Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,8 @@ Then you'll need to use a custom chat handler to load the clip model and process
>>> llm = Llama(
model_path="./path/to/llava/llama-model.gguf",
chat_handler=chat_handler,
n_ctx=2048 # n_ctx should be increased to accomodate the image embedding
n_ctx=2048, # n_ctx should be increased to accomodate the image embedding
logits_all=True,# needed to make llava work
)
>>> llm.create_chat_completion(
messages = [
Expand Down
50 changes: 50 additions & 0 deletions examples/high_level_api/llava_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import base64
from llama_cpp import Llama
from llama_cpp.llama_chat_format import Llava15ChatHandler

llava_folder = "path/to/folder/llava/llava_v1.5_7b/"
file_path = 'path/to/your/image.png'

clip_model_path = f"{llava_folder}mmproj-model-f16.gguf"
model_path = f"{llava_folder}llava_v1.5_7b_q4_k.gguf"

def image_to_base64_data_uri(file_path):
with open(file_path, "rb") as img_file:
base64_data = base64.b64encode(img_file.read()).decode('utf-8')
return f"data:image/png;base64,{base64_data}"


print('load image')
data_uri = image_to_base64_data_uri(file_path)
print('image encoded')

print("init image handler")
chat_handler = Llava15ChatHandler(
clip_model_path=clip_model_path,
verbose=True)

llm = Llama(
model_path=model_path,
chat_format="llava-1-5",
chat_handler=chat_handler,
n_ctx=4096,# n_ctx should be increased to accomodate the image embedding
logits_all=True,# needed to make llava work
)

messages=[
{"role": "system", "content": "You are an assistant who perfectly describes images."},
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": data_uri}},
{"type": "text", "text": "Describe this image in detail please."}
]
}
]
print('init chat completion')
# Call create_chat_completion to get the response
response = llm.create_chat_completion(messages=messages, )
print(response)
print()
print()
print(response['choices'][0]['message']['content'])
79 changes: 79 additions & 0 deletions examples/high_level_api/readme/llava_clip.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# llava_clip.py

This Python script utilizes the Llama library for image description. It encodes an image into a base64 data URI, initializes a chat handler using the Llava15ChatHandler, and then generates a detailed description of the image using the Llama model.

## Prerequisites

- llama-cpp-python
- base64

## Setup

1. Clone the repository:

```bash
git clone https://github.com/abetlen/llama-cpp-python.git
cd llama-cpp-python/examples/high_level_api/
```

2. Install the required dependencies:

```bash
pip install llama-cpp-python
```

3. Download the Llama model files and place them in the specified folders:

- Download mmproj-model-f16.gguf and llava_v1.5_7b_q4_k.gguf from llama-models-repo and place them in path/to/your/folder/llava/.

- Ensure that the image file is located in path/to/your/image.png.

### Usage

make sure to fill model folder path and image path at the top of llava_cli.py file
```python
llava_folder = "path/to/folder/llava/llava_v1.5_7b/"
file_path = 'path/to/your/image.png'

clip_model_path = f"{llava_folder}mmproj-model-f16.gguf"
model_path = f"{llava_folder}llava_v1.5_7b_q4_k.gguf"
```
image format tested:

- .png
- .jpg

Run the script using the following command:

```bash
python llava_clip.py
```
- The script will load the image
- encode it
- initialize the chat handler
- then generate a description of the image.


## Configuration

- llava_folder: Path to the folder containing Llama model files.
- file_path: Path to the image file.
- clip_model_path: Path to the CLIP model file.
- model_path: Path to the Llama model file.
- n_ctx: Number of context tokens for the Llama model.
- logits_all: Boolean flag needed to be set True to make Llava work.

### Output

The script will print a generated description of the image.

- first ouput contain the whole response object.
- second output contain the response text.



## License

This project is licensed under the MIT License.
Acknowledgments

26 changes: 16 additions & 10 deletions llama_cpp/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,18 @@ def __enter__(self):
if self.disable:
return self

# Check if sys.stdout and sys.stderr have fileno method
if not hasattr(self.sys.stdout, 'fileno') or not hasattr(self.sys.stderr, 'fileno'):
return self # Return the instance without making changes

self.outnull_file = self.open(self.os.devnull, "w")
self.errnull_file = self.open(self.os.devnull, "w")

self.old_stdout_fileno_undup = self.sys.stdout.fileno()
self.old_stderr_fileno_undup = self.sys.stderr.fileno()

self.old_stdout_fileno = self.os.dup(self.sys.stdout.fileno())
self.old_stderr_fileno = self.os.dup(self.sys.stderr.fileno())
self.old_stdout_fileno = self.os.dup(self.old_stdout_fileno_undup)
self.old_stderr_fileno = self.os.dup(self.old_stderr_fileno_undup)

self.old_stdout = self.sys.stdout
self.old_stderr = self.sys.stderr
Expand All @@ -40,14 +44,16 @@ def __exit__(self, *_):
if self.disable:
return

self.sys.stdout = self.old_stdout
self.sys.stderr = self.old_stderr
# Check if sys.stdout and sys.stderr have fileno method
if hasattr(self.sys.stdout, 'fileno') and hasattr(self.sys.stderr, 'fileno'):
self.sys.stdout = self.old_stdout
self.sys.stderr = self.old_stderr

self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup)
self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup)
self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup)
self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup)

self.os.close(self.old_stdout_fileno)
self.os.close(self.old_stderr_fileno)
self.os.close(self.old_stdout_fileno)
self.os.close(self.old_stderr_fileno)

self.outnull_file.close()
self.errnull_file.close()
self.outnull_file.close()
self.errnull_file.close()