# Local Visual QA with LLaMA 3.2 Vision
## ABB #6 - Session 2

Code authored by: Shaw Talebi

### imports

In [1]:
import ollama
import gradio as gr
import time

### basic usage

Ensure ollama is installed and running locally: https://ollama.com/

In [2]:
# pull model
ollama.pull('llama3.2-vision')

ProgressResponse(status='success', completed=None, total=None, digest=None)

In [3]:
# interact with model (locally)
stream = ollama.chat(
    model='llama3.2-vision',
    messages=[{
        'role': 'user',
        'content': "What color is Shaw's hat?",
        'images': ['content/shaw.jpeg']
    }],
    stream=True,
)

for chunk in stream:
    print(chunk['message']['content'], end='', flush=True)

Shaw's hat is black.

### gradio UI

In [4]:
# function to refactor Gradio's message history into an ollama compatible format
def refactor_for_ollama(history):
    refactored = []
    temp_user_message = None

    for entry in history:
        role = entry.get("role")
        content = entry.get("content")
        images = []

        # Handle image-only user message
        if role == "user" and isinstance(content, tuple):
            image_path = content[0] if content else None
            if image_path:
                images = [image_path]
            temp_user_message = {"role": "user", "images": images, "content": ""}
        
        # Handle text-only or combined text-and-image message
        elif role == "user" and isinstance(content, str):
            if temp_user_message:
                temp_user_message["content"] = content
                refactored.append(temp_user_message)
                temp_user_message = None
            else:
                refactored.append({"role": "user", "content": content, "images": entry.get("images", [])})

        # Handle assistant messages or other roles
        elif role == "assistant":
            refactored.append({"role": role, "content": content})

    return refactored

In [5]:
# Function to interact with the Ollama model
def stream_chat(message, history):
    """
    Streams the response from the Ollama model and sends it to the Gradio UI.
    
    Args:
        message (str): The user input message.
        history (list): A list of previous conversation messages.
        
    Yields:
        str: The chatbot's response chunk by chunk.
    """
    # Preprocess the 'files' to extract file paths
    processed_files = []
    if "files" in message:
        for file in message["files"]:
            # Check if file is a dict with a 'path' key and extract it
            if isinstance(file, dict) and 'path' in file:
                processed_files.append(file['path'])
            else:
                processed_files.append(file)

    
    # Append the user message to the conversation history
    history.append({"role": "user", "content": message["text"], "images":processed_files})

    # Initialize streaming from Ollama
    stream = ollama.chat(
        model='llama3.2-vision',
        messages=refactor_for_ollama(history),  # Full chat history including the current user message
        stream=True,
    )

    response_text = ""
    for chunk in stream:
        content = chunk['message']['content']
        response_text += content
        yield response_text  # Send the response incrementally to the UI

    # Append the assistant's full response to the history
    history.append({"role": "assistant", "content": response_text})

In [6]:
# Create a Gradio ChatInterface
demo = gr.ChatInterface(
    fn=stream_chat,  # The function handling the chat
    type="messages",  # Using "messages" to enable chat-style conversation
    examples=[{"text": "What color is Shaw's hat?", "files": ['content/shaw.jpeg']}],  # Example inputs
    multimodal=True,
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


