# Chat UI for Multimodal Article QA Assistant

Code authored by: Shaw Talebi

[Blog link](https://medium.com/towards-data-science/multimodal-rag-process-any-file-type-with-ai-e6921342c903) 
| [Video link](https://youtu.be/Y7pNmocrmi8)

### imports

In [2]:
#!pip install gradio

In [3]:
import ollama
from torch import load
import gradio as gr
import time
from functions import *

### Load model and data

In [4]:
# pull model
ollama.pull('llama3.2-vision')

ProgressResponse(status='success', completed=None, total=None, digest=None)

In [5]:
# load article contents
text_content_list = load_from_json('data/text_content.json')
image_content_list = load_from_json('data/image_content.json')

# load embeddings
text_embeddings = load('data/text_embeddings.pt', weights_only=True)
image_embeddings = load('data/image_embeddings.pt', weights_only=True)

### UI

In [6]:
# Function to interact with the Ollama model
def stream_chat(message, history):
    """
    Streams the response from the Ollama model and sends it to the Gradio UI.
    
    Args:
        message (str): The user input message.
        history (list): A list of previous conversation messages.
        
    Yields:
        str: The chatbot's response chunk by chunk.
    """

    # context retrieval
    text_results, image_results = context_retrieval(message["text"], text_embeddings, image_embeddings, text_content_list, image_content_list)

    # construct prompt
    prompt = construct_prompt(message["text"], text_results, image_results)
    
    # Append the user message to the conversation history
    history.append({"role": "user", "content": prompt, "images": [image["image_path"] for image in image_results]})
    
    # Initialize streaming from Ollama
    stream = ollama.chat(
        model='llama3.2-vision',
        messages=history,  # Full chat history including the current user message
        stream=True,
    )
    
    response_text = ""
    for chunk in stream:
        content = chunk['message']['content']
        response_text += content
        yield response_text  # Send the response incrementally to the UI

    # Append the assistant's full response to the history
    history.append({"role": "assistant", "content": response_text})

In [7]:
# Create a Gradio ChatInterface
gr.ChatInterface(
    fn=stream_chat,  # The function handling the chat
    type="messages",  # Using "messages" to enable chat-style conversation
    examples=[{"text": "What is CLIP's contrastive loss function?"}, 
              {"text": "What are the three paths described for making LLMs multimodal?"},
              {"text": "What is an intuitive explanation of multimodal embeddings?"}],  # Example inputs
    multimodal=True,
).launch()

TypeError: ChatInterface.__init__() got an unexpected keyword argument 'type'

In [8]:
def stream_chat(message, history):
    # message is now a plain string
    query = message

    # context retrieval
    text_results, image_results = context_retrieval(
        query, text_embeddings, image_embeddings, text_content_list, image_content_list
    )

    # construct prompt
    prompt = construct_prompt(query, text_results, image_results)

    # Append the user message (with images) to history for Ollama
    history.append({
        "role": "user",
        "content": prompt,
        "images": [image["image_path"] for image in image_results],
    })

    stream = ollama.chat(
        model='llama3.2-vision',
        messages=history,
        stream=True,
    )

    response_text = ""
    for chunk in stream:
        content = chunk["message"]["content"]
        response_text += content
        yield response_text

    history.append({"role": "assistant", "content": response_text})


In [9]:
import gradio as gr

gr.ChatInterface(
    fn=stream_chat,  # your streaming function is already (message, history)
    examples=[
        "What is CLIP's contrastive loss function?",
        "What are the three paths described for making LLMs multimodal?",
        "What is an intuitive explanation of multimodal embeddings?",
    ],
).launch()


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


