In [1]:
import base64

def image_to_base64(image_path):
    with open(image_path, 'rb') as image_file:
        encoded_string = base64.b64encode(image_file.read())
        return encoded_string.decode('utf-8')

import json

In [25]:
def get_image_desc(image_path):
    
    base64_string = image_to_base64(image_path)
    
    import requests

    url = "http://localhost:11434/api/generate"
    data = {
        "model": "llava",
        "prompt": "describe the food in the image?",
        "stream": False,
        "images":[base64_string]
    }
    response = requests.post(url, data=json.dumps(data))
    return response.json()['response']

In [3]:
# Define your desired data structure.
from langchain_community.llms import Ollama
from typing import List

from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage
from langchain_mistralai.chat_models import ChatMistralAI

class HotDog(BaseModel):
    hot_dog_or_not: bool = Field(description="hot dog or not")

        
class HotDogLLM:
    def __init__(self, local=False):
        
        if local:
            llm = Ollama(model="mistral:instruct", temperature=0.1)
        else:
            llm = ChatMistralAI(mistral_api_key='', model = 'mistral-small')
        parser = PydanticOutputParser(pydantic_object=HotDog)

        prompt = PromptTemplate(
            template="Answer the user query.\n{format_instructions}\n{query}\n",
            input_variables=["query"],
            partial_variables={"format_instructions": parser.get_format_instructions()},
        )
        


        chain = prompt | llm | parser
        self.chain = chain
    
    def get(self, text):
        c = 0
        while c < 5:
            try:
               
                ans = self.chain.invoke({"query": f'''You are a assistant who 
                should tell if the food mentioned in text {text} is hot dog or not'''})
                
                return ans
            except:
                pass
            c+=1
            


In [4]:
hd =HotDogLLM(local=True)

In [None]:
import gradio as gr
import base64



# Function that takes User Inputs and displays it on ChatUI
def query_message(history,img):
    if not img:
        history += [("hi",None)]
        return history
    desc = get_image_desc(img)
    print(desc)
    res = hd.get(desc)
    base64_string = image_to_base64(img)
    data_url = f"data:image/jpeg;base64,{base64_string}"
    if not res or not res.hot_dog_or_not:
        txt = "Its not a hot dog"
    else:
        txt = "Its a hot dog"
        
    history += [(f"{txt} ![]({data_url})", None)]
    return history
    
# UI Code
with gr.Blocks() as app:
    with gr.Row():
        image_box = gr.Image(type="filepath")
   
        chatbot = gr.Chatbot(
            scale = 2,
            height=750
        )


    btn = gr.Button("Submit")
    clicked = btn.click(query_message,
                        [chatbot,image_box],
                        chatbot
                        )
app.queue()
app.launch(debug=True)

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


 The image shows a hamburger with what appears to be a beef patty on a sesame seed bun. On top of the burger, there is a slice of tomato and a leaf of lettuce. A small amount of chopped onion is sprinkled on the patty, and it seems to be garnished with some sort of condiment or sauce, though the specific type isn't visible from this angle. The background suggests a casual dining setting, likely a restaurant or cafe. 
 The image shows two hot dogs, each served on a bun. One of the hot dogs has been topped with traditional condiments, including what appears to be mustard and relish, along with diced white onions, sliced pickles, and chopped green herbs, which is likely parsley or cilantro. The other hot dog has been garnished with a variety of colorful toppings, including tomato slices, green leaves that could be lettuce or arugula mix, red onions, and what seems to be a dollop of mayonnaise. Both hot dogs are placed on a flat surface next to each other, and in the background, there is a