In [13]:
%%writefile app_visual_qa.py

import streamlit as st
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, AIMessage
from langchain_community.chat_message_histories import StreamlitChatMessageHistory
from PIL import Image
import base64
from io import BytesIO

# Load environment variables
load_dotenv()

# Initialize Gemini model
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    google_api_key=os.getenv("GEMINI_API_KEY"),
    temperature=0.3
)

# Page config
st.set_page_config(page_title="Visual QA Bot", page_icon="🖼️")

# Title
st.title("🖼️ Visual QA Bot")
st.markdown("Upload an image and ask questions about it")

# Initialize chat history
msg_history = StreamlitChatMessageHistory(key="chat_messages")

# Initialize session state for image
if "uploaded_image" not in st.session_state:
    st.session_state.uploaded_image = None

# Function to encode image
def encode_image(image):
    buffered = BytesIO()
    image.save(buffered, format="PNG")
    return base64.b64encode(buffered.getvalue()).decode()

# File uploader
uploaded_file = st.file_uploader("Choose an image", type=["jpg", "jpeg", "png"])

# Store uploaded image
if uploaded_file:
    st.session_state.uploaded_image = Image.open(uploaded_file)
    st.image(st.session_state.uploaded_image, caption="Uploaded Image", use_container_width=True)

st.markdown("---")

# Display chat history (always visible)
for msg in msg_history.messages:
    with st.chat_message("user" if isinstance(msg, HumanMessage) else "assistant"):
        st.markdown(msg.content)

# Chat input (always visible)
if prompt := st.chat_input("Ask anything about the image..."):
    # Display user message
    with st.chat_message("user"):
        st.markdown(prompt)
    
    msg_history.add_user_message(prompt)
    
    # Generate response
    with st.chat_message("assistant"):
        with st.spinner("Thinking..."):
            try:
                if st.session_state.uploaded_image:
                    # With image
                    img_base64 = encode_image(st.session_state.uploaded_image)
                    
                    message = HumanMessage(
                        content=[
                            {"type": "text", "text": prompt},
                            {"type": "image_url", "image_url": f"data:image/png;base64,{img_base64}"}
                        ]
                    )
                    response = llm.invoke([message])
                else:
                    # Without image - just text
                    st.warning("⚠️ No image uploaded. Please upload an image to ask visual questions!")
                    response = llm.invoke([HumanMessage(content=prompt)])
                
                st.markdown(response.content)
                msg_history.add_ai_message(response.content)
                
            except Exception as e:
                st.error(f"Error: {str(e)}")

# Sidebar with clear button
with st.sidebar:
    if st.button("🗑️ Clear Chat"):
        msg_history.clear()
        st.rerun()

Overwriting app_visual_qa.py
