#Before exploring the notebook, change the colab runtime from normal cpu to gpu.

# Installing necessary packages

In [None]:
!pip install byaldi streamlit pyngrok
!sudo apt-get install -y poppler-utils
!pip install pdf2image

#Loading the data source from local to colab
<img width="1470" alt="Screenshot 2024-12-28 at 4 59 23‚ÄØPM" src="https://github.com/user-attachments/assets/e3be932b-fbda-49ba-81a7-f7904e4b4200" />
<img width="1470" alt="Screenshot 2024-12-28 at 5 01 09‚ÄØPM" src="https://github.com/user-attachments/assets/5d5b6244-236a-4807-af9b-bcba0debc568" />
<img width="1470" alt="Screenshot 2024-12-28 at 5 02 44‚ÄØPM" src="https://github.com/user-attachments/assets/8337d446-7dd9-4928-8631-6800b9dcaf3a" />
<img width="1470" alt="Screenshot 2024-12-28 at 5 03 22‚ÄØPM" src="https://github.com/user-attachments/assets/6357adae-8de4-45bc-a93b-86e22ccc63cd" />


#Loading the model for building the index


In [None]:
from byaldi import RAGMultiModalModel
RAG = RAGMultiModalModel.from_pretrained("vidore/colpali-v1.2")

In [None]:
RAG.index(
    input_path="xxxxxxxxx", # The path to your uploaded document
    index_name="xxxxxxxxx", # The name you want to give to your index. It'll be saved at `index_root/index_name/`.
    store_collection_with_index=True, # Whether the index should store the base64 encoded documents.
    overwrite=True # Whether to overwrite an index if it already exists. If False, it'll return None and do nothing if `index_root/index_name` exists.
)

It will save the index file under the folder name which you mentioned. Now the save the index to your google drive with the following command

In [None]:
!cp -r /content/internship /content/drive/MyDrive

Setup the basic RAG model in [Stack AI](https://www.stack-ai.com/) by following the images as follows:-
<img width="1470" alt="Screenshot 2024-12-28 at 5 51 42‚ÄØPM" src="https://github.com/user-attachments/assets/46ac535f-bc22-4cc9-b8cf-818c9aecf15b" />

Follow the [Youtube](https://youtu.be/ZeZM_Zm7pgc) for better understanding of the deployment. Once save and publish, navigate to the export tab and copy the credentials under python code section as follows:-

<img width="1470" alt="Screenshot 2024-12-28 at 5 57 58‚ÄØPM" src="https://github.com/user-attachments/assets/fc74673b-e30b-47c8-8e6a-d91a03f426f8" />


#Loading the necessary packages

In [None]:
import google.generativeai as genai
import base64
from io import BytesIO
from PIL import Image
from byaldi import RAGMultiModalModel
import logging
import torch
import requests
from datetime import datetime
import re

In [None]:
API_URL = "xxxxxxxxxxxxx"
headers = {'Authorization':
			 'Bearer xxxxxxxxxx',
			 'Content-Type': 'application/json'
		}

Setup account in [Gemini Cloud](https://ai.google.dev/) and copy paste the api key

In [None]:
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize Gemini
api_key = "xxxxxxxxxxxxxxx"
genai.configure(api_key=api_key)
model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')


# Load RAG model - Only load once at startup
rag_model = RAGMultiModalModel.from_index("paste your index stored path")

In [None]:
SYSTEM_CONTEXT = """Please consider the provided image and the user's question for generating the final answer.
If you are unable to understand or answer based on the images, please respond with 'I apologize, I cannot determine the answer from these images.'"""

Now create a new file `app.py` in contents with the following code and save the file.
<img width="1470" alt="Screenshot 2024-12-28 at 6 03 10‚ÄØPM" src="https://github.com/user-attachments/assets/5ae6369c-7f54-4b53-a2c5-5b435fffbb5c" />



```
import streamlit as st
import google.generativeai as genai
import base64
from io import BytesIO
from PIL import Image
from byaldi import RAGMultiModalModel
import time
import logging
import json
from datetime import datetime
import requests
import re

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('chatbot.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Configure page title
st.set_page_config(page_title="RAG Image Chatbot", layout="wide")
st.title("Dual RAG Chatbot System")

# Initialize Gemini and other configurations
api_key = "gemini api key"
genai.configure(api_key=api_key)
model = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')

# Stack AI API Configuration
API_URL = "xxxxxxxxxxxxxxx"
headers = {
    'Authorization': 'Bearer xxxxxxxxxxxxxxxx',
    'Content-Type': 'application/json'
}

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

# Initialize RAG model
@st.cache_resource
def load_rag_model():
    logger.info("Initializing RAG model")
    return RAGMultiModalModel.from_index("path to your index")

def clean_traditional_response(response):
    """Remove citations and clean up traditional RAG response"""
    # Remove citations section and references
    response = re.split(r'<citations>|Citations:', response)[0].strip()
    response = re.sub(r'\[\^[\d\.]+\]', '', response)
    return response


existing_index = load_rag_model()

# Initialize session states
if "messages" not in st.session_state:
    st.session_state.messages = []
if "show_logs" not in st.session_state:
    st.session_state.show_logs = False

# System context for Gemini
SYSTEM_CONTEXT = """Please consider the provided image and the user's question for generating the final answer.
If you are unable to understand or answer based on the images, please respond with 'I apologize, I cannot determine the answer from these images.'"""

def base64_to_pil(base64_string):
    if 'base64,' in base64_string:
        base64_string = base64_string.split('base64,')[1]
    img_bytes = base64.b64decode(base64_string)
    img = Image.open(BytesIO(img_bytes))
    return img

# Sidebar
with st.sidebar:
    st.markdown("### About")
    st.markdown("This chatbot combines two RAG systems:")
    st.markdown("1. Multi-modal RAG (Left)")
    st.markdown("2. Text-based RAG (Right)")

    if st.button("Clear Chat History"):
        st.session_state.messages = []
        st.rerun()

    st.session_state.show_logs = st.checkbox("Show Processing Logs",
                                           value=st.session_state.show_logs)

# Display chat history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        if message["role"] == "assistant":
            col1, col2 = st.columns(2)
            with col1:
                st.markdown("### Multi-modal RAG")
                st.markdown(message.get("image_response", ""))
                if "similarity_score" in message:
                    st.markdown(f"**Similarity Score:** {message['similarity_score']:.2%}")
                if "image" in message:
                    st.image(base64_to_pil(message["image"]),
                            caption="Retrieved Image",
                            use_column_width=True)
            with col2:
                st.markdown("### Text-based RAG")
                st.markdown(message.get("text_response", ""))
        else:
            st.markdown(message["content"])

# Chat interface
if prompt := st.chat_input("Ask your question here..."):
    logger.info(f"Received query: {prompt}")

    # Add user message
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    # Process query in two columns
    col1, col2 = st.columns(2)
    text_response = ""
    image_response = ""
    image_base64 = None
    similarity_score = None

    # First column - Multi-modal RAG
    with col1:
        st.markdown("### Multi-modal RAG")
        with st.status("Processing multi-modal query...") as status:
            try:
                st.write("üîç Searching relevant documents...")
                results = existing_index.search(prompt, k=1, return_base64_results=True)
                similarity_score = results[0]['score']

                st.write("üí≠ Generating response...")
                image_base64 = results[0]['base64']
                pil_image = base64_to_pil(image_base64)

                query_text = f"{SYSTEM_CONTEXT}\n\nUser Question: {prompt}"
                response = model.generate_content([query_text, pil_image])
                image_response = response.text

                status.update(label="‚úÖ Multi-modal processing complete!", state="complete")

                st.markdown(image_response)
                st.markdown(f"**Similarity Score:** {similarity_score:.2f}")
                st.image(pil_image, caption="Retrieved Image", use_column_width=True)
            except Exception as e:
                logger.error(f"Multi-modal RAG error: {str(e)}")
                st.error(f"An error occurred: {str(e)}")

    # Second column - Text-based RAG
    with col2:
        st.markdown("### Text-based RAG")
        with st.status("Processing text query...") as status:
            try:
                st.write("üîç Processing query...")
                result = query({
                    "in-0": prompt,
                    "user_id": "<USER or Conversation ID>"
                })
                text_response = clean_traditional_response(result['outputs']['out-0'])
                status.update(label="‚úÖ Text processing complete!", state="complete")
                st.markdown(text_response)
            except Exception as e:
                logger.error(f"Text RAG error: {str(e)}")
                st.error(f"An error occurred: {str(e)}")

    # Store assistant response in chat history
    if text_response or image_response:
        st.session_state.messages.append({
            "role": "assistant",
            "text_response": text_response,
            "image_response": image_response,
            "image": image_base64,
            "similarity_score": similarity_score,
            "timestamp": datetime.now().isoformat()
        })

# Display logs if enabled
if st.session_state.show_logs:
    st.divider()
    st.markdown("### Processing Logs")
    try:
        with open('chatbot.log', 'r') as log_file:
            logs = log_file.readlines()[-10:]
            for log in logs:
                st.text(log.strip())
    except FileNotFoundError:
        st.info("No logs available yet. Start chatting to generate logs!")
    except Exception as e:
        st.error(f"Error reading logs: {str(e)}")


```


Its time to start web application. For doing that login to the ngrok and create a authentication key

In [None]:
# Import and set auth token
from pyngrok import ngrok
import time
ngrok.set_auth_token("your ngrok auth token")

def launch_streamlit_with_ngrok():
    # Kill any existing processes
    !pkill -9 streamlit

    # Start Streamlit
    print("üöÄ Starting Streamlit...")
    !streamlit run /content/app.py &>/content/logs.txt &
    time.sleep(3)  # Give Streamlit time to start

    # Configure and start ngrok tunnel
    print("üåê Creating secure tunnel...")
    # Close any existing tunnels
    ngrok.kill()

    # Create tunnel with explicit configuration
    tunnel = ngrok.connect(
        addr=8501,
        proto="http",  # Explicitly set protocol
        bind_tls=True  # Enable HTTPS
    )

    print("\n‚ú® Your Streamlit app is live at:", tunnel.public_url)
    print("NOTE: The URL will be active as long as this Colab notebook is running")
    return tunnel.public_url

# Launch everything
try:
    url = launch_streamlit_with_ngrok()
except Exception as e:
    print(f"Error: {str(e)}")
    # Try alternative configuration if first attempt fails
    try:
        url = ngrok.connect(8501)
        print("\n‚ú® Your Streamlit app is live at:", url)
    except Exception as e:
        print(f"Final Error: {str(e)}")

Once you run the above cell you may see as follows:-
<img width="1470" alt="Screenshot 2024-12-28 at 6 10 50‚ÄØPM" src="https://github.com/user-attachments/assets/36507d48-2e2d-43a4-b0ca-3e7f20b32045" />

Then click on the first link , then it opens a new tab as follows:-
<img width="1470" alt="Screenshot 2024-12-28 at 6 26 58‚ÄØPM" src="https://github.com/user-attachments/assets/af585cf8-13e2-4d95-841e-0902f665c913" />
