In [27]:
# Importing necessary libraries and packages.
import gradio as gr
import os
import getpass
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
from langchain.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, AIMessage
import pandas as pd
import fitz
import cv2
import numpy as np
from PIL import Image
import base64
import shutil

In [None]:
# Making the Google API Key an evironmental variable so that the API can be accessed.
if not os.environ.get("GOOGLE_API_KEY"):
        os.environ["GOOGLE_API_KEY"] = getpass.getpass("GOOGLE_API_KEY")
# Creating the variables to access the LLM, embeddings and the respective vector store for the chatbot.
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-lite")
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
embedding_dim = len(embeddings.embed_query("hello world"))
index = faiss.IndexFlatL2(embedding_dim)
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)
# Declaring a State class.
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str
    history: List[dict]
# Designing a system prompt to engineer the chatbot's responsivity to the users.
system_prompt = """
You are a friendly, helpful assistant with a conversational tone. 

Always prioritize the context provided when answering, and only use your own knowledge if the context is clearly unrelated.

Your goal is to simplify complex ideas using:
- Bullet points for clarity
- Approachable analogies or metaphors
- Occasional emojis (only where they help)

Avoid jargon unless the user already used it. Match the user's tone and formality. Keep your answers informative but human.

If the context doesn't mention the answer, feel free to respond from your own training, but *tell the user that's what you're doing*.

EXAMPLE:

User: What is the body effect in MOSFETs?

Answer: Got you! Let's break it down:

### The Body Effect 🧲

- It's a phenomenon in MOSFETs where the voltage between the source and body (VSB) affects how the transistor behaves
- Even if you keep the gate voltage constant, changing VSB will change the threshold voltage (VT)
- That means the transistor might turn on later or earlier

Think of it like a tug-of-war between the gate and body — the body can pull the rope back a little, making it harder for the gate to win.

Let me know if you want examples or more analogies! 😊
"""
# Formatting the system prompt along with the user input the model should be receiving.
prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "Context:\n{context}\n\nQuestion: {question}")
])
# Defining a retrieval function for the model to fetch an answer from the vector store.
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"]) 
    return {"context": retrieved_docs}
# Defining a generation function which provides the model with the full history of the chat (to remember chat context better)
# As well as invoking a response from the LLM, provided the context and the query.
def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    # Collecting history of previous messages.
    past_msgs = state.get("history", [])
    chat_history = []
    # Storing the previous messages in a chat history variable for the model to have the context in easier grasp.
    for msg in past_msgs:
        if msg["role"] == "user":
            chat_history.append(HumanMessage(content=msg["content"]))
        elif msg["role"] == "assistant":
            chat_history.append(AIMessage(content=msg["content"]))
        else:
            print("")
    # Formatting messages for model understanding.
    messages = prompt.format_messages(
        question=state["question"],
        context=docs_content
    )
    # Updating history with the question/user query.
    full_messages = chat_history + messages
    response = llm.invoke(full_messages)
    # Returning the LLM's answer.
    return {"answer": response.content}

# Creating a graph for the pipeline's execution.
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [29]:
# A function to extract images from the uploaded PDF, for the LLM to understand.
def extract_images_from_pdf(pdf_path, output_folder="extracted_images"):
    doc = fitz.open(pdf_path)
    os.makedirs(output_folder, exist_ok=True)
    images = []
    for page_number in range(len(doc)):
        page = doc.load_page(page_number)
        for img_index, img in enumerate(page.get_images(full=True)):
            xref = img[0]
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]
            image_ext = base_image["ext"]
            image_filename = f"page{page_number+1}_img{img_index}.{image_ext}"
            image_path = os.path.join(output_folder, image_filename)
            with open(image_path, "wb") as f:
                f.write(image_bytes)
            images.append((page_number+1, image_path))
    return images

In [30]:
# A function to check whether the images extracted from the PDF are coherent, and not pixelated nonsensical pngs.
def is_good_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return False
    h, w = img.shape
    if h < 100 or w < 100: # Making sure image dimension is proper.
        return False
    if np.var(img) < 500: # Low variance implies a flat image.
        return False
    edges = cv2.Canny(img, 100, 200) 
    if np.count_nonzero(edges) < 500: # Checking for edges, mostly used in diagrams and so.
        return False
    return True

In [31]:
# A function to caption the images extracted from the PDF to add to the vector store, enabling multi-modality.
def caption_images(select_images):
    captioner = ChatGoogleGenerativeAI(model="gemini-2.0-flash-lite")
    captions = {}
    for image_file_path in select_images:
        with open(image_file_path, "rb") as image_file:
            encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
        caption_query = HumanMessage(
            content=[
                {"type": "text", "text": f"An image is saved under: {image_file_path}. Start your answer with 'This is an image on page number --.' And continue. Remember the page number. Describe the local image."},
                {"type": "image_url", "image_url": f"data:image/png;base64,{encoded_image}"},
            ]
        )
        try:
            result_local = captioner.invoke([caption_query])
            captions[image_file_path] = result_local.content
        except Exception as e:
            print(f"Error captioning {image_file_path}: {e}")
            captions[image_file_path] = "Could not generate caption."
    return captions

In [32]:
# A function to chunk/index the user's uploaded file, and prepare it for RAG. 
def handle_uploaded_file(tempfile):
    # Loading the file uploaed by the user.
    filepath = tempfile.name 
    try:
        loader = PyPDFLoader(filepath)
        docs = loader.load()
    except Exception as e:
        return f"Couldn't parse the PDF: {e}"
    # Applying text splitting and storing them 
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    all_splits = text_splitter.split_documents(docs)
    # Appending the vector store with the indexed chunks.
    _ = vector_store.add_documents(documents=all_splits)
    # Extracting images from the uploaded file.
    images = extract_images_from_pdf(filepath)
    # Using OpenCV to assess the images and filter out irrelevant/corrupted ones.
    good_images = []
    for i in range(len(images)):
        path = images[i][1]
        if is_good_image(path):
            good_images.append(path)
        
    # Captioning the images individually, storing them away in a list.
    image_captions = caption_images(good_images)
    # Converting the captions into a Langchain Document to add to the vector store.
    image_docs = []
    for image_path, caption in image_captions.items():
        image_doc = Document(
            page_content=caption,
            metadata={"source": os.path.basename(image_path), "type": "image"}
        )
        image_docs.append(image_doc)
    # Appending the image captions into the vector store.
    _ = vector_store.add_documents(image_docs)
    # Updating the status of the file indexing, chunking and extraction to the user.
    return f"File uploaded! {len(all_splits)} text chunks and {len(image_docs)} image captions indexed for RAG."

In [33]:
# A function to return the user's query, makes use of retrieve and generate as nodes.
def handle_query(message, history):
    response = graph.invoke({"question": message, "history": history})
    return response["answer"]

In [34]:
# Grading this chatbot based on a sample PDF: MOSFETs.pdf
# Creating a replica of the chatbot exactly so that the main vector store remains clean.
test_index = faiss.IndexFlatL2(embedding_dim)
test_vector_store = FAISS(
    embedding_function=embeddings,
    index=test_index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)
# Loading up basic necessities for our sample RAG pipeline for a given PDF.
eval_loader = PyPDFLoader("MOSFETs.pdf")
eval_docs = eval_loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
eval_splits = text_splitter.split_documents(eval_docs)
_ = test_vector_store.add_documents(eval_splits)
# Retrieval function which uses the vector store meant to store sample information from our static PDF.
def retrieve_eval(state: State):
    retrieved_docs = test_vector_store.similarity_search(state["question"]) 
    return {"context": retrieved_docs}
# Generate function remains the same.
eval_graph_builder = StateGraph(State).add_sequence([retrieve_eval, generate])
eval_graph_builder.add_edge(START, "retrieve_eval")
eval_graph = eval_graph_builder.compile()
# eval_graph will be the graph/pipeline we use for our evaluation purposes.
# There is a file questions.csv that has a list of 10 questions that will be asked to the chatbot, and then used to evaluate the model.

In [35]:
# Importing the questions csv file and storing it into a dataframe to make grading easier.
df = pd.read_csv("questions.csv", encoding="windows-1252")

# Having the RAG pipeline run to answer test-queries. Useful for grading responses.
questions = df["Questions"].astype(str)
true_responses = df["True Response"].astype(str)
llm_questions = []
true_response = []

for question in questions:
    llm_questions.append(question)

for response in true_responses:
    true_response.append(response)

llm_responses = []

# Runs a loop for the LLM to answer all test queries stored in questions.csv
for question in llm_questions:
    response = eval_graph.invoke({
        "question": question
    })
    llm_responses.append(response["answer"])
# Appending these responses to the dataframe
df["LLM Response"] = llm_responses
# Saving the responses to a csv file
df.to_csv('llm_output.csv', index=False)
df.head()

[]
[]
[]
[]
[]
[]
[]
[]
[]
[]


Unnamed: 0,Questions,LLM Response,True Response,Grade
0,What is the body effect in MOSFETs?,"Okay, let's break down the body effect in MOSF...",Sure! Here's a breakdown: The Body Effect - It...,
1,What is a FET?,"Based on the provided text, here's what a FET ...",Got it! Let’s simplify: FET (Field Effect Tran...,
2,What is a MOSFET?,"Based on the context you provided, here's what...",Absolutely! Here’s the scoop: MOSFET = Metal-O...,
3,Why are MOSFETs also called IGFETs?,"Based on the provided text, here's why MOSFETs...",Great question! Let’s break it down: IGFET = I...,
4,What is a PMOS?,"Based on the provided text, here's a breakdown...",Happy to help! PMOS (P-type MOSFET) - Uses hol...,


In [36]:
# Calling Gemini to grade the responses.
llm_grader = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

# System prompt for instructing the grader.
system_prompt = """
    You are an expert response grader designed for LLMs, tasked with comparing the three given parameters:
        The Query
        The LLM's Response
        The True Response
    You are to compare the LLM's response to the True response and assign a grade to the LLM's response on a scale of 1-4,
    where:
        1 = The response is entirely irrelevant, and does not match the true response in words or gist.
        2 = The response is similar to the truth, but lacks in either information or context.
        3 = The response is close to the true response, but can be improved upon.
        4 = The response matches the true response completely. It is perfect.
    Only put out the grade (1-4) of the response after you have made the comparison. Think about it logically.
    The first given string of a pair is the LLM's response, and the second string of the pair is the true response.
"""
# A template for the model to run on.
query_answer = ChatPromptTemplate([
    ("system", system_prompt),
    ("human", "Compare:\n\n{user_input}")
])
# Empty list for the answer pairs so it is easy to feed into the LLM for grading.
answer_pair = []
# Running a loop to store the responses pairwise.
for i in range(len(llm_responses)):
    pair = [llm_responses[i], true_response[i]]
    answer_pair.append(pair)
# Creating an empty list for the grades alloted by the grader.
grades = []
# Calling the LLM for every response-pair to be graded.
for pairs in answer_pair:
    formatted = query_answer.format_messages(user_input=pairs)
    response = llm_grader.invoke(formatted)
    grades.append(response.content)
# Updating the dataframe with the new grades.
df["Grade"] = grades
df.head()

Unnamed: 0,Questions,LLM Response,True Response,Grade
0,What is the body effect in MOSFETs?,"Okay, let's break down the body effect in MOSF...",Sure! Here's a breakdown: The Body Effect - It...,3
1,What is a FET?,"Based on the provided text, here's what a FET ...",Got it! Let’s simplify: FET (Field Effect Tran...,3
2,What is a MOSFET?,"Based on the context you provided, here's what...",Absolutely! Here’s the scoop: MOSFET = Metal-O...,3
3,Why are MOSFETs also called IGFETs?,"Based on the provided text, here's why MOSFETs...",Great question! Let’s break it down: IGFET = I...,3
4,What is a PMOS?,"Based on the provided text, here's a breakdown...",Happy to help! PMOS (P-type MOSFET) - Uses hol...,4


In [37]:
# Gathering the statistics of the data of output.csv to assess model accuracy.
import statistics
grade_list = df["Grade"].astype(int)
avg = statistics.mean(grade_list)
percentage = (avg/4) * 100
percentage

def update():
    return f"Current Stats of the Chatbot:\n\nAccuracy Score: {avg}/4\nAccuracy Percentile: {percentage}"

In [38]:
# Setting up the Gradio interface to handle the RAG pipeline. The frontend of the chatbot.
with gr.Blocks(fill_width=True, fill_height=True) as demo:
    gr.Markdown("# Conversational RAG ")
    gr.Markdown("Upload a `.pdf` file, and ask the AI about it!")
    with gr.Row():
        with gr.Column(scale=4):
            chatbox = gr.ChatInterface(
                handle_query,
                type="messages",
                flagging_mode="manual",
                flagging_options=["Like", "Spam", "Inappropriate", "Other"],
                save_history=True,
            )
        with gr.Column(scale=1):
            file_upload = gr.UploadButton("Upload PDF", file_types=[".pdf"])
            output = gr.Textbox()
            file_upload.upload(fn=handle_uploaded_file, inputs=file_upload, outputs=output)
            textbox = gr.Textbox(label="📊 Chatbot Stats", interactive=False)
            refresh_button = gr.Button("Check Stats")
            refresh_button.click(fn=update, inputs=[], outputs=textbox)

In [39]:
# Launching the demo.
if __name__ == "__main__":
    demo.launch()

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.


[]
[{'role': 'user', 'metadata': None, 'content': "Hi there! I've uploaded a document here, and just wanted to ask--how many images are there in this document? Ones you can interpret? Diagrams?", 'options': None}, {'role': 'assistant', 'metadata': None, 'content': "Hey there! 👋 I've reviewed the document you uploaded. Based on my analysis, I've found **four** images that I can interpret. They seem to be diagrams related to MOSFETs.", 'options': None}]
[{'role': 'user', 'metadata': None, 'content': "Hi there! I've uploaded a document here, and just wanted to ask--how many images are there in this document? Ones you can interpret? Diagrams?", 'options': None}, {'role': 'assistant', 'metadata': None, 'content': "Hey there! 👋 I've reviewed the document you uploaded. Based on my analysis, I've found **four** images that I can interpret. They seem to be diagrams related to MOSFETs.", 'options': None}, {'role': 'user', 'metadata': None, 'content': 'What diagrams have you seen, and what do the