<a href="https://colab.research.google.com/github/jith101/10th_science_RAG_Project/blob/main/10th_science_RAG_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade --quiet pypdf
! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain
!pip install --upgrade --quiet  langchain sentence_transformers
!pip install -qU langchain-groq
!pip install -U langchain langchain-community
! pip install streamlit -q



In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.44.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from gradio)
  Downloading python_multipart-0.0.9-py3-none-any.whl.metadata (2.5 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.6.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting tomlkit==0.12.0 (from gradio)
  Downloading tomlkit-0.12.0-py3-none-any.whl.metadata (2.7 kB)
Collecting websocket

THE MAIN CODE

In [None]:
import os
import gradio as gr
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings

# Set up embeddings and document loader
embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key="< your HuggingFace API Key >",
    model_name="sentence-transformers/all-MiniLM-l6-v2"
)
os.environ["GROQ_API_KEY"] = "< your groq API Key >"
file_path = "/content/jesc101.pdf"  # Update this to the correct path for your PDF
loader = PyPDFLoader(file_path)
docs = loader.load()

# Split documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Create vector store
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
retriever = vectorstore.as_retriever()

# Initialize history and counter
question_h = []
answer_h = []
r = 0

llm = ChatGroq(model="llama3-8b-8192")

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Define the chatbot function
def chatbot(question):
    global r, question_h, answer_h

    question_h.append(question)

    if r == 0:
        prompt = ChatPromptTemplate.from_messages(
            (
                "you are a senior high school teacher, and you are trying to solve a student's doubt. Use the following pieces of retrieved context to answer the question and provide a detailed response.\nQuestion: {question} \nContext: {context} \nAnswer: ",
            )
        )

        rag_chain = (
            {"context": retriever | format_docs, "question": RunnablePassthrough()}
            | prompt
            | llm
            | StrOutputParser()
        )
        resp = rag_chain.invoke(question)
        answer_h.append(resp)
        r += 1
    else:
        # Add conversation history
        def create_history(question_h, answer_h):
            history = ""
            for q, a in zip(question_h, answer_h):
                history += f"Q: {q}\nA: {a}\n\n"
            return history

        history_text = create_history(question_h, answer_h)
        prompt = ChatPromptTemplate.from_messages(
            (
                "You are a senior high school teacher, and you are trying to solve a student's doubt. Use the following pieces of retrieved context to answer the question and provide a detailed response.\nQuestion: {question}\nConversation History\n" + history_text + " \nContext: {context} \nAnswer: ",
            )
        )

        # Create the RAG chain
        rag_chain = (
            {"context": retriever | format_docs, "question": RunnablePassthrough()}
            | prompt
            | ChatGroq()
            | StrOutputParser()
        )

        # Get response
        resp = rag_chain.invoke(question)
        answer_h.append(resp)

        # Increment counter and reset history if needed
        r += 1
        if r == 5:
            question_h.clear()
            answer_h.clear()
            r = 0

    return resp

# Create the Gradio Blocks interface
with gr.Blocks() as demo:
    chatbot_ui = gr.Chatbot()
    msg = gr.Textbox(label="Enter your question here", placeholder="Type your message...")

    def respond(message, chat_history):
        response = chatbot(message)  # Call the existing chatbot function
        chat_history.append((message, response))
        return "", chat_history

    msg.submit(respond, [msg, chatbot_ui], [msg, chatbot_ui])

# Launch the interface
demo.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://ddd9c91c8f3fc25031.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


