<a href="https://colab.research.google.com/github/itrinku/RAG_CHATBOT/blob/main/Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
from google.colab import files
uploaded = files.upload()

from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader

file_name = list(uploaded.keys())[0]

if file_name.endswith(".pdf"):
    loader = PyPDFLoader(file_name)
elif file_name.endswith(".docx"):
    loader = Docx2txtLoader(file_name)
else:
    raise ValueError("Only PDF or DOCX files are supported.")

documents = loader.load()

from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.split_documents(documents)

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embedding)

import dspy
from transformers import pipeline

qa_pipeline = pipeline("text2text-generation", model="google/flan-t5-base")

class HFWrapper(dspy.Predict):
    def __init__(self, pipe):
        super().__init__(signature="context, query -> answer")
        self.pipe = pipe

    def forward(self, context, query):
        prompt = f"Context:\n{context}\n\nQuestion:\n{query}\n\nAnswer:"
        result = self.pipe(prompt, max_new_tokens=256)[0]["generated_text"]
        return {"answer": result.strip()}

llm = HFWrapper(qa_pipeline)

retriever = vectorstore.as_retriever(search_type="similarity", k=3)

class SimpleRAG(dspy.Module):
    def __init__(self, retriever):
        super().__init__()
        self.retriever = retriever
        self.generate = llm

    def forward(self, query):
        docs = self.retriever.get_relevant_documents(query)
        context = "\n\n".join([doc.page_content for doc in docs])
        return self.generate(context=context, query=query)

from IPython.display import display, Markdown

rag = SimpleRAG(retriever)

query = input("Enter your question: ")
response = rag.forward(query=query)

answer = response['answer']

styled_output = f"""
<div style="border: 2px solid #4CAF50; border-radius: 10px; padding: 20px; background-color: #f4fff4;">
<h3 style="color: #1B5E20;"> <strong>Question Asked 1 </strong></h3>
<blockquote style="color: #0D47A1; font-size: 16px;">{query}</blockquote>
<h3 style="color: #2E7D32;"> <strong>Answer from Document</strong></h3>
<pre style="background-color: #e8f5e9; padding: 15px; border-radius: 8px; font-size: 16px;">{answer}</pre>
<p style="text-align: right; color: #777;"></p>
</div>
"""

display(Markdown(styled_output))


Saving Resume.pdf to Resume (6).pdf


Device set to use cpu


Enter your question: how many years of experience.



<div style="border: 2px solid #4CAF50; border-radius: 10px; padding: 20px; background-color: #f4fff4;">
<h3 style="color: #1B5E20;"> <strong>Question Asked </strong></h3>
<blockquote style="color: #0D47A1; font-size: 16px;">1 how many years of experience.</blockquote>
<h3 style="color: #2E7D32;"> <strong>Answer from Document</strong></h3>
<pre style="background-color: #e8f5e9; padding: 15px; border-radius: 8px; font-size: 16px;">4.1</pre>
<p style="text-align: right; color: #777;"></p>
</div>
