In [None]:
# !pip install langchain_community langchain gradio faiss-gpu

In [None]:
from transformers import pipeline  
from sentence_transformers import SentenceTransformer 
from langchain.text_splitter import RecursiveCharacterTextSplitter  
from langchain_community.vectorstores import FAISS  
from langchain_community.document_loaders import PyPDFLoader 
from langchain.chains import RetrievalQA  
from langchain.embeddings import HuggingFaceEmbeddings
import gradio as gr
import warnings
warnings.filterwarnings('ignore')
from huggingface_hub import login


login("...")# replace 'your_huggingface_token' with your personal huggingface token (is free to get it)

llm_pipeline = pipeline("text-generation", model="meta-llama/Llama-3.2-3B-Instruct", device_map="auto")

def document_loader(file):
    """
    Load a PDF document from the given file path.
    Args:
        file (str): Path to the PDF file to be loaded.
    """
    loader = PyPDFLoader(file.name)
    loaded_document = loader.load()
    return loaded_document

def text_splitter(data):
    """
    Split the loaded document into chunks for processing.
    Args:
        data (list): A list of documents to be split.
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,  
        chunk_overlap=50, 
        length_function=len,
    )
    chunks = text_splitter.split_documents(data) 
    return chunks 

def get_embedding_model():
    """
    Retrieve the embedding model for converting text to embeddings.

    Returns:
        HuggingFaceEmbeddings: The embedding model instance.
    """
    return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

def vector_database(chunks):
    """
    Create a vector database from the text chunks using embeddings.
    Args:
        chunks (list): A list of text chunks to be embedded.
    Returns:
        FAISS: A FAISS vector store instance.
    """
    embedding_model = get_embedding_model()  
    vectordb = FAISS.from_documents(chunks, embedding_model) 
    return vectordb  

def retriever(file):
    """
    Create a retriever object for querying the vector database.
    Args:
        file (str): Path to the PDF file to be processed.
    Returns:
        FAISS: A FAISS retriever instance.
    """
    splits = document_loader(file)
    chunks = text_splitter(splits)  
    vectordb = vector_database(chunks)  
    retriever = vectordb.as_retriever()
    return retriever  

def retriever_qa(file, query):
    """
    Perform a retrieval-based question-answering process.
    Args:
        file (str): Path to the PDF file to be processed.
        query (str): The question to be answered.
    Returns:
        str: The answer to the query based on the document.
    """
    retriever_obj = retriever(file) 
    docs = retriever_obj.get_relevant_documents(query)  
    context = "\n".join([doc.page_content for doc in docs])  

    # Debug: Print retrieved context
    # print(f"Retrieved Context:\n{context}")

    if not context.strip():
        return "No relevant information found in the document."  

    prompt = f"Answer the question based on the following context:\n\n{context}\n\nQuestion: {query}\nAnswer:"
    response = llm_pipeline(prompt, return_full_text=False, max_new_tokens=256, temperature=0.5)

    return response[0]['generated_text'] 

rag_application = gr.Interface(
    fn=retriever_qa,
    allow_flagging="never",
    inputs=[
        gr.File(label="Upload PDF File", file_count="single", file_types=['.pdf'], type="filepath"),
        gr.Textbox(label="Input Query", lines=2, placeholder="Type your question here...")
    ],
    outputs=gr.Textbox(label="Output"),
    title="RAG Chatbot", 
    description="Upload a PDF document and ask any question. The chatbot will try to answer using the provided document."
)

# Launch the Gradio application
rag_application.launch(share=True)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://9da28826da3b317e85.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
