<a href="https://colab.research.google.com/github/VedikaSingh1/DocAnalyzer/blob/master/RAG_Backend.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install fastapi uvicorn python-multipart nest_asyncio pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.1-py3-none-any.whl.metadata (8.3 kB)
Downloading pyngrok-7.2.1-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.1


In [None]:
pip install langchain pymupdf faiss-cpu nltk langchain-community langchain_google_genai

Collecting pymupdf
  Downloading PyMuPDF-1.24.14-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting langchain-community
  Downloading langchain_community-0.3.7-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain_google_genai
  Downloading langchain_google_genai-2.0.5-py3-none-any.whl.metadata (3.6 kB)
Collecting SQLAlchemy<3,>=1.4 (from langchain)
  Downloading SQLAlchemy-2.0.35-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.6.1-p

In [None]:
from fastapi import FastAPI, UploadFile, Form
from fastapi.responses import JSONResponse
import os
from google.colab import userdata
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
import langchain
import nest_asyncio
from pyngrok import ngrok
import uvicorn
from tempfile import NamedTemporaryFile

In [None]:
app = FastAPI()

vectorstore = None
chain = None

In [None]:
os.environ["GOOGLE_API_KEY"]=userdata.get('GOOGLE_API_KEY')

In [None]:
gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash-8b",
    temperature=0.0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [None]:
@app.post("/upload_pdf/")
async def upload_pdf(file: UploadFile):
    """
    Endpoint to upload a PDF and initialize the FAISS vectorstore.
    """
    global vectorstore, chain

    # Save the uploaded file temporarily
    with NamedTemporaryFile(delete=False) as temp_file:
        temp_file.write(await file.read())
        temp_path = temp_file.name

    # Load and process the PDF
    loader = PyMuPDFLoader(temp_path)
    data = loader.load()

    # Split data into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        separators=['\n\n', '\n', '.', ','],
        chunk_size=500
    )
    docs = text_splitter.split_documents(data)

    # Create vectorstore
    vectorstore = FAISS.from_documents(docs, gemini_embeddings)

    # Initialize retrieval chain
    chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorstore.as_retriever())

    # Clean up temporary file
    os.remove(temp_path)

    return JSONResponse({"message": "PDF uploaded and processed successfully!", "num_chunks": len(docs)})

In [None]:
@app.post("/query/")
async def query_paper(question: str = Form(...)):
    """
    Single endpoint to handle a query and return the response.
    """
    global chain

    if chain is None:
        # Return an error if no PDF has been uploaded yet
        return JSONResponse(
            {"error": "No PDF has been uploaded yet. Please upload a PDF first."},
            status_code=400
        )

    try:
        # Run the query through the chain
        langchain.debug = True  # Enable debugging for detailed logs
        result = chain({"question": question}, return_only_outputs=True)

        # Return the result as JSON
        return JSONResponse({"result": result})
    except Exception as e:
        # Handle unexpected errors
        return JSONResponse(
            {"error": "An error occurred while processing the query.", "details": str(e)},
            status_code=500
        )

In [None]:
ngrok.set_auth_token(userdata.get('NGROK_AUTH'))
nest_asyncio.apply()
public_url = ngrok.connect(8000)
print(f"Public URL: {public_url}")
uvicorn.run(app, host="0.0.0.0", port=8000)

Public URL: NgrokTunnel: "https://647f-34-125-191-5.ngrok-free.app" -> "http://localhost:8000"


INFO:     Started server process [610]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     182.66.218.123:0 - "POST /upload_pdf/ HTTP/1.1" 200 OK


  result = chain({"question": question}, return_only_outputs=True)


[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "What is backpropagation?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "4\n3) Backward Propagation and Weight Updates:\n• Compute gradients of the loss with respect to the weights\nby the usage of backpropagation:\n∂L\n∂w = Backprop(L, w)\n(18)\n• Update the weights using an optimizer (Adam):\nwt+1 = wt −η ∂L\n∂wt\n(19)\nwhere η stands for learning rate.\n4) Validation:\n• Evaluate the model against the validation data after each\nepoch.\n• Compute the accuracy and loss on the validation set:\nAccuracy = Number of Correct Predictions\nTotal Number of Predictions\n(20)\

INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [610]
