# Project Structure

/fastapi-chatbot
‚îÇ‚îÄ‚îÄ /data
‚îÇ   ‚îú‚îÄ‚îÄ codeprolk.pdf  # Your PDF
‚îÇ‚îÄ‚îÄ main.py            # FastAPI app
‚îÇ‚îÄ‚îÄ rag_pipeline.py    # RAG implementation
‚îÇ‚îÄ‚îÄ requirements.txt   # Dependencies
‚îÇ‚îÄ‚îÄ .env               # API key storage


## üöÄ Code Implementation
üîπ 1Ô∏è‚É£ rag_pipeline.py (LangChain & FAISS Retrieval)
This module handles:

Loading PDFs

Chunking & Vector Storage

Querying with Hugging Face API

In [3]:
import os
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from huggingface_hub import InferenceClient

# Load environment variables
load_dotenv()
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")

# Constants
PDF_PATH = os.path.join(os.path.dirname(os.getcwd()), "data/codeprolk.pdf")
LLM_MODEL = "meta-llama/Llama-3.2-1B"
EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"

# Initialize Embeddings
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)

def load_and_preprocess_pdf(pdf_path):
    """Loads PDF, splits text, and initializes FAISS vector store."""
    loader = PyPDFLoader(pdf_path)
    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)
    chunks = text_splitter.split_documents(documents)

    vectorstore = FAISS.from_documents(chunks, embedding=embeddings)
    return vectorstore.as_retriever()

# Load Retriever
retriever = load_and_preprocess_pdf(PDF_PATH)

# Initialize Hugging Face LLM Client
llm_client = InferenceClient(model=LLM_MODEL, token=HUGGINGFACE_API_KEY)

# Define Prompt Template
prompt_template = """
You are a helpful assistant. Answer the question based on the provided context.

Question: {question}

Context: {context}

Answer:
"""
prompt = ChatPromptTemplate.from_template(prompt_template)

def retrieve_context(input_dict):
    """Retrieves relevant context from vector store."""
    question = input_dict["question"]
    documents = retriever.invoke(question)
    context = "\n\n".join([doc.page_content for doc in documents])
    return context

def query_llm(input_text):
    """Calls Hugging Face Inference API."""
    response = llm_client.text_generation(input_text, max_new_tokens=200)
    return response

# Define RAG Pipeline (FIXED)
rag_chain = (
    {"context": RunnableLambda(retrieve_context), "question": RunnablePassthrough()}
    | prompt
    | RunnableLambda(query_llm)  # ‚úÖ Wrap LLM call in RunnableLambda
)

In [4]:
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
#from rag_pipeline import rag_chain

# Initialize FastAPI
app = FastAPI(title="LangChain Chatbot API", version="1.0")

# Request Model
class QueryRequest(BaseModel):
    question: str

@app.post("/query")
async def query_rag(request: QueryRequest):
    """Handles RAG-based question answering."""
    try:
        response = rag_chain.invoke({"question": request.question})
        return {"answer": response}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Root Endpoint
@app.get("/")
async def root():
    return {"message": "Welcome to the LangChain Chatbot API"}