In [1]:
%%writefile app.py
import streamlit as st
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import pipeline
import tempfile
import os

st.set_page_config(page_title="Chat with your PDF", layout="wide")
st.title("📄 Chat with your PDF (Extractive QA)")

if "retriever" not in st.session_state:
    st.session_state.retriever = None
if "qa_pipeline" not in st.session_state:
    st.session_state.qa_pipeline = None

uploaded_file = st.file_uploader("Upload a PDF", type="pdf")

if uploaded_file:
    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
        tmp_file.write(uploaded_file.read())
        pdf_path = tmp_file.name

    st.info("📚 Loading and chunking PDF...")
    loader = PyPDFLoader(pdf_path)
    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    docs = text_splitter.split_documents(documents)

    st.info("🔍 Creating vector index...")
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    db = FAISS.from_documents(docs, embeddings)
    st.session_state.retriever = db.as_retriever()

    st.info("🤖 Loading QA model...")
    st.session_state.qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2", device=-1)

    st.success("✅ Ready! Ask away below.")

if st.session_state.retriever and st.session_state.qa_pipeline:
    query = st.text_input("Ask a question about the PDF:")

    if query:
        with st.spinner("Thinking..."):
            retrieved_docs = st.session_state.retriever.get_relevant_documents(query)
            context = " ".join([doc.page_content for doc in retrieved_docs[:3]])
            result = st.session_state.qa_pipeline(question=query, context=context)
            st.markdown(f"**Answer:** {result['answer']}")


Writing app.py
