In [None]:
# Install required packages (run once)
!pip install pdfplumber langchain faiss-cpu sentence-transformers ollama langchain-text-splitters langchain-community

import subprocess
import pdfplumber
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

In [None]:
# -------------------------------
# Step 1: Extract text from PDFs
# -------------------------------
def extract_pdf_text(pdf_path):
    """Extracts text from a single PDF file."""
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text

In [None]:
# -------------------------------
# Step 2: Build vectorstore from multiple PDFs
# -------------------------------
def build_vectorstore_from_pdfs(pdf_paths):
    """Builds a single FAISS vectorstore from multiple PDFs."""
    all_chunks = []
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)

    for path in pdf_paths:
        text = extract_pdf_text(path)
        chunks = splitter.split_text(text)
        all_chunks.extend(chunks)

    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vectorstore = FAISS.from_texts(all_chunks, embeddings)
    return vectorstore

In [None]:
# -------------------------------
# Step 3: Ask question across PDFs
# -------------------------------
def ask_question(vectorstore, query, k=3):
    """Searches across all PDFs in the vectorstore and returns only Q&A."""
    docs = vectorstore.similarity_search(query, k=k)
    context = "\n".join([d.page_content for d in docs])
    prompt = f"Answer the question based on the PDFs:\n\n{context}\n\nQuestion: {query}"

    result = subprocess.run(
        ["ollama", "run", "llama3"],
        input=prompt,
        text=True,
        encoding="utf-8",
        errors="replace",
        capture_output=True
    )
    print("\n--- Question ---")
    print(query)
    print("\n--- Answer ---")
    print(result.stdout)

In [None]:
# -------------------------------
# Example usage
# -------------------------------
pdf_paths = [
    r"C:\Users\2shiv\Downloads\Shivang_Soni_Enhanced_CV.pdf",
    r"C:\Users\2shiv\Downloads\Research_Paper_On_Artificial_Intelligence_And_Its.pdf"
]

vectorstore = build_vectorstore_from_pdfs(pdf_paths)

# Ask a question across both PDFs
ask_question(vectorstore, "Summarize Shivang Soni's skills and the main AI research topics.")