In [34]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import OllamaEmbeddings
import os

In [36]:
PDF_FOLDER = "pdfs"
CHUNK_SIZE = 1000
CHUNK_OVERLAP = 50
VECTORSTORE_PATH = "rag_vectorstore"

In [37]:

embeddings = OllamaEmbeddings(model = "llama3")

In [27]:
def build_and_store_faiss_index(pdf_path):
    """Builds and stores FAISS index from extracted text."""
    print(f" Reading: {pdf_path}")
    text = extract_text_from_pdf(pdf_path)

    print(" Splitting text into chunks...")
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=CHUNK_SIZE,
        chunk_overlap=CHUNK_OVERLAP,
        length_function=len
    )
    chunks = splitter.split_text(text)
    print(f"🔹 Created {len(chunks)} text chunks.")

    print(f"Creating embeddings using {EMBED_MODEL}...")
    embeddings = OllamaEmbeddings(model=EMBED_MODEL)

    print("Building FAISS index...")
    vectorstore = FAISS.from_texts(chunks, embedding=embeddings)

    os.makedirs(OUTPUT_DIR, exist_ok=True)
    paper_name = os.path.splitext(os.path.basename(pdf_path))[0]
    index_path = os.path.join(OUTPUT_DIR, paper_name)
    vectorstore.save_local(index_path)

    print(f"FAISS index saved at: {index_path}")

In [28]:
if __name__ == "__main__":
    
    build_and_store_faiss_index(PDF_PATH)

 Reading: pdfs/Attention_Is_All_You_Need.pdf
 Splitting text into chunks...
🔹 Created 94 text chunks.
Creating embeddings using nomic-embed-text...
Building FAISS index...
FAISS index saved at: rag_indexes\Attention_Is_All_You_Need


In [33]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OllamaEmbeddings
import ollama

query = "I have read through the paper. Prepare 4 question to quiz me."

# Load FAISS index
embeddings = OllamaEmbeddings(model="nomic-embed-text")
vectorstore = FAISS.load_local("rag_indexes/attention_is_all_you_need", embeddings, allow_dangerous_deserialization=True)

# Retrieve relevant context
docs = vectorstore.similarity_search(query, k=3)
context = "\n\n".join([d.page_content for d in docs])

# Ask your local LLM (e.g., llama3)
response = ollama.chat(model="llama3", messages=[
    {"role": "system", "content": "You are a research assistant."},
    {"role": "user", "content": f"Answer based on the following context:\n\n{context}\n\nQuestion: {query}"}
])

print(response["message"]["content"])

Here are four questions based on the provided context:

1. What is the purpose of performing attention functions in parallel, according to the text?
a) To compute the dot products between queries and keys
b) To calculate the variance of the dot product
c) To illustrate why dot products get large
d) To facilitate parallel computation

Answer: d) To facilitate parallel computation
