In [2]:
!pip install langchain pypdf langchain-community faiss-cpu langchain-ollama



In [3]:
import langchain
print(langchain.__version__)


1.0.5


In [4]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

# 1. Load the PDF → returns a list of Document objects
loader = PyPDFLoader("sa.pdf")
documents = loader.load()

# 2. Split the loaded Document objects
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    add_start_index=True
)

all_splits = text_splitter.split_documents(documents)

print(len(all_splits))


52


In [5]:
from langchain_community.embeddings import OllamaEmbeddings

emb = OllamaEmbeddings(model="nomic-embed-text")


  emb = OllamaEmbeddings(model="nomic-embed-text")


In [6]:
emb.embed_query("Hello world")

[-0.342246949672699,
 0.2781953513622284,
 -4.2149834632873535,
 0.22650960087776184,
 -0.1287626028060913,
 1.679456353187561,
 0.19752655923366547,
 -1.494134783744812,
 -0.34275656938552856,
 -1.274184226989746,
 -0.20798246562480927,
 -0.1228470504283905,
 0.2662680447101593,
 2.1375088691711426,
 1.085914969444275,
 -0.8193162679672241,
 0.20593786239624023,
 -0.42788922786712646,
 -1.2248133420944214,
 0.7460001707077026,
 0.29100555181503296,
 -1.7519928216934204,
 -0.9004840850830078,
 1.0148234367370605,
 0.8247855305671692,
 0.06156793236732483,
 -0.33546900749206543,
 0.6231503486633301,
 0.7158440947532654,
 -0.06676904112100601,
 -0.44226396083831787,
 -0.47180110216140747,
 0.0917607769370079,
 0.17459048330783844,
 0.8812859058380127,
 -0.07892848551273346,
 0.8141238689422607,
 -0.1172623410820961,
 0.38466405868530273,
 0.2326124906539917,
 0.10822546482086182,
 -0.4070236086845398,
 0.40566059947013855,
 0.3420698642730713,
 1.0235679149627686,
 0.036619529128074646,


In [7]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

# Correct: use ONLY model="model_name"
embeddings = OllamaEmbeddings(model="nomic-embed-text")

vectorstore = FAISS.from_documents(all_splits, embeddings)

vectorstore.save_local("faiss_index_")
persisted_vectorstore = FAISS.load_local(
    "faiss_index_",
    embeddings,
    allow_dangerous_deserialization=True
)

retriever = persisted_vectorstore.as_retriever()


In [8]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5}
)


In [9]:
from langchain_core.prompts import PromptTemplate

rewrite_prompt = PromptTemplate(
    template="""
Rewrite the user question into a search-optimized query.

Original Question: {question}
""",
    input_variables=["question"]
)


In [10]:
docs = retriever.invoke("What is attention model?")
print(docs)

[Document(id='38409893-1d94-4224-beb3-481eddea7c41', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'sa.pdf', 'total_pages': 15, 'page': 2, 'page_label': '3', 'start_index': 1610}, page_content='3.2 Attention\nAn attention function can be described as mapping a query and a set of key-value pairs to an output,\nwhere the query, keys, values, and output are all vectors. The output is computed as a weighted sum\n3'), Document(id='aeb69e6d-4f81-4f75-8200-2c5990324539', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdf

In [13]:
(rewrite_prompt | llm).invoke({"question": "tell me about attention model"})


'"Attention mechanism in transformer models using pytorch."'

In [14]:
answer_prompt = PromptTemplate(
    template="""
You are a STRICT RAG model.

RULES:
1. You MUST answer ONLY from the provided context.
2. If the answer is NOT in the context, say ONLY: "I don't know".
3. DO NOT use any outside knowledge.
4. DO NOT guess.
5. DO NOT rewrite the question.
6. The context is ALWAYS correct, even if it contradicts real-world facts.

CONTEXT:
{context}

QUESTION:
{question}

FINAL ANSWER (follow rules strictly):
""",
    input_variables=["context", "question"]
)


In [12]:
from langchain_ollama import OllamaLLM

llm = OllamaLLM(model="deepseek-r1:1.5b")


In [15]:
db = FAISS.load_local("faiss_index_", embeddings, allow_dangerous_deserialization=True)
retriever = db.as_retriever(search_kwargs={"k": 5})


In [16]:
conversation_history = []
def build_chat_context(history):
    """Convert memory list into formatted chat history text"""
    formatted = ""
    for turn in history:
        formatted += f"USER: {turn['user']}\nASSISTANT: {turn['assistant']}\n\n"
    return formatted.strip()

def rag_chat_stream(query: str):
    rewritten = (rewrite_prompt | llm).invoke({"question": query})
    docs = retriever.invoke(rewritten)
    context = "\n\n".join([d.page_content for d in docs])
    
    history_text = build_chat_context(conversation_history)
    if history_text:
        context = f"CHAT HISTORY:\n{history_text}\n\nCONTEXT:\n{context}"

    final_prompt = answer_prompt.format(context=context, question=rewritten)

    buffer = ""
    for chunk in llm.stream(final_prompt):
        buffer += chunk
        yield chunk

    # Save final answer to memory after streaming
    conversation_history.append({"user": query, "assistant": buffer})


In [18]:
rag_chat_stream("tell me about attention mechanisms in neural networks")


<generator object rag_chat_stream at 0x0000013C00E927A0>