# RAG Pipeline With History

In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama import OllamaLLM
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS
import os
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever

In [9]:
from session_history import SessionMemoryTableOps, SessionLocal, InSessionMemoryOps
from datetime import datetime
import uuid
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
import torch

In [4]:
MODEL_NAME = "llama3.2"
llm = OllamaLLM(model= MODEL_NAME)

In [5]:
def load_docs():
    
    document_loader = []

    for root, dirs, files in os.walk("."):
        # Skip chroma_db folder
        if "faiss" in root or "git" in root:
            continue
        for file in files:
            if file.endswith(".pdf"):
                document_loader.append(file)

    return document_loader

In [6]:
document_loader = load_docs()
document_loader

['ENSC3016_Course_Notes_Part_1_Electromagnetism_Transformers.pdf',
 'Three Phase Power System Fundamentals.pdf',
 'ENSC3016_Course_Notes_Part_2_Electric_Machines.pdf',
 'Electric Machinery Fundamentals Textbook -- Chapman.pdf',
 'ENSC3016 Study Guide 1-Review of Circuit Fundamentals.pdf']

In [27]:
embedding_model ="./local_models/all-MiniLM-L6-v2" #embedding matrix model
device = "cuda" if torch.cuda.is_available() else "cpu"

def embed_splitting(document_loader, embedding_model):
    embeddings = HuggingFaceEmbeddings(model = embedding_model, model_kwargs= {"device":device}, encode_kwargs={'normalize_embeddings': True})

    doc_store = []
    for file in document_loader:
        loader = PyPDFLoader(file)
        doc = loader.load()
        doc_store += doc

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size = 400,
        chunk_overlap = 64
        )
    
    #Make splits
    splits = text_splitter.split_documents(doc_store)

    return embeddings, splits


In [28]:
embeddings, splits = embed_splitting(document_loader, embedding_model)

In [29]:
embeddings

HuggingFaceEmbeddings(model_name='./local_models/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={'device': 'cuda'}, encode_kwargs={'normalize_embeddings': True}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [30]:
len(splits)

402

In [31]:
print("FAISS version", faiss.__version__)
print(faiss.get_num_gpus())

FAISS version 1.9.0
1


In [33]:
dim = len(embeddings.embed_query("test sentence"))
index = faiss.IndexFlatL2(dim)
gpu_index = faiss.index_cpu_to_all_gpus(index=index)

if os.path.exists("faiss_index"):
    print("Loading FAISS index from disk...")
    vector_store = FAISS.load_local("faiss_index", embeddings=embeddings, allow_dangerous_deserialization=True)
else:
    print("Building FAISS index from scratch...")
    gpu_index = faiss.index_cpu_to_all_gpus(index=index)
    vector_store = FAISS(
        embedding_function=embeddings,
        index=index,
        docstore=InMemoryDocstore(),
        index_to_docstore_id={},
    )
    vector_store.add_documents(splits)
    vector_store.save_local("faiss_index")

Building FAISS index from scratch...


In [34]:
# create the retriever object once
semantic_retriever = vector_store.as_retriever(search_kwargs={'k': 4})

In [35]:
semantic_retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x75841b0cac90>, search_kwargs={'k': 4})

In [37]:
bm25_retriever = BM25Retriever.from_documents(splits)
bm25_retriever.k = 4

In [38]:
ensemble_retriever = EnsembleRetriever(retrievers= [semantic_retriever, bm25_retriever], weights = [0.67, 0.33], search_kwargs={"k": 3})

In [39]:
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""

contextualize_q_prompt = ChatPromptTemplate(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

history_aware_retriever = create_history_aware_retriever(
    llm, ensemble_retriever, contextualize_q_prompt
)

In [40]:
user_input = "Explain transformers"
history_results = history_aware_retriever.invoke({"input": user_input, "chat_history": []})

In [41]:
history_results

[Document(id='402198b9-0143-476b-beba-33c8d40d298b', metadata={'producer': 'Microsoft® Word 2013', 'creator': 'Microsoft® Word 2013', 'creationdate': '2019-07-27T15:04:48+08:00', 'author': 'Ali Kharrazi', 'moddate': '2019-07-27T15:04:48+08:00', 'source': 'ENSC3016_Course_Notes_Part_1_Electromagnetism_Transformers.pdf', 'total_pages': 76, 'page': 51, 'page_label': '52'}, page_content='Transformer 52 \n \n \n \n   Figure 6-3 Shell-type transformers. \n \n \n \nFigure 6-4 Flux plot: shell-type transformer \n \n \nToroidal transformers exploit the remarkable properties of toroidal coils described in section 3.6. \nAlthough they are more expensive than shell-type transformers, the performance is better. They are used \nin high -quality electronic equipment and for instrument transformers (see section 6.3) where \nmeasurement accuracy is important. Typical toroidal transformers are shown in figure 6-5. \n \nFigure 6-5 Toroidal transformers.\uf020\n \n \n \n6.2 Transformer Principle: \nThe ac

In [42]:
for i, doc in enumerate(history_results):
    print(f"\nDocument {i+1}")
    print(f"Source: {doc.metadata.get('source', 'unknown')}")
    print(f"Content:\n{doc.page_content}")


Document 1
Source: ENSC3016_Course_Notes_Part_1_Electromagnetism_Transformers.pdf
Content:
Transformer 52 
 
 
 
   Figure 6-3 Shell-type transformers. 
 
 
 
Figure 6-4 Flux plot: shell-type transformer 
 
 
Toroidal transformers exploit the remarkable properties of toroidal coils described in section 3.6. 
Although they are more expensive than shell-type transformers, the performance is better. They are used 
in high -quality electronic equipment and for instrument transformers (see section 6.3) where 
measurement accuracy is important. Typical toroidal transformers are shown in figure 6-5. 
 
Figure 6-5 Toroidal transformers.
 
 
 
6.2 Transformer Principle: 
The action of a transformer is most easily understood if the two coils are wound on opposite sides of a 
magnetic core, as shown in the model of figure 6 -6. This form is used for some low -cost transformers, 
but the magnetic coupling is not as good as with the shell-type construction. 
 
 
Figure 6-6  Core-type transformer 

In [43]:
user_input_prompt = """You are an expert assistant answering based only on the provided context.

    Context:
    {context}
    
    Use all relevant information above to answer the question below. If the answer isn't found in the chunks, say:
    "I cannot answer this question because the necessary information was not found in the provided documents."

    When answering, cite the **source file name** and **slide/page number** if available.
    """

In [44]:
prompt_template = ChatPromptTemplate(
    [
        ("system", user_input_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [45]:
question_answer_chain = create_stuff_documents_chain(llm, prompt_template)
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [46]:
history = SessionMemoryTableOps(SessionLocal)

In [49]:
### Statefully manage chat history ###
chat_history_cache = {}

def get_session_history(session_id: str):
    if session_id not in chat_history_cache:
        chat_history_cache[session_id] = InSessionMemoryOps(session_id, db=history)
    return chat_history_cache[session_id]


conversational_rag_chain = RunnableWithMessageHistory(
    rag_chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [50]:
def pipeline_combined():
     
    while True:
        session_id = input("Enter session ID to resume, or press Enter to start new: ").strip()
        if not session_id:
            session_id = str(uuid.uuid4())[:8]
            print(f"Starting new session: {session_id}")
            history.add_session(session_id=session_id, turns_used=0)
            break
        else:
            if history.session_exists(session_id):
                print(f"Resuming session: {session_id}")
                break
            else:
                print(f"Session ID '{session_id}' not found. Please try again.")

    print(f"\nModel {MODEL_NAME} has been initiated with memory. Please feel free to ask questions or type 'exit' to quit.")
    while True:
        user_input = input("You: ")
        
        if user_input.lower() in ["exit", "quit"]:
            print("Session ended. Have a good day.")
            break

        response = conversational_rag_chain.invoke(
            {"input": user_input},
            config={"configurable": {"session_id": session_id}},
        )
        print(f"LLM: {response['answer']}\n")

        # Note: The memory is managed by the chain via get_session_history
        # So you don't need to manually add messages here

In [52]:
pipeline_combined()

Starting new session: ee59457d

Model llama3.2 has been initiated with memory. Please feel free to ask questions or type 'exit' to quit.
LLM: Transformers are essentially AC devices that transfer energy from one coil to another through magnetically coupled coils. The purpose of a transformer is to either provide electrical isolation between the source and the load or to change the voltage and current levels.

The basic principle of a transformer is shown in figure 6-1, where energy is transferred from the AC source to the lamp through the space between the coils. In this model, the coil connected to the source is termed the primary, and the coil connected to the load is termed the secondary.

According to equation 6-19, the current transformer (CT) ratio is given by:

I_M ≈ N1 / N2

where I_M is the meter current, N1 is the number of turns in the primary winding, and N2 is the number of turns in the secondary winding.

In the case of a shell-type construction (figure 6-3), the coils ar

In [None]:
conversational_rag_chain.invoke(
    {"input": "Explain transformers?"},
    config={"configurable": {"session_id": "abc123"}},
)["answer"]

In [None]:
print(get_session_history("abc123").messages)