# python script v2 issues -> fix in v3

* discrepancy in the results of retriever = vectorstore.as_retriever(search_type='similarity',search_kwargs = {'k':5}) vs get_retrieved_documents_with_scores manual function
    * the retriever results for the former is not returning the top K in correct order but manual function is
    * means the history_aware_retriever is also not getting the top K results
* if change retriever to manual method, means history_aware_retriever wont work already, so need to manually do the refine_query process of the history_aware_retriever
    * history_aware_retriever has 2 parts: 1. refining query with convo context + 2. retrieval with new refined_query
    * try do part 1 with the ollama package without using langchain ollama, and then dont need to stream the output for this part
    * double check that the output of the refined_query is correct
    * then integrate with the the fixed manual retrieval process
* add functionality of setting sim_score_threshold as part of python script argparse
    * references must be top K > sim_score only
    * if no documents >sim_score then return empty list, then in prompt or ?? if empty list then the LLM must say it doesnt know in the answer -> need to think where best to implemetn this part
 
* SUMMARY of v3 fixes
    * fix retrieval process of using similarity score -> currently not returning top K correctly, so use manual function
        * but still using langchain embeddings, vectorstore and retrieval 
    * get rid of using history_aware retriever, need to create a pipeline for query refining with convo history
        * then integrate with the fixed retriever
    * add functionality of setting sim_score_threshold as part of python script argparse
        * need to deal with the empty list returned, LLM say it doesnt know
- v3 fixes will improve retrieval process and reduce hallucinations with the last added functionality
- v4 will be to remove langchain usage of embeddings, vectorstore and retrieval (refer to Nic script and class in telegram chat)
- v5 will be remove langchain usage of ChatMessageHistory, HumanMessage, AIMessage (maybe RecursiveCharacterTextSplitter and chunking can just us
- e langchain, not big issue, trivial easy fix if required in the future)

** consideration: over time the conversation size grows and will need to be aware of total number of tokens - need to Manage Conversation History

In [36]:
print()
# import os
import glob
import time
import argparse
from tqdm import tqdm
import ollama
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_ollama import ChatOllama
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import create_retrieval_chain
# from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.chat_message_histories import ChatMessageHistory
# from langchain_core.chat_history import BaseChatMessageHistory
# from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.messages import HumanMessage, AIMessage
import warnings
# from urllib3.exceptions import NotOpenSSLWarning
# Suppress all warnings
warnings.filterwarnings("ignore")
# Suppress NotOpenSSLWarning from urllib3
warnings.filterwarnings("ignore", module='urllib3')


def load_pdfs(file_paths):
    """
    file_paths must end with .pdf
    PyPDFLoader auto splits the pdf into pages, each page is 1 Document object split by page number
    note that the splitting by page number is not perfect, the actual page number might be +/- 1-2pages.

    returns a dict of key: file_path and value: list of document objects
    """
    documents_dict = {}   
    for f in tqdm(file_paths):
        loader = PyPDFLoader(file_path = f)
        documents = loader.load()
        documents_dict[f] = documents
    return documents_dict

def chunk_list_of_documents(documents):
    """
    input a list of documents as Document objects

    output a list of chunks as Document objects
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 500,
        chunk_overlap = 100, # using 20% is a good start
        length_function=len,
        is_separator_regex=False,
        add_start_index=True
    )

    chunks = text_splitter.split_documents(documents)    
    return chunks

def get_session_history(session_id: str):
    """
    if session_id exists, function returns the ChatMessageHistory of that session_id.

    if session_id does not exists, function instantiates a new ChatMessageHistory.
    """
    if session_id not in chat_history_store:
        chat_history_store[session_id] = ChatMessageHistory()
    return chat_history_store[session_id]

def create_huggingface_retriever(folder_path,embedding_model_name):
    """
    folder_path is type str, absolute folder path of the pdf files' location.
    embedding_model_name type str, take key from HG website.

    1. uses load_pdfs and chunk_list_of_documents functions to
    get chunks across the different input pdfs.
    2. sets up huggingface embedding model based on embedding_model_name passed
    3. sets up the vector db and adds the embeds the chunks into the vector db.
    4. sets up retriever object from the filled vector db

    output: retriever_hf created from the HugginFaceEmbeddings
    """
    files_paths = glob.glob(f"{folder_path}/*.pdf")
    print()
    print()

    # load documents from file paths
    print("loading pdfs...")
    documents_dict = load_pdfs(file_paths=files_paths)

    # chunk documents
    print()
    print("chunking documents...")
    all_chunks = []
    for key in tqdm(documents_dict.keys()):
        documents = documents_dict[key]
        chunks = chunk_list_of_documents(documents=documents)
        all_chunks.extend(chunks)
    print(f"number of chunks: {len(all_chunks)}")

    # setup embedding model
    print()
    print("instantiating HuggingFaceEmbeddings...")
    model_kwargs = {'device': 'cpu'}
    encode_kwargs = {'normalize_embeddings': False}
    hf_embedding_model = HuggingFaceEmbeddings(
        model_name=embedding_model_name,
        model_kwargs=model_kwargs,
        encode_kwargs=encode_kwargs
    )
    print("hf_embedding_model created!")

    # setup vectordb, using HF embedding model
    start_time=time.time()
    print()
    print("start process of embedding chunks into vector database...")
    vectorstore_hf = InMemoryVectorStore.from_documents(
        documents=all_chunks,
        embedding=hf_embedding_model
    )
    print("all chunks embedded into vector database!",f"time taken: {round(time.time()-start_time,2)}s")

    # setup retrieval and test with a query and gt_context
    retriever_hf = vectorstore_hf.as_retriever(
        search_type='similarity',
        search_kwargs = {'k':5}
    )
    print("retriever created!")

    return vectorstore_hf,retriever_hf

def instantiate_history_aware_retriever(retriever_hf,llm_model_name):
    """
    retriever_hf is the output of the create_huggingface_retriever function.
    llm_model_name is type str, can be taken from langchain website.

    use langchain to create a history_aware_retriever. 
    The LLM used here is from langchain, not Ollama.

    output: langchain history_aware_retriever object.
    """

    # setup llm chat model using ollama
    llm_model = ChatOllama(
        model=llm_model_name,
        temperature=0 # increase temp for more creative answers
    ) 

    # setup system contextualise input prompt
    system_contextualise_input_prompt = (
        "Given a chat history and the latest user question "
        "which might reference context in the chat history, "
        "formulate a standalone question which can be understood "
        "without the chat history. Do NOT answer the question, "
        "just reformulate it if needed and otherwise return it as is."
    )
    system_input_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_contextualise_input_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}"),
        ]
    )

    # instantiate the history-aware retriever:
    history_aware_retriever = create_history_aware_retriever(
        llm=llm_model,
        retriever=retriever_hf,
        prompt=system_input_prompt
    )

    return history_aware_retriever

def format_chat_history(chat_history):
    """
    input: chat_history is a langchain_community ChatMessageHistory object.

    function formats the messages into dict format 
    instead of Langchain AI and HumanMessage objects.
    this function returns empty list if there is no chat_history
    """

    formatted_chat_history = [
        {"role": "human", "content": message.content} if isinstance(message, HumanMessage) else
        {"role": "ai", "content": message.content}
        for message in chat_history.messages
    ]

    return formatted_chat_history

def manual_rag_with_ollama(retrieved_documents, formatted_chat_history, input_query, ollama_model_name="llama3.1"):
    """
    Manually performs RAG using retrieved documents from history-aware-retriever and streams results from the Ollama model.
    
    Args:
        retrieved_documents (list of Document objects):  output of history-aware-retriever.invoke().
        formatted_chat_history (list of dict): output of format_chat_history function.
        input_query (str): The user's input query.
        ollama_model_name (str): The name of the Ollama model to use.
    """
    
    # Step 1: Format the retrieved documents as context
    retrieved_references = "\n\n".join([doc.page_content for doc in retrieved_documents])
    
    # Step 2: Create a prompt that integrates the retrieved context and input query
    input_prompt = (
        f"You are an assistant for question-answering tasks. You must reference information from the retrieved_references to answer the input_query. "
        f"You must also reference the formatted_chat_history to take into account conversation flow and to ensure that the response is relevant to both the current query and prior conversation. "
        f"Use five sentences maximum and keep the answer concise. Also, if the input_query is specifically a yes or no question, you must only answer yes or no."
        "\n\n"
        f"retrieved_references: \n{retrieved_references}"
        "\n\n"
        f"formatted_chat_history: \n{formatted_chat_history}"
        "\n\n"
        f"input_query: \n{input_query}"
    )

    # Step 3: Pass the prompt to the Ollama LLM and stream the response
    # print("Streaming response from Ollama...")
    print("LLM Response:")

    stream = ollama.chat(
        model=ollama_model_name,
        messages=[{'role': 'user', 'content': input_prompt}],
        stream=True
    )
    response = ''
    # Stream and display the output from Ollama as it generates
    for chunk in stream:
        print(chunk['message']['content'], end='', flush=True)
        response += chunk['message']['content']  # Append each chunk to the answer

    return response

def main(folder_path,embedding_model_name,llm_model_name):
    """
    folder_path (str): absolute folder path of the pdf files' location.
    embedding_model_name (str): model key name that can be taken from HG website.
    llm_model_name (str): model key name that can be taken from Ollama website.

    main function that implements the entire RAG process. 
    session_id is hard-coded to '1' for now since there is no persistence.
    """

    session_id = "1" # hardcode this for temp fix since there is no persistence implemented

    # Initialise embedding and llm model_name
    if embedding_model_name is None:
        embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
    if llm_model_name is None:
        llm_model_name = 'llama3.1'

    # Create the Hugging Face retriever
    retriever_hf = create_huggingface_retriever(folder_path=folder_path,embedding_model_name=embedding_model_name)
    # Create the history-aware-retriever
    history_aware_retriever = instantiate_history_aware_retriever(retriever_hf=retriever_hf,llm_model_name=llm_model_name)

    print()
    print("########################################")
    print("########## START CONVERSATION ##########")
    print("########################################")
    print()
    print("You can now start chatting with the LLM.")
    print("Add '--show references' to the end of the input to view the documents referenced by the LLM.")
    print("Type 'exit' to stop the conversation.")
    print()

    # Initialize the chat loop
    while True:
        # Get user input
        full_user_input = input("User input: ")
        # Only take the first part of the user input if --show references is used
        user_input = full_user_input.split("--")[0]

        # End session if user types 'exit'
        if user_input.lower() == "exit":
            print("Ending session. Goodbye!")
            break

        # get current chat history
        current_chat_history = get_session_history(session_id)
        # format current chat history
        formatted_chat_history = format_chat_history(current_chat_history)

        # retrieve documents using history_aware_retriever
        retrieved_documents = history_aware_retriever.invoke(
            {
                'chat_history':formatted_chat_history,
                'input':user_input
            }
        )

        # invoke manual_rag_with_ollama function  and show the results, need to store for chat history update
        response = manual_rag_with_ollama(
            retrieved_documents=retrieved_documents, 
            formatted_chat_history=formatted_chat_history, 
            input_query=user_input, 
            ollama_model_name=llm_model_name
        )

        # update chat history with latest user input and LLM output - add the input query and response to the current_chat_history
        current_chat_history.add_user_message(user_input)
        current_chat_history.add_ai_message(response)
        
        # Show the references if user requests
        print("\n")
        if len(full_user_input.split("--"))>1: # just check for non empty string will suffice jic of misspelling
            print("### preparing references... ###")
            time.sleep(1)
            print("References:")
            for i,d in enumerate(retrieved_documents):
                time.sleep(1)
                print(f"{i+1} From: page {d.metadata['page']} of {d.metadata['source'].split('/')[-1]}")
                print(f"Content: {d.page_content}")
                print()


# if __name__=="__main__":
#     parser = argparse.ArgumentParser(description="Directly use Conversational RAG with custom PDF documents in terminal.")
#     # Add arguments
#     parser.add_argument("--folder_path", type=str, help="input absolute folder path to folder of pdfs", required=True)
#     parser.add_argument("--embedding_model_name", type=str, help="pass the huggingface embedding model name of your choice", required=False)
#     parser.add_argument("--llm_model_name", type=str, help="pass the ollama llm model name of your choice", required=False)
#     # note that the llm_model_name here is used as the model key for both the langchain ChatOllama model for the history_aware_retriever 
#     # and python Ollama model for the LLM answer generation, if a diff key is used and both packages have different names, there will be issues

#     # Parse the arguments
#     args = parser.parse_args()

#     # create global variable for chat_history_store
#     chat_history_store = {}

#     # Run the main chat function 
#     main(
#         folder_path=args.folder_path,
#         embedding_model_name=args.embedding_model_name,
#         llm_model_name=args.llm_model_name 
#     )

# Issues
# current bottleneck is the history_aware_retriever generating contexts with conversation history reference, but 2-3s shud be fine just like chatgpt
# 3. if cannot find reelvant references, dont return any 
    # - context refernces must be > sim_score, cannot just take top K
# sample queries
# do you know about high dimensional problems in statistical learning? yes or no.
# explain in which cases can ridge regression do it with regards to p and N in high dimensional?  --show references




fix issue of 

In [38]:

session_id = "1" # hardcode this for temp fix since there is no persistence implemented

# Initialise embedding and llm model_name

embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
llm_model_name = 'llama3.1'
folder_path = "/Users/i748920/Desktop/llms-learning/pdf-chatbot-app/data/short-elements-of-statistical-learning-book"

# Create the Hugging Face retriever
vectorstore_hf,retriever_hf = create_huggingface_retriever(folder_path=folder_path,embedding_model_name=embedding_model_name)
# Create the history-aware-retriever
history_aware_retriever = instantiate_history_aware_retriever(retriever_hf=retriever_hf,llm_model_name=llm_model_name)




loading pdfs...


100%|██████████| 1/1 [00:00<00:00, 11.87it/s]



chunking documents...


100%|██████████| 1/1 [00:00<00:00, 3498.17it/s]

number of chunks: 18

instantiating HuggingFaceEmbeddings...





hf_embedding_model created!

start process of embedding chunks into vector database...
all chunks embedded into vector database! time taken: 0.67s
retriever created!


In [28]:
chat_history_store = {}
# get current chat history
current_chat_history = get_session_history(session_id)
# format current chat history
formatted_chat_history = format_chat_history(current_chat_history)

In [30]:
user_input = "do you know about high dimensional problems in statistical learning? yes or no."

In [32]:
retrieved_documents = history_aware_retriever.invoke(
    {
        'chat_history':formatted_chat_history,
        'input':user_input
    }
)

In [34]:
retrieved_documents

[Document(id='4e93f4fe-2de8-4b66-9c82-f39943c40a36', metadata={'source': '/Users/i748920/Desktop/llms-learning/pdf-chatbot-app/data/short-elements-of-statistical-learning-book/chap18 copy.pdf', 'page': 2, 'start_index': 800}, page_content='to eﬃciently estimate the high-dimensional covariance mat rix. In that case,\nmore regularization leads to superior prediction performa nce.\nThus it is not surprising that the analysis of high-dimensio nal data re-\nquires either modiﬁcation of procedures designed for the N >pscenario, or\nentirely new procedures. In this chapter we discuss example s of both kinds\nofapproachesforhighdimensionalclassiﬁcationandregre ssion;thesemeth-'),
 Document(id='c9d819ce-160e-4d48-a2da-ef0d34943e80', metadata={'source': '/Users/i748920/Desktop/llms-learning/pdf-chatbot-app/data/short-elements-of-statistical-learning-book/chap18 copy.pdf', 'page': 0, 'start_index': 0}, page_content='This is page 649\nPrinter: Opaque this\n18\nHigh-Dimensional Problems: p≫N\n18.1 

In [None]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import chain


@chain
def retriever(query: str) -> List[Document]:
    docs, scores = zip(*vectorstore.similarity_search_with_score(query))
    for doc, score in zip(docs, scores):
        doc.metadata["score"] = score

    return docs

In [74]:
from typing import List
from langchain_core.documents import Document

def get_retrieved_documents_with_scores(vectorstore,query,k) -> List[Document]:
    docs, scores = zip(*vectorstore.similarity_search_with_score(query=query,k=k))
    for doc, score in zip(docs, scores):
        doc.metadata["score"] = score

    return docs

In [78]:
docs = get_retrieved_documents_with_scores(vectorstore=vectorstore_hf,query=user_input,k=3)

In [80]:
for d in docs:
    print(d.metadata['score'])
    print(d)
    print()

0.6213172417016296
page_content='to eﬃciently estimate the high-dimensional covariance mat rix. In that case,
more regularization leads to superior prediction performa nce.
Thus it is not surprising that the analysis of high-dimensio nal data re-
quires either modiﬁcation of procedures designed for the N >pscenario, or
entirely new procedures. In this chapter we discuss example s of both kinds
ofapproachesforhighdimensionalclassiﬁcationandregre ssion;thesemeth-' metadata={'source': '/Users/i748920/Desktop/llms-learning/pdf-chatbot-app/data/short-elements-of-statistical-learning-book/chap18 copy.pdf', 'page': 2, 'start_index': 800, 'score': 0.6213172417016296}

0.5919392750876704
page_content='This is page 649
Printer: Opaque this
18
High-Dimensional Problems: p≫N
18.1 When pis Much Bigger than N
In this chapter we discuss prediction problems in which the n umber of
featurespis much larger than the number of observations N, often written
p≫N. Such problems have become of increasing impo

In [82]:
user_input1 = "explain in which cases can ridge regression do it with regards to p and N in high dimensional?  --show references"

In [88]:
docs = get_retrieved_documents_with_scores(vectorstore=vectorstore_hf,query=user_input1,k=3)

In [90]:
for d in docs:
    print(d.metadata['score'])
    print(d)
    print()

0.726056829878058
page_content='over the 100 simulation runs. The p= 1000 case is designed to mimic the
kind of data that we might see in a high-dimensional genomic o r proteomic
dataset, for example.
We ﬁt a ridge regression to the data, with three diﬀerent valu es for the
regularization parameter λ: 0.001, 100, and 1000. When λ= 0.001, this
is nearly the same as least squares regression, with a little regularization
just to ensure that the problem is non-singular when p > N. Figure 18.1' metadata={'source': '/Users/i748920/Desktop/llms-learning/pdf-chatbot-app/data/short-elements-of-statistical-learning-book/chap18 copy.pdf', 'page': 1, 'start_index': 760, 'score': 0.726056829878058}

0.6877578348106839
page_content='just to ensure that the problem is non-singular when p > N. Figure 18.1
shows boxplots oftherelative testerrorachieved bythediﬀ erentestimators
in each scenario. The corresponding average degrees of free dom used in
each ridge-regression ﬁt is indicated (computed using f

In [None]:
from typing import List
from langchain_core.documents import Document

def get_retrieved_documents_with_scores(vectorstore,query,k) -> List[Document]:
    """
    vectorstore: vectorstore object
    query (str): user input query
    k (int): number of top K documents to return
    
    manual function to get retrieved documents from vectorstore and add score to metadata.
    output: List of Document objects.
    """
    docs, scores = zip(*vectorstore.similarity_search_with_score(query=query,k=k))
    for doc, score in zip(docs, scores):
        doc.metadata["score"] = score

    return docs

In [None]:
history_aware_retriever.invoke()

In [None]:
retriever_hf.invoke()

In [92]:
vectorstore_hf

<langchain_core.vectorstores.in_memory.InMemoryVectorStore at 0x347d51f40>

In [104]:
retriever_hf = vectorstore_hf.as_retriever(
    search_type='similarity_score_threshold',
    search_kwargs = {
        'score_threshold':0.9
    }
)

In [106]:
retriever_hf.invoke(input=user_input1)

NotImplementedError: 

In [None]:
vectorstore_hf = InMemoryVectorStore.from_documents(
    documents=all_chunks,
    embedding=hf_embedding_model
)
print("all chunks embedded into vector database!",f"time taken: {round(time.time()-start_time,2)}s")

# setup retrieval and test with a query and gt_context
retriever_hf = vectorstore_hf.as_retriever(
    search_type='similarity',
    search_kwargs = {'k':5}
)
print("retriever created!")

return vectorstore_hf,retriever_hf

In [108]:
retriever_hf = vectorstore_hf.as_retriever(
    search_type='similarity',
    search_kwargs = {'k':5}
)

retriever_hf.invoke(user_input1)

[Document(id='2a7fa81b-0bb3-4d4c-86be-18c1bbf57590', metadata={'source': '/Users/i748920/Desktop/llms-learning/pdf-chatbot-app/data/short-elements-of-statistical-learning-book/chap18 copy.pdf', 'page': 1, 'start_index': 760, 'score': 0.726056829878058}, page_content='over the 100 simulation runs. The p= 1000 case is designed to mimic the\nkind of data that we might see in a high-dimensional genomic o r proteomic\ndataset, for example.\nWe ﬁt a ridge regression to the data, with three diﬀerent valu es for the\nregularization parameter λ: 0.001, 100, and 1000. When λ= 0.001, this\nis nearly the same as least squares regression, with a little regularization\njust to ensure that the problem is non-singular when p > N. Figure 18.1'),
 Document(id='ecd4bf7b-fd87-491b-8fb7-4ab23676d431', metadata={'source': '/Users/i748920/Desktop/llms-learning/pdf-chatbot-app/data/short-elements-of-statistical-learning-book/chap18 copy.pdf', 'page': 1, 'start_index': 1150, 'score': 0.6877578348106839}, page_

In [114]:
retriever_hf = vectorstore_hf.as_retriever(
    # search_type='similarity_score_threshold',
    # search_kwargs={'score_threshold': 0.0}
    search_kwargs={'k':1}
)

retriever_hf.invoke(user_input1)

[Document(id='2a7fa81b-0bb3-4d4c-86be-18c1bbf57590', metadata={'source': '/Users/i748920/Desktop/llms-learning/pdf-chatbot-app/data/short-elements-of-statistical-learning-book/chap18 copy.pdf', 'page': 1, 'start_index': 760, 'score': 0.726056829878058}, page_content='over the 100 simulation runs. The p= 1000 case is designed to mimic the\nkind of data that we might see in a high-dimensional genomic o r proteomic\ndataset, for example.\nWe ﬁt a ridge regression to the data, with three diﬀerent valu es for the\nregularization parameter λ: 0.001, 100, and 1000. When λ= 0.001, this\nis nearly the same as least squares regression, with a little regularization\njust to ensure that the problem is non-singular when p > N. Figure 18.1')]

In [144]:
retriever_hf = vectorstore_hf.as_retriever(
    search_type='similarity',
    search_kwargs = {'k':5}
)

In [146]:
retriever_hf.invoke(user_input1)

[Document(id='2a7fa81b-0bb3-4d4c-86be-18c1bbf57590', metadata={'source': '/Users/i748920/Desktop/llms-learning/pdf-chatbot-app/data/short-elements-of-statistical-learning-book/chap18 copy.pdf', 'page': 1, 'start_index': 760, 'score': 0.726056829878058}, page_content='over the 100 simulation runs. The p= 1000 case is designed to mimic the\nkind of data that we might see in a high-dimensional genomic o r proteomic\ndataset, for example.\nWe ﬁt a ridge regression to the data, with three diﬀerent valu es for the\nregularization parameter λ: 0.001, 100, and 1000. When λ= 0.001, this\nis nearly the same as least squares regression, with a little regularization\njust to ensure that the problem is non-singular when p > N. Figure 18.1'),
 Document(id='ecd4bf7b-fd87-491b-8fb7-4ab23676d431', metadata={'source': '/Users/i748920/Desktop/llms-learning/pdf-chatbot-app/data/short-elements-of-statistical-learning-book/chap18 copy.pdf', 'page': 1, 'start_index': 1150, 'score': 0.6877578348106839}, page_

In [124]:
retriever_hf.invoke(user_input1)

[Document(id='2a7fa81b-0bb3-4d4c-86be-18c1bbf57590', metadata={'source': '/Users/i748920/Desktop/llms-learning/pdf-chatbot-app/data/short-elements-of-statistical-learning-book/chap18 copy.pdf', 'page': 1, 'start_index': 760, 'score': 0.726056829878058}, page_content='over the 100 simulation runs. The p= 1000 case is designed to mimic the\nkind of data that we might see in a high-dimensional genomic o r proteomic\ndataset, for example.\nWe ﬁt a ridge regression to the data, with three diﬀerent valu es for the\nregularization parameter λ: 0.001, 100, and 1000. When λ= 0.001, this\nis nearly the same as least squares regression, with a little regularization\njust to ensure that the problem is non-singular when p > N. Figure 18.1'),
 Document(id='ecd4bf7b-fd87-491b-8fb7-4ab23676d431', metadata={'source': '/Users/i748920/Desktop/llms-learning/pdf-chatbot-app/data/short-elements-of-statistical-learning-book/chap18 copy.pdf', 'page': 1, 'start_index': 1150, 'score': 0.6877578348106839}, page_

In [None]:
retriever_hf.invoke()

In [148]:
from typing import List
from langchain_core.documents import Document

def get_retrieved_documents_with_scores(vectorstore,query,k) -> List[Document]:
    """
    vectorstore: vectorstore object
    query (str): user input query
    k (int): number of top K documents to return
    
    manual function to get retrieved documents from vectorstore and add score to metadata.
    output: List of Document objects.
    """
    docs, scores = zip(*vectorstore.similarity_search_with_score(query=query,k=k))
    for doc, score in zip(docs, scores):
        doc.metadata["score"] = score

    return docs

In [150]:
docs = get_retrieved_documents_with_scores(vectorstore_hf,user_input1,5)

In [152]:
docs

(Document(id='2a7fa81b-0bb3-4d4c-86be-18c1bbf57590', metadata={'source': '/Users/i748920/Desktop/llms-learning/pdf-chatbot-app/data/short-elements-of-statistical-learning-book/chap18 copy.pdf', 'page': 1, 'start_index': 760, 'score': 0.726056829878058}, page_content='over the 100 simulation runs. The p= 1000 case is designed to mimic the\nkind of data that we might see in a high-dimensional genomic o r proteomic\ndataset, for example.\nWe ﬁt a ridge regression to the data, with three diﬀerent valu es for the\nregularization parameter λ: 0.001, 100, and 1000. When λ= 0.001, this\nis nearly the same as least squares regression, with a little regularization\njust to ensure that the problem is non-singular when p > N. Figure 18.1'),
 Document(id='ecd4bf7b-fd87-491b-8fb7-4ab23676d431', metadata={'source': '/Users/i748920/Desktop/llms-learning/pdf-chatbot-app/data/short-elements-of-statistical-learning-book/chap18 copy.pdf', 'page': 1, 'start_index': 1150, 'score': 0.6877578348106839}, page_

# attempt to use manual query refiner llm instead of history_aware_retriever

In [714]:
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage

# 1. setup conversation history

chat_history_store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

In [777]:
session_id = "2"
current_chat_history = get_session_history(session_id)
current_chat_history

InMemoryChatMessageHistory(messages=[])

In [779]:
# add some messages

current_chat_history.add_user_message("hi do you know what pythagoras theorem is? just say yes or no")
current_chat_history.add_ai_message("Yes. What would you like to know about it?")
current_chat_history

InMemoryChatMessageHistory(messages=[HumanMessage(content='hi do you know what pythagoras theorem is? just say yes or no'), AIMessage(content='Yes. What would you like to know about it?')])

In [787]:
get_session_history("2")

InMemoryChatMessageHistory(messages=[HumanMessage(content='hi do you know what pythagoras theorem is? just say yes or no'), AIMessage(content='Yes. What would you like to know about it?')])

history_aware_retriever

* Create a chain that takes conversation history and returns documents.
* If there is no chat_history, then the input is just passed directly to the retriever. If there is chat_history, then the prompt and LLM will be used to generate a search query. That search query is then passed to the retriever.

In [722]:
# setup llm model for refining the user input with conversation context

from langchain_ollama import ChatOllama

query_refiner_llm_model_name = "llama3.1"
query_refiner_llm = ChatOllama(
    model=query_refiner_llm_model_name,
    temperature=0 # increase temp for more creative answers
) 

In [724]:
# test
query_refiner_llm.invoke("hi")

AIMessage(content="How's it going? Is there something I can help you with or would you like to chat?", response_metadata={'model': 'llama3.1', 'created_at': '2024-09-18T05:46:38.621339Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 1068434333, 'load_duration': 28997833, 'prompt_eval_count': 11, 'prompt_eval_duration': 310055000, 'eval_count': 21, 'eval_duration': 728042000}, id='run-917dec4c-7b87-4d65-a426-54d0ae073f47-0', usage_metadata={'input_tokens': 11, 'output_tokens': 21, 'total_tokens': 32})

In [748]:
current_chat_history

InMemoryChatMessageHistory(messages=[HumanMessage(content='hi do you know what pythagoras theorem is? just say yes or no'), AIMessage(content='Yes. What would you like to know about it?')])

In [758]:
def refine_input_query_with_chat_history_context(session_id, input_query, query_refiner_llm):
    """
    session_id is tyope str - the sessions_id of the chat history
    input_query is type str - input in the terminal by the user
    query_refiner_llm is a base chat LLM used to refine the input query with context of the chat history
    """
    chat_history = get_session_history(session_id)

    # Setup system contextualise input prompt
    system_contextualize_input_prompt = (
        "Given a chat history and the latest user question "
        "which might reference context in the chat history, "
        "formulate a standalone question which can be understood "
        "without the chat history. Do NOT answer the question, "
        "just reformulate it if needed and otherwise return it as is."
    )
    
    # Prepare the chat prompt template with the chat history
    system_input_prompt_template = ChatPromptTemplate.from_messages(
        [
            ("system", system_contextualize_input_prompt),
            MessagesPlaceholder(variable_name="chat_history"),  # For history
            ("human", "{input}"),  # User's latest input query
        ]
    )

    # Format the chat history as a list of dictionaries (required for MessagesPlaceholder)
    formatted_chat_history = []
    for message in chat_history.messages:
        if isinstance(message, HumanMessage):
            formatted_chat_history.append({"role": "human", "content": message.content})
        elif isinstance(message, AIMessage):
            formatted_chat_history.append({"role": "ai", "content": message.content})

    # Invoke the template with chat history and input query
    system_input_prompt_value = system_input_prompt_template.invoke(
        {
            'chat_history': formatted_chat_history,  # Pass the formatted history
            'input': input_query  # User's current query
        }
    )

    print("chat history:")
    print(system_input_prompt_value.messages)
    
    # Use the query_refiner_llm to get the refined_query
    refined_query = query_refiner_llm.invoke(
        input = system_input_prompt_value.messages
    )
    
    return refined_query

In [760]:
refined_query =  refine_input_query_with_chat_history_context(
    session_id='2',
    input_query="okay explain it.",
    query_refiner_llm=query_refiner_llm
)

chat history:
[SystemMessage(content='Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. Do NOT answer the question, just reformulate it if needed and otherwise return it as is.'), HumanMessage(content='hi do you know what pythagoras theorem is? just say yes or no'), AIMessage(content='Yes. What would you like to know about it?'), HumanMessage(content='okay explain it.')]


In [756]:
system_input_prompt_value.messages

[SystemMessage(content='Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. Do NOT answer the question, just reformulate it if needed and otherwise return it as is.'),
 HumanMessage(content='hi do you know what pythagoras theorem is? just say yes or no'),
 AIMessage(content='Yes. What would you like to know about it?'),
 HumanMessage(content='okay explain it.')]

In [762]:
refined_query.content

"The Pythagorean Theorem states that in a right-angled triangle, the square of the length of the hypotenuse (the side opposite the right angle) is equal to the sum of the squares of the lengths of the other two sides.\n\nMathematically, this can be expressed as:\n\na² + b² = c²\n\nwhere 'a' and 'b' are the lengths of the two shorter sides, and 'c' is the length of the hypotenuse."

issue here is that the refined_query_llm is not returning the refined query but directly answering the question

## others 

In [590]:
current_chat_history.add_user_message("hi can you explain pythagoras theorem to me?")

In [592]:
current_chat_history

InMemoryChatMessageHistory(messages=[HumanMessage(content='hi can you explain pythagoras theorem to me?')])

In [594]:
current_chat_history.add_ai_message("i cant find information on that in the vector db")

In [598]:
current_chat_history.messages

[HumanMessage(content='hi can you explain pythagoras theorem to me?'),
 AIMessage(content='i cant find information on that in the vector db')]

In [688]:
from langchain.schema import AIMessage, HumanMessage

def refine_query_with_context(input_query, session_id, query_refiner_llm):
    """
    Refines the input query by considering the chat history.
    """
    # Get the chat history for the session (assume you have a function to fetch it)
    chat_history = get_session_history(session_id)

    # Setup system contextualize input prompt
    system_contextualize_input_prompt = (
        "Given a chat history and the latest user question "
        "which might reference context in the chat history, "
        "formulate a standalone question which can be understood "
        "without the chat history. Do NOT answer the question, "
        "just reformulate it if needed and otherwise return it as is."
    )

    # Prepare the chat prompt template with the chat history
    system_input_prompt_template = ChatPromptTemplate.from_messages(
        [
            ("system", system_contextualize_input_prompt),
            MessagesPlaceholder(variable_name="chat_history"),  # For history
            ("human", "{input}"),  # User's latest input
        ]
    )

    # Format the chat history as a list of dictionaries (required for MessagesPlaceholder)
    formatted_history = []
    for message in chat_history.messages:
        if isinstance(message, HumanMessage):
            formatted_history.append({"role": "human", "content": message.content})
        elif isinstance(message, AIMessage):
            formatted_history.append({"role": "ai", "content": message.content})

    # Invoke the template with chat history and input query
    refined_query = system_input_prompt_template.invoke(
        {
            'chat_history': formatted_history,  # Pass the formatted history
            'input': input_query  # User's current query
        }
    )

    return refined_query

In [690]:
refined_query = refine_query_with_context(input_query=sample_query,session_id='1',query_refiner_llm=query_refiner_llm)

In [700]:
refined_query.messages

[SystemMessage(content='Given a chat history and the latest user question which might reference context in the chat history, formulate a standalone question which can be understood without the chat history. Do NOT answer the question, just reformulate it if needed and otherwise return it as is.'),
 HumanMessage(content='hi can you explain pythagoras theorem to me?'),
 AIMessage(content='i cant find information on that in the vector db'),
 HumanMessage(content='for high-dimensional problems, with regards to p and N, in what cases can ridge regression exploit the correlation in the features of the dataset?')]

In [704]:
response = query_refiner_llm.invoke(input=refined_query)
response.content

"What is Ridge Regression's ability to exploit feature correlations in high-dimensional spaces when considering p (number of features) and N (sample size)?"

In [None]:
query_refiner_llm.invoke

In [681]:
system_input_prompt_temmplate = refine_query_with_context(input_query=sample_query,session_id='1',query_refiner_llm=query_refiner_llm)
system_input_prompt_temmplate

ValueError: variable chat_history should be a list of base messages, got Human: hi can you explain pythagoras theorem to me?
AI: i cant find information on that in the vector db of type <class 'langchain_core.chat_history.InMemoryChatMessageHistory'>

build entire pipeline in order without langchain as much as possible

In [None]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_openai.chat_models import ChatOpenAI

    # setup llm model for refining the user input with conversation context
    prompt_refiner_llm = ChatOllama(
        model=llm_model_name,
        temperature=0 # increase temp for more creative answers
    ) 
    

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You're an assistant who's good at {ability}. Respond in 20 words or fewer",
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{input}"),
    ]
)
runnable = prompt | model

In [571]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


with_message_history = RunnableWithMessageHistory(
    runnable,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history",
)

NameError: name 'runnable' is not defined