# Chat With Your Data

## Preserve conversation history

# Install libraries

In [None]:
pip install openai

In [None]:
pip install python-dotenv

In [None]:
pip install langchain

In [None]:
pip install langchain-openai

In [None]:
pip install pypdf

In [None]:
pip install faiss-cpu

In [None]:
pip install langchainhub

## Load OpenAI API Key

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

OPENAI_API_KEY=os.environ['OPENAI_API_KEY']

## Prompt model with no knowledge of the Voynich manuscript

In [2]:
from langchain_openai import ChatOpenAI

#initialize the LLM we'll use - OpenAI GPT 3.5 Turbo
llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo-0125")

In [3]:
#prompt the model with no additional knowledge of the Voynich manuscript beyond pretraining 
llm.invoke("What are the medicinal insights from the Voynich manuscript?")  

AIMessage(content='The Voynich manuscript is a mysterious document written in an unknown script and language, with detailed illustrations of plants, astrological symbols, and human figures. Some researchers have speculated that the manuscript may contain medicinal insights or herbal remedies based on the plant illustrations and the overall layout of the text.\n\nHowever, due to the enigmatic nature of the manuscript and the inability to decipher the text, it is currently impossible to definitively determine what, if any, medicinal insights are contained within its pages. Some theories suggest that the manuscript may contain information on alchemical practices, herbal medicine, or even early forms of pharmacology.\n\nWithout a clear understanding of the language and symbols used in the manuscript, it is difficult to extract any concrete medicinal insights. The Voynich manuscript continues to be a source of fascination and intrigue for researchers, but its true meaning and purpose remain

In [4]:
llm.invoke("What is Aetherfloris Ventus?")

AIMessage(content='Aetherfloris Ventus is a Latin term that translates to "air flower wind." It is a concept that refers to the combination of air, flowers, and wind in a harmonious and interconnected way, symbolizing the beauty and interconnectedness of nature.', response_metadata={'token_usage': {'completion_tokens': 52, 'prompt_tokens': 16, 'total_tokens': 68}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-f18b0515-4360-42df-8877-2fe8eeb545f7-0', usage_metadata={'input_tokens': 16, 'output_tokens': 52, 'total_tokens': 68})

## Load vector database from disk

In [5]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS


db = FAISS.load_local("../faiss_index", 
                      OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY, model="text-embedding-3-small"), 
                      allow_dangerous_deserialization=True)

## Configure retriever
### Use the similarity search capabilities of a vector store to facilitate retrieval

In [6]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 6})

## Implement a chain
### Chain together multiple calls in a logical sequence

In [7]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

In [8]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

In [9]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

#combine multiple steps in a single chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser() #convert the chat message to a string
)

## Send LLM's response to the user

In [10]:
for chunk in rag_chain.stream("What are the medicinal insights from the Voynich manuscript?"):
    print(chunk, end="", flush=True)

The Voynich manuscript contains detailed anatomical diagrams of mythical beings, possibly used for medicinal or alchemical purposes, providing insights into ancient medical knowledge intertwined with fantasy. The manuscript also includes depictions of mysterious herbs with unique properties, possibly indicating their medicinal uses in preparing potions or remedies. Additionally, the manuscript features colorful illustrations of dishes prepared from unknown or exotic ingredients, likely used for special feasts or medicinal purposes, reflecting a cultural crossover between culinary practices and medicinal knowledge.

In [11]:
for chunk in rag_chain.stream("What is Aetherfloris Ventus?"):
    print(chunk, end="", flush=True)

Aetherfloris Ventus is a delicate plant of ethereal beauty with petals lighter than air, appearing to float freely, untethered by gravity. It is believed to have originated from the breath of the winds and nurtured by the whispers of the clouds. The plant's essence, captured in rare vials, is said to bestow the gift of lightness and uplift the spirits.

In [12]:
for chunk in rag_chain.stream("What's the most important part of the Voynich manuscript?"):
    print(chunk, end="", flush=True)

The most important part of the Voynich manuscript is the detailed anatomical diagrams of mythical beings, possibly used for medicinal or alchemical purposes, with annotations explaining the function of each organ and system. These illustrations offer a unique blend of fantasy and precise anatomical accuracy, providing insights into ancient medical knowledge intertwined with fantasy. The annotated diagrams may have served as a practical guide for medicinal and alchemical uses, possibly indicating the functions of various organs and their importance in ancient practices.

## Preserve Conversation History 

In [13]:
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

system_prompt = """Given the chat history and a recent user question \
generate a new standalone question \
that can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed or otherwise return it as is."""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

retriever_with_history = create_history_aware_retriever(
    llm, retriever, prompt
)

In [14]:
prompt

ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='Given the chat history and a recent user question generate a new standalone question that can be understood without the chat history. Do NOT answer the question, just reformulate it if needed or otherwise return it as is.')), MessagesPlaceholder(variable_name='chat_history'), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))])

In [15]:
prompt.input_variables

['chat_history', 'input']

In [16]:
prompt.input_types

{'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}

In [17]:
prompt.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='Given the chat history and a recent user question generate a new standalone question that can be understood without the chat history. Do NOT answer the question, just reformulate it if needed or otherwise return it as is.')),
 MessagesPlaceholder(variable_name='chat_history'),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))]

In [18]:
retriever_with_history

RunnableBinding(bound=RunnableBranch(branches=[(RunnableLambda(lambda x: not x.get('chat_history', False)), RunnableLambda(lambda x: x['input'])
| VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7c3d9baf30d0>, search_kwargs={'k': 6}))], default=ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='Given the chat history and a recent user question generate a new standalone question that can be understood without the chat history. Do NOT answer the question, just reformulate it if needed or otherwise 