In [1]:
import os

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
LANGCHAIN_PROJECT_NAME = os.getenv("LANGCHAIN_PROJECT_NAME")

In [4]:
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
os.environ['GROQ_API_KEY'] = GROQ_API_KEY
LANGCHAIN_TRACING_V2="true"
LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
LANGCHAIN_API_KEY=LANGCHAIN_API_KEY
LANGCHAIN_PROJECT=LANGCHAIN_PROJECT_NAME

In [32]:
"gemini-1.0-pro"
"Gemma2-9b-It"

'gemini-1.0-pro'

In [62]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.0-pro")

In [63]:
from langchain_groq import ChatGroq

llm = ChatGroq(model="Gemma2-9b-It")

In [70]:
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

In [64]:
store = {}

In [67]:
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]

In [68]:
InMemoryChatMessageHistory()

InMemoryChatMessageHistory(messages=[])

In [69]:
config = {"configurable": {"session_id": "my_new_memory_chat"}}

In [71]:
model_with_memory = RunnableWithMessageHistory(runnable=llm, get_session_history=get_session_history)

In [73]:

model_with_memory.invoke(input=("my name is dinesh"), config=config)

AIMessage(content='Hello, Dinesh!  \n\nIs there anything I can help you with? 😊 \n', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 14, 'total_tokens': 35, 'completion_time': 0.038181818, 'prompt_time': 8.2899e-05, 'queue_time': 0.013406251000000001, 'total_time': 0.038264717}, 'model_name': 'Gemma2-9b-It', 'system_fingerprint': 'fp_10c08bf97d', 'finish_reason': 'stop', 'logprobs': None}, id='run-7698f63a-2d19-4aef-b5a2-adb145af4124-0', usage_metadata={'input_tokens': 14, 'output_tokens': 21, 'total_tokens': 35})

In [74]:
model_with_memory.invoke(input=("what is my name"), config=config)

AIMessage(content='Your name is Dinesh.  \n\nI remember that! 😄 \n\nIs there anything else I can help you with?\n', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 28, 'prompt_tokens': 47, 'total_tokens': 75, 'completion_time': 0.050909091, 'prompt_time': 0.001444956, 'queue_time': 0.011613703, 'total_time': 0.052354047}, 'model_name': 'Gemma2-9b-It', 'system_fingerprint': 'fp_10c08bf97d', 'finish_reason': 'stop', 'logprobs': None}, id='run-2bae38ef-ef99-4363-a3c4-746886def50d-0', usage_metadata={'input_tokens': 47, 'output_tokens': 28, 'total_tokens': 75})

In [83]:
store['my_new_memory_chat'].messages

[HumanMessage(content='my name is dinesh', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Hello, Dinesh!  \n\nIs there anything I can help you with? 😊 \n', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 14, 'total_tokens': 35, 'completion_time': 0.038181818, 'prompt_time': 8.2899e-05, 'queue_time': 0.013406251000000001, 'total_time': 0.038264717}, 'model_name': 'Gemma2-9b-It', 'system_fingerprint': 'fp_10c08bf97d', 'finish_reason': 'stop', 'logprobs': None}, id='run-7698f63a-2d19-4aef-b5a2-adb145af4124-0', usage_metadata={'input_tokens': 14, 'output_tokens': 21, 'total_tokens': 35}),
 HumanMessage(content='what is my name', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Your name is Dinesh.  \n\nI remember that! 😄 \n\nIs there anything else I can help you with?\n', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 28, 'prompt_tokens': 47, 'total_tokens': 75, 'completion_t

In [102]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_chroma import Chroma
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [85]:
loader = PyPDFLoader(file_path="The Illustrated Transformer.pdf")
docs = loader.load()

In [88]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50, length_function=len)
split_docs = text_splitter.split_documents(documents=docs)

In [91]:
embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [92]:
vector_store = Chroma.from_documents(documents=split_docs, embedding=embedding)

In [94]:
vector_store.similarity_search("what is self attention mechanism")

[Document(metadata={'page': 5, 'source': 'The Illustrated Transformer.pdf'}, page_content='Self-Attention in Detail\nLet’s first look at how to calculate self-attention using vectors, then proceed to look at how it’s actually implemented –using matrices.'),
 Document(metadata={'page': 5, 'source': 'The Illustrated Transformer.pdf'}, page_content='Self-Attention in Detail\nLet’s first look at how to calculate self-attention using vectors, then proceed to look at how it’s actually implemented –using matrices.'),
 Document(metadata={'page': 4, 'source': 'The Illustrated Transformer.pdf'}, page_content='What does “it” in this sentence refer to? Is it referring to the street or to the animal? It’s a simple question to a human,but not as simple to an algorithm.\nWhen the model is processing the word “it”, self-attention allows it to associate “it” with “animal”.\nAs the model processes each word (each position in the input sequence), self attention allows it to look at otherpositions in the 

In [97]:
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

In [98]:
retriever.invoke("what is self-attention mechanism")

[Document(metadata={'page': 5, 'source': 'The Illustrated Transformer.pdf'}, page_content='Self-Attention in Detail\nLet’s first look at how to calculate self-attention using vectors, then proceed to look at how it’s actually implemented –using matrices.'),
 Document(metadata={'page': 5, 'source': 'The Illustrated Transformer.pdf'}, page_content='Self-Attention in Detail\nLet’s first look at how to calculate self-attention using vectors, then proceed to look at how it’s actually implemented –using matrices.'),
 Document(metadata={'page': 8, 'source': 'The Illustrated Transformer.pdf'}, page_content='That concludes the self-attention calculation. The resulting vector is one we can send along to the feed-forward neuralnetwork. In the actual implementation, however, this calculation is done in matrix form for faster processing. So let’slook at that now that we’ve seen the intuition of the calculation on the word level.\nMatrix Calculation of Self-Attention')]

In [113]:
template = """Answer the question based on the following context:
{context}

Answer: {question}
"""

In [114]:
prompt = PromptTemplate.from_template(template=template)

In [115]:
retriever_chain = (RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
 | prompt
 | llm
 | StrOutputParser())

In [117]:
retriever_chain.invoke("what is self-attention mechanism")

"Self-attention is a mechanism in machine learning, particularly in natural language processing, that allows a model to weigh the importance of different words in a sequence when processing that sequence.  \n\nHere's a breakdown based on the provided text:\n\n* **Purpose:** Self-attention helps a model understand the relationships between words in a sentence or paragraph. Some words might be more important for understanding the overall meaning than others.\n* **Vector-based Calculation (Conceptual):**  The text mentions calculating self-attention using vectors. This involves representing each word as a vector and then using mathematical operations to compare the similarity between word vectors. Words that are similar in meaning or context will have vectors that are closer together.\n* **Matrix-based Calculation (Practical):** The actual implementation of self-attention uses matrices for efficiency. Matrices allow for faster and more scalable calculations compared to working with indivi