In [1]:
import os
from typing import Any, Dict

import tiktoken
from dotenv import load_dotenv
from langchain.schema.output_parser import StrOutputParser
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import (
    BaseChatMessageHistory,
    InMemoryChatMessageHistory,
)
from langchain_core.globals import set_debug
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import (
    RunnableLambda,
    RunnablePassthrough,
    RunnableSerializable,
)
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_mistralai import ChatMistralAI
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage

from prompts.gen_prompts import GENERAL_PROMPT, RAG_PROMPT
from rag_pinecone import basic_retriever

set_debug(True)
load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
google_api_key = os.getenv("GOOGLE_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")
mistralai_api_key = os.getenv("MISTRALAI_API_KEY")

In [3]:
memory_store = {}

gemini_llm = ChatGoogleGenerativeAI(
    google_api_key=f"{google_api_key}",
    model="gemini-pro",
)  # Type: Ignore
mistral_llm = ChatMistralAI(model="mistral-large-latest")
openai_llm = ChatOpenAI(openai_api_key=openai_api_key, model="gpt-4o")

In [4]:
def summarize_memory(
    stored_session: InMemoryChatMessageHistory,
) -> InMemoryChatMessageHistory:
    summarization_prompt = ChatPromptTemplate.from_messages(
        [
            MessagesPlaceholder(variable_name="history"),
            (
                "user",
                "The mesages above are from an AI/Human chat session. You need to distill the above chat messages into a single summary message. Include as many specific details as you can. But be sure that it is done in a way that is concise and easy to understand as it will be used to summarize the chat history and used as reference later by the AI",
            ),
        ]
    )
    summarization_chain = (summarization_prompt | gemini_llm).with_config(
        config={"run_name": "sumarize_memory"}
    )
    summary_message = summarization_chain.invoke({"history": stored_session.messages})
    stored_session.clear()
    stored_session.add_message(summary_message)
    return stored_session

In [5]:
def check_memory_token_size(messages: BaseChatMessageHistory) -> bool:
    encoding = tiktoken.get_encoding("cl100k_base")
    count = []
    for message in messages:
        token_count = len(encoding.encode(message.content))
        count.append(token_count)
    total_tokens = sum(count)
    if total_tokens > 100:
        return True
    else:
        return False


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in memory_store:
        memory_store[session_id] = ChatMessageHistory()
        return memory_store[session_id]
    stored_session: InMemoryChatMessageHistory = memory_store[session_id]
    if len(stored_session.messages) > 6:
        if check_memory_token_size(stored_session.messages):
            return summarize_memory(stored_session)
    return stored_session

In [16]:
def baisc_conversation(
    query: Dict[str, Any], config: Dict[str, Dict[str, Any]] = None
) -> RunnableWithMessageHistory:
    basic_convo = GENERAL_PROMPT | openai_llm | StrOutputParser()
    with_message_history = RunnableWithMessageHistory(
        basic_convo,
        get_session_history,
        input_messages_key="query",
        history_messages_key="history",
    )
    return with_message_history.invoke(query, config)


baisc_conversation(
    {
        "query": "Can you kindly list all the cities we spoke about?",
    },
    config={"configurable": {"session_id": "def234"}},
)

[32;1m[1;3m[chain/start][0m [1m[chain:sumarize_memory] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:sumarize_memory > prompt:ChatPromptTemplate] Entering Prompt run with input:
[0m[inputs]
[36;1m[1;3m[chain/end][0m [1m[chain:sumarize_memory > prompt:ChatPromptTemplate] [1ms] Exiting Prompt run with output:
[0m[outputs]
[32;1m[1;3m[llm/start][0m [1m[chain:sumarize_memory > llm:ChatGoogleGenerativeAI] Entering LLM run with input:
[0m{
  "prompts": [
    "AI: **Summary of Chat Session**\n\nThe chat focused on two locations:\n\n**Sarasota, Florida:**\n\n* Beautiful city on the southwestern coast of Florida\n* Known for its stunning beaches, especially Siesta Key Beach with white, powdery sand\n* Rich arts community with attractions like the Ringling Museum of Art, Sarasota Opera, and Sarasota Ballet\n* Offers nature and outdoor activities like the Marie Selby Botanical Gardens and Myakka River State Park\n* Has a vibrant dining and sh

'Absolutely! Here are the cities we discussed:\n\n1. Sarasota, Florida\n2. Chicago, Illinois\n3. Kansas City, Missouri\n4. Salt Lake City, Utah\n\nIf you have any more questions or need further details about any of these cities, feel free to ask! 😊'

In [17]:
print(len(memory_store["def234"].messages))
print(check_memory_token_size(memory_store["def234"].messages))
print(memory_store["def234"].messages)

3
True
[AIMessage(content='**Summary of Chat Session**\n\nThe chat focused on three locations:\n\n**Sarasota, Florida**\n\n* Beautiful city on the southwestern coast of Florida\n* Known for its stunning beaches, especially Siesta Key Beach with white, powdery sand\n* Rich arts community with attractions like the Ringling Museum of Art, Sarasota Opera, and Sarasota Ballet\n* Offers nature and outdoor activities like the Marie Selby Botanical Gardens and Myakka River State Park\n* Has a vibrant dining and shopping scene, including St. Armands Circle\n\n**Chicago, Illinois**\n\n* Located on the shores of Lake Michigan\n* Famous for its impressive skyline and architecture, including the Willis Tower and Wrigley Building\n* Houses world-class museums such as the Art Institute of Chicago, Museum of Science and Industry, and Field Museum\n* Has a thriving music and theater scene, with renowned institutions like the Chicago Symphony Orchestra and Second City comedy club\n* Known for its culina

In [8]:
docs = await basic_retriever(query="Who is Alis?")
print(len(docs))
print(docs[0].page_content)

4


AttributeError: 'str' object has no attribute 'page_content'

In [None]:
retriever_runnable = RunnableLambda(basic_retriever)
basic_convo = RAG_PROMPT | openai_llm | StrOutputParser()

with_message_history = RunnableWithMessageHistory(
    basic_convo,
    get_session_history,
    input_messages_key="query",
    history_messages_key="history",
)
query = "Who is Alis?"
chain = (
    {"context": retriever_runnable, "query": RunnablePassthrough()}
    | with_message_history
    | StrOutputParser()
)

await chain.ainvoke("Who is Alis?", config={"configurable": {"session_id": "def234"}})

In [None]:
async def basic_rag_conversation(
    query: str, config: Dict[str, Dict[str, Any]]
) -> RunnableSerializable:
    basic_convo = RAG_PROMPT | openai_llm | StrOutputParser()
    with_message_history = RunnableWithMessageHistory(
        basic_convo,
        get_session_history,
        input_messages_key="query",
        history_messages_key="history",
    )
    retriever_runnable = RunnableLambda(basic_retriever)
    chain = (
        {"context": retriever_runnable, "query": RunnablePassthrough()}
        | with_message_history
        | StrOutputParser()
    )
    response = await chain.ainvoke(query, config)
    return response


response = basic_rag_conversation(
    "Who is Alis?", {"configurable": {"session_id": "def234"}}
)

print(await response)

In [None]:
import tiktoken

encoding = tiktoken.get_encoding("cl100k_base")
# encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
count = []
for message in memory_store["def234"].messages:
    token_count = len(encoding.encode(message.content))
    print(token_count, "\n______________________\n")
    count.append(token_count)
print(sum(count))

In [None]:
memory_store

{'def234': InMemoryChatMessageHistory(messages=[])}

In [None]:
encoding = tiktoken.get_encoding("cl100k_base")


async def summarize_memory(memory_store, session_id: str):
    count = []
    if not len(memory_store[session_id]) > 0:
        return
    stored_session = memory_store["def234"].messages
    for message in stored_session:
        token_count = len(encoding.encode(message.content))
        count.append(token_count)
    total_tokens = sum(count)
    if len(total_tokens) < 100:
        print("Less than 100 tokens")
        return
    else:
        summarization_prompt = ChatPromptTemplate.from_messages(
            [
                MessagesPlaceholder(variable_name="history"),
                (
                    "user",
                    "Distill the above chat messages into a single summary message. Include as many specific details as you can.",
                ),
            ]
        )
        summarization_chain = summarization_prompt | openai_llm
        summary_message = await summarization_chain.ainvoke({"history": stored_session})
        stored_session.clear()
        stored_session.add_messages(summary_message)
        return

[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence] Entering Chain run with input:
[0m{
  "query": "Who is Alis?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<messages_summarized>] Entering Chain run with input:
[0m{
  "query": "Who is Alis?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<messages_summarized> > chain:RunnableParallel<messages_summarized>] Entering Chain run with input:
[0m{
  "query": "Who is Alis?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RunnableSequence > chain:RunnableAssign<messages_summarized> > chain:RunnableParallel<messages_summarized> > chain:summarize_memory] Entering Chain run with input:
[0m{
  "query": "Who is Alis?"
}
[31;1m[1;3m[chain/error][0m [1m[chain:RunnableSequence > chain:RunnableAssign<messages_summarized> > chain:RunnableParallel<messages_summarized> > chain:summarize_memory] [1ms] Chain run errored with error:
[0m"TypeError(\"summarize_memory() miss

TypeError: summarize_memory() missing 1 required positional argument: 'session_id'

In [None]:
chain = (
    RunnablePassthrough.assign(messages_summarized=summarize_memory)
    | with_message_history
)
await chain.ainvoke(
    {"query": "Who is Alis?"}, config={"configurable": {"session_id": "def234"}}
)

In [None]:
chain

RunnableAssign(mapper={
  messages_summarized: RunnableLambda(afunc=summarize_memory)
})
| RunnableWithMessageHistory(bound=RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
    history: RunnableBinding(bound=RunnableLambda(_enter_history), config={'run_name': 'load_history'})
  }), config={'run_name': 'insert_history'})
  | RunnableBinding(bound=ChatPromptTemplate(input_variables=['context', 'history', 'query'], input_types={'history': typing.List[typing.Union[langchain_core.messages.ai.AIMessage, langchain_core.messages.human.HumanMessage, langchain_core.messages.chat.ChatMessage, langchain_core.messages.system.SystemMessage, langchain_core.messages.function.FunctionMessage, langchain_core.messages.tool.ToolMessage]]}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful and friendly chatbot with peronality name Yumi.\nYour responses MUST be 2000 or fewer characters in length.\n')), MessagesPlaceholder(variable_