In [2]:
from pathlib import Path
import sys

cur_path = Path.cwd()
root_path = cur_path.parent.parent.parent
sys.path.append(str(root_path))
from settings import set_environment

set_environment()

这部分书里很多实现langchain已经弃用了
TODO: 看看store, memory这几个部分的源码，总结一下

## ConversationBufferMemory

- a simple wrapper that stores the messages in a variable
- retains all previous interactions


以下是一个 Automatic history management 的例子

In [29]:
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferMemory
from langchain_openai import ChatOpenAI

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="history"),
    ]
)
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0, streaming=True)
chain = prompt | llm

memory = ConversationBufferMemory()
chain_with_history = RunnableWithMessageHistory(
    chain,
    get_session_history=lambda session_id: memory.chat_memory,  # lambda 定义匿名函数, session_id 是参数名, memory 返回值
    input_messages_key="input",
    history_messages_key="history",
    verbose=True
)

chain_with_history.invoke(input={'input': 'Hello'}, config={"configurable": {"session_id": "foo"}})
chain_with_history.invoke(input={'input': 'How`s going on?'}, config={"configurable": {"session_id": "foo"}})
print(memory.chat_memory.messages)

[HumanMessage(content='Hello'), AIMessage(content="Sure! I'm here to help. What questions do you have?", response_metadata={'finish_reason': 'stop', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_9722793223'}, id='run-bd743dc3-549a-4ea6-8de8-dc0994d58f87-0'), HumanMessage(content='How`s going on?'), AIMessage(content='Hello! How can I assist you today?', response_metadata={'finish_reason': 'stop', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f33667828e'}, id='run-88ed1613-546f-4aa7-a8cd-603f5d7ae1b1-0')]


## ConversationBufferWindowMemory 
- a memory type provided by LangChain that keeps track  of the interactions in a conversation over time.
- only keeps the last k  interactions, where k is the window size specified.

In [31]:
from langchain.memory import ConversationBufferWindowMemory

memory = ConversationBufferWindowMemory(k=2)

In [32]:
memory.save_context(inputs={"input": "hi"}, outputs={"output": "whats up"})
memory.save_context(inputs={"input": "not much you"}, outputs={"output": "not much"})
memory.load_memory_variables({})

{'history': 'Human: hi\nAI: whats up\nHuman: not much you\nAI: not much'}

## Customize Conversational Memory

In [34]:
from langchain.memory import ConversationBufferMemory
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationChain
from langchain.prompts import PromptTemplate

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0, streaming=True)

template = """
    The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots 
    of specific details from its context. If the AI does not know the answer to a question, it truthfully 
    says it does not know.  
    Current conversation:  {history}  Human: {input}  AI Assistant:
    """
prompt = PromptTemplate(template=template, input_variables=["history", "input"])
conversation = ConversationChain(
    prompt=prompt,  
    llm=llm,  
    verbose=True,  
    memory=ConversationBufferMemory(ai_prefix="AI Assistant"),
)

## ConversationSummaryMemory

**Step**: 
1. create an instance of it, passing the language model (llm) as an argument.
2. use the `save_context()` method to save the interaction context, which includes the user input and AI output
3. To retrieve the summarized conversation history, use the `load_memory_variables()` method.

In [36]:
from langchain.memory import ConversationSummaryMemory
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0, streaming=True)
memory = ConversationSummaryMemory(llm=llm)
memory.save_context({"input": "hi"}, {"output": "whats up"})
memory.load_memory_variables({})

{'history': 'The human greets the AI with "hi," and the AI responds by asking, "what\'s up?"'}

## Storing knowledge graphs

A knowledge graph is a **structured knowledge representation model** that organizes information in the form of: 
- **entities**: can be any concept, object, or thing in the world
- **attributes**: describe the properties or characteristics of these entities
- **relationships**: capture the connections and  associations between entities, providing contextual information and enabling semantic reasoning.

It represents knowledge as a graph, where:
- entities are represented as **nodes**
- relationships between entities are represented as **edges**. 

There’s functionality in LangChain for knowledge graphs for retrieval; <BR>
Provides memory components to automatically create a knowledge graph based on our conversation messages.

In [46]:
from langchain.memory import ConversationKGMemory
from langchain_openai import ChatOpenAI
from IPython.display import SVG

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0, streaming=True)
memory = ConversationKGMemory(llm=llm)
memory.save_context({"input": "hi"}, {"output": "whats up"})
memory.kg.draw_graphviz()

<IPython.core.display.SVG object>

## Combining several memory mechanisms

LangChain also allows combining multiple memory strategies using the CombinedMemory class.  This is useful when you want to maintain various aspects of the conversation history.

In [43]:
from langchain_openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory, ConversationSummaryBufferMemory, CombinedMemory
from langchain.prompts import PromptTemplate
from langchain.chains import ConversationChain

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0, streaming=True)

cov_memo = ConversationBufferMemory(memory_key="chat_history_lines", input_key="input")
summary_memo = ConversationBufferMemory(llm=llm, input_key="input")
memory = CombinedMemory(memories=[cov_memo, summary_memo])

TEMPLATE = """
    The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
    Summary of conversation:  {history}
    Current conversation:  {chat_history_lines}
    Human: {input}  AI:
"""

prompt = PromptTemplate(template=TEMPLATE, input_variables=["history", "input", "chat_ history_lines"])
conversation = ConversationChain(llm=llm, verbose=True, memory=memory, prompt=prompt)
conversation.invoke("Hi!")



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
    The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
    Summary of conversation:  
    Current conversation:  
    Human: Hi!  AI:
[0m

[1m> Finished chain.[0m


{'input': 'Hi!',
 'chat_history_lines': '',
 'history': '',
 'response': "Hello! How are you today? I'm here to chat about anything on your mind, whether it's a question, a topic you're interested in, or just some friendly banter. What would you like to talk about?"}

## Long-term persistence

Zep provides a persistent backend to store, summarize, and search chat histories using vector embeddings and auto-token counting.

[Zep docs](https://help.getzep.com/concepts)

In [5]:
from langchain_community.memory.zep_memory import ZepMemory
from settings import get_zep_config
import uuid

zep_config = get_zep_config()
session_id = str(uuid.uuid4())

memory = ZepMemory(
    session_id=session_id,
    url=zep_config['url'],
    api_key=zep_config['api_key'],
    memory_key="chat_history",
)

APIError: Failed to connect to Zep server. Please check that:
         - the server is running 
         - the API URL is correct
         - No other process is using the same port
         : None