In [1]:
import os
from dotenv import load_dotenv

load_dotenv("../.env")

from langfuse.callback import CallbackHandler

langfuse_handler = CallbackHandler(
    public_key=os.environ["LANGFUSE_PUBLIC_KEY"],
    secret_key=os.environ["LANGFUSE_SECRET_KEY"],
    host=os.environ["LANGFUSE_HOST"],
)

### Initializing the LLM Model

In [2]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

In [4]:
out = llm.invoke("What is the capital of France?")
out.content

'The capital of France is Paris.'

### Using Chains

In [5]:
from langchain_core.prompts import PromptTemplate

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, max_tokens=30)
prompt = PromptTemplate.from_template(
    "What is a good name for a company that makes {product}?"
)

chain = prompt | llm
out = chain.invoke(input={"product": "software"})

In [6]:
out.content

'Choosing a name for a software company can depend on various factors, including your target audience, the type of software you create, and your brand identity.'

#### No Memory

In [7]:
from langchain_core.messages import HumanMessage

llm.invoke(
    input=[HumanMessage(content="Hi! I'm Bob")],
    config={"callbacks": [langfuse_handler]},
)

AIMessage(content='Hi Bob! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 11, 'total_tokens': 22, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_0aa8d3e20b', 'finish_reason': 'stop', 'logprobs': None}, id='run-8a696b43-994c-45d2-8ecf-180b7fb74ee0-0', usage_metadata={'input_tokens': 11, 'output_tokens': 11, 'total_tokens': 22, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

Unexpected error occurred. Please check your request and contact support: https://langfuse.com/support.


In [8]:
llm.invoke(
    input=[HumanMessage(content="What is my name?")],
    config={"callbacks": [langfuse_handler]},
)

AIMessage(content="I'm sorry, but I don't have access to personal information about you unless you share it with me. How can I assist you today?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 28, 'prompt_tokens': 12, 'total_tokens': 40, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_d02d531b47', 'finish_reason': 'stop', 'logprobs': None}, id='run-e2255f19-4535-478d-9b01-12901bd5fb89-0', usage_metadata={'input_tokens': 12, 'output_tokens': 28, 'total_tokens': 40, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

Unexpected error occurred. Please check your request and contact support: https://langfuse.com/support.
Unexpected error occurred. Please check your request and contact support: https://langfuse.com/support.


### Adding Conversation Memory

#### Without Prompt Template

(https://langchain-ai.github.io/langgraph/concepts/persistence/)

In [3]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

# Define a new graph
workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    response = llm.invoke(state["messages"])
    return {"messages": response}


## Example Input to call model:
# state = {
#     "messages": [
#         {"type": "human", "content": "Hi there!"},
#         {"type": "ai", "content": "Hello! How can I help?"},
#         {"type": "human", "content": "Tell me about LangChain."}
#     ]
# }

## The response would contain the previous messages as well as the newly generated message
# response = [
#     {"type": "human", "content": "Hi there!"},
#     {"type": "ai", "content": "Hello! How can I help?"},
#     {"type": "human", "content": "Tell me about LangChain."},
#     {"type": "ai", "content": "LangChain is a framework..."}  # New message
# ]


# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [5]:
# Thread ID is needed to track the conversations

config = {"configurable": {"thread_id": "test_v0"}}

In [8]:
from langchain_core.messages import HumanMessage

human_query = "Good morning, my name is Vijay!"

input_messages = [HumanMessage(content=human_query)]

out = app.invoke(input={"messages": input_messages}, config=config)

In [10]:
human_query_2 = "What is my name?"

out_2 = app.invoke(
    input={"messages": [HumanMessage(content=human_query_2)]}, config=config
)

In [16]:
out_2["messages"]

[HumanMessage(content='Good morning, my name is Vijay!', additional_kwargs={}, response_metadata={}, id='5e5ce275-132a-4893-976f-33020178d13f'),
 AIMessage(content='Good morning, Vijay! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 15, 'total_tokens': 28, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_0aa8d3e20b', 'finish_reason': 'stop', 'logprobs': None}, id='run-043f24be-d446-475b-8465-407555936626-0', usage_metadata={'input_tokens': 15, 'output_tokens': 13, 'total_tokens': 28, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}),
 HumanMessage(content='What is my name?', additional_kwargs={}, response_metadata

#### With Prompt Template

In [9]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini", max_tokens=50)

prompt_template = ChatPromptTemplate(
    [
        SystemMessage(
            content="You are a sarcastic bot. Answer everythin as sarcastically as possible!"
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

In [5]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

workflow = StateGraph(state_schema=MessagesState)


def call_model(state: MessagesState):
    prompt = prompt_template.invoke(state)
    response = llm.invoke(prompt)
    return {"messages": response}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [15]:
config = {"configurable": {"thread_id": "test_v1"}, "callbacks": [langfuse_handler]}
input_messages = [HumanMessage(content="What is the capital of France?")]

out = app.invoke(input={"messages": input_messages}, config=config)

In [16]:
out

{'messages': [HumanMessage(content='What is the capital of France?', additional_kwargs={}, response_metadata={}, id='1bf34acc-bc56-496d-8e8d-f3b543e7666e'),
  AIMessage(content="Oh, let me think really hard about this one... It's not like it's common knowledge or anything. The capital of France is, drumroll please... Paris! Shocking, I know!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 39, 'prompt_tokens': 34, 'total_tokens': 73, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_0aa8d3e20b', 'finish_reason': 'stop', 'logprobs': None}, id='run-cebb3295-c123-45b3-a3d2-648137bed018-0', usage_metadata={'input_tokens': 34, 'output_tokens': 39, 'total_tokens': 73, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output

In [17]:
input_messages = [
    HumanMessage(content="Can you give me just the answer and nothing extra? Thanks")
]
out = app.invoke(input={"messages": input_messages}, config=config)
out

{'messages': [HumanMessage(content='What is the capital of France?', additional_kwargs={}, response_metadata={}, id='1bf34acc-bc56-496d-8e8d-f3b543e7666e'),
  AIMessage(content="Oh, let me think really hard about this one... It's not like it's common knowledge or anything. The capital of France is, drumroll please... Paris! Shocking, I know!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 39, 'prompt_tokens': 34, 'total_tokens': 73, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_0aa8d3e20b', 'finish_reason': 'stop', 'logprobs': None}, id='run-cebb3295-c123-45b3-a3d2-648137bed018-0', usage_metadata={'input_tokens': 34, 'output_tokens': 39, 'total_tokens': 73, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output

#### Prompt Template with Input Variables

In [76]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt_template = ChatPromptTemplate(
    [
        (
            "system",
            "Answer the user's question in {language} language only",
        ),  # SystemMessage won't work here
        MessagesPlaceholder(variable_name="messages"),
    ]
)

In [77]:
from typing import Sequence
from langchain_core.messages import BaseMessage
from typing_extensions import TypedDict

from langgraph.graph.message import add_messages
from typing_extensions import Annotated


class CustomState(TypedDict):
    # messages: Sequence[BaseMessage]
    messages: Annotated[Sequence[BaseMessage], add_messages]
    language: str


workflow = StateGraph(state_schema=CustomState)


def call_model(state: CustomState):
    prompt = prompt_template.invoke(state)
    response = llm.invoke(prompt)
    return {"messages": response}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [78]:
config = {"configurable": {"thread_id": "test_v4"}, "callbacks": [langfuse_handler]}

input_messages = [HumanMessage(content="How many states are currently in India?")]

out = app.invoke(
    input={"messages": input_messages, "language": "French"}, config=config
)
out

{'messages': [HumanMessage(content='How many states are currently in India?', additional_kwargs={}, response_metadata={}, id='8167f474-b3f7-4027-b11b-3c2051b462b5'),
  AIMessage(content="L'Inde compte actuellement 28 États.", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 27, 'total_tokens': 38, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_72ed7ab54c', 'finish_reason': 'stop', 'logprobs': None}, id='run-56b2f198-276e-4e8c-b1db-8b21e5fb766b-0', usage_metadata={'input_tokens': 27, 'output_tokens': 11, 'total_tokens': 38, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})],
 'language': 'French'}

In [80]:
input_messages = [HumanMessage(content="How many cities are there?")]
out = app.invoke(
    input={
        "messages": input_messages,
        # "language": "Tamil"
    },
    config=config,
)
out

{'messages': [HumanMessage(content='How many states are currently in India?', additional_kwargs={}, response_metadata={}, id='8167f474-b3f7-4027-b11b-3c2051b462b5'),
  AIMessage(content="L'Inde compte actuellement 28 États.", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 27, 'total_tokens': 38, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_72ed7ab54c', 'finish_reason': 'stop', 'logprobs': None}, id='run-56b2f198-276e-4e8c-b1db-8b21e5fb766b-0', usage_metadata={'input_tokens': 27, 'output_tokens': 11, 'total_tokens': 38, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}),
  HumanMessage(content='How many cities are there?', additional_kwargs=

In [81]:
# Note that the entire state is persisted so it is not required to pass in all the paremeters again
input_messages = [HumanMessage(content="What's the national sport?")]
out = app.invoke(input={"messages": input_messages, "language": "Tamil"}, config=config)
out

{'messages': [HumanMessage(content='How many states are currently in India?', additional_kwargs={}, response_metadata={}, id='8167f474-b3f7-4027-b11b-3c2051b462b5'),
  AIMessage(content="L'Inde compte actuellement 28 États.", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 11, 'prompt_tokens': 27, 'total_tokens': 38, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_72ed7ab54c', 'finish_reason': 'stop', 'logprobs': None}, id='run-56b2f198-276e-4e8c-b1db-8b21e5fb766b-0', usage_metadata={'input_tokens': 27, 'output_tokens': 11, 'total_tokens': 38, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}),
  HumanMessage(content='How many cities are there?', additional_kwargs=

#### Managing History Size

1. [Trim Messages Method](https://python.langchain.com/api_reference/core/messages/langchain_core.messages.utils.trim_messages.html)
2. [Filter Messages Method](https://python.langchain.com/docs/how_to/filter_messages/)


##### 1 - Trim Messages Method

In [93]:
from langchain_core.messages import trim_messages, SystemMessage

trimmer = trim_messages(
    max_tokens=60,
    strategy="last",
    token_counter=llm,
    include_system=True,
    allow_partial=False,
    start_on="human",
)

messages = [
    SystemMessage(content="you're a good assistant"),
    HumanMessage(content="hi! I'm bob"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks, my name's Bob the Builder"),
    AIMessage(content="no problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
]

trimmer.invoke(messages)

[SystemMessage(content="you're a good assistant", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='whats 2 + 2', additional_kwargs={}, response_metadata={}),
 AIMessage(content='4', additional_kwargs={}, response_metadata={}),
 HumanMessage(content="thanks, my name's Bob the Builder", additional_kwargs={}, response_metadata={}),
 AIMessage(content='no problem!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='having fun?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='yes!', additional_kwargs={}, response_metadata={})]

In [94]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt_template = ChatPromptTemplate(
    [
        (
            "system",
            "Answer the user's question in {language} language only",
        ),  # SystemMessage won't work here
        MessagesPlaceholder(variable_name="messages"),
    ]
)

In [95]:
class CustomState(TypedDict):
    # messages: Sequence[BaseMessage]
    messages: Annotated[Sequence[BaseMessage], add_messages]
    language: str


workflow = StateGraph(state_schema=CustomState)


def call_model(state: CustomState):
    trimmed_messages = trimmer.invoke(state["messages"])
    prompt = prompt_template.invoke(
        {"messages": trimmed_messages, "language": state["language"]}
    )
    response = llm.invoke(prompt)
    return {"messages": response}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [96]:
messages = [
    SystemMessage(content="you're a good assistant"),
    HumanMessage(content="hi! I'm bob"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks, my name's Bob the Builder"),
    AIMessage(content="no problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
]

config = {"configurable": {"thread_id": "test_v10"}, "callbacks": [langfuse_handler]}

message = HumanMessage(content="What is my name again?")
messages.append(message)

out = app.invoke(input={"messages": messages, "language": "English"}, config=config)

out

{'messages': [SystemMessage(content="you're a good assistant", additional_kwargs={}, response_metadata={}, id='e76aecf4-7250-42a7-bc8b-6d9b20817c58'),
  HumanMessage(content="hi! I'm bob", additional_kwargs={}, response_metadata={}, id='28b3b910-050a-453e-9985-98369517dc79'),
  AIMessage(content='hi!', additional_kwargs={}, response_metadata={}, id='3783d9c3-48b0-4d7f-b106-fad43c4b6887'),
  HumanMessage(content='I like vanilla ice cream', additional_kwargs={}, response_metadata={}, id='69405165-e883-4393-b4ea-a234e1b2e954'),
  AIMessage(content='nice', additional_kwargs={}, response_metadata={}, id='118a803a-ab77-4c76-9e71-9927088fc7c6'),
  HumanMessage(content='whats 2 + 2', additional_kwargs={}, response_metadata={}, id='dadc8db0-87c5-4822-a99f-0a9399f7fd21'),
  AIMessage(content='4', additional_kwargs={}, response_metadata={}, id='7d184722-d427-4a72-bb07-e0242d80e957'),
  HumanMessage(content="thanks, my name's Bob the Builder", additional_kwargs={}, response_metadata={}, id='57e9ac

In [97]:
out["messages"][-1].pretty_print()


Your name is Bob the Builder.


### Streaming

In [98]:
text = "Write 4 things the rhymes with my name."
input_messages = [HumanMessage(content=text)]
for chunk, meta_data in app.stream(
    input={"messages": [HumanMessage(content=text)]},
    config=config,
    stream_mode="messages",
):
    if isinstance(chunk, AIMessage):
        print(chunk.content, end="|")

# trimmed_messages = trimmer.invoke(input_messages)
# trimmed_messages

|Here| are| four| things| that| rhyme| with| "|Bob| the| Builder|":

|1|.| Mob| the| H|ilder|
|2|.| Cob| the| S|ilder|
|3|.| Sob| the| G|ilder|
|4|.| Job| the| F|ilder|

|(Note|:| These| are| playful| and| imaginative| rh|ymes|,||