In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

from openai import AzureOpenAI
AzureOpenAI.api_key = os.getenv("AZURE_OPENAI_API_KEY")

from langchain_core.messages import HumanMessage, SystemMessage, AIMessage

In [2]:
from langchain_openai import AzureChatOpenAI
llm = AzureChatOpenAI(
    azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"),
    azure_deployment = os.getenv("AZURE_OPENAI_LLM_MODEL"),
    api_version = "2025-01-01-preview",
    api_key = os.getenv("AZURE_OPENAI_API_KEY"),
)

In [3]:
llm.invoke([HumanMessage(content="Hello! I am Abhi, How are you?")])

AIMessage(content='Hey Abhi! Iâ€™m just a bunch of code, so I donâ€™t have feelingsâ€”but Iâ€™m here and ready to chat! How are *you* doing today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 36, 'prompt_tokens': 18, 'total_tokens': 54, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-chat-2025-10-03', 'system_fingerprint': 'fp_88bf7c189b', 'id': 'chatcmpl-Cx6F7sRJUGy7oJ8zfsF89wbU0xbcr', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 

In [4]:
llm.invoke([
    HumanMessage(content="Hello! I am Abhi, How are you?"),
    AIMessage(content="Hello Abhi! Iâ€™m just a program, so I donâ€™t have feelings, but Iâ€™m here and ready to chat with you. How are you doing today?"),
    HumanMessage(content="I am doing great! Can you tell me a joke?")
])

AIMessage(content='Iâ€™m glad to hear that, Abhi! Sure â€” hereâ€™s a light one for you:  \n\nWhy donâ€™t scientists trust atoms?  \nBecause they make up everything! ðŸ˜„  \n\nWould you like another one?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 45, 'prompt_tokens': 72, 'total_tokens': 117, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-chat-2025-10-03', 'system_fingerprint': 'fp_88bf7c189b', 'id': 'chatcmpl-Cx6F8m6SpLbNrFKXPimbKHdAkj1RD', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'f

In [5]:
## Message history
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

In [6]:
store = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

with_message_history = RunnableWithMessageHistory(
    llm,
    get_session_history
)

def save_session_history(session_id: str, history: BaseChatMessageHistory) -> None:
    store[session_id] = history

In [7]:
config = {
    "configurable":{"session_id": "user_12345"},
}

In [8]:
response = with_message_history.invoke(
    [HumanMessage(content="Hello! I am Abhi. I am an AI engineer.")],
    config=config
)
response.content

'Hi Abhi! Thatâ€™s great to hear â€” always nice to meet a fellow AI enthusiast. What kind of AI work are you involved in?'

In [9]:
config_1 = {
    "configurable":{"session_id": "user_12345"},
}

response = with_message_history.invoke(
    [HumanMessage(content="Whats' my name? What do I do?")],
    config=config_1
)

response.content

'Your name is Abhi, and youâ€™re an AI engineer.'

In [10]:
## Using with different session id
config_2 = {
    "configurable":{"session_id": "user_67890"},
}

response = with_message_history.invoke(
    [HumanMessage(content="Whats' my name? What do I do?")],
    config=config_2
)

response.content

'I donâ€™t have access to personal data, so I donâ€™t actually know your name or what you do.  \nBut if you tell me a bit about yourselfâ€”like what field you work in or what youâ€™re studyingâ€”I can tailor my responses to better fit you. Would you like to tell me more?'

In [11]:
## Prompt Template
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages([
    SystemMessage(content="You are a helpful assistant that answers people's question to best of your ability."),
    MessagesPlaceholder(variable_name="messages"),
])

In [12]:
chain = prompt | llm

In [13]:
chain.invoke({"messages": [
    HumanMessage(content="Hello! I am Abhi.")]})

AIMessage(content='Hello Abhi! ðŸ˜Š Nice to meet you. How are you doing today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 33, 'total_tokens': 50, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-chat-2025-10-03', 'system_fingerprint': 'fp_88bf7c189b', 'id': 'chatcmpl-Cx6FDBClBEJDzTU0hqgrdyNnEJZQV', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filte

In [14]:
with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history
)

In [15]:
config = {"configurable":{"session_id": "user_10000"}}
response = with_message_history.invoke(
    [HumanMessage(content="Hello! I am Abhi. I am an AI engineer.")],
    config=config
)

response.content

'Hello Abhi! Nice to meet you. Thatâ€™s greatâ€”youâ€™re an AI engineer! What kind of projects are you working on, or what areas of AI are you most interested in?'

In [16]:
## Add more complexity to the prompt
prompt = ChatPromptTemplate.from_messages([
    ("system","You are a helpful assistant that answers people's question to best of your ability. Answer in the give language - {language}."),
    MessagesPlaceholder(variable_name="messages"),
])

chain = prompt | llm

In [17]:
chain.invoke({
    "messages": [
        HumanMessage(content="Hello! I am Abhi. I am an AI engineer."),
    ],
    "language": "French"
})

AIMessage(content='Bonjour Abhi\u202f! EnchantÃ© de faire ta connaissance. Câ€™est gÃ©nial que tu sois ingÃ©nieur en intelligence artificielle\u202f! Sur quoi travailles-tu en ce moment\u202f?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 39, 'prompt_tokens': 47, 'total_tokens': 86, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-5-chat-2025-10-03', 'system_fingerprint': 'fp_88bf7c189b', 'id': 'chatcmpl-Cx6FFzOOuBjb559T4JYuzLjcJgy8d', 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': Fa

In [18]:
with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="messages"
)

config = {"configurable":{"session_id": "user_10001"}}
response = with_message_history.invoke(
    {
        "messages":[HumanMessage(content="Hello! I am Abhi. I am an AI engineer.")],
        "language":"Spanish"
    },
    config=config
)

response.content

'Â¡Hola, Abhi! Encantado de conocerte. QuÃ© interesante que seas ingeniero en inteligencia artificial. Â¿En quÃ© tipo de proyectos estÃ¡s trabajando actualmente?'

In [19]:
response = with_message_history.invoke(
    {
        "messages":[HumanMessage(content="What's my name and profession?")],
        "language":"Spanish"
    },
    config=config
)

response.content

'Tu nombre es Abhi y tu profesiÃ³n es ingeniero en inteligencia artificial.'

In [27]:
## Manage the Conversaton History
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, trim_messages

trimmer = trim_messages(
    max_tokens = 500,
    strategy='last',
    #token_counter=llm, #This does not work with AzureChatOpenAI currently
    token_counter="approximate",
    include_system=True,
    allow_partial=False,
    start_on="human"

)

"""
Why token_counter="approximate" or a custom token counter is needed with AzureChatOpenAI:

- trim_messages needs a way to count tokens.
- When a BaseLanguageModel is passed as token_counter, LangChain calls
  get_num_tokens_from_messages() on the model.

- ChatOpenAI works out-of-the-box because it has a concrete model_name
  (e.g., "gpt-4o", "gpt-3.5-turbo") that tiktoken can map to a tokenizer.

- AzureChatOpenAI uses deployments instead of explicit model names.
  As a result, model_name is often None internally.

- During token counting, tiktoken tries to resolve a tokenizer using
  model_name.startswith(...), which crashes when model_name is None.

- Using token_counter="approximate" avoids model-specific tokenizers
  and provides a fast, stable estimate suitable for trimming chat history.

- Using a custom token counter gives exact and Azure-safe token counts
  without relying on AzureChatOpenAI internals.

Conclusion:
- ChatOpenAI â†’ safe to pass the LLM directly as token_counter.
- AzureChatOpenAI â†’ use "approximate" or a custom token counter to avoid
  tokenizer resolution errors.
"""

messages = [
    SystemMessage(content="You are a helpful assistant that answers people's question to best of your ability."),
    HumanMessage(content="Hello! I am Abhi. I am an AI engineer."),
    AIMessage(content="Hello Abhi!"),
    HumanMessage(content="Can you tell me what's 4+4?"),
    AIMessage(content="Sure, 4+4 is 8."),
    HumanMessage(content="Thanks!")
]

trimmer.invoke(messages)

[SystemMessage(content="You are a helpful assistant that answers people's question to best of your ability.", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Hello! I am Abhi. I am an AI engineer.', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Hello Abhi!', additional_kwargs={}, response_metadata={}, tool_calls=[], invalid_tool_calls=[]),
 HumanMessage(content="Can you tell me what's 4+4?", additional_kwargs={}, response_metadata={}),
 AIMessage(content='Sure, 4+4 is 8.', additional_kwargs={}, response_metadata={}, tool_calls=[], invalid_tool_calls=[]),
 HumanMessage(content='Thanks!', additional_kwargs={}, response_metadata={})]

In [30]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

chain = (
    RunnablePassthrough.assign(messages = itemgetter("messages") | trimmer )
    | prompt
    | llm
    | StrOutputParser()
)

response = chain.invoke(
    {
        "messages": messages + [HumanMessage(content="What math problem did I ask for?")],
        "language": "English"
    }
)

response

'You asked for the result of **4 + 4**.'

In [31]:
#Let's wrap the above in message history runnable

with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="messages"
)

config = {"configurable":{"session_id": "user_20000"}}

In [33]:
reponse = with_message_history.invoke(
    {"messages":[HumanMessage(content="Hello! I am Abhi. I am an AI engineer.")],   
    "language":"English"
    },
    config=config
)
reponse

'Hello Abhi! Great to meet you again. Being an AI engineer sounds exciting! Are you currently working on something specific in AI â€” like machine learning models, natural language processing, or something else?'

In [34]:
reponse = with_message_history.invoke(
    {"messages":[HumanMessage(content="What math problem did I ask for?")],   
    "language":"English"
    },
    config=config
)
reponse

'You havenâ€™t asked any math problem yet. So far, youâ€™ve just introduced yourself as Abhi, an AI engineer. Would you like to ask a math question now?'

In [35]:
reponse = with_message_history.invoke(
    {"messages":[HumanMessage(content="I like statistics. What is formula of MSE?")],   
    "language":"English"
    },
    config=config
)
reponse

'Thatâ€™s great, Abhi! Since you like statistics, you probably know that **MSE (Mean Squared Error)** is a common measure of how close predictions are to actual outcomes.  \n\nThe formula for **MSE** is:  \n\n\\[\n\\text{MSE} = \\frac{1}{n} \\sum_{i=1}^{n}(y_i - \\hat{y}_i)^2\n\\]\n\nWhere:  \n- \\( n \\) = number of data points  \n- \\( y_i \\) = actual (true) value  \n- \\( \\hat{y}_i \\) = predicted value  \n\nIt basically averages the squared differences between predicted and actual values, giving more weight to larger errors.  \n\nWould you like me to explain how it relates to variance or how itâ€™s used in model optimization?'

In [36]:
reponse = with_message_history.invoke(
    {"messages":[HumanMessage(content="What do I like?")],   
    "language":"English"
    },
    config=config
)
reponse

'You mentioned that you like **statistics**! You also introduced yourself as **Abhi**, an **AI engineer** â€” so it seems you enjoy working with data and artificial intelligence too.'