In [1]:
from dotenv import load_dotenv
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_core.messages import HumanMessage, AIMessage

from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

from typing import Sequence
from langchain_core.messages import BaseMessage
from langgraph.graph.message import add_messages
from typing_extensions import Annotated, TypedDict

from langsmith import utils

from langchain_core.messages import SystemMessage, trim_messages

import os

In [2]:
load_dotenv()

os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_ENDPOINT"]="https://eu.api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"]="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"
os.environ["LANGCHAIN_PROJECT"]="pr-juicy-equality-90"

os.environ["NVIDIA_API_KEY"] = "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"

In [3]:
utils.tracing_is_enabled()

True

In [4]:
# initialise NVIDIA model for chat
model = ChatNVIDIA(model="meta/llama-3.1-405b-instruct")

In [5]:
# send the first message
model.invoke([HumanMessage(content="Hi! I am Deepak")])

AIMessage(content="Namaste Deepak! It's nice to meet you. How are you doing today? Is there something I can help you with or would you like to chat?", additional_kwargs={}, response_metadata={'role': 'assistant', 'content': "Namaste Deepak! It's nice to meet you. How are you doing today? Is there something I can help you with or would you like to chat?", 'token_usage': {'prompt_tokens': 17, 'total_tokens': 50, 'completion_tokens': 33}, 'finish_reason': 'stop', 'model_name': 'meta/llama-3.1-405b-instruct'}, id='run-b5a0b805-25c9-4493-8983-e7a47d41acee-0', usage_metadata={'input_tokens': 17, 'output_tokens': 33, 'total_tokens': 50}, role='assistant')

In [6]:
# send follow up message to see if the model remembers - it wont
model.invoke([HumanMessage(content="What was my name?")])

AIMessage(content="I don't have any information about your name. I'm a large language model, I don't have personal interactions or memories, so I don't retain information about individual users. Each time you interact with me, it's a new conversation and I don't have any prior knowledge about you. If you'd like to introduce yourself, I'd be happy to chat with you!", additional_kwargs={}, response_metadata={'role': 'assistant', 'content': "I don't have any information about your name. I'm a large language model, I don't have personal interactions or memories, so I don't retain information about individual users. Each time you interact with me, it's a new conversation and I don't have any prior knowledge about you. If you'd like to introduce yourself, I'd be happy to chat with you!", 'token_usage': {'prompt_tokens': 16, 'total_tokens': 92, 'completion_tokens': 76}, 'finish_reason': 'stop', 'model_name': 'meta/llama-3.1-405b-instruct'}, id='run-6257d71c-1182-4fe9-aef5-f2fda5ef3382-0', usa

In [7]:
# send full message history to get the context
model.invoke(
    [
        HumanMessage(content="Hi! I am Deepak"),
        AIMessage(content="Hi Deepak! Nice to meet you! Is there something I can help you with?"),
        HumanMessage(content="What was my name?")
    ]

)

AIMessage(content='Your name is Deepak!', additional_kwargs={}, response_metadata={'role': 'assistant', 'content': 'Your name is Deepak!', 'token_usage': {'prompt_tokens': 50, 'total_tokens': 56, 'completion_tokens': 6}, 'finish_reason': 'stop', 'model_name': 'meta/llama-3.1-405b-instruct'}, id='run-fc7c088c-8828-42fb-8858-71157b06761e-0', usage_metadata={'input_tokens': 50, 'output_tokens': 6, 'total_tokens': 56}, role='assistant')

## Prompt template

In [8]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful translator. Answer all questions in {language}."
        ),
        MessagesPlaceholder(variable_name="messages")
    ]
)

## Message Persistence

In [9]:
trimmer = trim_messages(
    max_tokens = 100,
    strategy = "last",
    token_counter = model,
    include_system = True,
    allow_partial = False,
    start_on = "human"
)

In [10]:
class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    language: str

workflow = StateGraph(state_schema=State)

def call_model(state: State):
    chain = prompt | model
    trimmed_messages = trimmer.invoke(state["messages"])
    print(f"TRIM: ========={trimmed_messages[-1]}=========")
    response = chain.invoke(
        {"messages":trimmed_messages, "language":state["language"]}
    )
    return {"messages": response}

workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [11]:
# This enables us to support multiple conversation threads with a single application, a common requirement when your application has multiple users.
config = {"configurable": {"thread_id":"abc123"}}

In [12]:
query = "Hi! I am Myra"
language = "English"

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages, "language": language}, config)

output["messages"]#[-1].pretty_print()

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]



[HumanMessage(content='Hi! I am Myra', additional_kwargs={}, response_metadata={}, id='90fd8e86-2c7f-47cb-a831-89b2d56b1aa7'),
 AIMessage(content='Hello Myra! Nice to meet you. How can I help you today? Do you need some translation help or just want to chat?', additional_kwargs={}, response_metadata={'role': 'assistant', 'content': 'Hello Myra! Nice to meet you. How can I help you today? Do you need some translation help or just want to chat?', 'token_usage': {'prompt_tokens': 34, 'total_tokens': 62, 'completion_tokens': 28}, 'finish_reason': 'stop', 'model_name': 'meta/llama-3.1-405b-instruct'}, id='run-8b17a446-fd32-4760-9751-49996efa1e37-0', usage_metadata={'input_tokens': 34, 'output_tokens': 28, 'total_tokens': 62}, role='assistant')]

In [13]:
query = "What is my name?"

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)

output["messages"]#[-1].pretty_print()



[HumanMessage(content='Hi! I am Myra', additional_kwargs={}, response_metadata={}, id='90fd8e86-2c7f-47cb-a831-89b2d56b1aa7'),
 AIMessage(content='Hello Myra! Nice to meet you. How can I help you today? Do you need some translation help or just want to chat?', additional_kwargs={}, response_metadata={'role': 'assistant', 'content': 'Hello Myra! Nice to meet you. How can I help you today? Do you need some translation help or just want to chat?', 'token_usage': {'prompt_tokens': 34, 'total_tokens': 62, 'completion_tokens': 28}, 'finish_reason': 'stop', 'model_name': 'meta/llama-3.1-405b-instruct'}, id='run-8b17a446-fd32-4760-9751-49996efa1e37-0', usage_metadata={'input_tokens': 34, 'output_tokens': 28, 'total_tokens': 62}, role='assistant'),
 HumanMessage(content='What is my name?', additional_kwargs={}, response_metadata={}, id='5ab708d1-fecf-4903-8af6-278476c0a81d'),
 AIMessage(content='Your name is Myra!', additional_kwargs={}, response_metadata={'role': 'assistant', 'content': 'Your 