### **Load Environment Configurations**

In [13]:
%pip --quiet install python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [14]:
from dotenv import load_dotenv, find_dotenv
import os

load_dotenv(find_dotenv())

OLAMA_BASE_URL = os.getenv('OLAMA_BASE_URL')


### **Load/Setup Vector Database -- Chroma**

In [15]:
%pip install --quiet -U langchain-ollama
%pip install --quiet -U langchain-chroma

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [16]:
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma

# Initialize the embeddings
embeddings = OllamaEmbeddings(
    base_url=OLAMA_BASE_URL,
    model="nomic-embed-text",
)

persist_directory = "./chroma_langchain_db"
vector_store = Chroma(
    collection_name="long_term_memories",
    embedding_function=embeddings,
    persist_directory=persist_directory,  # Where to save data locally, remove if not necessary
    create_collection_if_not_exists=True,
)

### Insert Memories

In [17]:
from langchain_core.documents import Document
from uuid import uuid4

document_1 = Document(
    page_content="I love pizza",
    metadata={},
)
document_2 = Document(
    page_content="I prefer Korean food",
    metadata={},
)
document_3 = Document(
    page_content="I love spicy food",
    metadata={},
)
document_4 = Document(
    page_content="I am studying computer science",
    metadata={},
)
document_5 = Document(
    page_content="I am a student",
    metadata={},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
]

uuids = [str(uuid4()) for _ in range(len(documents))]
vector_store.add_documents(documents=documents, ids=uuids)

['b757c7b3-a01c-4ff5-a44f-4f2b23e3faea',
 '893ff4aa-ce4c-4563-840a-8848c5e71955',
 'af1ac112-b001-4b0b-9708-601964d6845b',
 '41b86881-f393-4d4c-a2e7-0da9156b7b00',
 '56351758-9260-4678-a582-c3a9318866b5']

In [18]:
# Test Query
query = "what's my name"

# results = vector_store.similarity_search(query, k=3, filter=None)
results = vector_store.similarity_search_with_score(query, k=3, filter=None)
print(results)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

[(Document(id='e77facaa-ef3d-4128-b853-1ed69197ab34', metadata={}, page_content='Hello'), 1.0494743881539488), (Document(id='df95c508-d11a-43be-933a-96a382ee8bdc', metadata={}, page_content='I am a student'), 1.1332038151490709), (Document(id='56351758-9260-4678-a582-c3a9318866b5', metadata={}, page_content='I am a student'), 1.1332038151490709)]
* [SIM=1.049474] Hello [{}]
* [SIM=1.133204] I am a student [{}]
* [SIM=1.133204] I am a student [{}]


### **Load LLM**

In [19]:
%pip install --quiet -U langchain-ollama

Note: you may need to restart the kernel to use updated packages.


In [20]:
from langchain_ollama.chat_models import ChatOllama

model = ChatOllama(
    base_url=OLAMA_BASE_URL,
    model="llama3.2:1b",
    temperature=0.8,
)

### Multiple Conversation Threads: Multiple Users

In [21]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, SystemMessagePromptTemplate
from langchain_core.messages import AIMessage, HumanMessage, BaseMessage, trim_messages
from langgraph.graph.message import add_messages
from typing import Sequence
from typing_extensions import Annotated, TypedDict
from langchain_core.documents import Document
from uuid import uuid4

prompt_template = ChatPromptTemplate(
    [
        SystemMessagePromptTemplate.from_template("You are a helpful assistant. <memories>{memories}</memories>"),
        MessagesPlaceholder(variable_name="messages"),
    ]
)
class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]

trimmer = trim_messages(
    max_tokens=256,
    strategy="last",
    token_counter=model,
    include_system=True,
    allow_partial=False,
    start_on="human",
)

def call_model(state: State):
    trimmed_messages = trimmer.invoke(state["messages"])

    last_msg = state["messages"][-1]
    if isinstance(last_msg, HumanMessage):
        query = last_msg.content
        results = vector_store.similarity_search_with_score(query, k=2)
        memories = "\n".join(f"{str(res.page_content)}" for res, _ in results)

        # Create memory
        doc = [Document(page_content=query, metadata={},)]
        uuid = [str(uuid4())]
        vector_store.add_documents(documents=doc, ids=uuid)
    else:
        memories = ""
    
    prompt = prompt_template.invoke(
        {
            "messages": trimmed_messages,
            "memories": memories
        }
    )
    response = model.invoke(prompt)
    
    return {"messages": [response] }

In [22]:
from langgraph.graph import START, MessagesState, StateGraph
from langgraph.checkpoint.memory import InMemorySaver 

# Define a new graph
workflow = StateGraph(state_schema=State)

# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = InMemorySaver()
app = workflow.compile(checkpointer=memory)

### Stream Message as it is Generated

In [23]:
%pip install --quiet -U transformers
%pip install --quiet -U torch

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [27]:
config = {"configurable": {"thread_id": "1"}}
print("Chat with TESS (type 'exit' to end the conversation)")
while True:
    user_input = input("You: ")
    print("USER:", user_input)
    if user_input.lower() == 'exit':
        break
    
    input_messages = [HumanMessage(user_input)]
    print("TESS: ", end="")
    for chunk, metadata in app.stream(
        {"messages": input_messages},
        config,
        stream_mode="messages",
    ):
        if isinstance(chunk, AIMessage):  # Filter to just model responses
            print(chunk.content, end="|")
    print(end='\n') 

Chat with TESS (type 'exit' to end the conversation)
USER: Hello
TESS: It|'s| nice| to| meet| you|.| Is| there| anything| I| can| help| you| with| or| would| you| like| to| talk| about| something| in| particular|?||
USER: What are the top 10 most spicy peppers?
TESS: When| it| comes| to| spice| levels|,| here|'s| a| list| of| the| top| |10| most| commonly| used| and| revered| chili| peppers|:

|1|.| **|Car|olina| Reaper|**| (|2|,|200|,|000|-|3|,|200|,|000| Sc|ov|ille| Heat| Units| (|SH|U|)):| Known| for| its| intense| heat| and| unique| flavor|.
|2|.| **|Tr|inidad| Mor|uga| Sc|orpion|**| (|1|,|469|,|000|-|2|,|486|,|000| SH|U|):| A| Trinidad|ian| pepper| known| for| its| intense|,| electric| heat|.
|3|.| **|N|aga| J|ol|okia|**| (|855|,|000|-|1|,|041|,|427| SH|U|):| Also| known| as| the| Ghost| Pepper|,| this| Indian| pepper| is| not| for| the| faint| of| heart|.
|4|.| **|Infinity| Chili|**| (|1|,|382|,|118|-|2|,|160|,|000| SH|U|):| A| hybrid| chili| bred| to| be| even| hotter| than| the

In [28]:
config = {"configurable": {"thread_id": "1"}}
state = app.get_state(config).values
for message in state["messages"]:
    message.pretty_print()


Hello, how are you doing?

How can I assist you today?

What are the top 10 most spicy peppers?

Here are ten of the hottest peppers in the world, ranked by their Scoville heat units (SHU):

1. **Carolina Reaper** - 1,569,300 SHU
2. **Trinidad Moruga Scorpion** - 1,469,000 SHU
3. **Naga Viper** - 1,382,118 SHU
4. **Naga Jolokia** - 855,000-1,041,427 SHU (Note: the exact range can vary depending on factors like growing conditions and preparation)
5. **Ghost Pepper (Bhut Jolokia)** - 855,000-1,041,427 SHU
6. **Naga Jolokia** - 855,000-1,041,427 SHU (same as above)
7. **Scotch Bonnet** - 350,000-500,000 SHU
8. **Calypso** - 200,000-300,000 SHU
9. **Infinity Chili** - 223,000-233,000 SHU
10. **Fresno Pepper** - 225,000-350,000 SHU

Please note that the Scoville scale is subjective and can vary depending on factors like the pepper's ripeness, preparation, and individual tolerance. These rankings are approximate and based on average values.

Would you like to know more about spicy peppers o