In [7]:
from langgraph.graph import StateGraph, START, END
from langgraph.store.base import IndexConfig
from langchain_openai import AzureOpenAIEmbeddings
import os
from xmlrpc import client
from azure.cosmos import CosmosClient, PartitionKey
from cosmos_store import CosmosDBStore
from azure.identity import ClientSecretCredential
from azure.keyvault.secrets import SecretClient
from azure.identity import DefaultAzureCredential
import uuid
from langgraph.graph import MessagesState
from cosmos_store import CosmosDBStore
from langchain_openai import AzureChatOpenAI
from pydantic import BaseModel
#from langchain_core.utils.function_calling import type_model
import json


keyVaultName = "akvlab00"
KVUri = f"https://{keyVaultName}.vault.azure.net"

credential = DefaultAzureCredential()
client_akv = SecretClient(vault_url=KVUri, credential=credential)
cosmosdb_endpoint = client_akv.get_secret(name="cosmosdb-url").value
conn_str = client_akv.get_secret(name="cosmosdb-connstr").value
# conn_str = os.environ["COSMOS_CONN_STRING"]
azure_openai_endpoint = client_akv.get_secret(name="aoai-endpoint").value
azure_openai_api_key = client_akv.get_secret(name="aoai-api-key").value
azure_openai_api_version = "2024-02-15-preview"

#client = CosmosClient.from_connection_string(conn_str)
cosmos_client = CosmosClient(cosmosdb_endpoint, credential=credential)

# Embeddings for semantic search in long-term memory
emb = AzureOpenAIEmbeddings(
    model="text-embedding-3-small",
    azure_deployment="text-embedding-3-small",
    api_key=azure_openai_api_key,
    api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
)

index_cfg: IndexConfig = {
    "dims": 1536,
    "embed": emb,
    "fields": ["summary", "text"],  # fields inside your stored value dict
}

store = CosmosDBStore(
    client=cosmos_client,
    database_id="langgraph-test-db",
    container_id="memory-test",
    partition_key_path="/namespaceKey",
    index=index_cfg,  # smoke test: no vector index yet
)

# One-time setup (DB + container + indexes/TTL)
store.setup(ttl_seconds=60 * 60 * 24 * 7)  # 7 days, or None for no default TTL

from typing import TypedDict
from datetime import datetime

# Model configuration
# instantiate the chatbot model
model = AzureChatOpenAI(
    model="gpt-4o",
    api_key=azure_openai_api_key,
    api_version="2024-12-01-preview",
    azure_endpoint=azure_openai_endpoint,
    temperature=0.5,
)


class ConversationState(MessagesState):
    summary: str | None


# Define the typed summary model with the new langgraph @type_model decorator
# This does two things: 1.)Defines your Python shape for memory items, 2)Turns it into a structured-output schema the LLM must follow

#@type_model
class MemorySummary(BaseModel):
    category: str   # e.g. "marketing", "latency", "pricing", "support", "other"
    summary: str


structured_summary_model = model.with_structured_output(MemorySummary)

from langchain_core.messages import SystemMessage, HumanMessage, AIMessage


def _message_to_text(message) -> str:
    if message is None:
        return ""
    content = message.content
    if isinstance(content, str):
        return content
    if isinstance(content, list):
        parts = []
        for part in content:
            if isinstance(part, dict):
                text_part = part.get("text")
                if text_part:
                    parts.append(text_part)
            else:
                parts.append(str(part))
        return " ".join(parts).strip()
    return str(content)


def write_long_term_memory(state: ConversationState, config, *, store):
    user_id = config["configurable"]["user_id"]

    # LangMem (or your graph) is already maintaining state["messages"].
    # No pruning here; this node is ONLY concerned with long-term memory.

    messages_for_sum = [
        SystemMessage(
            content=(
                "You are a memory compressor. "
                "Given the following conversation, extract:\n"
                "- a high-level category from this fixed set: "
                "[\"marketing_insights\", \"optimization\", \"inference\", \"other\"],\n"
                "- a concise summary capturing key facts, preferences, and goals.\n\n"
                "Return ONLY a structured object with fields: category, summary."
            )
        ),
        *state["messages"],
    ]

    # LLM returns a *typed* MemorySummary object
    result: MemorySummary = structured_summary_model.invoke(
        messages_for_sum,
        response_format=MemorySummary,
    )

    category = result.category      # e.g. "marketing"
    new_summary = result.summary    # compressed knowledge

    # Extract most recent human/assistant turns for embedding + audit
    last_human = next(
        (m for m in reversed(state["messages"]) if isinstance(m, HumanMessage)),
        None,
    )
    last_ai = next(
        (m for m in reversed(state["messages"]) if isinstance(m, AIMessage)),
        None,
    )

    human_text = _message_to_text(last_human)
    ai_text = _message_to_text(last_ai)

    text_segments = []
    if human_text:
        text_segments.append(f"Human: {human_text}")
    if ai_text:
        text_segments.append(f"AI: {ai_text}")
    memory_text = "\n".join(text_segments)

    # Namespace includes user_id + category
    namespace = ("user", user_id, category)

    store.put(
        namespace,
        key=str(uuid.uuid4()),
        value={
            "summary": new_summary,
            "text": memory_text,
            "category": category,
            "created_at": datetime.utcnow().isoformat(),
            "human_message": human_text,
            "ai_message": ai_text,
        },
        index=True,
    )

    # Preserve the message history for downstream nodes.
    return {"messages": state["messages"], "summary": new_summary}


builder = StateGraph(ConversationState)


def call_model(state: ConversationState, config) -> ConversationState:
    response = model.invoke(state["messages"], config=config)
    # response is an AIMessage
    updated = list(state["messages"]) + [response]
    return {"messages": updated, "summary": state.get("summary")}


builder.add_node("llm", call_model)
builder.add_node("write_long_term_memory", write_long_term_memory)
builder.add_edge(START, "llm")
builder.add_edge("llm", "write_long_term_memory")
builder.add_edge("write_long_term_memory", END)

graph = builder.compile(store=store)


In [8]:
user_id = "charlesc@partnergem.com"
config = {"configurable": {"thread_id": "1", "user_id": user_id}}
payload = {"messages": [HumanMessage(content="Is marketing campaign sales prediction along multiple channels easy to implement across industries?")]}
result = graph.invoke(payload, config=config)
print(result["messages"][-1].content)  # assistant answer

Implementing marketing campaign sales predictions across multiple channels can vary in complexity depending on several factors, including the industry, the availability and quality of data, the specific channels being used, and the overall marketing strategy. Here are some considerations that can impact the ease of implementation:

1. **Data Availability and Quality**: Access to high-quality, comprehensive data is crucial. Industries with well-established digital infrastructures may find it easier to collect and integrate data across channels. In contrast, industries with less mature data practices may face challenges.

2. **Channel Complexity**: Some industries rely on a wide array of channels, including digital (social media, email, search engines) and traditional (TV, print, in-store). The more channels involved, the more complex the prediction model becomes, as each channel may require different data and modeling approaches.

3. **Industry Dynamics**: Certain industries, like e-com

In [9]:
def fetch_user_memories(user_id: str, category: str, *, limit: int = 10):
    #sanitized = user_id.replace(".", "_").replace("@", "_")
    namespace = "|".join(("user", user_id, category))

    container = store._container
    query = """
        SELECT c.id,
               c.key,
               ARRAY_LENGTH(c.contentVector) AS vectorLength,
               c["value"] AS docValue,
               c.created_at,
               c.updated_at
        FROM c
        WHERE c.namespaceKey = @namespace
        ORDER BY c.updated_at DESC
    """
    params = [{"name": "@namespace", "value": namespace}]
    docs = list(
        container.query_items(
            query=query,
            parameters=params,
            enable_cross_partition_query=True,
            max_item_count=limit,
        )
    )

    for doc in docs:
        print(f"id: {doc['id']}")
        print(f"key: {doc['key']}")
        print(f"created_at: {doc['created_at']}")
        print(json.dumps(doc["docValue"], indent=2))
        print("-" * 60)

    return docs

In [10]:
fetch_user_memories("charlesc@partnergem.com", category="inference", limit=5)

id: user|charlesc@partnergem.com|inference::a7e30017-0858-4fbc-b587-7e7ef17b3049
key: a7e30017-0858-4fbc-b587-7e7ef17b3049
created_at: 2025-11-26T04:42:22.545498+00:00
{
  "summary": "Implementing multi-channel marketing campaign sales predictions varies in complexity by industry, influenced by data quality, channel diversity, industry dynamics, technological infrastructure, regulatory environment, and organizational readiness. Industries with mature data practices and advanced tech infrastructure find it easier, while others may face challenges.",
  "text": "Human: Is marketing campaign sales prediction along multiple channels easy to implement across industries?\nAI: Implementing marketing campaign sales predictions across multiple channels can vary in complexity depending on several factors, including the industry, the availability and quality of data, the specific channels being used, and the overall marketing strategy. Here are some considerations that can impact the ease of imple

[{'id': 'user|charlesc@partnergem.com|inference::a7e30017-0858-4fbc-b587-7e7ef17b3049',
  'key': 'a7e30017-0858-4fbc-b587-7e7ef17b3049',
  'vectorLength': 1536,
  'docValue': {'summary': 'Implementing multi-channel marketing campaign sales predictions varies in complexity by industry, influenced by data quality, channel diversity, industry dynamics, technological infrastructure, regulatory environment, and organizational readiness. Industries with mature data practices and advanced tech infrastructure find it easier, while others may face challenges.',
   'text': 'Human: Is marketing campaign sales prediction along multiple channels easy to implement across industries?\nAI: Implementing marketing campaign sales predictions across multiple channels can vary in complexity depending on several factors, including the industry, the availability and quality of data, the specific channels being used, and the overall marketing strategy. Here are some considerations that can impact the ease of 