In [None]:
# !pip install -r requirements.txt --quiet
!pip install mem0ai --upgrade

In [2]:
import os
import openai
from dotenv import load_dotenv
from mem0 import Memory, MemoryClient
from openai import AzureOpenAI
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizableTextQuery

# Load environment variables from a .env file (or from your system)
load_dotenv()
print("Environment variables loaded.")


Environment variables loaded.


In [3]:
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME = os.getenv("AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME")
AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME")

# Create the Azure OpenAI client (for completions and embeddings)
client = AzureOpenAI(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_key=AZURE_OPENAI_API_KEY,
    api_version="2024-12-01-preview"
)
print("Azure OpenAI client configured.")


Azure OpenAI client configured.


In [4]:
SEARCH_SERVICE_ENDPOINT = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
SEARCH_SERVICE_API_KEY = os.getenv("AZURE_SEARCH_ADMIN_KEY")
print("Azure AI Search configuration loaded.")


Azure AI Search configuration loaded.


In [5]:
MEM0AI_API_KEY = os.getenv("MEM0AI_API_KEY")
print("Mem0 API key loaded (if applicable).")


Mem0 API key loaded (if applicable).


In [7]:
# Threads index: stores conversation thread history only.
threads_config = {
    "vector_store": {
        "provider": "azure_ai_search",
        "config": {
            "service_name": "fsunavala-ai-search",  # Your Azure AI Search service name
            "api_key": SEARCH_SERVICE_API_KEY,
            "collection_name": "threads",  # Dedicated index for threads
            "embedding_model_dims": 1536,
            "use_compression": True,
        },
    },
    "llm": {
        "provider": "azure_openai",
        "config": {
            "model": AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME,
            "temperature": 0.1,
            "max_tokens": 2000,
            "azure_kwargs": {
                "azure_deployment": AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME,
                "api_version": "2024-10-21",
                "azure_endpoint": AZURE_OPENAI_ENDPOINT,
                "api_key": AZURE_OPENAI_API_KEY,
            },
        },
    },
    "embedder": {
        "provider": "azure_openai",
        "config": {
            "model": AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME,
            "embedding_dims": 1536,
            "azure_kwargs": {
                "api_version": "2024-10-21",
                "azure_deployment": AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME,
                "azure_endpoint": AZURE_OPENAI_ENDPOINT,
                "api_key": AZURE_OPENAI_API_KEY,
            },
        },
    },
    "version": "v1.1",
}

# Memories index: stores salient conversation memories.
memories_config = {
    "vector_store": {
        "provider": "azure_ai_search",
        "config": {
            "service_name": "fsunavala-ai-search",
            "api_key": SEARCH_SERVICE_API_KEY,
            "collection_name": "memories",  # Dedicated index for memories
            "embedding_model_dims": 1536,
            "use_compression": True,
        },
    },
    "llm": {
        "provider": "azure_openai",
        "config": {
            "model": AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME,
            "temperature": 0.1,
            "max_tokens": 2000,
            "azure_kwargs": {
                "azure_deployment": AZURE_OPENAI_CHAT_COMPLETION_DEPLOYED_MODEL_NAME,
                "api_version": "2024-10-21",
                "azure_endpoint": AZURE_OPENAI_ENDPOINT,
                "api_key": AZURE_OPENAI_API_KEY,
            },
        },
    },
    "embedder": {
        "provider": "azure_openai",
        "config": {
            "model": AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME,
            "embedding_dims": 1536,
            "azure_kwargs": {
                "api_version": "2024-10-21",
                "azure_deployment": AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME,
                "azure_endpoint": AZURE_OPENAI_ENDPOINT,
                "api_key": AZURE_OPENAI_API_KEY,
            },
        },
    },
    "version": "v1.1",
}

print("Mem0 configurations for Threads and Memories set.")

Mem0 configurations for Threads and Memories set.


In [8]:
threads_memory = Memory.from_config(threads_config)
memories_memory = Memory.from_config(memories_config)

# (We are not creating a mem0 instance for documents, since the "contoso-hr" index already exists.)
# Initialize MemoryClient if needed.
# mem0_client = MemoryClient(api_key=MEM0AI_API_KEY)

print("Memory objects for Threads and Memories initialized.")


Memory objects for Threads and Memories initialized.


In [12]:
# --- Threads: CRUD operations for conversation thread history ---
def add_thread(text, user_id, metadata=None):
    threads_memory.add(text, user_id=user_id, metadata=metadata)

def get_threads(query, limit=3, user_id=None):
    if user_id is None:
        raise ValueError("user_id must be provided for thread search.")
    return threads_memory.search(query, limit=limit, user_id=user_id)

def update_thread(thread_id, new_text):
    threads_memory.update(thread_id, new_text)

def delete_thread(thread_id):
    threads_memory.delete(thread_id)

# --- Memories: CRUD operations for salient conversation memories ---
def add_memory(text, user_id, metadata=None):
    memories_memory.add(text, user_id=user_id, metadata=metadata)

def get_memories(query, limit=3, user_id=None):
    if user_id is None:
        raise ValueError("user_id must be provided for memory search.")
    return memories_memory.search(query, limit=limit, user_id=user_id)

def update_memory(memory_id, new_text):
    memories_memory.update(memory_id, new_text)

def delete_memory(memory_id):
    memories_memory.delete(memory_id)

# --- Document Retrieval ---
# Since you already have an external index "contoso-hr", we use azure-search-documents directly.

# Initialize a SearchClient for your existing "contoso-hr" index.
contoso_hr_index = "default"
search_client_docs = SearchClient(
    endpoint=SEARCH_SERVICE_ENDPOINT,
    index_name=contoso_hr_index,
    credential=AzureKeyCredential(SEARCH_SERVICE_API_KEY)
)

def retrieve_documents(query, limit=3):
    """
    Retrieve documents from your existing "default" index using vector search.
    Assumes that the index schema contains a field for document content (e.g., "content") and a vector field (e.g., "contentVector").
    """
    vector_query = VectorizableTextQuery(text=query, k_nearest_neighbors=limit, fields="text_vector")  
    results = search_client_docs.search(
        search_text=query, # hybrid search
        vector_queries=[vector_query],
        top=limit
    )

    docs = []
    for result in results:
        docs.append(result)
    return docs

print("Helper functions defined.")


Helper functions defined.


In [13]:
def rag_query(query, user_id):
    """
    For a given query and specified user_id, this function:
      - Retrieves relevant documents from the external "contoso-hr" index.
      - Retrieves memories and thread history for that user.
      - Builds a composite, open-ended prompt for the language model.
      - Invokes the model and stores the conversation exchange in both the Threads and Memories indexes.
      
    Detailed debug information is printed at each step.
    """
    print("=== RAG Query Start ===")
    print(f"User: {user_id}")
    print(f"Query: {query}\n")
    
    # Step 1: Retrieve documents from the external knowledge base.
    documents = retrieve_documents(query, limit=3)
    print("Step 1: Retrieved Documents:")
    for idx, doc in enumerate(documents, 1):
        # Use the "chunk" field since that's where your content is stored.
        content = doc.get("chunk", "[No content]") if hasattr(doc, "get") else str(doc)
        print(f"  Doc {idx}: {content[:200]}...")
    
    # Step 2: Retrieve memories for the specific user.
    memories = get_memories(query, limit=3, user_id=user_id)
    print("\nStep 2: Retrieved Memories:")
    if memories:
        for idx, mem in enumerate(memories, 1):
            content = mem.get("content", "[No content]") if hasattr(mem, "get") else str(mem)
            print(f"  Memory {idx}: {content[:200]}...")
    else:
        print("  None")
    
    # Step 3: Retrieve recent thread history for the specific user.
    threads = get_threads(query, limit=3, user_id=user_id)
    print("\nStep 3: Retrieved Thread History:")
    if threads:
        for idx, th in enumerate(threads, 1):
            content = th.get("content", "[No content]") if hasattr(th, "get") else str(th)
            print(f"  Thread {idx}: {content[:200]}...")
    else:
        print("  None")
    
    # Step 4: Build the composite, open-ended prompt.
    # The prompt now invites the model to consider all provided context and use them as it deems relevant.
    prompt = (
        "You are a helpful assistant. Based on the context provided below, "
        "generate the best possible answer to the query. You may consider any or all "
        "of the following context elements (documents, memories, and thread history) as needed.\n\n"
    )
    prompt += f"Query: {query}\n\n"
    
    if documents:
        prompt += "Documents:\n"
        for doc in documents:
            content = doc.get("chunk", "[No content]") if hasattr(doc, "get") else str(doc)
            prompt += f"- {content}\n"
    if memories:
        prompt += "\nMemories:\n"
        for mem in memories:
            content = mem.get("content", "[No content]") if hasattr(mem, "get") else str(mem)
            prompt += f"- {content}\n"
    if threads:
        prompt += "\nThread History:\n"
        for th in threads:
            content = th.get("content", "[No content]") if hasattr(th, "get") else str(th)
            prompt += f"- {content}\n"
    prompt += "\nAnswer:"
    
    print("\nStep 4: Constructed Prompt:")
    print(prompt)
    
    # Step 5: Call the language model using Azure OpenAI.
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7,
    )
    # Access the content using attribute access.
    answer = response.choices[0].message.content
    print("\nStep 5: Model Response:")
    print(answer)
    
    # Step 6: Persist the conversation exchange in both Threads and Memories for this user.
    thread_entry = f"Query: {query}\nAnswer: {answer}"
    add_thread(thread_entry, user_id=user_id, metadata={"type": "thread"})
    add_memory(thread_entry, user_id=user_id, metadata={"type": "conversation"})
    print("\nStep 6: Persisted conversation exchange:")
    print(thread_entry)
    
    print("=== RAG Query End ===\n")
    return answer

print("User-specific, open-ended RAG query function defined.")


User-specific, open-ended RAG query function defined.


In [15]:
# Test query for Farzad
farzad_query = "hey remember my name is Farzad"
farzad_answer = rag_query(farzad_query, user_id="Farzad")
print("Farzad's Answer:")
print(farzad_answer)

# Test follow-up query for Farzad about identity
followup_query = "what is my name"
followup_answer = rag_query(followup_query, user_id="Farzad")
print("\nFollow-up for Farzad:")
print(followup_answer)

# Test query for Vinod
vinod_query = "What are the recent changes in our HR policy regarding remote work?"
vinod_answer = rag_query(vinod_query, user_id="Vinod")
print("\nVinod's Answer:")
print(vinod_answer)

# Test query for Kevin
kevin_query = "Can you explain the benefits of the new wellness program?"
kevin_answer = rag_query(kevin_query, user_id="Kevin")
print("\nKevin's Answer:")
print(kevin_answer)


=== RAG Query Start ===
User: Farzad
Query: hey remember my name is Farzad

Step 1: Retrieved Documents:
  Doc 1: "type": "azureblob",
   "subtype": null,
   "credentials": {

https://learn.microsoft.com/en-us/rest/api/searchservice/indexers/get-status
https://learn.microsoft.com/en-us/rest/api/searchservice/inde...
  Doc 2: ise  kadar  ki  sadece  üzere  ve  veya  ya  a  b  c  ç  d  e  f  g  ğ  h  ı  i  j  k  l  m  n  o  ö
p  r  s  ş  t  u  ü  v  y  z  A  B  C  Ç  D  E  F  G  Ğ  H  I  İ  J  K  L  M  N  O  Ö  P  R  S  Ş  ...
  Doc 3: δικής  δικήν  δικιά  δικιάν  δικά
δικιάς  δικιές  δικοί  δικού  δικούς  δικόν  της  των  τον  την  το  τους  τις  τα  τη  ένας
μια  ένα  ενός  μιας  με  σε  αν  εάν  να  δια  εκ  εξ  επί  προ  υπέρ  α...

Step 2: Retrieved Memories:
  Memory 1: results...

Step 3: Retrieved Thread History:
  Thread 1: results...

Step 4: Constructed Prompt:
You are a helpful assistant. Based on the context provided below, generate the best possible answer to the query. You