In [None]:
from sympy.stats.sampling.sample_scipy import scipy
######## Installations

!pip install zep-cloud openai gdown
!pip install scipy

In [None]:
######## Imports

import asyncio
import pandas as pd
import openai
import gdown
import json
import tarfile
from zep_cloud.client import AsyncZep
from zep_cloud import Message
from openai import AsyncOpenAI
from dotenv import load_dotenv
import os
import scipy.stats as stats
from datetime import datetime, timezone
from pydantic import BaseModel, Field
from zep_cloud import Message, EntityEdge, EntityNode
from time import time

In [None]:
######## Download the eval dataset from github

import requests

url = "https://raw.githubusercontent.com/snap-research/locomo/refs/heads/main/data/locomo10.json"
response = requests.get(url)
locomo_df = pd.read_json(url)
data = response.json()

os.makedirs("data", exist_ok=True)

with open("./data/locomo.json", "w") as f:
    json.dump(data, f, indent=2)

print("JSON saved to ./data/locomo.json")

In [None]:
locomo_df.head()

In [None]:
######## Start up Zep and OpenAI clients
load_dotenv()

# , base_url="https://api.development.getzep.com/api/v2"

zep = AsyncZep(api_key=os.getenv("ZEP_API_KEY"), base_url="https://api.getzep.com/api/v2")
oai_client = AsyncOpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
)

In [None]:
######## LongMemEval  - ingest each multi-session as its own single Zep user/session pair

num_users = 10
max_session_count = 35

for user_idx in range(num_users):
    conversation = locomo_df['conversation'].iloc[user_idx]
    user_id = "locomo_experiment_user_" + str(user_idx)
    session_id = "locomo_experiment_session_" + str(user_idx)
    print(user_id)
    
    first_name = locomo_df['conversation'].iloc[user_idx]['speaker_a']
    
    try:
        await zep.user.add(user_id=user_id, first_name=first_name)
        await zep.memory.add_session(
            user_id=user_id,
            session_id=session_id,
        )
    except:
        pass
    
    # Uncomment this code to delete existing users
    # try:
    #     await zep.user.delete(user_id)
    #     await zep.memory.delete(session_id)
    # except:
    #     pass
    # 
    # continue

    for session_idx in range(max_session_count):
        session_key = f'session_{session_idx}'
        print(session_key)
        session = locomo_df['conversation'].iloc[user_idx].get(f'session_{session_idx}')
        if session is None:
            continue
        
        for msg in session:
            # Get date
            session_date = locomo_df['conversation'].iloc[user_idx].get(f'session_{session_idx}_date_time') + ' UTC'
            date_format = '%I:%M %p on %d %B, %Y UTC'
            date_string = datetime.strptime(session_date, date_format).replace(tzinfo=timezone.utc)
            iso_date = date_string.isoformat()
            
            # Get speaker and role
            speaker = msg.get('speaker')
            role_type = 'user' if speaker == first_name else 'norole'
            
            
            await zep.memory.add(session_id=session_id, messages=[Message(role=speaker, role_type=role_type, content=msg.get('text'), created_at=date_string.isoformat())])

In [None]:
locomo_df['conversation'].iloc[0].get('session_1')