In [None]:
import pandas as pd
import json
import os
from openai import OpenAI
import anthropic
import google.generativeai as genai
from pathlib import Path
from dotenv import load_dotenv

# Load API keys from .env file
load_dotenv()

# Initialize clients using environment variables
openai_client = OpenAI()  # Will automatically use OPENAI_API_KEY env var
anthropic_client = anthropic.Anthropic()  # Will automatically use ANTHROPIC_API_KEY env var
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

# DeepSeek uses OpenAI-compatible API
deepseek_client = OpenAI(
    api_key=os.getenv("DEEPSEEK_API_KEY"),
    base_url="https://api.deepseek.com/v1"
)

# Load data
data_df = pd.read_csv("personalization_data.csv")

# Define the catalog (you'll need to replace this with your actual catalog)
catalog = ["Electronics", "Fashion", "Home & Garden", "Sports", "Books", 
           "Beauty", "Toys", "Food & Grocery", "Health", "Automotive"]

def create_prompt(row):
    """Create prompt from user data"""
    user_data = {
        "linkedin_headline": row["linkedin_headline"],
        "liked_tweets": [row["liked_tweets.0"], row["liked_tweets.1"], row["liked_tweets.2"]]
    }
    
    prompt = (
        f'json {json.dumps(user_data)}\n'
        f'"From the catalog below, pick the top two categories this user will most likely engage with '
        f'and justify each in ≤25 words. Return JSON: {{ "picks": ["...", "..."], "why": ["...", "..."] }}"\n'
        f'Catalog: {", ".join(catalog)}'
    )
    return prompt

def call_chatgpt(prompt):
    """Call ChatGPT API"""
    try:
        response = openai_client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            response_format={"type": "json_object"}
        )
        return json.loads(response.choices[0].message.content)
    except Exception as e:
        print(f"ChatGPT error: {e}")
        return None

def call_claude(prompt):
    """Call Claude API"""
    try:
        response = anthropic_client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1024,
            temperature=0.7,
            messages=[{"role": "user", "content": prompt}]
        )
        # Extract JSON from response
        content = response.content[0].text
        json_start = content.find('{')
        json_end = content.rfind('}') + 1
        if json_start != -1 and json_end != 0:
            return json.loads(content[json_start:json_end])
        return None
    except Exception as e:
        print(f"Claude error: {e}")
        return None

def call_gemini(prompt):
    """Call Gemini API"""
    try:
        model = genai.GenerativeModel('gemini-pro')
        response = model.generate_content(prompt)
        # Extract JSON from response
        content = response.text
        json_start = content.find('{')
        json_end = content.rfind('}') + 1
        if json_start != -1 and json_end != 0:
            return json.loads(content[json_start:json_end])
        return None
    except Exception as e:
        print(f"Gemini error: {e}")
        return None

def call_deepseek(prompt):
    """Call DeepSeek API"""
    try:
        response = deepseek_client.chat.completions.create(
            model="deepseek-chat",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7
        )
        content = response.choices[0].message.content
        json_start = content.find('{')
        json_end = content.rfind('}') + 1
        if json_start != -1 and json_end != 0:
            return json.loads(content[json_start:json_end])
        return None
    except Exception as e:
        print(f"DeepSeek error: {e}")
        return None

# Run experiment
results = []
models = {
    "ChatGPT": call_chatgpt,
    "Claude": call_claude,
    "Gemini": call_gemini,
    "DeepSeek": call_deepseek
}

# Process first 25 users
for idx, row in data_df.head(25).iterrows():
    prompt = create_prompt(row)
    user_id = row["user_id"]
    
    for model_name, model_func in models.items():
        print(f"Processing user {user_id} with {model_name}...")
        response = model_func(prompt)
        
        if response:
            result = {
                "user_id": user_id,
                "model": model_name,
                "response": response,
                "ground_truth_1": row["cat_gt_1"],
                "ground_truth_2": row["cat_gt_2"]
            }
            results.append(result)

# Save results
output_dir = Path("runs")
output_dir.mkdir(exist_ok=True)

for model_name in models.keys():
    model_dir = output_dir / model_name
    model_dir.mkdir(exist_ok=True)
    
    # Save JSON responses
    model_results = [r for r in results if r["model"] == model_name]
    with open(model_dir / "p4.json", "w") as f:
        json.dump(model_results, f, indent=2)

# Compute hit rates
def compute_metrics(results):
    """Compute top-2 hit rate for each model"""
    metrics = {}
    
    for model_name in models.keys():
        model_results = [r for r in results if r["model"] == model_name]
        hits = 0
        total = 0
        
        for result in model_results:
            if result["response"] and "picks" in result["response"]:
                picks = result["response"]["picks"]
                gt_1 = result["ground_truth_1"]
                gt_2 = result["ground_truth_2"]
                
                if gt_1 in picks or gt_2 in picks:
                    hits += 1
                total += 1
        
        hit_rate = hits / total if total > 0 else 0
        metrics[model_name] = {
            "hit_rate": hit_rate,
            "total": total,
            "hits": hits
        }
    
    return metrics

metrics = compute_metrics(results)
print("\nMetrics Summary:")
for model, metric in metrics.items():
    print(f"{model}: Hit Rate = {metric['hit_rate']:.2%} ({metric['hits']}/{metric['total']})")

# Create evaluation spreadsheet template
eval_data = []
for result in results:
    if result["response"] and "picks" in result["response"] and "why" in result["response"]:
        eval_data.append({
            "user_id": result["user_id"],
            "model": result["model"],
            "pick_1": result["response"]["picks"][0] if len(result["response"]["picks"]) > 0 else "",
            "pick_2": result["response"]["picks"][1] if len(result["response"]["picks"]) > 1 else "",
            "rationale_1": result["response"]["why"][0] if len(result["response"]["why"]) > 0 else "",
            "rationale_2": result["response"]["why"][1] if len(result["response"]["why"]) > 1 else "",
            "ground_truth_1": result["ground_truth_1"],
            "ground_truth_2": result["ground_truth_2"],
            "plausibility_1": "",  # To be manually filled
            "plausibility_2": ""   # To be manually filled
        })

eval_df = pd.DataFrame(eval_data)
eval_df.to_csv("experiment_results_for_evaluation.csv", index=False)
print("\nEvaluation template saved to 'experiment_results_for_evaluation.csv'")