# Video Marketing Simulation - AI Model Evaluations

This notebook evaluates all 20 personas across multiple AI models to determine video preferences.

**Each cell below represents one model evaluation.**

In [2]:
# Setup and Imports
import json
from pathlib import Path
from ai_agent import load_video_analyses, load_personas, evaluate_persona, MODELS
from IPython.display import display, HTML
import pandas as pd

# Load data
print("Loading video analyses and personas...")
videos = load_video_analyses()
personas = load_personas()

print(f"✓ Loaded {len(videos)} videos")
print(f"✓ Loaded {len(personas)} personas")
print(f"\nAvailable models:")
for provider, models in MODELS.items():
    print(f"  {provider.upper()}: {len(models)} models")
    for model_name in models.keys():
        print(f"    - {model_name}")

# Create results directory
results_dir = Path('results')
results_dir.mkdir(exist_ok=True)
print(f"\n✓ Results will be saved to: {results_dir.absolute()}")

Loading video analyses and personas...
✓ Loaded 4 videos
✓ Loaded 20 personas

Available models:
  OPENAI: 4 models
    - gpt-5
    - gpt-4o
    - gpt-4o-mini
    - o1
  ANTHROPIC: 4 models
    - claude-sonnet-4.5
    - claude-haiku-4.5
    - claude-opus-4.1
    - claude-3.7-sonnet
  GOOGLE: 5 models
    - gemini-2.5-pro
    - gemini-2.5-flash
    - gemini-2.5-flash-lite
    - gemini-2.0-flash
    - gemini-2.0-flash-lite

✓ Results will be saved to: /Users/jacobrafati/Projects/video_marketing_simulation/backend/results


---
## OpenAI Models

In [3]:
# OpenAI: GPT-5
provider = 'openai'
model_name = 'gpt-5'

print(f"{'='*80}")
print(f"Evaluating: {provider.upper()} - {model_name}")
print(f"{'='*80}\n")

results = []
for i, persona in enumerate(personas, 1):
    print(f"[{i}/{len(personas)}] Persona {persona['id']}: {persona['name']}...", end=" ")
    
    try:
        evaluation = evaluate_persona(persona, videos, provider, model_name)
        
        if evaluation:
            result = {
                'persona_id': persona['id'],
                'persona_name': persona['name'],
                'provider': provider,
                'model': model_name,
                'evaluation': evaluation
            }
            results.append(result)
            print(f"✓ Video {evaluation.get('most_preferred_video')} (Confidence: {evaluation.get('confidence_score')}%)")
        else:
            print("✗ Failed")
    except Exception as e:
        print(f"✗ Error: {str(e)}")

# Save results
output_file = results_dir / f'{provider}_{model_name}_evaluations.json'
with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Saved {len(results)} evaluations to: {output_file}")

# Display summary
if results:
    df = pd.DataFrame([{
        'Persona': r['persona_name'],
        'Preferred Video': r['evaluation']['most_preferred_video'],
        'Confidence': f"{r['evaluation']['confidence_score']}%"
    } for r in results])
    display(df)

Evaluating: OPENAI - gpt-5

[1/20] Persona 1: Persona 1... ✓ Video 2 (Confidence: 78%)
[2/20] Persona 2: Persona 2... ✓ Video 2 (Confidence: 76%)
[3/20] Persona 3: Persona 3... ✓ Video 2 (Confidence: 80%)
[4/20] Persona 4: Persona 4... ✓ Video 2 (Confidence: 74%)
[5/20] Persona 5: Persona 5... ✓ Video 2 (Confidence: 76%)
[6/20] Persona 6: Persona 6... ✓ Video 2 (Confidence: 78%)
[7/20] Persona 7: Persona 7... ✓ Video 2 (Confidence: 76%)
[8/20] Persona 8: Persona 8... ✓ Video 2 (Confidence: 78%)
[9/20] Persona 9: Persona 9... ✓ Video 2 (Confidence: 78%)
[10/20] Persona 10: Persona 10... ✓ Video 2 (Confidence: 78%)
[11/20] Persona 11: Persona 11... ✓ Video 2 (Confidence: 75%)
[12/20] Persona 12: Persona 12... ✓ Video 2 (Confidence: 76%)
[13/20] Persona 13: Persona 13... ✓ Video 2 (Confidence: 76%)
[14/20] Persona 14: Persona 14... ✓ Video 2 (Confidence: 80%)
[15/20] Persona 15: Persona 15... ✓ Video 2 (Confidence: 76%)
[16/20] Persona 16: Persona 16... ✓ Video 2 (Confidence: 82%)
[17/20]

Unnamed: 0,Persona,Preferred Video,Confidence
0,Persona 1,2,78%
1,Persona 2,2,76%
2,Persona 3,2,80%
3,Persona 4,2,74%
4,Persona 5,2,76%
5,Persona 6,2,78%
6,Persona 7,2,76%
7,Persona 8,2,78%
8,Persona 9,2,78%
9,Persona 10,2,78%


In [None]:
# OpenAI: GPT-4o
provider = 'openai'
model_name = 'gpt-4o'

print(f"{'='*80}")
print(f"Evaluating: {provider.upper()} - {model_name}")
print(f"{'='*80}\n")

results = []
for i, persona in enumerate(personas, 1):
    print(f"[{i}/{len(personas)}] Persona {persona['id']}: {persona['name']}...", end=" ")
    
    try:
        evaluation = evaluate_persona(persona, videos, provider, model_name)
        
        if evaluation:
            result = {
                'persona_id': persona['id'],
                'persona_name': persona['name'],
                'provider': provider,
                'model': model_name,
                'evaluation': evaluation
            }
            results.append(result)
            print(f"✓ Video {evaluation.get('most_preferred_video')} (Confidence: {evaluation.get('confidence_score')}%)")
        else:
            print("✗ Failed")
    except Exception as e:
        print(f"✗ Error: {str(e)}")

output_file = results_dir / f'{provider}_{model_name}_evaluations.json'
with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Saved {len(results)} evaluations to: {output_file}")

if results:
    df = pd.DataFrame([{
        'Persona': r['persona_name'],
        'Preferred Video': r['evaluation']['most_preferred_video'],
        'Confidence': f"{r['evaluation']['confidence_score']}%"
    } for r in results])
    display(df)

In [None]:
# OpenAI: GPT-4o-mini
provider = 'openai'
model_name = 'gpt-4o-mini'

print(f"{'='*80}")
print(f"Evaluating: {provider.upper()} - {model_name}")
print(f"{'='*80}\n")

results = []
for i, persona in enumerate(personas, 1):
    print(f"[{i}/{len(personas)}] Persona {persona['id']}: {persona['name']}...", end=" ")
    
    try:
        evaluation = evaluate_persona(persona, videos, provider, model_name)
        
        if evaluation:
            result = {
                'persona_id': persona['id'],
                'persona_name': persona['name'],
                'provider': provider,
                'model': model_name,
                'evaluation': evaluation
            }
            results.append(result)
            print(f"✓ Video {evaluation.get('most_preferred_video')} (Confidence: {evaluation.get('confidence_score')}%)")
        else:
            print("✗ Failed")
    except Exception as e:
        print(f"✗ Error: {str(e)}")

output_file = results_dir / f'{provider}_{model_name}_evaluations.json'
with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Saved {len(results)} evaluations to: {output_file}")

if results:
    df = pd.DataFrame([{
        'Persona': r['persona_name'],
        'Preferred Video': r['evaluation']['most_preferred_video'],
        'Confidence': f"{r['evaluation']['confidence_score']}%"
    } for r in results])
    display(df)

In [None]:
# OpenAI: o1
provider = 'openai'
model_name = 'o1'

print(f"{'='*80}")
print(f"Evaluating: {provider.upper()} - {model_name}")
print(f"{'='*80}\n")

results = []
for i, persona in enumerate(personas, 1):
    print(f"[{i}/{len(personas)}] Persona {persona['id']}: {persona['name']}...", end=" ")
    
    try:
        evaluation = evaluate_persona(persona, videos, provider, model_name)
        
        if evaluation:
            result = {
                'persona_id': persona['id'],
                'persona_name': persona['name'],
                'provider': provider,
                'model': model_name,
                'evaluation': evaluation
            }
            results.append(result)
            print(f"✓ Video {evaluation.get('most_preferred_video')} (Confidence: {evaluation.get('confidence_score')}%)")
        else:
            print("✗ Failed")
    except Exception as e:
        print(f"✗ Error: {str(e)}")

output_file = results_dir / f'{provider}_{model_name}_evaluations.json'
with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Saved {len(results)} evaluations to: {output_file}")

if results:
    df = pd.DataFrame([{
        'Persona': r['persona_name'],
        'Preferred Video': r['evaluation']['most_preferred_video'],
        'Confidence': f"{r['evaluation']['confidence_score']}%"
    } for r in results])
    display(df)

---
## Anthropic Claude Models

In [4]:
# Anthropic: Claude Sonnet 4.5
provider = 'anthropic'
model_name = 'claude-sonnet-4.5'

print(f"{'='*80}")
print(f"Evaluating: {provider.upper()} - {model_name}")
print(f"{'='*80}\n")

results = []
for i, persona in enumerate(personas, 1):
    print(f"[{i}/{len(personas)}] Persona {persona['id']}: {persona['name']}...", end=" ")
    
    try:
        evaluation = evaluate_persona(persona, videos, provider, model_name)
        
        if evaluation:
            result = {
                'persona_id': persona['id'],
                'persona_name': persona['name'],
                'provider': provider,
                'model': model_name,
                'evaluation': evaluation
            }
            results.append(result)
            print(f"✓ Video {evaluation.get('most_preferred_video')} (Confidence: {evaluation.get('confidence_score')}%)")
        else:
            print("✗ Failed")
    except Exception as e:
        print(f"✗ Error: {str(e)}")

output_file = results_dir / f'{provider}_{model_name}_evaluations.json'
with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Saved {len(results)} evaluations to: {output_file}")

if results:
    df = pd.DataFrame([{
        'Persona': r['persona_name'],
        'Preferred Video': r['evaluation']['most_preferred_video'],
        'Confidence': f"{r['evaluation']['confidence_score']}%"
    } for r in results])
    display(df)

Evaluating: ANTHROPIC - claude-sonnet-4.5

[1/20] Persona 1: Persona 1... ✓ Video 2 (Confidence: 78%)
[2/20] Persona 2: Persona 2... ✓ Video 2 (Confidence: 78%)
[3/20] Persona 3: Persona 3... ✓ Video 2 (Confidence: 78%)
[4/20] Persona 4: Persona 4... ✓ Video 2 (Confidence: 78%)
[5/20] Persona 5: Persona 5... ✓ Video 2 (Confidence: 78%)
[6/20] Persona 6: Persona 6... ✓ Video 2 (Confidence: 78%)
[7/20] Persona 7: Persona 7... ✓ Video 2 (Confidence: 72%)
[8/20] Persona 8: Persona 8... ✓ Video 2 (Confidence: 78%)
[9/20] Persona 9: Persona 9... ✓ Video 2 (Confidence: 78%)
[10/20] Persona 10: Persona 10... ✓ Video 2 (Confidence: 78%)
[11/20] Persona 11: Persona 11... ✓ Video 2 (Confidence: 72%)
[12/20] Persona 12: Persona 12... ✓ Video 2 (Confidence: 78%)
[13/20] Persona 13: Persona 13... ✓ Video 2 (Confidence: 78%)
[14/20] Persona 14: Persona 14... ✓ Video 2 (Confidence: 78%)
[15/20] Persona 15: Persona 15... ✓ Video 2 (Confidence: 72%)
[16/20] Persona 16: Persona 16... ✓ Video 2 (Confidenc

Unnamed: 0,Persona,Preferred Video,Confidence
0,Persona 1,2,78%
1,Persona 2,2,78%
2,Persona 3,2,78%
3,Persona 4,2,78%
4,Persona 5,2,78%
5,Persona 6,2,78%
6,Persona 7,2,72%
7,Persona 8,2,78%
8,Persona 9,2,78%
9,Persona 10,2,78%


In [None]:
# Anthropic: Claude Haiku 4.5
provider = 'anthropic'
model_name = 'claude-haiku-4.5'

print(f"{'='*80}")
print(f"Evaluating: {provider.upper()} - {model_name}")
print(f"{'='*80}\n")

results = []
for i, persona in enumerate(personas, 1):
    print(f"[{i}/{len(personas)}] Persona {persona['id']}: {persona['name']}...", end=" ")
    
    try:
        evaluation = evaluate_persona(persona, videos, provider, model_name)
        
        if evaluation:
            result = {
                'persona_id': persona['id'],
                'persona_name': persona['name'],
                'provider': provider,
                'model': model_name,
                'evaluation': evaluation
            }
            results.append(result)
            print(f"✓ Video {evaluation.get('most_preferred_video')} (Confidence: {evaluation.get('confidence_score')}%)")
        else:
            print("✗ Failed")
    except Exception as e:
        print(f"✗ Error: {str(e)}")

output_file = results_dir / f'{provider}_{model_name}_evaluations.json'
with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Saved {len(results)} evaluations to: {output_file}")

if results:
    df = pd.DataFrame([{
        'Persona': r['persona_name'],
        'Preferred Video': r['evaluation']['most_preferred_video'],
        'Confidence': f"{r['evaluation']['confidence_score']}%"
    } for r in results])
    display(df)

In [None]:
# Anthropic: Claude Opus 4.1
provider = 'anthropic'
model_name = 'claude-opus-4.1'

print(f"{'='*80}")
print(f"Evaluating: {provider.upper()} - {model_name}")
print(f"{'='*80}\n")

results = []
for i, persona in enumerate(personas, 1):
    print(f"[{i}/{len(personas)}] Persona {persona['id']}: {persona['name']}...", end=" ")
    
    try:
        evaluation = evaluate_persona(persona, videos, provider, model_name)
        
        if evaluation:
            result = {
                'persona_id': persona['id'],
                'persona_name': persona['name'],
                'provider': provider,
                'model': model_name,
                'evaluation': evaluation
            }
            results.append(result)
            print(f"✓ Video {evaluation.get('most_preferred_video')} (Confidence: {evaluation.get('confidence_score')}%)")
        else:
            print("✗ Failed")
    except Exception as e:
        print(f"✗ Error: {str(e)}")

output_file = results_dir / f'{provider}_{model_name}_evaluations.json'
with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Saved {len(results)} evaluations to: {output_file}")

if results:
    df = pd.DataFrame([{
        'Persona': r['persona_name'],
        'Preferred Video': r['evaluation']['most_preferred_video'],
        'Confidence': f"{r['evaluation']['confidence_score']}%"
    } for r in results])
    display(df)

In [None]:
# Anthropic: Claude 3.7 Sonnet
provider = 'anthropic'
model_name = 'claude-3.7-sonnet'

print(f"{'='*80}")
print(f"Evaluating: {provider.upper()} - {model_name}")
print(f"{'='*80}\n")

results = []
for i, persona in enumerate(personas, 1):
    print(f"[{i}/{len(personas)}] Persona {persona['id']}: {persona['name']}...", end=" ")
    
    try:
        evaluation = evaluate_persona(persona, videos, provider, model_name)
        
        if evaluation:
            result = {
                'persona_id': persona['id'],
                'persona_name': persona['name'],
                'provider': provider,
                'model': model_name,
                'evaluation': evaluation
            }
            results.append(result)
            print(f"✓ Video {evaluation.get('most_preferred_video')} (Confidence: {evaluation.get('confidence_score')}%)")
        else:
            print("✗ Failed")
    except Exception as e:
        print(f"✗ Error: {str(e)}")

output_file = results_dir / f'{provider}_{model_name}_evaluations.json'
with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Saved {len(results)} evaluations to: {output_file}")

if results:
    df = pd.DataFrame([{
        'Persona': r['persona_name'],
        'Preferred Video': r['evaluation']['most_preferred_video'],
        'Confidence': f"{r['evaluation']['confidence_score']}%"
    } for r in results])
    display(df)

---
## Google Gemini Models

In [5]:
# Google: Gemini 2.5 Pro
provider = 'google'
model_name = 'gemini-2.5-pro'

print(f"{'='*80}")
print(f"Evaluating: {provider.upper()} - {model_name}")
print(f"{'='*80}\n")

results = []
for i, persona in enumerate(personas, 1):
    print(f"[{i}/{len(personas)}] Persona {persona['id']}: {persona['name']}...", end=" ")
    
    try:
        evaluation = evaluate_persona(persona, videos, provider, model_name)
        
        if evaluation:
            result = {
                'persona_id': persona['id'],
                'persona_name': persona['name'],
                'provider': provider,
                'model': model_name,
                'evaluation': evaluation
            }
            results.append(result)
            print(f"✓ Video {evaluation.get('most_preferred_video')} (Confidence: {evaluation.get('confidence_score')}%)")
        else:
            print("✗ Failed")
    except Exception as e:
        print(f"✗ Error: {str(e)}")

output_file = results_dir / f'{provider}_{model_name}_evaluations.json'
with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Saved {len(results)} evaluations to: {output_file}")

if results:
    df = pd.DataFrame([{
        'Persona': r['persona_name'],
        'Preferred Video': r['evaluation']['most_preferred_video'],
        'Confidence': f"{r['evaluation']['confidence_score']}%"
    } for r in results])
    display(df)

Evaluating: GOOGLE - gemini-2.5-pro

[1/20] Persona 1: Persona 1... ✓ Video 2 (Confidence: 90%)
[2/20] Persona 2: Persona 2... ✓ Video 2 (Confidence: 85%)
[3/20] Persona 3: Persona 3... ✓ Video 2 (Confidence: 90%)
[4/20] Persona 4: Persona 4... ✓ Video 2 (Confidence: 90%)
[5/20] Persona 5: Persona 5... ✓ Video 2 (Confidence: 90%)
[6/20] Persona 6: Persona 6... ✓ Video 3 (Confidence: 90%)
[7/20] Persona 7: Persona 7... ✓ Video 2 (Confidence: 85%)
[8/20] Persona 8: Persona 8... ✓ Video 2 (Confidence: 85%)
[9/20] Persona 9: Persona 9... ✓ Video 2 (Confidence: 90%)
[10/20] Persona 10: Persona 10... ✓ Video 2 (Confidence: 90%)
[11/20] Persona 11: Persona 11... ✓ Video 2 (Confidence: 90%)
[12/20] Persona 12: Persona 12... ✓ Video 2 (Confidence: 90%)
[13/20] Persona 13: Persona 13... ✓ Video 2 (Confidence: 85%)
[14/20] Persona 14: Persona 14... ✓ Video 2 (Confidence: 90%)
[15/20] Persona 15: Persona 15... ✓ Video 2 (Confidence: 85%)
[16/20] Persona 16: Persona 16... ✓ Video 2 (Confidence: 90%

Unnamed: 0,Persona,Preferred Video,Confidence
0,Persona 1,2,90%
1,Persona 2,2,85%
2,Persona 3,2,90%
3,Persona 4,2,90%
4,Persona 5,2,90%
5,Persona 6,3,90%
6,Persona 7,2,85%
7,Persona 8,2,85%
8,Persona 9,2,90%
9,Persona 10,2,90%


In [None]:
# Google: Gemini 2.5 Flash
provider = 'google'
model_name = 'gemini-2.5-flash'

print(f"{'='*80}")
print(f"Evaluating: {provider.upper()} - {model_name}")
print(f"{'='*80}\n")

results = []
for i, persona in enumerate(personas, 1):
    print(f"[{i}/{len(personas)}] Persona {persona['id']}: {persona['name']}...", end=" ")
    
    try:
        evaluation = evaluate_persona(persona, videos, provider, model_name)
        
        if evaluation:
            result = {
                'persona_id': persona['id'],
                'persona_name': persona['name'],
                'provider': provider,
                'model': model_name,
                'evaluation': evaluation
            }
            results.append(result)
            print(f"✓ Video {evaluation.get('most_preferred_video')} (Confidence: {evaluation.get('confidence_score')}%)")
        else:
            print("✗ Failed")
    except Exception as e:
        print(f"✗ Error: {str(e)}")

output_file = results_dir / f'{provider}_{model_name}_evaluations.json'
with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Saved {len(results)} evaluations to: {output_file}")

if results:
    df = pd.DataFrame([{
        'Persona': r['persona_name'],
        'Preferred Video': r['evaluation']['most_preferred_video'],
        'Confidence': f"{r['evaluation']['confidence_score']}%"
    } for r in results])
    display(df)

In [None]:
# Google: Gemini 2.5 Flash Lite
provider = 'google'
model_name = 'gemini-2.5-flash-lite'

print(f"{'='*80}")
print(f"Evaluating: {provider.upper()} - {model_name}")
print(f"{'='*80}\n")

results = []
for i, persona in enumerate(personas, 1):
    print(f"[{i}/{len(personas)}] Persona {persona['id']}: {persona['name']}...", end=" ")
    
    try:
        evaluation = evaluate_persona(persona, videos, provider, model_name)
        
        if evaluation:
            result = {
                'persona_id': persona['id'],
                'persona_name': persona['name'],
                'provider': provider,
                'model': model_name,
                'evaluation': evaluation
            }
            results.append(result)
            print(f"✓ Video {evaluation.get('most_preferred_video')} (Confidence: {evaluation.get('confidence_score')}%)")
        else:
            print("✗ Failed")
    except Exception as e:
        print(f"✗ Error: {str(e)}")

output_file = results_dir / f'{provider}_{model_name}_evaluations.json'
with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Saved {len(results)} evaluations to: {output_file}")

if results:
    df = pd.DataFrame([{
        'Persona': r['persona_name'],
        'Preferred Video': r['evaluation']['most_preferred_video'],
        'Confidence': f"{r['evaluation']['confidence_score']}%"
    } for r in results])
    display(df)

In [None]:
# Google: Gemini 2.0 Flash
provider = 'google'
model_name = 'gemini-2.0-flash'

print(f"{'='*80}")
print(f"Evaluating: {provider.upper()} - {model_name}")
print(f"{'='*80}\n")

results = []
for i, persona in enumerate(personas, 1):
    print(f"[{i}/{len(personas)}] Persona {persona['id']}: {persona['name']}...", end=" ")
    
    try:
        evaluation = evaluate_persona(persona, videos, provider, model_name)
        
        if evaluation:
            result = {
                'persona_id': persona['id'],
                'persona_name': persona['name'],
                'provider': provider,
                'model': model_name,
                'evaluation': evaluation
            }
            results.append(result)
            print(f"✓ Video {evaluation.get('most_preferred_video')} (Confidence: {evaluation.get('confidence_score')}%)")
        else:
            print("✗ Failed")
    except Exception as e:
        print(f"✗ Error: {str(e)}")

output_file = results_dir / f'{provider}_{model_name}_evaluations.json'
with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Saved {len(results)} evaluations to: {output_file}")

if results:
    df = pd.DataFrame([{
        'Persona': r['persona_name'],
        'Preferred Video': r['evaluation']['most_preferred_video'],
        'Confidence': f"{r['evaluation']['confidence_score']}%"
    } for r in results])
    display(df)

In [None]:
# Google: Gemini 2.0 Flash Lite
provider = 'google'
model_name = 'gemini-2.0-flash-lite'

print(f"{'='*80}")
print(f"Evaluating: {provider.upper()} - {model_name}")
print(f"{'='*80}\n")

results = []
for i, persona in enumerate(personas, 1):
    print(f"[{i}/{len(personas)}] Persona {persona['id']}: {persona['name']}...", end=" ")
    
    try:
        evaluation = evaluate_persona(persona, videos, provider, model_name)
        
        if evaluation:
            result = {
                'persona_id': persona['id'],
                'persona_name': persona['name'],
                'provider': provider,
                'model': model_name,
                'evaluation': evaluation
            }
            results.append(result)
            print(f"✓ Video {evaluation.get('most_preferred_video')} (Confidence: {evaluation.get('confidence_score')}%)")
        else:
            print("✗ Failed")
    except Exception as e:
        print(f"✗ Error: {str(e)}")

output_file = results_dir / f'{provider}_{model_name}_evaluations.json'
with open(output_file, 'w') as f:
    json.dump(results, f, indent=2)

print(f"\n✓ Saved {len(results)} evaluations to: {output_file}")

if results:
    df = pd.DataFrame([{
        'Persona': r['persona_name'],
        'Preferred Video': r['evaluation']['most_preferred_video'],
        'Confidence': f"{r['evaluation']['confidence_score']}%"
    } for r in results])
    display(df)

---
## Summary: View All Results

In [None]:
# Load and display all results
import glob

all_result_files = glob.glob('results/*_evaluations.json')
print(f"Found {len(all_result_files)} result files:\n")

for file in all_result_files:
    print(f"  - {Path(file).name}")
    with open(file, 'r') as f:
        data = json.load(f)
        print(f"    Evaluations: {len(data)}")

print(f"\n✓ All results saved in: {results_dir.absolute()}")