# Winoground Results Analysis

This notebook processes all winoground evaluation results and converts them to a pandas DataFrame and CSV.

In [None]:
import pandas as pd
import json
import glob
import os
from pathlib import Path

In [None]:
# Define paths
results_dir = "../results/winoground/"
output_csv = "../results/winoground_summary.csv"

In [None]:
# Read all JSON files
json_files = glob.glob(os.path.join(results_dir, "*.json"))
print(f"Found {len(json_files)} result files:")
for file in json_files:
    print(f"  - {os.path.basename(file)}")

In [None]:
# Process each file and create rows for DataFrame
rows = []

for json_file in json_files:
    model_name = Path(json_file).stem  # Get filename without extension
    
    with open(json_file, 'r') as f:
        data = json.load(f)
    
    # Navigate to winoground results within experiments
    winoground_data = data['experiments']['winoground']
    
    # Overall results
    overall = winoground_data['overall']
    rows.append({
        'model': model_name,
        'tag': 'Overall',
        'text_score': overall['text'] * 100,
        'image_score': overall['image'] * 100,
        'group_score': overall['group'] * 100,
        'count': winoground_data['total_examples']
    })
    
    # Tag-specific results
    for tag, tag_data in winoground_data['by_tag'].items():
        rows.append({
            'model': model_name,
            'tag': tag,
            'text_score': tag_data['text'] * 100,
            'image_score': tag_data['image'] * 100,
            'group_score': tag_data['group'] * 100,
            'count': tag_data['count']
        })

print(f"Created {len(rows)} rows from {len(json_files)} files")

In [None]:
# Create DataFrame
df = pd.DataFrame(rows)
print(f"DataFrame shape: {df.shape}")
print("\nColumns:", df.columns.tolist())
print("\nFirst few rows:")
df.head(10)

In [None]:
# Display summary statistics
print("Models found:", df['model'].unique())
print("\nTags found:", df['tag'].unique())
print("\nSample counts by tag:")
tag_counts = df[df['model'] == df['model'].iloc[0]].set_index('tag')['count']
print(tag_counts)

In [None]:
# Save to CSV
df.to_csv(output_csv, index=False)
print(f"Results saved to: {output_csv}")
print(f"CSV shape: {df.shape}")

In [None]:
# Optional: Create a pivot table for easier viewing
# Pivot by model and tag for group scores
pivot_group = df.pivot(index='tag', columns='model', values='group_score')
print("\nGroup Scores by Model and Tag:")
display(pivot_group.round(2))

In [None]:
# Save pivot table as well
pivot_group.to_csv("../results/winoground_pivot_group.csv")
print("Pivot table saved to: ../results/winoground_pivot_group.csv")