In [5]:
import pandas as pd

# Load data
df = pd.read_csv("semantic_similarity_detailed.csv")

# Pivot to wide format so each model has a column of similarity scores
pivot_df = df.pivot_table(
    index=["contest", "human_caption"],
    columns="ai_model",
    values="semantic_similarity",
    aggfunc="max"  # or mean, depending on your use case
).reset_index()

# Rename columns to clean them up
pivot_df.columns.name = None  # remove the 'ai_model' column label
pivot_df = pivot_df.rename(columns={
    "chatgpt (gpt-4o)": "chatgpt",
    "claude": "claude"
})

# Determine which model performed better
def determine_winner(row):
    if row["chatgpt"] > row["claude"]:
        return "chatgpt", "ChatGPT aligned more closely with human captions"
    elif row["chatgpt"] < row["claude"]:
        return "claude", "Claude aligned more closely with human captions"
    else:
        return "tie", "Both models performed equally"

# Apply function
pivot_df[["Who_won", "Description"]] = pivot_df.apply(determine_winner, axis=1, result_type="expand")

# Save result
pivot_df.to_csv("contests_with_model_winner.csv", index=False)
print("✅ Saved to 'contests_with_model_winner.csv'")


✅ Saved to 'contests_with_model_winner.csv'
