In [2]:
import pandas as pd
import json
import os
from tqdm import tqdm
def merge_results():
    """
    Merge GPT and Claude results and determine final decisions
    """
    gpt_results = pd.read_csv('MovieSummaries/character_metadata_with_gpt.tsv', sep='\t')
    claude_results = pd.read_csv('MovieSummaries/character_metadata_with_claude.tsv', sep='\t')
    
    # Merge the results
    final_results = gpt_results.merge(
        claude_results[['Wikipedia movie ID', 'Actor name', 'claude_decision']],
        on=['Wikipedia movie ID', 'Actor name'],
        how='outer'
    )
    
    # Determine agreement and final decision
    final_results['ai_agreement'] = False
    final_results['is_main_character'] = None
    
    mask = (final_results['gpt_decision'].notna() & 
            final_results['claude_decision'].notna() & 
            (final_results['gpt_decision'] == final_results['claude_decision']))
    
    final_results.loc[mask, 'ai_agreement'] = True
    final_results.loc[mask, 'is_main_character'] = final_results.loc[mask, 'gpt_decision']
    
    # Save final results
    final_results.to_csv('MovieSummaries/character_metadata_with_roles_final.tsv', sep='\t', index=False)
    
    # Print statistics
    print("Merge completed!")
    print(f"Total characters: {len(final_results)}")
    print(f"Characters with AI agreement: {final_results['ai_agreement'].sum()}")
    print(f"Main characters (agreed): {final_results[final_results['ai_agreement']]['is_main_character'].sum()}")

if __name__ == "__main__":
    merge_results()

Merge completed!
Total characters: 103071
Characters with AI agreement: 67580
Main characters (agreed): 20773
