In [None]:
import pandas as pd
import itertools

In [None]:
def read_in_file(model_name):
    filename = f'../results/1_{model_name}_results.csv'
    df = pd.read_csv(filename, on_bad_lines='warn')
    df = df.drop(['Ethnicity Probability'], axis=1).rename(columns={'group': 'Target Ethnicity'})

    return df

In [None]:
def add_ethnicity_group(df):
    asian = ['CHINESE', 'JAPANESE', 'KOREAN', 'THAI']
    european = ['ENGLISH', 'FRENCH', 'GERMAN', 'GREEK', 'HUNGARIAN', 'ITALIAN', 'NORDIC', 'DUTCH']
    drop = ['AFRICAN', 'ISRAELI', 'TURKISH', 'BALTIC', 'SLAV', 'INDIAN']
    arab = ['ARAB']
    hispanic = ['HISPANIC']

    df = df[~df['Ethnicity'].isin(drop)]
    df = df[df['Target Ethnicity'] != 'nonce']

    df['Ethnicity Group'] = df['Ethnicity'].apply(lambda x: 'white' if x in european else 
                                                    'hispanic' if x in hispanic else 
                                                    'arab' if x in arab else 
                                                    'asian' if x in asian else None)
    return df



In [None]:
def get_proportional_perplexities(df): 
    cluster_summary = []

    overall_mean = df['perplexity_1'].mean()

    for ethnicity in df['Group'].unique():
        cluster_data = df[(df['Group'] == ethnicity)]
        
        # Get the average perplexity for the current group
        cluster_ethnicity = cluster_data['Group'].iloc[0]
        average_perplexity = round(cluster_data['perplexity_1'].mean(), 2)
        prop_perplexity = round(cluster_data['perplexity_1'].mean() / overall_mean, 3)

        cluster_summary.append({'Group': cluster_ethnicity, 
        'Average Perplexity': average_perplexity, 'Proportional Perplexity': prop_perplexity})

    prop_df = pd.DataFrame(cluster_summary)

    prop_df = prop_df.sort_values(by='Average Perplexity')

    df = pd.merge(df, prop_df, on='Group', how='left')

    # Calculate Adjusted Perplexity
    df['Adjusted Perplexity'] = df['perplexity_1'] / df['Proportional Perplexity']
    df = df.drop(['Average Perplexity', 'Proportional Perplexity'], axis=1)

    return df

In [None]:
def get_summary_df(df, apx):
    # Calculate the average perplexity for each cluster and descriptor
    cluster_summary = []

    # Get unique combinations of 'Target Gender', 'Target Ethnicity', and 'descriptor'
    unique_combinations2 = list(itertools.product(df['Target Ethnicity'].unique(), df['descriptor'].unique()))
    unique_combinations1 = list(itertools.product(df['Gender'].unique(), df['Ethnicity Group'].unique()))

    for gender, ethnicity in unique_combinations1:
        for target_ethnicity, descriptor in unique_combinations2:
            # Filter the DataFrame to get rows matching the current combination and group_id
            group_df = df[(df['Gender'] == gender) & (df['Ethnicity Group'] == ethnicity) 
                        & (df['Target Ethnicity'] == target_ethnicity) 
                        & (df['descriptor'] == descriptor)]

            # Check if any rows exist for the current combination and group_id
            if not group_df.empty:
                # Get the ethnicity, gender, and average perplexity for the current cluster
                cluster_ethnicity = group_df['Ethnicity Group'].iloc[0]
                cluster_gender = group_df['Gender'].iloc[0]
                if apx:
                    average_perplexity = round(group_df['Adjusted Perplexity'].mean(), 2)
                else:
                    average_perplexity = round(group_df['perplexity_1'].mean(), 2)

                cluster_summary.append({'Gender': cluster_gender, 'Ethnicity Group': cluster_ethnicity, 
                                        'Average Perplexity': average_perplexity, 'descriptor': descriptor, 'Ethnicity': group_df['Ethnicity'].iloc[0],
                                        'Target Ethnicity': target_ethnicity})


    summary_df = pd.DataFrame(cluster_summary)

    summary_df = summary_df.sort_values(by='descriptor')

    return(summary_df)


In [None]:

def get_min_perplexities(summary_df):
    # Keep the row with the highest Average Perplexity value for each descriptor
    min_perplexity_rows = summary_df.loc[summary_df.groupby(['descriptor','Target Ethnicity'])['Average Perplexity'].idxmin()]

    min_perplexity_rows = min_perplexity_rows.sort_values(by=['Target Ethnicity', 'descriptor'])
    overall_accuracy = ((min_perplexity_rows['Ethnicity Group'] == min_perplexity_rows['Target Ethnicity'])).sum() / len(min_perplexity_rows)

    print("Overall Accuracy:", round(overall_accuracy, 3))
    return(overall_accuracy)

In [None]:

def print_full_accuracy_table(model_name, apx):
    df = read_in_file(model_name)
    df = add_ethnicity_group(df)
    df = get_proportional_perplexities(df)
    summary_df = get_summary_df(df, apx)
    summary_df.head()
    accuracy = get_min_perplexities(summary_df)
    


In [None]:
model_list = ['google-bert_bert-large-cased', 'roberta-large', 'gpt2-xl', 'google_flan-ul2', 'EleutherAI_gpt-neox-20b',
               'facebook_opt-30b', 'meta-llama_Meta-Llama-3-8B']
for model in model_list:
    print(model)
    print_full_accuracy_table(model, False)
    print_full_accuracy_table(model, True)
