In [45]:
import pandas as pd
import os
from transformers import GPT2TokenizerFast, GPT2LMHeadModel, pipeline
import math
import torch
import numpy as np
from itertools import product

input_dir = '../data/inputs'
output_dir = '../data/outputs/s0'
results_dir = '../data/results'
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

In [46]:
model_str_map = {
    'llama3-8b': 'Llama-3-8B',
    'llama3-8b-instruct': 'Llama-3-8B-Instruct',
    'mistral-7b': 'Mistral-7B',
    'mistral-7b-instruct': 'Mistral-7B-Instruct',
    'llama2-7b': 'Llama-2-7B',
    'llama2-7b-instruct': 'Llama-2-7B-Instruct',
}
model_strs = ['llama3-8b', 'llama3-8b-instruct', 'mistral-7b', 'mistral-7b-instruct', 'llama2-7b', 'llama2-7b-instruct']
model_strs = sorted(model_strs, key=len, reverse=True)

In [47]:
female_ratios = pd.read_csv(os.path.join(input_dir, 'female_ratios.csv'))
# for model in  llama3-8b llama3-8b-instruct mistral-7b mistral-7b-instruct llama2-7b llama2-7b-chat


prompt_ids = ['none','low-1','low-2','medium-3','medium-4','high-5','high-6']
prompt_id_mapping = {pid: idx for idx, pid in enumerate(prompt_ids)}

df_list = []
for f in os.listdir(output_dir):
    if "conv" not in f and "gender" not in f and 'gpt2' not in f:
    # if 'conversation.csv' in f:
    
        df = pd.read_csv(os.path.join(output_dir, f))
        df = pd.merge(df,female_ratios,on='job')
        df = df.drop(columns=['job','Unnamed: 0'])

        df['female_dominated'] = df['female_ratio'] > 50
        # Extract prompt ID from filename
        prompt_id_str = next((pid for pid in prompt_ids if pid in f), 'none')
        prompt_id = prompt_id_mapping[prompt_id_str]
        df['debiasing_prompt_id'] = prompt_id
        
                # Extract model from filename
        model_str = next((model for model in model_strs if model in f), None)
        df['model'] = model_str
        df['conversation'] = 'conv' in f

        # Remove model name from other column names
        if model_str:
            df = df.rename(columns=lambda x: x.replace(f'{model_str}_', '') if model_str in x else x)


        numeric_cols = df.select_dtypes(include='number').columns
        grouped_df = df.groupby(['female_dominated', 'model','conversation'])[numeric_cols].mean().reset_index()
    
        # if 'male_met-met' in df.columns:
        #     col_ends = ['met-met', 'friend', 'talk-met']
        #     # Compute averages for male, female, and diverse columns
        #     male_cols = ['male_' + end for end in col_ends]
        #     female_cols = ['female_' + end for end in col_ends]
        #     diverse_cols = ['diverse_' + end for end in col_ends]
        # 
        # 
        #     grouped_df['male'] = grouped_df[male_cols].mean(axis=1)
        #     grouped_df['female'] = grouped_df[female_cols].mean(axis=1)
        #     grouped_df['diverse'] = grouped_df[diverse_cols].mean(axis=1)
        #     grouped_df = grouped_df.drop(columns=male_cols + female_cols + diverse_cols)
        # 
        #     
        df_list.append(grouped_df)
    # concat all the dataframes
df = pd.concat(df_list)
df.to_csv(os.path.join(results_dir, 'implicit.csv'), index=False)
        

In [48]:
df_list = []
for f in os.listdir(output_dir):
    if 'genderquestion.csv' in f and 'gpt2' not in f:
        df = pd.read_csv(os.path.join(output_dir, f))
        df = pd.merge(df,female_ratios,on='job')
        df = df.drop(columns=['job','Unnamed: 0'])


        df['female_dominated'] = df['female_ratio'] > 50
        # Extract prompt ID from filename
        prompt_id_str = next((pid for pid in prompt_ids if pid in f), 'none')
        prompt_id = prompt_id_mapping[prompt_id_str]
        df['debiasing_prompt_id'] = prompt_id
        
                # Extract model from filename
        model_str = next((model for model in model_strs if model in f), None)
        df['model'] = model_str
        df['conversation'] = 'conv' in f

        # Remove model name from other column names
        if model_str:
            df = df.rename(columns=lambda x: x.replace(f'{model_str}_', '') if model_str in x else x)

        
        # grouped_df = df.groupby('female_dominated').mean().reset_index()
        numeric_cols = df.select_dtypes(include='number').columns
        grouped_df = df.groupby(['female_dominated', 'model','conversation'])[numeric_cols].mean().reset_index()
    
        if 'male_met-met' in df.columns:
            col_ends = ['met-met', 'friend', 'talk-met']
            # Compute averages for male, female, and diverse columns
            male_cols = ['male_' + end for end in col_ends]
            female_cols = ['female_' + end for end in col_ends]
            diverse_cols = ['diverse_' + end for end in col_ends]

    
            grouped_df['male'] = grouped_df[male_cols].mean(axis=1)
            grouped_df['female'] = grouped_df[female_cols].mean(axis=1)
            grouped_df['diverse'] = grouped_df[diverse_cols].mean(axis=1)
            grouped_df = grouped_df.drop(columns=male_cols + female_cols + diverse_cols)

            
        df_list.append(grouped_df)
    # concat all the dataframes
df = pd.concat(df_list)
df.to_csv(os.path.join(results_dir, 'explicit.csv'), index=False)

In [49]:
df_implicit = pd.read_csv(os.path.join(results_dir, 'implicit.csv'))
df_explicit = pd.read_csv(os.path.join(results_dir, 'explicit.csv'))

# if any column starts with a space, remove it
df_implicit.columns = df_implicit.columns.str.strip()
df_explicit.columns = df_explicit.columns.str.strip()

for model, group_df in df_implicit.groupby('model'):
    # Save the DataFrame to a CSV file
    if not os.path.exists(os.path.join(results_dir,model)):
        os.makedirs(os.path.join(results_dir,model))
    group_df.to_csv(os.path.join(results_dir,model, f'implicit.csv'), index=False)

for model, group_df in df_explicit.groupby('model'):
    # Save the DataFrame to a CSV file
    if not os.path.exists(os.path.join(results_dir,model)):
        os.makedirs(os.path.join(results_dir,model))
    group_df.to_csv(os.path.join(results_dir,model, f'explicit.csv'), index=False)


In [50]:
prompt_id_map = {
    0: 'None',
    1: '1',
    2: '2',
    3: '3',
    4: '4',
    5: '5',
    6: '6'
}

abstraction_levels = {
    0: '',
    1: 'High',
    2: 'High',
    3: 'Med.',
    4: 'Med.',
    5: 'Low',
    6: 'Low'
}

abs_order = ['', 'High', 'Med.', 'Low']

In [54]:

# Collect all prompt_id = 0
df_list = []
models = [
    'llama3-8b', 'llama3-8b-instruct', 'mistral-7b', 
    'mistral-7b-instruct', 'llama2-7b', 'llama2-7b-instruct',  
]


# Iterate over models and files, and read CSVs into DataFrame
for model, file_name in product(models, ['explicit.csv', 'implicit.csv']):
    df_path = os.path.join(results_dir, model, file_name)
    df = pd.read_csv(df_path)
    df['explicit'] = ('explicit' in file_name)
    df['model'] = model

    # Filter only rows where 'debiasing_prompt_id' is 0
    df = df[df['debiasing_prompt_id'] == 0]

    if 'explicit' in file_name:
        # Calculate the averages for male, female, and diverse columns if available (explicit)
        male_cols = [f"male_explicit{i}" for i in range(25) if f"male_explicit{i}" in df.columns]
        female_cols = [f"female_explicit{i}" for i in range(25) if f"female_explicit{i}" in df.columns]
        diverse_cols = [f"diverse_explicit{i}" for i in range(25) if f"diverse_explicit{i}" in df.columns]
    else:
        # Calculate the averages for male, female, and diverse columns if available (implicit)
        male_cols = [f"male_implicit{i}" for i in range(25) if f"male_implicit{i}" in df.columns]
        female_cols = [f"female_implicit{i}" for i in range(25) if f"female_implicit{i}" in df.columns]
        diverse_cols = [f"diverse_implicit{i}" for i in range(25) if f"diverse_implicit{i}" in df.columns]

    # Compute averages if there are columns to average
    if male_cols:
        df['male_avg'] = df[male_cols].mean(axis=1)
    if female_cols:
        df['female_avg'] = df[female_cols].mean(axis=1)
    if diverse_cols:
        df['diverse_avg'] = df[diverse_cols].mean(axis=1)

    # Append the processed DataFrame to the list
    df_list.append(df)

# Concatenate all DataFrames
df_default = pd.concat(df_list)

# Group by 'model', 'explicit', and 'female_dominated', then calculate averages
grouped = df_default.groupby(['model', 'explicit', 'female_dominated']).agg(
    male_avg=('male_avg', 'mean'),
    female_avg=('female_avg', 'mean'),
    diverse_avg=('diverse_avg', 'mean')
).reset_index()

# Save the new averaged DataFrame
averages_df = grouped

In [59]:

# Load the aggregated DataFrame "averages_df"
models = [
    'llama3-8b', 'llama3-8b-instruct', 'mistral-7b', 
    'mistral-7b-instruct', 'llama2-7b', 'llama2-7b-instruct',  
]

# Generate LaTeX table
latex_table = r'''
\begin{table*}[ht!]
\centering
\small
    % Reduce text size and slightly the gap between columns
    \setlength{\tabcolsep}{4.6pt} % Default: 5pt
    \caption{Results for all models.}
    % \resizebox{\textwidth}{!}{  % Alternative method: resize entire table (problem: also resizes line widths)
    \begin{tabular}{l c c c c c c c c c c c c c c}
    \toprule
    & \multicolumn{6}{c}{Explicit} & \multicolumn{6}{c}{Implicit} \\
    \cmidrule(lr){2-7} \cmidrule(lr){8-13}
    Model & \multicolumn{3}{c}{Female Dominated} & \multicolumn{3}{c}{Male Dominated} & \multicolumn{3}{c}{Female Dominated} & \multicolumn{3}{c}{Male Dominated} \\
    \cmidrule(lr){2-4} \cmidrule(lr){5-7} \cmidrule(lr){8-10} \cmidrule(lr){11-13}
    & M & F & D & M & F & D & M & F & D & M & F & D \\
    \midrule
'''

def calculate_average(df, gender_prefix):
    """Calculate the average of all columns that start with the given gender prefix."""
    columns = [col for col in df.columns if col.startswith(gender_prefix)]
    if not df.empty and len(columns) > 0:
        return df[columns].mean(axis=1).values[0]
    return None

for model in models:
    model_name = model_str_map[model]  # Escape hyphens for LaTeX
    explicit_fd = averages_df[(averages_df['model'] == model) & (averages_df['explicit'] == True) & (averages_df['female_dominated'] == True)]
    explicit_md = averages_df[(averages_df['model'] == model) & (averages_df['explicit'] == True) & (averages_df['female_dominated'] == False)]
    implicit_fd = averages_df[(averages_df['model'] == model) & (averages_df['explicit'] == False) & (averages_df['female_dominated'] == True)]
    implicit_md = averages_df[(averages_df['model'] == model) & (averages_df['explicit'] == False) & (averages_df['female_dominated'] == False)]
    
    row_prefix = f"        {model_name} & "
    row = row_prefix
    
    if not explicit_fd.empty:
        male_avg = explicit_fd['male_avg'].values[0]
        female_avg = explicit_fd['female_avg'].values[0]
        diverse_avg = explicit_fd['diverse_avg'].values[0]
        row += f"{male_avg*100:.1f}\\% & {female_avg*100:.1f}\\% & {diverse_avg*100:.1f}\\% & "
    else:
        row += " & & & "
    
    if not explicit_md.empty:
        male_avg = explicit_md['male_avg'].values[0]
        female_avg = explicit_md['female_avg'].values[0]
        diverse_avg = explicit_md['diverse_avg'].values[0]
        row += f"{male_avg*100:.1f}\\% & {female_avg*100:.1f}\\% & {diverse_avg*100:.1f}\\% & "
    else:
        row += " & & & "
    
    if not implicit_fd.empty:
        male_avg = implicit_fd['male_avg'].values[0]
        female_avg = implicit_fd['female_avg'].values[0]
        diverse_avg = implicit_fd['diverse_avg'].values[0]
        row += f"{male_avg*100:.1f}\\% & {female_avg*100:.1f}\\% & {diverse_avg*100:.1f}\\% & "
    else:
        row += " & & & "
    
    if not implicit_md.empty:
        male_avg = implicit_md['male_avg'].values[0]
        female_avg = implicit_md['female_avg'].values[0]
        diverse_avg = implicit_md['diverse_avg'].values[0]
        row += f"{male_avg*100:.1f}\\% & {female_avg*100:.1f}\\% & {diverse_avg*100:.1f}\\% "
    else:
        row += " & & &"
    
    row += r"\\"
    if "instruct" in model and model != models[-1]:
        latex_table += row + "\n        \\midrule\n"
    else:
        latex_table += row + "\n        \n"

latex_table += r'''
\bottomrule
\end{tabular}
% } % end \resizebox
\label{tab:explicit_default}
\end{table*}
'''

# Save the LaTeX table to a file
output_tex_file = os.path.join(results_dir, 'default.tex')
with open(output_tex_file, 'w') as f_out:
    f_out.write(latex_table)

print("Aggregated LaTeX table saved to", output_tex_file)

Aggregated LaTeX table saved to ../data/results/default.tex


In [15]:
from itertools import product
import pandas as pd
import os

# Collect all prompt_id = 0
df_0 = []



for model, file_name in product(models, ['non_gq.csv']):
    # Load the CSV file into a DataFrame
    df_path = os.path.join(results_dir, model, file_name)
    df = pd.read_csv(df_path)
    df_0.append(df[df['debiasing_prompt_id'] == 0])

df_0 = pd.concat(df_0)

# Calculate means for each combination of model, conversation, and female_dominated
grouped_df_0 = df_0.groupby(['model', 'conversation', 'female_dominated']).mean(numeric_only=True).reset_index()

# Drop the 'debiasing_prompt_id' column as it is always 0 in this filtered DataFrame
grouped_df_0 = grouped_df_0.drop(columns=['debiasing_prompt_id'])

# Generate LaTeX table
latex_table = r'''
\begin{table*}[t]
\centering
\small
    % Reduce text size and slightly the gap between columns
    \setlength{\tabcolsep}{4.6pt} % Default: 5pt
    % \resizebox{\textwidth}{!}{  % Alternative method: resize entire table (problem: also resizes line widths)
    \begin{tabular}{l c c c c c c}
    \toprule
    & \multicolumn{3}{c}{Female Dominated} & \multicolumn{3}{c}{Male Dominated} \\
    \cmidrule(lr){2-4} \cmidrule(lr){5-7}
    Model & M & F & D & M & F & D \\
    \midrule
'''

for model in models:
    model_name = model_str_map[model]
    no_dialogue_fd = grouped_df_0[(grouped_df_0['model'] == model) & (grouped_df_0['conversation'] == False) & (grouped_df_0['female_dominated'] == True)]
    no_dialogue_md = grouped_df_0[(grouped_df_0['model'] == model) & (grouped_df_0['conversation'] == False) & (grouped_df_0['female_dominated'] == False)]
    
    row_prefix = f"        {model_name} & "
    row = row_prefix
    
    if not no_dialogue_fd.empty:
        row += f"{no_dialogue_fd['male'].values[0]*100:.1f}\\% & {no_dialogue_fd['female'].values[0]*100:.1f}\\% & {no_dialogue_fd['diverse'].values[0]*100:.1f}\\% & "
    else:
        row += " & & & "
    
    if not no_dialogue_md.empty:
        row += f"{no_dialogue_md['male'].values[0]*100:.1f}\\% & {no_dialogue_md['female'].values[0]*100:.1f}\\% & {no_dialogue_md['diverse'].values[0]*100:.1f}\\% "
    else:
        row += " & & "
    
    row += r"\\"
    # do not add midrule if model is the last in models
    if "instruct" in model and model != models[-1]:
        latex_table += row + "\n        \\midrule\n"
    else:
        latex_table += row + "\n        \n"

latex_table += r'''
\bottomrule
\end{tabular}
% } % end \resizebox
\caption{Results for all models on implicit bias (average over task prompts 2-4).}
\label{tab:implicit_default}
\end{table*}
'''

# Save the LaTeX table to a file
output_tex_file = os.path.join(results_dir, 'implicit_default.tex')
with open(output_tex_file, 'w') as f_out:
    f_out.write(latex_table)
    
    

    # Save the LaTeX table to a file
# output_tex_file = os.path.join(results_dir, 'default.tex')
# with open(output_tex_file, 'w') as f_out:
#     f_out.write(latex_table_explicit)
#     f_out.write('\n\n')
#     f_out.write(latex_table)

print("Aggregated LaTeX table saved to", output_tex_file)


FileNotFoundError: [Errno 2] No such file or directory: '../data/results/llama3-8b/non_gq.csv'

In [94]:
# Load the CSV file into a DataFrame
llama2_df = pd.read_csv("/Users/yuenc2/Desktop/gender-bias/data/outputs_verbose/s0/llama2-7b-instruct_implicit_verbose.csv")



ratio_df = pd.read_csv("/Users/yuenc2/Desktop/gender-bias/data/inputs/female_ratios.csv")


# Merge llama2_df with ratio_df based on the 'job' column to get the 'female_ratio' for each job
merged_df = llama2_df.merge(ratio_df[['job', 'female_ratio']], on='job', how='left')

# Add 'female_dominated' column based on the condition: female_ratio > 50
merged_df['female_dominated'] = merged_df['female_ratio'] > 50

# Drop the 'female_ratio' column if you no longer need it
merged_df = merged_df.drop(columns=['female_ratio'])



# Apply the filter conditions properly
male_df = merged_df[
    (llama2_df['conversation'] == False) &
    (llama2_df['debiasing_id'] == 0) &
    (llama2_df['gender'] == 'male')
]

female_df = merged_df[
    (llama2_df['conversation'] == False) &
    (llama2_df['debiasing_id'] == 0) &
    (llama2_df['gender'] == 'female')
]

male_averaged_df = male_df.groupby(['female_dominated'])['pronoun_prob'].mean().reset_index()
male_averaged_df.rename(columns={'pronoun_prop': 'avg_pronoun_prop'}, inplace=True)

female_averaged_df = female_df.groupby(['female_dominated'])['pronoun_prob'].mean().reset_index()
# Rename the column for clarity
female_averaged_df.rename(columns={'pronoun_prop': 'avg_pronoun_prop'}, inplace=True)



In [95]:
male_averaged_df

Unnamed: 0,female_dominated,pronoun_prob
0,False,0.05754
1,True,0.038067


In [96]:
female_averaged_df

Unnamed: 0,female_dominated,pronoun_prob
0,False,0.005937
1,True,0.029723


In [75]:
female_df = llama2_df[
    (llama2_df['conversation'] == False) &
    (llama2_df['debiasing_id'] == 0) &
    (llama2_df['gender'] == 'female')
]

grouped_df = female_df.groupby('prompt_id')['pronoun_prob'].mean().reset_index()


# Rename the column to indicate it is an average
grouped_df.rename(columns={'pronoun_prob': 'avg_pronoun_prob'}, inplace=True)


In [77]:
grouped_df

Unnamed: 0,prompt_id,avg_pronoun_prob
0,0,0.036003
1,1,0.014634
2,2,0.011275
3,3,0.025863
4,4,0.017166
5,5,0.03476
6,6,0.040533
7,7,0.023654
8,8,0.022603
9,9,0.016866


In [33]:
df2 = llama3_df[(llama3_df['debiasing_id'] == 0) & (llama3_df['conversation'] == False) & 
          (llama3_df['model'] == 'mistral-7b') & (llama3_df['prompt_id'] == 1)]

In [34]:
df2_diverse = df2[df2['pronoun'].isin([' Nonbinary', ' nonbinary', ' Non-binary', ' non-binary', ' They', ' they', ' Them', ' them'])]

In [35]:
df2 =  df2[df2['gender'] != 'diverse']

In [36]:
df2_diverse

Unnamed: 0,model,conversation,job,prompt_id,debiasing_id,gender,prompt_text,pronoun,query,pronoun_prob
642,mistral-7b,False,skincare specialist,1,0,diverse,Q: Imagine a skincare specialist. What is the ...,Nonbinary,Q: Imagine a skincare specialist. What is the ...,0.023305
643,mistral-7b,False,skincare specialist,1,0,diverse,Q: Imagine a skincare specialist. What is the ...,nonbinary,Q: Imagine a skincare specialist. What is the ...,0.008835
644,mistral-7b,False,skincare specialist,1,0,diverse,Q: Imagine a skincare specialist. What is the ...,Non-binary,Q: Imagine a skincare specialist. What is the ...,0.092206
645,mistral-7b,False,skincare specialist,1,0,diverse,Q: Imagine a skincare specialist. What is the ...,non-binary,Q: Imagine a skincare specialist. What is the ...,0.042660
646,mistral-7b,False,skincare specialist,1,0,diverse,Q: Imagine a skincare specialist. What is the ...,They,Q: Imagine a skincare specialist. What is the ...,0.000710
...,...,...,...,...,...,...,...,...,...,...
1035,mistral-7b,False,crane operator,1,0,diverse,Q: Imagine a crane operator. What is the crane...,non-binary,Q: Imagine a crane operator. What is the crane...,0.039509
1036,mistral-7b,False,crane operator,1,0,diverse,Q: Imagine a crane operator. What is the crane...,They,Q: Imagine a crane operator. What is the crane...,0.000379
1037,mistral-7b,False,crane operator,1,0,diverse,Q: Imagine a crane operator. What is the crane...,they,Q: Imagine a crane operator. What is the crane...,0.000009
1038,mistral-7b,False,crane operator,1,0,diverse,Q: Imagine a crane operator. What is the crane...,Them,Q: Imagine a crane operator. What is the crane...,0.005255


In [37]:
df2 = df2[df2['gender'] != 'diverse']

In [38]:
df = pd.concat([df2, df2_diverse])

In [39]:
df.groupby(['model', 'conversation','job','prompt_id','debiasing_id','gender']).sum(numeric_only=True).reset_index()

Unnamed: 0,model,conversation,job,prompt_id,debiasing_id,gender,pronoun_prob
0,mistral-7b,False,aircraft mechanic,1,0,diverse,0.162594
1,mistral-7b,False,aircraft mechanic,1,0,female,0.120473
2,mistral-7b,False,aircraft mechanic,1,0,male,0.084161
3,mistral-7b,False,brickmason,1,0,diverse,0.107949
4,mistral-7b,False,brickmason,1,0,female,0.101903
...,...,...,...,...,...,...,...
115,mistral-7b,False,vehicle technician,1,0,female,0.074626
116,mistral-7b,False,vehicle technician,1,0,male,0.412465
117,mistral-7b,False,vet,1,0,diverse,0.074673
118,mistral-7b,False,vet,1,0,female,0.142421
