In [531]:
import pandas as pd
import os
from transformers import GPT2TokenizerFast, GPT2LMHeadModel, pipeline
import math
import torch
import numpy as np
from itertools import product

input_dir = '../data/inputs'
output_dir = '../data/outputs/s0_1007'
results_dir = '../data/results'
if not os.path.exists(results_dir):
    os.makedirs(results_dir)

In [532]:
prompt_id_map = {
    0: 'None',
    1: '1',
    2: '2',
    3: '3',
    4: '4',
    5: '5',
    6: '6'
}


model_str_map = {
    'llama3-8b': 'Llama-3-8B',
    'llama3-8b-instruct': 'Llama-3-8B-Instruct',
    'mistral-7b': 'Mistral-7B',
    'mistral-7b-instruct': 'Mistral-7B-Instruct',
    'llama2-7b': 'Llama-2-7B',
    'llama2-7b-instruct': 'Llama-2-7B-Instruct',
    'gemma-7b': 'Gemma-7B',
    'gemma-7b-instruct': 'Gemma-7B-Instruct',
    'gemma-2-9b': 'Gemma-2-9B',
    'gemma-2-9b-instruct': 'Gemma-2-9B-Instruct',
}
model_strs = ['llama3-8b', 'llama3-8b-instruct', 'mistral-7b', 'mistral-7b-instruct', 'llama2-7b', 'llama2-7b-instruct', 'gemma-7b', 'gemma-7b-instruct', 'gemma-2-9b', 'gemma-2-9b-instruct']
model_strs = sorted(model_strs, key=len, reverse=True)

In [406]:
female_ratios = pd.read_csv(os.path.join(input_dir, 'female_ratios.csv'))
# for model in  llama3-8b llama3-8b-instruct mistral-7b mistral-7b-instruct llama2-7b llama2-7b-chat


prompt_ids = ['none','low-1','low-2','medium-3','medium-4','high-5','high-6']
prompt_id_mapping = {pid: idx for idx, pid in enumerate(prompt_ids)}

df_list = []
for f in os.listdir(output_dir):
    if "conv" not in f and "gender" not in f and 'gpt2' not in f:
    # if 'conversation.csv' in f:
    
        df = pd.read_csv(os.path.join(output_dir, f))
        # df = pd.merge(df,female_ratios,on='job')
        # df = df.drop(columns=['job','Unnamed: 0'])
        for col in df.columns:
            if '.' in col:
                df.drop(columns=[col.replace('.1','')],inplace=True)
                df.rename(columns={col:col.replace('.1','')},inplace=True)
            if '_x' in col:
                df.drop(columns=[col],inplace=True)
                df.rename(columns={col.replace('_x','_y'):col.replace('_x','')},inplace=True)

        df['female_dominated'] = df['female_ratio'] > 50
        # Extract prompt ID from filename
        prompt_id_str = next((pid for pid in prompt_ids if pid in f), 'none')
        prompt_id = prompt_id_mapping[prompt_id_str]
        df['debiasing_prompt_id'] = prompt_id
        
                # Extract model from filename
        model_str = next((model for model in model_strs if model in f), None)
        df['model'] = model_str
        df['conversation'] = 'conv' in f
        df.to_csv(os.path.join(output_dir, f), index=False)

        # Remove model name from other column names
        if model_str:
            df = df.rename(columns=lambda x: x.replace(f'{model_str}_', '') if model_str in x else x)


        numeric_cols = df.select_dtypes(include='number').columns
        grouped_df = df.groupby(['female_dominated', 'model','conversation'])[numeric_cols].mean().reset_index()
    

        df_list.append(grouped_df)
    # concat all the dataframes
df = pd.concat(df_list)
# drop NA columns
df = df.dropna(axis=1, how='any')
df.to_csv(os.path.join(results_dir, 'implicit.csv'), index=False)

In [407]:
df_implicit = pd.read_csv(os.path.join(results_dir, 'implicit.csv'))
df_explicit = pd.read_csv(os.path.join(results_dir, 'explicit.csv'))
# if any column starts with a space, remove it
df_implicit.columns = df_implicit.columns.str.strip()
df_explicit.columns = df_explicit.columns.str.strip()

for model, group_df in df_implicit.groupby('model'):
    # Save the DataFrame to a CSV file
    if not os.path.exists(os.path.join(results_dir,model)):
        os.makedirs(os.path.join(results_dir,model))
    group_df.to_csv(os.path.join(results_dir,model, f'implicit.csv'), index=False)

for model, group_df in df_explicit.groupby('model'):
    if not os.path.exists(os.path.join(results_dir,model)):
        os.makedirs(os.path.join(results_dir,model))
    group_df.to_csv(os.path.join(results_dir,model, f'explicit.csv'), index=False)


In [409]:
# Collect all prompt_id = 0
df_list = []
models = [
    'llama3-8b', 'llama3-8b-instruct', 'mistral-7b', 
    'mistral-7b-instruct', 'llama2-7b', 'llama2-7b-instruct','gemma-7b', 'gemma-7b-instruct', 'gemma-2-9b', 'gemma-2-9b-instruct']


# Iterate over models and files, and read CSVs into DataFrame
for model, file_name in product(models, ['explicit.csv', 'implicit.csv']):
    df_path = os.path.join(results_dir, model, file_name)
    df = pd.read_csv(df_path)
    df['explicit'] = ('explicit' in file_name)
    df['model'] = model

    # Filter only rows where 'debiasing_prompt_id' is 0
    df = df[df['debiasing_prompt_id'] == 0]

    if 'explicit' in file_name:
        # Calculate the averages for male, female, and diverse columns if available (explicit)
        male_cols = [f"male_explicit{i}" for i in range(25) if f"male_explicit{i}" in df.columns]
        female_cols = [f"female_explicit{i}" for i in range(25) if f"female_explicit{i}" in df.columns]
        diverse_cols = [f"diverse_explicit{i}" for i in range(25) if f"diverse_explicit{i}" in df.columns]
    else:
        # Calculate the averages for male, female, and diverse columns if available (implicit)
        male_cols = [f"male_implicit{i}" for i in range(25) if f"male_implicit{i}" in df.columns]
        female_cols = [f"female_implicit{i}" for i in range(25) if f"female_implicit{i}" in df.columns]
        diverse_cols = [f"diverse_implicit{i}" for i in range(25) if f"diverse_implicit{i}" in df.columns]


    # Compute averages if there are columns to average
    if male_cols:
        df['male_avg'] = df[male_cols].mean(axis=1)
    if female_cols:
        df['female_avg'] = df[female_cols].mean(axis=1)
    if diverse_cols:
        df['diverse_avg'] = df[diverse_cols].mean(axis=1)

    # Append the processed DataFrame to the list
    df_list.append(df)

# Concatenate all DataFrames
df_default = pd.concat(df_list)

# Group by 'model', 'explicit', and 'female_dominated', then calculate averages
grouped = df_default.groupby(['model', 'explicit', 'female_dominated']).agg(
    male_avg=('male_avg', 'mean'),
    female_avg=('female_avg', 'mean'),
    diverse_avg=('diverse_avg', 'mean')
).reset_index()

# Save the new averaged DataFrame
averages_df = grouped

In [410]:
# Load the aggregated DataFrame "averages_df"
models = [
    'llama3-8b', 'llama3-8b-instruct', 'mistral-7b', 
    'mistral-7b-instruct', 'llama2-7b', 'llama2-7b-instruct',  
    'gemma-7b', 'gemma-7b-instruct', 'gemma-2-9b', 'gemma-2-9b-instruct'
]

# Generate LaTeX table
latex_table = r'''
\begin{table*}[ht!]
\centering
\small
    % Reduce text size and slightly the gap between columns
    \setlength{\tabcolsep}{4.6pt} % Default: 5pt
    \caption{Results for all models.}
    % \resizebox{\textwidth}{!}{  % Alternative method: resize entire table (problem: also resizes line widths)
    \begin{tabular}{l c c c c c c c c c c c c c c}
    \toprule
    & \multicolumn{6}{c}{Explicit} & \multicolumn{6}{c}{Implicit} \\
    \cmidrule(lr){2-7} \cmidrule(lr){8-13}
    Model & \multicolumn{3}{c}{Female Dominated} & \multicolumn{3}{c}{Male Dominated} & \multicolumn{3}{c}{Female Dominated} & \multicolumn{3}{c}{Male Dominated} \\
    \cmidrule(lr){2-4} \cmidrule(lr){5-7} \cmidrule(lr){8-10} \cmidrule(lr){11-13}
    & M & F & D & M & F & D & M & F & D & M & F & D \\
    \midrule
'''

def calculate_average(df, gender_prefix):
    """Calculate the average of all columns that start with the given gender prefix."""
    columns = [col for col in df.columns if col.startswith(gender_prefix)]
    if not df.empty and len(columns) > 0:
        return df[columns].mean(axis=1).values[0]
    return None

for model in models:
    model_name = model_str_map[model]  # Escape hyphens for LaTeX
    explicit_fd = averages_df[(averages_df['model'] == model) & (averages_df['explicit'] == True) & (averages_df['female_dominated'] == True)]
    explicit_md = averages_df[(averages_df['model'] == model) & (averages_df['explicit'] == True) & (averages_df['female_dominated'] == False)]
    implicit_fd = averages_df[(averages_df['model'] == model) & (averages_df['explicit'] == False) & (averages_df['female_dominated'] == True)]
    implicit_md = averages_df[(averages_df['model'] == model) & (averages_df['explicit'] == False) & (averages_df['female_dominated'] == False)]
    
    row_prefix = f"        {model_name} & "
    row = row_prefix
    
    if not explicit_fd.empty:
        male_avg = explicit_fd['male_avg'].values[0]
        female_avg = explicit_fd['female_avg'].values[0]
        diverse_avg = explicit_fd['diverse_avg'].values[0]
        row += f"{male_avg*100:.1f}\\% & {female_avg*100:.1f}\\% & {diverse_avg*100:.1f}\\% & "
    else:
        row += " & & & "
    
    if not explicit_md.empty:
        male_avg = explicit_md['male_avg'].values[0]
        female_avg = explicit_md['female_avg'].values[0]
        diverse_avg = explicit_md['diverse_avg'].values[0]
        row += f"{male_avg*100:.1f}\\% & {female_avg*100:.1f}\\% & {diverse_avg*100:.1f}\\% & "
    else:
        row += " & & & "
    
    if not implicit_fd.empty:
        male_avg = implicit_fd['male_avg'].values[0]
        female_avg = implicit_fd['female_avg'].values[0]
        diverse_avg = implicit_fd['diverse_avg'].values[0]
        row += f"{male_avg*100:.1f}\\% & {female_avg*100:.1f}\\% & {diverse_avg*100:.1f}\\% & "
    else:
        row += " & & & "
    
    if not implicit_md.empty:
        male_avg = implicit_md['male_avg'].values[0]
        female_avg = implicit_md['female_avg'].values[0]
        diverse_avg = implicit_md['diverse_avg'].values[0]
        row += f"{male_avg*100:.1f}\\% & {female_avg*100:.1f}\\% & {diverse_avg*100:.1f}\\% "
    else:
        row += " & & &"
    
    row += r"\\"
    if "instruct" in model and model != models[-1]:
        latex_table += row + "\n        \\midrule\n"
    else:
        latex_table += row + "\n        \n"

latex_table += r'''
\bottomrule
\end{tabular}
% } % end \resizebox
\label{tab:explicit_default}
\end{table*}
'''

# Save the LaTeX table to a file
output_tex_file = os.path.join(results_dir, 'default.tex')
with open(output_tex_file, 'w') as f_out:
    f_out.write(latex_table)

print("Aggregated LaTeX table saved to", output_tex_file)

Aggregated LaTeX table saved to ../data/results/default.tex


# Debiasing Prompts Analysis

In [533]:
female_ratios = pd.read_csv(os.path.join(input_dir, 'female_ratios.csv'))
# for model in  llama3-8b llama3-8b-instruct mistral-7b mistral-7b-instruct llama2-7b llama2-7b-chat

job_df = pd.read_csv(os.path.join(input_dir, 'female_ratios.csv'))
jobs = job_df['job'].to_list()

prompt_ids = ['none','low-1','low-2','medium-3','medium-4','high-5','high-6']
prompt_id_mapping = {pid: idx for idx, pid in enumerate(prompt_ids)}

df_list = []
for f in os.listdir(output_dir):
    if "conv" not in f and 'gender' not in f and 'gpt2' not in f:
        df = pd.read_csv(os.path.join(output_dir, f))
        if 'job' not in df.columns:
            df['job'] = jobs
        df = pd.merge(df,female_ratios,on='job')
        to_drop = [col for col in df.columns if 'Unnamed' in col]
        df = df.drop(columns=to_drop + ['job'])
        for col in df.columns:
            if '.' in col:
                df.drop(columns=[col.replace('.1','')],inplace=True)
                df.rename(columns={col:col.replace('.1','')},inplace=True)
            if '_x' in col:
                df.drop(columns=[col],inplace=True)
                df.rename(columns={col.replace('_x','_y'):col.replace('_x','')},inplace=True)
        df['explicit'] = "gender" in f
        df['female_dominated'] = df['female_ratio'] > 50
        # Extract prompt ID from filename
        prompt_id_str = next((pid for pid in prompt_ids if pid in f), 'none')
        prompt_id = prompt_id_mapping[prompt_id_str]
        df['debiasing_prompt_id'] = prompt_id
        
        # Extract model from filename
        model_str = next((model for model in model_strs if model in f), None)
        df['model'] = model_str
        df.to_csv(os.path.join(output_dir, f), index=False)

        # Remove model name from other column names
        if model_str:
            df = df.rename(columns=lambda x: x.replace(f'{model_str}_', '') if model_str in x else x)


        numeric_cols = df.select_dtypes(include='number').columns
        grouped_df = df.groupby(['female_dominated', 'model'])[numeric_cols].mean().reset_index()

        df_list.append(grouped_df)
    # concat all the dataframes
    
filtered_df_list = []
for df in df_list:
    # Filter columns to keep only prompts 0-4 for male, female, and diverse, along with other necessary columns
    filtered_columns = [
        'female_dominated', 'model', 'debiasing_prompt_id', 'female_ratio'
    ]
    
    for i in range(5):
        filtered_columns.extend([
            f'male_implicit{i}_prob', f'female_implicit{i}_prob', f'diverse_implicit{i}_prob',
            f'male_implicit{i}', f'female_implicit{i}', f'diverse_implicit{i}'
        ])
    
    filtered_df = df[filtered_columns]
    filtered_df_list.append(filtered_df)

# Print the columns of the filtered DataFrames to verify
for df in filtered_df_list:
    print(df.columns)
    # concat all the dataframes
all_df = pd.concat(filtered_df_list)
# drop NA columns
all_df = all_df.dropna(axis=1, how='any')
all_df.to_csv(os.path.join(results_dir, 'debias_implicit.csv'), index=False)

Index(['female_dominated', 'model', 'debiasing_prompt_id', 'female_ratio',
       'male_implicit0_prob', 'female_implicit0_prob',
       'diverse_implicit0_prob', 'male_implicit0', 'female_implicit0',
       'diverse_implicit0', 'male_implicit1_prob', 'female_implicit1_prob',
       'diverse_implicit1_prob', 'male_implicit1', 'female_implicit1',
       'diverse_implicit1', 'male_implicit2_prob', 'female_implicit2_prob',
       'diverse_implicit2_prob', 'male_implicit2', 'female_implicit2',
       'diverse_implicit2', 'male_implicit3_prob', 'female_implicit3_prob',
       'diverse_implicit3_prob', 'male_implicit3', 'female_implicit3',
       'diverse_implicit3', 'male_implicit4_prob', 'female_implicit4_prob',
       'diverse_implicit4_prob', 'male_implicit4', 'female_implicit4',
       'diverse_implicit4'],
      dtype='object')
Index(['female_dominated', 'model', 'debiasing_prompt_id', 'female_ratio',
       'male_implicit0_prob', 'female_implicit0_prob',
       'diverse_implicit0_prob

In [534]:
prompt_ids = ['none','low-1','low-2','medium-3','medium-4','high-5','high-6']
prompt_id_mapping = {pid: idx for idx, pid in enumerate(prompt_ids)}

df_list = []
for f in os.listdir(output_dir):
    if "conv" not in f and 'gender' in f and'gpt2' not in f:
        df = pd.read_csv(os.path.join(output_dir, f))
        if 'job' not in df.columns:
            df['job'] = jobs
        df = pd.merge(df,female_ratios,on='job')
        to_drop = [col for col in df.columns if 'Unnamed' in col]
        df = df.drop(columns=to_drop + ['job'])
        for col in df.columns:
            if '.' in col:
                df.drop(columns=[col.replace('.1','')],inplace=True)
                df.rename(columns={col:col.replace('.1','')},inplace=True)
            if '_x' in col:
                df.drop(columns=[col],inplace=True)
                df.rename(columns={col.replace('_x','_y'):col.replace('_x','')},inplace=True)
        df['explicit'] = "gender" in f
        df['female_dominated'] = df['female_ratio'] > 50
        # Extract prompt ID from filename
        prompt_id_str = next((pid for pid in prompt_ids if pid in f), 'none')
        prompt_id = prompt_id_mapping[prompt_id_str]
        df['debiasing_prompt_id'] = prompt_id
        
        # Extract model from filename
        model_str = next((model for model in model_strs if model in f), None)
        df['model'] = model_str
        df.to_csv(os.path.join(output_dir, f), index=False)

        # Remove model name from other column names
        if model_str:
            df = df.rename(columns=lambda x: x.replace(f'{model_str}_', '') if model_str in x else x)


        numeric_cols = df.select_dtypes(include='number').columns
        grouped_df = df.groupby(['female_dominated', 'model'])[numeric_cols].mean().reset_index()

        df_list.append(grouped_df)
        
filtered_df_list = []
for df in df_list:
    # Filter columns to keep only prompts 0-4 for male, female, and diverse, along with other necessary columns
    filtered_columns = [
        'female_dominated', 'model', 'debiasing_prompt_id', 'female_ratio'
    ]
    
    for i in range(5):
        filtered_columns.extend([
            f'male_explicit{i}_prob', f'female_explicit{i}_prob', f'diverse_explicit{i}_prob',
            f'male_explicit{i}', f'female_explicit{i}', f'diverse_explicit{i}'
        ])
    
    filtered_df = df[filtered_columns]
    filtered_df_list.append(filtered_df)

# Print the columns of the filtered DataFrames to verify
for df in filtered_df_list:
    print(df.columns)
    # concat all the dataframes
all_df = pd.concat(filtered_df_list)
# drop NA columns
all_df = all_df.dropna(axis=1, how='any')
all_df.to_csv(os.path.join(results_dir, 'debias_explicit.csv'), index=False)

Index(['female_dominated', 'model', 'debiasing_prompt_id', 'female_ratio',
       'male_explicit0_prob', 'female_explicit0_prob',
       'diverse_explicit0_prob', 'male_explicit0', 'female_explicit0',
       'diverse_explicit0', 'male_explicit1_prob', 'female_explicit1_prob',
       'diverse_explicit1_prob', 'male_explicit1', 'female_explicit1',
       'diverse_explicit1', 'male_explicit2_prob', 'female_explicit2_prob',
       'diverse_explicit2_prob', 'male_explicit2', 'female_explicit2',
       'diverse_explicit2', 'male_explicit3_prob', 'female_explicit3_prob',
       'diverse_explicit3_prob', 'male_explicit3', 'female_explicit3',
       'diverse_explicit3', 'male_explicit4_prob', 'female_explicit4_prob',
       'diverse_explicit4_prob', 'male_explicit4', 'female_explicit4',
       'diverse_explicit4'],
      dtype='object')
Index(['female_dominated', 'model', 'debiasing_prompt_id', 'female_ratio',
       'male_explicit0_prob', 'female_explicit0_prob',
       'diverse_explicit0_prob

In [535]:
df_implicit = pd.read_csv(os.path.join(results_dir, 'debias_implicit.csv'))
df_explicit = pd.read_csv(os.path.join(results_dir, 'debias_explicit.csv'))
# if any column starts with a space, remove it
df_implicit.columns = df_implicit.columns.str.strip()
df_explicit.columns = df_explicit.columns.str.strip()

for model, group_df in df_implicit.groupby('model'):
    # Save the DataFrame to a CSV file
    if not os.path.exists(os.path.join(results_dir,model)):
        os.makedirs(os.path.join(results_dir,model))
    group_df.to_csv(os.path.join(results_dir,model, f'debias_implicit.csv'), index=False)

for model, group_df in df_explicit.groupby('model'):
    if not os.path.exists(os.path.join(results_dir,model)):
        os.makedirs(os.path.join(results_dir,model))
    group_df.to_csv(os.path.join(results_dir,model, f'debias_explicit.csv'), index=False)


In [536]:
 # Update this to your results directory
models = list(model_str_map.keys())

output_tex_file = os.path.join(results_dir, 'aggregated_results.tex')
debias_implicit = pd.read_csv(os.path.join(results_dir, 'debias_implicit.csv'))
debias_explicit = pd.read_csv(os.path.join(results_dir, 'debias_explicit.csv'))


# Iterate over models and files, and read CSVs into DataFrame
for model, file_name in product(models, ['debias_explicit.csv', 'debias_implicit.csv']):
    df = pd.read_csv(os.path.join(results_dir, model,file_name))
    df['explicit'] = ('explicit' in file_name)
    df['model'] = model


    if 'explicit' in file_name:
        # Calculate the averages for male, female, and diverse columns if available (explicit)
        male_cols = [f"male_explicit{i}" for i in range(5) if f"male_explicit{i}" in df.columns]
        female_cols = [f"female_explicit{i}" for i in range(5) if f"female_explicit{i}" in df.columns]
        diverse_cols = [f"diverse_explicit{i}" for i in range(5) if f"diverse_explicit{i}" in df.columns]
    else:
        # Calculate the averages for male, female, and diverse columns if available (implicit)
        male_cols = [f"male_implicit{i}" for i in range(5) if f"male_implicit{i}" in df.columns]
        female_cols = [f"female_implicit{i}" for i in range(5) if f"female_implicit{i}" in df.columns]
        diverse_cols = [f"diverse_implicit{i}" for i in range(5) if f"diverse_implicit{i}" in df.columns]


    # Compute averages if there are columns to average
    if male_cols:
        df['male_avg'] = df[male_cols].mean(axis=1)
    if female_cols:
        df['female_avg'] = df[female_cols].mean(axis=1)
    if diverse_cols:
        df['diverse_avg'] = df[diverse_cols].mean(axis=1)

    # Append the processed DataFrame to the list
    df_list.append(df)

# Concatenate all DataFrames
df_default = pd.concat(df_list)

# Group by 'model', 'explicit', and 'female_dominated', then calculate averages
grouped = df_default.groupby(['model', 'explicit', 'female_dominated','debiasing_prompt_id']).agg(
    male_avg=('male_avg', 'mean'),
    female_avg=('female_avg', 'mean'),
    diverse_avg=('diverse_avg', 'mean')
).reset_index()

# Save the new averaged DataFrame
averages_df = grouped

In [550]:
for model in models:
    if len(averages_df[(averages_df['model'] == model) & (averages_df['explicit'] == True)]) < 14:
        print(model)

In [528]:
 # Update this to your results directory
models = list(model_str_map.keys())

output_tex_file = os.path.join(results_dir, 'aggregated_results.tex')

for col in averages_df.columns:
    if '_avg' in col:
        # remove '_avg' from the column name
        averages_df.rename(columns={col:col.replace('_avg','')},inplace=True)

with open(output_tex_file, 'w') as f_out:
    f_out.write(r'\onecolumn')
    for model in models:
        model_name = model.replace('-', '')
        model_description = model_str_map[model]

        f_out.write(r'\subsection{' + model_description + '}\n')
        # Load the CSV file into a DataFrame
        # df_implicit = pd.read_csv(os.path.join(results_dir, model, 'implicit.csv'))
        # df_explicit = pd.read_csv(os.path.join(results_dir, model, 'explicit.csv'))
        df_implicit = averages_df[(averages_df['explicit'] == False) & (averages_df['model'] == model)]
        df_explicit = averages_df[(averages_df['explicit'] == True) & (averages_df['model'] == model)]
        
        if model == 'gemma-2-9b-instruct':
            print(df_implicit)


        # Calculate means for each combination of prompt_id, conversation, and female_dominated
        grouped_df_implicit = df_implicit.groupby(['model', 'debiasing_prompt_id', 'female_dominated']).mean(numeric_only=True).reset_index()    
        grouped_df_explicit = df_explicit.groupby(['model', 'debiasing_prompt_id', 'female_dominated']).mean(numeric_only=True).reset_index()    

        # Drop the 'model' column
        model_df_implicit = grouped_df_implicit.drop(columns=['model'])
        model_df_explicit = grouped_df_explicit.drop(columns=['model'])
        
        # Sort by prompt_id to ensure correct order
        model_df_implicit['debiasing_prompt_id'] = model_df_implicit['debiasing_prompt_id'].astype(int)
        model_df_implicit = model_df_implicit.sort_values(by=['debiasing_prompt_id'])
        
        model_df_explicit['debiasing_prompt_id'] = model_df_explicit['debiasing_prompt_id'].astype(int)
        model_df_explicit = model_df_explicit.sort_values(by=['debiasing_prompt_id'])
        
        model_df_implicit['explicit'] = False
        model_df_explicit['explicit'] = True
        
        model_df = pd.concat([model_df_implicit, model_df_explicit])
        # drop all columns whose name contains "licit{i}" where between 5 and 24

        latex_table = r'''
\begin{table*}[ht!]
\centering
\small
% Reduce text size and slightly the gap between columns
% \setlength{\tabcolsep}{4.6pt} % Default: 5pt
\caption{Results for ''' + model_str_map[model] + r''' on debiasing prompts.}
  \resizebox{\textwidth}{!}{  % Alternative method: resize entire table (problem: also resizes line widths)
\begin{tabular}{c c c c c c c c c c c c c c}
\toprule
& \multicolumn{6}{c}{Explicit} & \multicolumn{6}{c}{Implicit} \\
\cmidrule(lr){2-7} \cmidrule(lr){8-13}
& \multicolumn{3}{c}{Female Dominated} & \multicolumn{3}{c}{Male Dominated} & \multicolumn{3}{c}{Female Dominated} & \multicolumn{3}{c}{Male Dominated} \\
\cmidrule(lr){2-4} \cmidrule(lr){5-7} \cmidrule(lr){8-10} \cmidrule(lr){11-13}
    ID & M & F & D & M & F & D & M & F & D & M & F & D\\
    \midrule
'''



        for prompt_id, prompt_group in model_df.groupby('debiasing_prompt_id'):
                id_label = 'None' if prompt_id == 0 else str(prompt_id)
                row_prefix = f"        {id_label} & "
                # print(prompt_group)
                explicit_fd = prompt_group[(prompt_group['explicit'] == True) & (prompt_group['female_dominated'] == True)]
                explicit_md = prompt_group[(prompt_group['explicit'] == True) & (prompt_group['female_dominated'] == False)]
                implicit_fd = prompt_group[(prompt_group['explicit'] == False) & (prompt_group['female_dominated'] == True)]
                implicit_md = prompt_group[(prompt_group['explicit'] == False) & (prompt_group['female_dominated'] == False)]

                row = row_prefix
                # print(explicit_fd)
                # print(explicit_md)


# 

                if not explicit_fd.empty:
                    # male_cols = [f"male_explicit{i}" for i in range(5) if f"male_explicit{i}" in explicit_fd.columns]
                    # female_cols = [f"female_explicit{i}" for i in range(5) if f"female_explicit{i}" in explicit_fd.columns]
                    # diverse_cols = [f"diverse_explicit{i}" for i in range(5) if f"diverse_explicit{i}" in explicit_fd.columns]
                    # if male_cols:
                    #     explicit_fd['male'] = explicit_fd[male_cols].mean(axis=1)
                    # if female_cols:
                    #     explicit_fd['female'] = explicit_fd[female_cols].mean(axis=1)
                    # if diverse_cols:
                    #     explicit_fd['diverse'] = explicit_fd[diverse_cols].mean(axis=1)
                    #     
                    row += f"{explicit_fd['male'].values[0]*100:.1f}\\% & {explicit_fd['female'].values[0]*100:.1f}\\% & {explicit_fd['diverse'].values[0]*100:.1f}\\% & "
                else:
                    row += " & & & "

                if not explicit_md.empty:
                    # male_cols = [f"male_explicit{i}" for i in range(5) if f"male_explicit{i}" in explicit_md.columns]
                    # female_cols = [f"female_explicit{i}" for i in range(5) if f"female_explicit{i}" in explicit_md.columns]
                    # diverse_cols = [f"diverse_explicit{i}" for i in range(5) if f"diverse_explicit{i}" in explicit_md.columns]
                    # if male_cols:
                    #     explicit_md['male'] = explicit_md[male_cols].mean(axis=1)
                    # if female_cols:
                    #     explicit_md['female'] = explicit_md[female_cols].mean(axis=1)
                    # if diverse_cols:
                    #     explicit_md['diverse'] = explicit_md[diverse_cols].mean(axis=1)                    
                    row += f"{explicit_md['male'].values[0]*100:.1f}\\% & {explicit_md['female'].values[0]*100:.1f}\\% & {explicit_md['diverse'].values[0]*100:.1f}\\% & "
                else:
                    row += " & & & "

                if not implicit_fd.empty:
                    # male_cols = [f"male_implicit{i}" for i in range(5) if f"male_implicit{i}" in implicit_fd.columns]
                    # female_cols = [f"female_implicit{i}" for i in range(5) if f"female_implicit{i}" in implicit_fd.columns]
                    # diverse_cols = [f"diverse_implicit{i}" for i in range(5) if f"diverse_implicit{i}" in implicit_fd.columns]
                    # if male_cols:
                    #     implicit_fd['male'] = implicit_fd[male_cols].mean(axis=1)
                    # if female_cols:
                    #     implicit_fd['female'] = implicit_fd[female_cols].mean(axis=1)
                    # if diverse_cols:
                    #     implicit_fd['diverse'] = implicit_fd[diverse_cols].mean(axis=1)                          
                    
                    row += f"{implicit_fd['male'].values[0]*100:.1f}\\% & {implicit_fd['female'].values[0]*100:.1f}\\% & {implicit_fd['diverse'].values[0]*100:.1f}\\% & "
                else:
                    row += " & & & "

                if not implicit_md.empty:
                    # male_cols = [f"male_implicit{i}" for i in range(5) if f"male_implicit{i}" in implicit_md.columns]
                    # female_cols = [f"female_implicit{i}" for i in range(5) if f"female_implicit{i}" in implicit_md.columns]
                    # diverse_cols = [f"diverse_implicit{i}" for i in range(5) if f"diverse_implicit{i}" in implicit_md.columns]
                    # if male_cols:
                    #     implicit_md['male'] = implicit_md[male_cols].mean(axis=1)
                    # if female_cols:
                    #     implicit_md['female'] = implicit_md[female_cols].mean(axis=1)
                    # if diverse_cols:
                    #     implicit_md['diverse'] = implicit_md[diverse_cols].mean(axis=1)                         
                    row += f"{implicit_md['male'].values[0]*100:.1f}\\% & {implicit_md['female'].values[0]*100:.1f}\\% & {implicit_md['diverse'].values[0]*100:.1f}\\% "
                else:
                    row += " & & "

                row += r"\\"
                latex_table += row + "\n"

        latex_table += r'''
\bottomrule
\end{tabular}
 } % end \resizebox
\label{tab:''' + model_name + r'''_debias}
\end{table*}

'''

        f_out.write(latex_table)
        # f_out.write('\\clearpage\n\n')

print("Aggregated LaTeX table saved to", output_tex_file)

                  model  explicit  female_dominated  debiasing_prompt_id  \
16  gemma-2-9b-instruct     False             False                  0.0   
17  gemma-2-9b-instruct     False              True                  0.0   

        male    female   diverse  
16  0.669147  0.145919  0.184933  
17  0.072755  0.779234  0.148010  
Aggregated LaTeX table saved to ../data/results/aggregated_results.tex


In [524]:
male_cols

[]

In [483]:
explicit_md

Unnamed: 0,debiasing_prompt_id,conversation,female_dominated,female_ratio,explicit,male_explicit0_prob,female_explicit0_prob,diverse_explicit0_prob,male_explicit0,female_explicit0,...,diverse_explicit23_prob,male_explicit23,female_explicit23,diverse_explicit23,male_explicit24_prob,female_explicit24_prob,diverse_explicit24_prob,male_explicit24,female_explicit24,diverse_explicit24
0,0,False,False,5.535,True,0.263907,0.038422,0.002154,0.866675,0.126075,...,0.003017,0.72727,0.260405,0.012305,0.224377,0.053976,0.002224,0.799655,0.19227,0.008085


In [484]:
implicit_fd

Unnamed: 0,debiasing_prompt_id,conversation,female_dominated,male_implicit0_prob,female_implicit0_prob,diverse_implicit0_prob,male_implicit0,female_implicit0,diverse_implicit0,male_implicit1_prob,...,female_implicit3,diverse_implicit3,male_implicit4_prob,female_implicit4_prob,diverse_implicit4_prob,male_implicit4,female_implicit4,diverse_implicit4,female_ratio,explicit
1,0,False,True,0.000154,0.000157,1.4e-05,0.42273,0.541995,0.03528,0.000269,...,0.63236,0.0888,0.000266,0.000461,5.8e-05,0.316165,0.626305,0.057525,89.265,False


In [485]:
implicit_md

Unnamed: 0,debiasing_prompt_id,conversation,female_dominated,male_implicit0_prob,female_implicit0_prob,diverse_implicit0_prob,male_implicit0,female_implicit0,diverse_implicit0,male_implicit1_prob,...,female_implicit3,diverse_implicit3,male_implicit4_prob,female_implicit4_prob,diverse_implicit4_prob,male_implicit4,female_implicit4,diverse_implicit4,female_ratio,explicit
0,0,False,False,0.000273,9e-05,1.4e-05,0.732905,0.23441,0.03269,0.000723,...,0.225815,0.10685,0.000667,0.000233,9.8e-05,0.664155,0.24412,0.09174,5.535,False
