In [1]:
import os
import random
import numpy as np
import pandas as pd

RANDOM_SEED = 220223

pd.set_option('display.max_rows', 10)
pd.set_option('display.max_columns', None)
pd.set_option('max_colwidth', None)
pd.set_option('display.expand_frame_repr', False)

In [2]:
all_data = pd.read_csv('../data/human_info.csv')
all_data = all_data.assign(
    base_name = all_data['image_path'].apply(lambda x: os.path.splitext(os.path.basename(x))[0])
)

In [None]:
# get_example_paths('../xai_samples/coldnhot/XAI1')
def get_example_paths(xai_path):
    '''
        Get the file paths for all the XAI examples in that folder.
        E.g. xai_samples = get_example_paths('../xai_samples/coldnhot/XAI1')
    '''
    models = ['fair_model', 'dark_undersampled', 'light_undersampled']
    file_paths, subfolder_names = [], []

    for subfolder in models:
        subfolder_path = os.path.join(xai_path, subfolder)
        file_names = [fn for fn in os.listdir(subfolder_path) if not fn.startswith('.') and not fn.endswith('.DS_Store')]
        file_paths.extend([os.path.join(subfolder_path, file_name) for file_name in file_names])

        # Add the subfolder name to the list of subfolder names, with the same length as the number of files in the subfolder
        subfolder_names.extend([subfolder] * len(file_names))

    df = pd.DataFrame({'file_path': file_paths, 'model': subfolder_names})
    df = df.assign(
        # image name, e.g. 734.cam_default.f_1.rgb
        # this is to simplify joining with the human info data
        base_name = df['file_path'].apply(lambda x: os.path.splitext(os.path.basename(x))[0][4:])
    )
    return df


def get_block_order(N_PARTICIPANTS = 50, random_seed = 42):

    # create a list of conditions by n_participants
    # 25 fair first, 25 biased first
    fair_first = ['fair_model'] * (N_PARTICIPANTS // 2)
    biased_first = ['biased_model'] * (N_PARTICIPANTS // 2)
    condition_order = fair_first + biased_first

    np.random.seed(random_seed)
    random.seed(random_seed)
    random.shuffle(condition_order)

    # if the first model is biased, then the second model is fair and vice versa
    df_conditions = pd.DataFrame({
        # 'Participant ID': ['P{:02d}'.format(i+1) for i in range(N_PARTICIPANTS)]
        'Participant ID': [f'P{i + 1:02d}' for i in range(N_PARTICIPANTS)],       # range(1, N_PARTICIPANTS + 1),
        'first_model': condition_order,
        'second_model': ['fair_model' if x == 'biased_model' else 'biased_model' for x in condition_order]
        })

    # replace the biased model name with the specific model
    biased_models = np.repeat(['light_undersampled', 'dark_undersampled'], len(df_conditions) // 2) 
    np.random.shuffle(biased_models)

    df_conditions['biased_model'] = biased_models
    df_conditions.loc[df_conditions['first_model'] == 'biased_model', 'first_model'] = df_conditions['biased_model']
    df_conditions.loc[df_conditions['second_model'] == 'biased_model', 'second_model'] = df_conditions['biased_model']
    df_conditions.drop(columns=['biased_model'], inplace = True)

    df_conditions.set_index('Participant ID', inplace = True)
    return df_conditions

def create_participant_matrix(df, n_trials = 32):
    '''Create matrix for each participant'''
    N_PARTICIPANTS = len(df)
    participant_matrix = pd.DataFrame(
        np.zeros((N_PARTICIPANTS, n_trials), dtype = int), 
        columns = [f'Trial {i+1}' for i in range(n_trials)], 
        index = [f'P{i+1:02d}' for i in range(N_PARTICIPANTS)])
    return participant_matrix

def create_sample_paths(participant_df, condition_df, file_dict, random_seed = None):
    '''
        Participant_df: empty matrix for each participant by trial columns
        Condition_df: dataframe with the condition order for each participant
        File_dict: dictionary with the file paths for each model
    '''
    np.random.seed(RANDOM_SEED)
    random.seed(RANDOM_SEED)

    for participant in participant_df.index:

        first_model = condition_df.loc[participant, 'first_model']
        second_model = condition_df.loc[participant, 'second_model']

        first_model_files = file_dict[first_model]
        second_model_files = file_dict[second_model]

        random.shuffle(first_model_files) 
        random.shuffle(second_model_files) 

        participant_df.loc[participant, 'Trial 1':'Trial 16'] = first_model_files 
        participant_df.loc[participant, 'Trial 17':'Trial 32'] = second_model_files 
    return participant_df

In [None]:
# explanation 1 and 2 for the trials
XAI1_PATHS = get_example_paths('../xai_samples/coldnhot/XAI1')
XAI2_PATHS = get_example_paths('../xai_samples/coldnhot/XAI2')

MODELS = XAI1_PATHS['model'].unique().tolist()

file_paths_dict_xai1 = {}
file_paths_dict_xai2 = {}

for model in MODELS:
    file_paths = XAI1_PATHS.query(f'model == "{model}"').reset_index(drop = True)['file_path'].tolist()
    file_paths_dict_xai1[f'{model}'] = file_paths

for model in MODELS:
    file_paths = XAI2_PATHS.query(f'model == "{model}"').reset_index(drop = True)['file_path'].tolist()
    file_paths_dict_xai2[f'{model}'] = file_paths

# file_paths_dict.keys()        dict_keys(['fair_model', 'dark_undersampled', 'light_undersampled'])
# file_paths_dict['fair_model']       file_paths_dict['dark_undersampled']        file_paths_dict['light_undersampled']

In [13]:
df_conditions = get_block_order(N_PARTICIPANTS = 50, random_seed = RANDOM_SEED)
part_df = create_participant_matrix(df_conditions)

In [14]:
part_paths1 = create_sample_paths(participant_df = part_df, condition_df = df_conditions, file_dict = file_paths_dict_xai1, random_seed = RANDOM_SEED)
part_paths2 = create_sample_paths(participant_df = part_df, condition_df = df_conditions, file_dict = file_paths_dict_xai2, random_seed = RANDOM_SEED)

In [19]:
# to get the human_id
merged_df = pd.merge(XAI1_PATHS, all_data[['sex', 'yaw_direction', 'skin_labels', 'base_name', 'human_id']], on = 'base_name', how = 'left')

In [18]:
merged_df

Unnamed: 0,file_path,model,base_name,sex,yaw_direction,skin_labels
0,../xai_samples/coldnhot/XAI1/fair_model/xai_5135.cam_default.f_1.rgb.png,fair_model,5135.cam_default.f_1.rgb,0.0,middle,light
1,../xai_samples/coldnhot/XAI1/fair_model/xai_6736.cam_default.f_1.rgb.png,fair_model,6736.cam_default.f_1.rgb,1.0,side,dark
2,../xai_samples/coldnhot/XAI1/fair_model/xai_7649.cam_default.f_1.rgb.png,fair_model,7649.cam_default.f_1.rgb,1.0,middle,light
3,../xai_samples/coldnhot/XAI1/fair_model/xai_6866.cam_default.f_1.rgb.png,fair_model,6866.cam_default.f_1.rgb,1.0,middle,light
4,../xai_samples/coldnhot/XAI1/fair_model/xai_2728.cam_default.f_1.rgb.png,fair_model,2728.cam_default.f_1.rgb,1.0,middle,light
...,...,...,...,...,...,...
43,../xai_samples/coldnhot/XAI1/light_undersampled/xai_5248.cam_default.f_1.rgb.png,light_undersampled,5248.cam_default.f_1.rgb,0.0,middle,light
44,../xai_samples/coldnhot/XAI1/light_undersampled/xai_2492.cam_default.f_1.rgb.png,light_undersampled,2492.cam_default.f_1.rgb,1.0,side,dark
45,../xai_samples/coldnhot/XAI1/light_undersampled/xai_9057.cam_default.f_1.rgb.png,light_undersampled,9057.cam_default.f_1.rgb,1.0,side,dark
46,../xai_samples/coldnhot/XAI1/light_undersampled/xai_4839.cam_default.f_1.rgb.png,light_undersampled,4839.cam_default.f_1.rgb,0.0,middle,light


In [21]:
# combine the file paths for both XAI1 and XAI2
pd.concat([
    XAI1_PATHS, XAI2_PATHS
], ignore_index = True)

Unnamed: 0,file_path,model,base_name
0,../xai_samples/coldnhot/XAI1/fair_model/xai_5135.cam_default.f_1.rgb.png,fair_model,5135.cam_default.f_1.rgb
1,../xai_samples/coldnhot/XAI1/fair_model/xai_6736.cam_default.f_1.rgb.png,fair_model,6736.cam_default.f_1.rgb
2,../xai_samples/coldnhot/XAI1/fair_model/xai_7649.cam_default.f_1.rgb.png,fair_model,7649.cam_default.f_1.rgb
3,../xai_samples/coldnhot/XAI1/fair_model/xai_6866.cam_default.f_1.rgb.png,fair_model,6866.cam_default.f_1.rgb
4,../xai_samples/coldnhot/XAI1/fair_model/xai_2728.cam_default.f_1.rgb.png,fair_model,2728.cam_default.f_1.rgb
...,...,...,...
43,../xai_samples/coldnhot/XAI2/light_undersampled/xai_5248.cam_default.f_1.rgb.png,light_undersampled,5248.cam_default.f_1.rgb
44,../xai_samples/coldnhot/XAI2/light_undersampled/xai_2492.cam_default.f_1.rgb.png,light_undersampled,2492.cam_default.f_1.rgb
45,../xai_samples/coldnhot/XAI2/light_undersampled/xai_9057.cam_default.f_1.rgb.png,light_undersampled,9057.cam_default.f_1.rgb
46,../xai_samples/coldnhot/XAI2/light_undersampled/xai_4839.cam_default.f_1.rgb.png,light_undersampled,4839.cam_default.f_1.rgb


In [34]:
# create a df of the file paths for each sample (e.g. input, XAI1, XAI2)
pd.merge(
    XAI1_PATHS.rename(columns = {'file_path': 'file_path1'}),
    XAI2_PATHS.rename(columns = {'file_path': 'file_path2'})[['file_path2', 'base_name']],
    on = 'base_name', how = 'left'
).merge(all_data[['sex', 'yaw_direction', 'skin_labels', 'base_name', 'human_id']], on = 'base_name', how = 'left')

Unnamed: 0,file_path1,model,base_name,file_path2,sex,yaw_direction,skin_labels,human_id
0,../xai_samples/coldnhot/XAI1/fair_model/xai_5135.cam_default.f_1.rgb.png,fair_model,5135.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/fair_model/xai_5135.cam_default.f_1.rgb.png,0.0,middle,light,288
1,../xai_samples/coldnhot/XAI1/fair_model/xai_6736.cam_default.f_1.rgb.png,fair_model,6736.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/fair_model/xai_6736.cam_default.f_1.rgb.png,1.0,side,dark,73
2,../xai_samples/coldnhot/XAI1/fair_model/xai_7649.cam_default.f_1.rgb.png,fair_model,7649.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/fair_model/xai_7649.cam_default.f_1.rgb.png,1.0,middle,light,35
3,../xai_samples/coldnhot/XAI1/fair_model/xai_6866.cam_default.f_1.rgb.png,fair_model,6866.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/fair_model/xai_6866.cam_default.f_1.rgb.png,1.0,middle,light,319
4,../xai_samples/coldnhot/XAI1/fair_model/xai_2728.cam_default.f_1.rgb.png,fair_model,2728.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/fair_model/xai_2728.cam_default.f_1.rgb.png,1.0,middle,light,63
...,...,...,...,...,...,...,...,...
43,../xai_samples/coldnhot/XAI1/light_undersampled/xai_5248.cam_default.f_1.rgb.png,light_undersampled,5248.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/light_undersampled/xai_5248.cam_default.f_1.rgb.png,0.0,middle,light,64
44,../xai_samples/coldnhot/XAI1/light_undersampled/xai_2492.cam_default.f_1.rgb.png,light_undersampled,2492.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/light_undersampled/xai_2492.cam_default.f_1.rgb.png,1.0,side,dark,107
45,../xai_samples/coldnhot/XAI1/light_undersampled/xai_9057.cam_default.f_1.rgb.png,light_undersampled,9057.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/light_undersampled/xai_9057.cam_default.f_1.rgb.png,1.0,side,dark,11
46,../xai_samples/coldnhot/XAI1/light_undersampled/xai_4839.cam_default.f_1.rgb.png,light_undersampled,4839.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/light_undersampled/xai_4839.cam_default.f_1.rgb.png,0.0,middle,light,94


In [31]:
pd.merge(XAI1_PATHS, XAI2_PATHS, on = 'base_name', how = 'left')

Unnamed: 0,file_path_x,model_x,base_name,file_path_y,model_y
0,../xai_samples/coldnhot/XAI1/fair_model/xai_5135.cam_default.f_1.rgb.png,fair_model,5135.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/fair_model/xai_5135.cam_default.f_1.rgb.png,fair_model
1,../xai_samples/coldnhot/XAI1/fair_model/xai_6736.cam_default.f_1.rgb.png,fair_model,6736.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/fair_model/xai_6736.cam_default.f_1.rgb.png,fair_model
2,../xai_samples/coldnhot/XAI1/fair_model/xai_7649.cam_default.f_1.rgb.png,fair_model,7649.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/fair_model/xai_7649.cam_default.f_1.rgb.png,fair_model
3,../xai_samples/coldnhot/XAI1/fair_model/xai_6866.cam_default.f_1.rgb.png,fair_model,6866.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/fair_model/xai_6866.cam_default.f_1.rgb.png,fair_model
4,../xai_samples/coldnhot/XAI1/fair_model/xai_2728.cam_default.f_1.rgb.png,fair_model,2728.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/fair_model/xai_2728.cam_default.f_1.rgb.png,fair_model
...,...,...,...,...,...
43,../xai_samples/coldnhot/XAI1/light_undersampled/xai_5248.cam_default.f_1.rgb.png,light_undersampled,5248.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/light_undersampled/xai_5248.cam_default.f_1.rgb.png,light_undersampled
44,../xai_samples/coldnhot/XAI1/light_undersampled/xai_2492.cam_default.f_1.rgb.png,light_undersampled,2492.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/light_undersampled/xai_2492.cam_default.f_1.rgb.png,light_undersampled
45,../xai_samples/coldnhot/XAI1/light_undersampled/xai_9057.cam_default.f_1.rgb.png,light_undersampled,9057.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/light_undersampled/xai_9057.cam_default.f_1.rgb.png,light_undersampled
46,../xai_samples/coldnhot/XAI1/light_undersampled/xai_4839.cam_default.f_1.rgb.png,light_undersampled,4839.cam_default.f_1.rgb,../xai_samples/coldnhot/XAI2/light_undersampled/xai_4839.cam_default.f_1.rgb.png,light_undersampled


In [20]:
merged_df

Unnamed: 0,file_path,model,base_name,sex,yaw_direction,skin_labels,human_id
0,../xai_samples/coldnhot/XAI1/fair_model/xai_5135.cam_default.f_1.rgb.png,fair_model,5135.cam_default.f_1.rgb,0.0,middle,light,288
1,../xai_samples/coldnhot/XAI1/fair_model/xai_6736.cam_default.f_1.rgb.png,fair_model,6736.cam_default.f_1.rgb,1.0,side,dark,73
2,../xai_samples/coldnhot/XAI1/fair_model/xai_7649.cam_default.f_1.rgb.png,fair_model,7649.cam_default.f_1.rgb,1.0,middle,light,35
3,../xai_samples/coldnhot/XAI1/fair_model/xai_6866.cam_default.f_1.rgb.png,fair_model,6866.cam_default.f_1.rgb,1.0,middle,light,319
4,../xai_samples/coldnhot/XAI1/fair_model/xai_2728.cam_default.f_1.rgb.png,fair_model,2728.cam_default.f_1.rgb,1.0,middle,light,63
...,...,...,...,...,...,...,...
43,../xai_samples/coldnhot/XAI1/light_undersampled/xai_5248.cam_default.f_1.rgb.png,light_undersampled,5248.cam_default.f_1.rgb,0.0,middle,light,64
44,../xai_samples/coldnhot/XAI1/light_undersampled/xai_2492.cam_default.f_1.rgb.png,light_undersampled,2492.cam_default.f_1.rgb,1.0,side,dark,107
45,../xai_samples/coldnhot/XAI1/light_undersampled/xai_9057.cam_default.f_1.rgb.png,light_undersampled,9057.cam_default.f_1.rgb,1.0,side,dark,11
46,../xai_samples/coldnhot/XAI1/light_undersampled/xai_4839.cam_default.f_1.rgb.png,light_undersampled,4839.cam_default.f_1.rgb,0.0,middle,light,94
