# Visualize data
This notebooks transforms data pickle files to csv for better visualization of the data structure.

In [1]:
import sys
import os
import re
import pickle as pkl
import pandas as pd

In [2]:
DATA_DIR='../data/bias_data'
%cd {DATA_DIR}



In [3]:
def get_file():
    for dir_ in os.listdir('.'):
        if not os.path.isdir(dir_) or dir_ == 'model_vocab':
            continue
        for file in os.listdir(dir_):
            if re.match(r'^.*\.pkl$', file):
                yield os.path.join(dir_, file)

In [4]:
def load_df(path):
    with open(path, "rb") as f:
        obj = pkl.load(f)

    df = pd.DataFrame(obj)
    #df.to_csv(r'file.csv')
    return df

## Show example dataset
Here we show an example of the model captions.

Each row of the data corresponds to an image in the COCO dataset. It contains the id of the image (`img_id`), the caption produced by the model (`pred`), and the ground truth annotations of the protected attributes gender and race (`bb_gender` and `bb_skin`, respectively).

In [5]:
df = load_df('Show-Tell/gender_val_st10_cap_mw_entries.pkl')
df

Unnamed: 0,img_id,pred,bb_gender,ratio,bb_skin,race,split
0,11340,an elephant is walking through a village .,Male,[0.02577024647887324],Dark,['Dark'],train
1,149783,a man sitting at a table with a laptop .,Female,"[0.18625651041666666, 0.043430989583333336, 0....",Light,"['Light', 'Light', nan]",train
2,249953,a man riding a surfboard on top of a wave .,Male,[0.036435158079625295],,[nan],train
3,465566,a man standing next to a train on a train pla...,Male,[0.08640625],Light,['Light'],train
4,229653,a man is sitting on a couch with a laptop .,Female,[0.1023125],Light,['Light'],train
...,...,...,...,...,...,...,...
10775,872,a baseball player swinging a bat at a ball,Male,"[0.335958465, 0.321149044]",Light,"['Light', 'Light']",val
10776,375812,a young boy in a green shirt and a red frisbee,Male,[0.19226271881838075],Light,['Light'],train
10777,227801,a man is holding a teddy bear in a basket .,Male,[0.03427637771502655],Dark,['Dark'],train
10778,360208,a woman is holding a glass of wine .,Female,"[0.020179036458333334, 0.3531575520833333]",Light,"['Unsure', 'Light']",train


In [6]:
df.columns

Index(['img_id', 'pred', 'bb_gender', 'ratio', 'bb_skin', 'race', 'split'], dtype='object')

## Make a copy of the data in csv format

In [7]:
for src in get_file():
    print(src)
    df = load_df(src)
    dst = src.replace('.pkl', '.csv')
    df.to_csv(dst)

Show-Tell/gender_val_st10_cap_mw_entries.pkl
Show-Tell/gender_val_st10_th10_cap_mw_entries.pkl
Show-Tell/gender_val_st10_th5_cap_mw_entries.pkl
Show-Tell/gender_val_st10_th2_cap_mw_entries.pkl
Show-Tell/race_val_st10_cap_entries.pkl
Show-Attend-Tell/gender_val_sat_masked_seg_obj_cap_mw_entries.pkl
Show-Attend-Tell/gender_val_sat_masked_obj_cap_mw_entries.pkl
Show-Attend-Tell/gender_val_sat_cap_mw_entries.pkl
Show-Attend-Tell/gender_val_sat_masked_both_cap_mw_entries.pkl
Show-Attend-Tell/race_val_sat_cap_entries.pkl
Show-Attend-Tell/gender_val_sat_masked_seg_person_cap_mw_entries.pkl
Show-Attend-Tell/gender_val_sat_masked_person_cap_mw_entries.pkl
Show-Attend-Tell/gender_val_sat_masked_seg_both_cap_mw_entries.pkl
Transformer/race_val_transformer_cap_entries.pkl
Transformer/gender_val_transformer_cap_mw_entries.pkl
Human_Ann/gender_obj_cap_mw_entries.pkl
Human_Ann/race_val_obj_cap_entries.pkl
Oscar/race_val_oscar_cap_entries.pkl
Oscar/gender_val_oscar_cap_mw_entries.pkl
Oscar/gender_val_

## Show example vocabulary
Here we show an example of the vocabulary used for a model.

In [8]:
path = 'model_vocab/fc_vocab.pkl'
with open(path, "rb") as f:
    obj = pkl.load(f)
print(obj)

['<unk>', '<pad>', 'a', 'genderword', 'on', 'with', 'of', 'in', 'standing', 'sitting', 'group', 'people', 'holding', 'table', 'at', 'tennis', 'street', 'field', 'court', 'riding', 'ball', 'the', 'cell', 'phone', 'room', 'to', 'baseball', 'next', 'down', 'skateboard', 'playing', 'little', 'kitchen', 'bat', 'frisbee', 'front', 'player', 'food', 'young', 'snow', 'dog', 'game', 'video', 'racket', 'and', 'surfboard', 'cake', 'an', 'bench', 'umbrella', 'eating', 'beach', 'bed', 'skis', 'living', 'laptop', 'tie', 'doing', 'trick', 'horse', 'couch', 'couple', 'suit', 'pizza', 'computer', 'wave', 'wearing', 'plate', 'motorcycle', 'walking', 'ramp', 'talking', 'water', 'top', 'hot', 'ocean', 'kite', 'laying', 'covered', 'slope', 'swinging', 'bathroom', 'teddy', 'wine', 'bear', 'bunch', 'soccer', 'brushing', 'preparing', 'bus', 'teeth', 'flying', 'mirror', 'glass', 'cat', 'refrigerator', 'bike', 'luggage', 'side', 'boat', 'bicycle', 'train', 'truck', 'giraffe', 'slice', 'book', 'umbrellas', 'bana