In [10]:
%load_ext autoreload
%autoreload 2
import sys
import numpy as np
import PIL.Image
import matplotlib.pyplot as plt
from os.path import join as oj
import pandas as pd
import pickle as pkl
import models
import util
import os
import config
from config import ATTR_TO_INDEX
import viz
import scipy.stats
from tqdm import tqdm
import figs
import matplotlib.image as mpimg
import seaborn as sns
import data
CELEB_IMS_DIR = '../data/celeba-hq/ims/'
CELEB_ANNO_DIR = '../data/celeba-hq/Anno/'


# load and merge all the data
print('loading...')
df = data.load_ids()
labs, labs_full = data.load_labs()
for k in labs.keys():
    df[k] = labs[k].values
for k in labs_full.keys():
    df[k] = labs_full[k].values
print('done loading!')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
loading...
done loading!


## show some celeb ims

In [6]:
ims = np.array([mpimg.imread(oj(CELEB_IMS_DIR, fname))
                for fname in df.fname_final.values[:200]])

In [None]:
util.plot_grid(ims.reshape(8, 25, *ims.shape[1:]))

In [None]:
N_CELEBS = 6
d = df[df['count_with_this_id'] > 20]
# d = d[d['skin-color'] == 0] # filter by skin color
# d = d[d['gender'] == 1] # filter by skin color

for i in d['id'].unique()[:N_CELEBS]:
    ids = d[d.id == i]
#     print(ids.shape)
    ims = np.array([mpimg.imread(oj(CELEB_IMS_DIR, fname))
                    for fname in ids.fname_final.values])
#     print(ims.shape)
    util.plot_row(ims)
    plt.show()

## look at image statistics

In [None]:
# plt.grid(zorder=-1)
plt.hist(df[df['gender'] == 0]['count_with_this_id'], label='Female')
plt.hist(df[df['gender'] == 1]['count_with_this_id'], label='Male', alpha=0.5)
plt.xlabel('Number of images with same id\nin celeb-a-hq (30k images)')
plt.legend()
plt.show()
# plt.hist(df[df['gender'] == 1])

## label correlations

In [None]:
df.columns.values

In [None]:
keys = [k for k in list(labs.keys()) + ['Eyeglasses']]
d = df[keys]
idxs = df['gender'] == 0
plt.figure(dpi=500)
viz.corrplot(d.corr())
plt.title('Correlations')
keys = np.array([k.capitalize() for k in keys])
# keys = labs.columns.str.capitalize().values
plt.xticks(np.arange(keys.size), keys, rotation='vertical')
plt.yticks(np.arange(keys.size), keys)
plt.colorbar()

means = d.groupby('gender').mean()

In [None]:
BAR_SIZE = 0.45
cols = [s.capitalize() for s in means.columns.values]
plt.figure(dpi=500)
plt.barh(np.arange(len(cols)), means.iloc[0], height=BAR_SIZE, label='F')
plt.barh(np.arange(len(cols)) + 0.5, means.iloc[1], height=BAR_SIZE, label='M')
plt.legend(title='Perceived gender')
plt.yticks(np.arange(len(cols)) + BAR_SIZE / 2, cols)
plt.xlabel('Mean value in CelebA-HQ')
plt.show()

## benchmarking facial dists

In [71]:
facial_dists = pkl.load(open(oj(config.PROCESSED_DIR, '12_facial_dists.pkl'), 'rb'))
ks = list(labs_full.keys()) + list(labs.keys())
vals = {
    k: [] for k in ks
}
for i in tqdm(facial_dists['ids']):
    d = df[df.id == i]
    means = d.mean()
    for k in ks:
        vals[k].append(means[k])
df_facial = pd.DataFrame.from_dict({**vals, **facial_dists} )
df_facial.to_pickle(open(oj(config.PROCESSED_DIR, '12_facial_dists_df.pkl'), 'wb'))
df_facial = pd.read_pickle(open(oj(config.PROCESSED_DIR, '12_facial_dists_df.pkl'), 'rb'))

100%|██████████| 4379/4379 [00:40<00:00, 108.89it/s]


In [73]:
df_facial.keys()

Index(['5_o_Clock_Shadow', 'Arched_Eyebrows', 'Attractive', 'Bags_Under_Eyes',
       'Bald', 'Bangs', 'Big_Lips', 'Big_Nose', 'Black_Hair', 'Blond_Hair',
       'Blurry', 'Brown_Hair', 'Bushy_Eyebrows', 'Chubby', 'Double_Chin',
       'Eyeglasses', 'Goatee', 'Gray_Hair', 'Heavy_Makeup', 'High_Cheekbones',
       'Male', 'Mouth_Slightly_Open', 'Mustache', 'Narrow_Eyes', 'No_Beard',
       'Oval_Face', 'Pale_Skin', 'Pointy_Nose', 'Receding_Hairline',
       'Rosy_Cheeks', 'Sideburns', 'Smiling', 'Straight_Hair', 'Wavy_Hair',
       'Wearing_Earrings', 'Wearing_Hat', 'Wearing_Lipstick',
       'Wearing_Necklace', 'Wearing_Necktie', 'Young', 'gender', 'hair-length',
       'facial-hair', 'makeup', 'skin-color', 'age', 'facial_dists', 'ids'],
      dtype='object')

In [None]:
attr = 'gender'
attr_labs = ['M', 'F']
# attr = 'Eyeglasses'
# attr_labs = ['No', 'Yes']


idxs_attr = (df_facial[attr] > 0.5)
d0 = df_facial[idxs_attr]['facial_dists']
d1 = df_facial[~idxs_attr]['facial_dists']
plt.figure(dpi=300)
plt.boxplot([d0, d1], vert=False, widths=0.6)
plt.axvline(0.6, linestyle='--')
plt.yticks(np.arange(1, 3), attr_labs)
plt.ylabel(attr.capitalize())
plt.xlabel('Facial distance\n(Lower means it is working better)')
print(f'acc {attr_labs[0]} {np.mean(d0 < 0.6):0.2f} acc {attr_labs[1]} {np.mean(d1 < 0.6):0.2f}')
plt.show()

In [91]:
# plt.figure(dpi=300)
# plt.boxplot([d0[k] for k in ks], vert=False, positions=2 * np.arange(len(ks)))
# plt.boxplot([d1[k] for k in ks], vert=False, positions=2 * np.arange(len(ks)) + 1)
# plt.yticks(2 * np.arange(len(ks)) + 0.5, ks)
# plt.show()