In [None]:
%load_ext autoreload
%autoreload 2
import sys
import numpy as np
import PIL.Image
import matplotlib.pyplot as plt
from os.path import join as oj
import pandas as pd
import pickle as pkl
import models
import util
import os
import config
from config import ATTR_TO_INDEX
import viz
import scipy.stats
from tqdm import tqdm
import figs
import matplotlib.image as mpimg
import sklearn.decomposition
import sklearn.manifold
import seaborn as sns
import data
import transects
import face_recognition
import sklearn.metrics
from matching import *
from matplotlib.image import BboxImage
from matplotlib.transforms import Bbox, TransformedBbox


df = data.load_all_labs()
df = df.set_index('fname_id')

DIR_ORIG = '../data/celeba-hq/ims/'
reg = 0.1
DIR_GEN = oj(f'../data_processed/celeba-hq/generated_images_{reg}')

# get fnames
fname_nps = [f for f in sorted(os.listdir(DIR_GEN)) if 'npy' in f] # these start at 00001
fname_ids = np.array([f[:-4] for f in fname_nps])
idxs_calculated = np.array([int(x) - 1 for x in fname_ids]) # this starts at 0

# trim df to only have the relevant ids
df = df.loc[fname_ids]

# load the linear model in latent space
coefs, intercepts = transects.get_directions()
coefs = np.array(coefs).squeeze()
intercepts = np.array(intercepts)


# load latents and calculate dists
print('loading latents...')
latents = np.array([np.load(oj(DIR_GEN, f)) for f in fname_nps])
lats = get_lat(latents)
preds = latents.mean(axis=1) @ coefs.T + intercepts.T
weights = np.zeros(preds.shape[1])
# print(ATTR_TO_INDEX)
# weights[ATTR_TO_INDEX['skin-color']] = 1e2
vecs = join_vecs(preds, lats, weights)

print('calculating dists...')
dists_gan = get_dists(vecs)
print('done!')

# load pairwise facial dicts
print('loading facial rec dists...')
dists_facial = np.load(open('processed/13_facial_dists_pairwise.npy', 'rb')) # pkl.load()
dists_facial = dists_facial[idxs_calculated, :][:, idxs_calculated]
print('done!')

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


loading cached labels




loading latents...
calculating dists...
done!
loading facial rec dists...


# do matching

### find matching for an im

In [None]:
# img 38 is a good gender example
for im_idx in range(10):# range(38, 45):
    
    # select subset of indices to use for matching
    idxs = np.ones(df.shape[0]).astype(bool)
#     idxs = (df['gender'] > 0).values
#     idxs = (df['race'] == 'Black').values
#     idxs = (df['Eyeglasses'] > 0).values
    dists_im = dists_gan[im_idx][idxs] # first select row, then select vals
#     dists_im = dists_facial[im_idx][idxs]
    fname_ids_for_matching = fname_ids[idxs]    
    
    closest_match_vals, closest_matches_fnames = calc_matches(dists_im, fname_ids_for_matching)
    # print(closest_matches_fnames)
    
    # load images
    N_MATCHES_TO_PLOT = 5
    fname_id = fname_ids[im_idx]
    im_orig = mpimg.imread(oj(DIR_ORIG, f'{fname_id}.jpg'))
    im_rec = mpimg.imread(oj(DIR_GEN, f'{fname_id}.png'))
    im_matches = [mpimg.imread(oj(DIR_GEN, f'{fname_match}.png'))
                  for fname_match in closest_matches_fnames[:N_MATCHES_TO_PLOT]]
    
    # plt images
    util.plot_row([im_orig, im_rec] + im_matches,
                  annot_list=['orig', 'rec'] + closest_match_vals[:N_MATCHES_TO_PLOT].round(3).tolist(), dpi=150)
    plt.show()
    # print(closest_matches, closest_matches_fnames)
# show_matches(dists_gan, DIR_ORIG, DIR_GEN, im_nums=range(5, 10))

**simple dim reduction**

In [None]:
def plot_image(xs, ys, im):
    '''Note: should normalize x/y coords to 0-1 before plotting
    '''
    for idx, (x, y) in tqdm(enumerate(zip(xs, ys))):
        bb = Bbox.from_bounds(x, y, IM_SIZE, IM_SIZE)  
        bb2 = TransformedBbox(bb, ax.transData)
        bbox_image = BboxImage(bb2, origin=None, clip_on=False)
        bbox_image.set_data(im[idx])
#          bbox_image.set_alpha(1.0)
        ax.add_artist(bbox_image)
    return ax

# plot
N_IMS = 1000
IM_SIZE = 0.025
N_PLOT = 1000

fig = plt.figure(figsize=(20, 20), dpi=100)
ax = fig.add_subplot(111)
ims = [mpimg.imread(oj(DIR_ORIG, f))[::2, ::2] for f in df.fname_final[:N_IMS]]
l = sklearn.decomposition.PCA(n_components=2).fit_transform(lats[:N_IMS])
# l = sklearn.manifold.TSNE().fit_transform(lats[:N_IMS])
l = (l - l.min(axis=0)) / (l.max(axis=0) - l.min(axis=0))
ax = plot_image(l[:N_PLOT, 0], l[:N_PLOT, 1], ims)
plt.xlabel('Dim 1')
plt.ylabel('Dim 2')
# plt.xlim((0.5, 1))
# plt.ylim((0, 0.5))
plt.show()

# plots for specific ids/matches

**make some plots for a specific match**

In [None]:
im_idx = 38 # img 38 is a good gender example
fname_id2 = '02638' # this is the match
# if all images are calculated, then this is just 5-char string of im_idx + 1
fname_id = fname_ids[im_idx] 


# idxs
dists_im = dists_gan[im_idx]

im = mpimg.imread(oj(DIR_ORIG, f'{fname_id}.jpg'))
im2 = mpimg.imread(oj(DIR_ORIG, f'{fname_id2}.jpg'))

d1 = df[df.id == df.loc[fname_id].id]
d2 = df[df.id == df.loc[fname_id2].id]



util.plot_row([im, im2])
util.plot_row([mpimg.imread(oj(DIR_ORIG, f)) for f in d1.fname_final])
util.plot_row([mpimg.imread(oj(DIR_ORIG, f)) for f in d2.fname_final])

**look at images / reconstructions of the same person**

In [None]:
d = df[df['count_with_this_id'] >= df['count_with_this_id'].max() - 1]
dd = d[d.id == d.iloc[0].id]
util.plot_row([mpimg.imread(oj(DIR_ORIG, f)) for f in dd.fname_final][:5])
util.plot_row([mpimg.imread(oj(DIR_GEN, f'{index}.png')) for index in dd.index][:5])

# evaluate matching with metrics

**how often do we return the same id?**

In [None]:
d = df[df['count_with_this_id'] > 1]
# print(d.shape)
dists_dict = {
    'gan': dists_gan,
    'facial': dists_facial
}
accs_keys = ['acc_top1', 'acc_top5', 'acc_top10']
attr_keys = [kk for kk in df.keys() if not 'scores' in kk]
r = {
    k: [] for k in accs_keys + attr_keys
}

for dist_type in sorted(dists_dict.keys(), reverse=True):
    dists = dists_dict[dist_type]
    lists = {
        k: [] for k in r.keys()
    }
    
    for im_idx in tqdm(range(1000)):
        id_orig = df.iloc[im_idx].id

        # id retention
        dists_im = dists[im_idx]
        matched_idxs = np.argsort(dists_im)
        matched_ids = df.iloc[matched_idxs].id.values # note - this needs to be df not d to get the proper indices from dists
        lists['acc_top1'].append(id_orig in matched_ids[:1])
        lists['acc_top5'].append(id_orig in matched_ids[:5])
        lists['acc_top10'].append(id_orig in matched_ids[:10])
        
        # are other feats matched?
        d = df.iloc[matched_idxs[:10]]
        for k in attr_keys:
            orig = df.iloc[im_idx]
            # print(d[k], orig[k], len(lists[k]))
            lists[k].append(np.mean(d[k] == orig[k]))
        
    for k in lists.keys():
        r[k].append(np.mean(lists[k]))
#     r['acc_top1'].append(np.mean(acc_top1))
#     r['acc_top5'].append(np.mean(acc_top5))
#     r['acc_top10'].append(np.mean(acc_top10))
r = pd.DataFrame.from_dict(r)
r.to_pickle('processed/13_dist_stats.pkl')

In [4]:
import pandas as pd
r = pd.read_pickle('processed/13_dist_stats.pkl').round(3)
r.index = ['gan', 'facial']
for k in r.keys():
    if r.loc['gan', k] > r.loc['facial', k]:
        print(k, f"{r.loc['gan', k] - r.loc['facial', k]:0.3f}")
r.style.background_gradient()

skin-color 0.007000000000000006
Bangs 0.029999999999999916
Blurry 0.0010000000000000009
Eyeglasses 0.006000000000000005
Pale_Skin 0.007000000000000006
Wavy_Hair 0.02400000000000002
Wearing_Hat 0.02200000000000002


Unnamed: 0,acc_top1,acc_top5,acc_top10,idx,orig_idx,orig_file,proc_md5,final_md5,id,fname_final,count_with_this_id,gender,hair-length,facial-hair,makeup,skin-color,age,5_o_Clock_Shadow,Arched_Eyebrows,Attractive,Bags_Under_Eyes,Bald,Bangs,Big_Lips,Big_Nose,Black_Hair,Blond_Hair,Blurry,Brown_Hair,Bushy_Eyebrows,Chubby,Double_Chin,Eyeglasses,Goatee,Gray_Hair,Heavy_Makeup,High_Cheekbones,Male,Mouth_Slightly_Open,Mustache,Narrow_Eyes,No_Beard,Oval_Face,Pale_Skin,Pointy_Nose,Receding_Hairline,Rosy_Cheeks,Sideburns,Smiling,Straight_Hair,Wavy_Hair,Wearing_Earrings,Wearing_Hat,Wearing_Lipstick,Wearing_Necklace,Wearing_Necktie,Young,face_name_align_pred,race_pred,race4_pred,gender_pred,age_pred,img_names_pred
gan,0.173,0.248,0.289,0.0,0.0,0.0,0.0,0.0,0.045,0.0,0.095,0.843,0.848,0.84,0.681,0.928,0.746,0.812,0.583,0.614,0.649,0.983,0.82,0.541,0.614,0.693,0.788,0.986,0.677,0.728,0.92,0.908,0.965,0.929,0.956,0.681,0.604,0.843,0.569,0.947,0.805,0.84,0.743,0.928,0.576,0.85,0.773,0.899,0.604,0.656,0.676,0.622,0.977,0.768,0.701,0.866,0.746,0.0,0.632,0.775,0.842,0.692,0.0
facial,0.903,0.929,0.933,0.0,0.0,0.0,0.0,0.0,0.577,0.0,0.603,0.989,0.865,0.923,0.725,0.921,0.87,0.917,0.678,0.715,0.681,0.986,0.79,0.739,0.741,0.814,0.846,0.985,0.74,0.856,0.925,0.932,0.959,0.952,0.971,0.725,0.661,0.972,0.611,0.956,0.825,0.923,0.801,0.921,0.668,0.867,0.806,0.938,0.634,0.728,0.652,0.671,0.955,0.842,0.707,0.929,0.87,0.0,0.877,0.934,0.942,0.724,0.0


In [15]:
rename = {
    'acc_top1': 'ID (top1)',
    'gender': 'Gender',
    'race_pred': 'Race'
}
r.index = ['GAN dist', 'Facial-rec dist']
r[['acc_top1', 'gender', 'race_pred', 'Mustache', 'Blurry', 'Eyeglasses', 'Bangs', 'Wearing_Hat']].rename(columns=rename) * 100

Unnamed: 0,ID (top1),Gender,Race,Mustache,Blurry,Eyeglasses,Bangs,Wearing_Hat
GAN dist,17.3,84.3,63.2,94.7,98.6,96.5,82.0,97.7
Facial-rec dist,90.3,98.9,87.7,95.6,98.5,95.9,79.0,95.5
