**Note: this notebook creates the images used for the annotation experiment. The images used in the original annotation experiment are downloadable from [here](https://drive.google.com/drive/folders/1YO_GZ48o30jTnME-z7d8LlcZoJejcNsk)**

In [7]:
%load_ext autoreload
%autoreload 2
import os
from os.path import join as oj

import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
import sys
import pickle as pkl
sys.path.append('..')

import data
import style
import config
from config import *
import util
import glob
import json, jsonlines
df = data.load_all_labs()
df = df.set_index('fname_id')

# get fnames
fname_nps = [f for f in sorted(os.listdir(DIR_GEN)) if 'npy' in f] # these start at 00001
fname_ids = np.array([f[:-4] for f in fname_nps])
idxs_calculated = np.array([int(x) - 1 for x in fname_ids]) # this starts at 0

# trim df to only have the relevant ids
df = df.loc[fname_ids]

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# select celebrities

In [8]:
# only consider ids with more than 2 ims
d = df[df['count_with_this_id'] > 2]

# sort by how many ids each image has
d = d.sort_values('count_with_this_id', ascending=False)

# get ids for subgroups
subgroups = {}
for gender in [0, 1]:
    for race in ['Black', 'White']:
        dd = d[d['gender'] == gender]
        dd = dd[dd['race_pred'] == race]
        print(gender, race, '|', dd.id.unique().size, 'ids\t', dd.shape[0], 'photos')        
        subgroups[(gender, race)] = dd.id.unique()

0 Black | 93 ids	 707 photos
0 White | 1511 ids	 12493 photos
1 Black | 172 ids	 1011 photos
1 White | 885 ids	 5978 photos


# save all pairs

In [None]:
# note that this code picks slightly different images than the paper results
# this discrepancy is because at the time of running this code, only half the images in celeba-hq had been projected
# the procedure was exactly that given here
# the original images picked are available on the gdrive folder references in the github

def pair_plot(im0, im1):
    R, C = 1, 2
    plt.subplot(R, C, 1)
    plt.title('Real photo')
    util.imshow(im0)
    plt.subplot(R, C, 2)
    plt.title('Test photo')
    util.imshow(im1)
    plt.tight_layout()
    
def slight_random_crop(im):
    x = np.random.randint(20, 35)
    return im[x: -x, x: -x]

np.random.seed(13)
N_IMS = 30
EXPERIMENT_DIR = oj(DIR_PROCESSED, 'projections_annotation_double', 'ims')
os.makedirs(EXPERIMENT_DIR, exist_ok=True)
for gender, race in tqdm(subgroups.keys()):
    # print(gender, race)
    ids = subgroups[(gender, race)]
    for i in tqdm(ids[:N_IMS]):
        ims = d[d.id == i].iloc[:2]
        im0 = mpimg.imread(oj(DIR_IMS, ims.fname_final.values[0]))
        im1 = mpimg.imread(oj(DIR_IMS, ims.fname_final.values[1]))
        im1_rec = mpimg.imread(oj(DIR_GEN, ims.fname_final.values[1][:-4] + '.png'))
        pair_plot(im0, im1)
        plt.savefig(oj(EXPERIMENT_DIR, f'{gender}_{race}_{i}_real.png'), dpi=300)
        pair_plot(im0, im1_rec)
        plt.savefig(oj(EXPERIMENT_DIR, f'{gender}_{race}_{i}_fake.png'), dpi=300)
        pair_plot(im0, slight_random_crop(im0))
        plt.savefig(oj(EXPERIMENT_DIR, f'{gender}_{race}_{i}_dup.png'), dpi=300)        

In [None]:
len(os.listdir(oj(DIR_PROCESSED, 'projections_annotation_double', 'ims')))