### Dataloader

In [1]:
import torch
from torchvision.datasets import ImageFolder
from torchvision import transforms

from data import MyLiteDataLoader

In [2]:
%matplotlib ipympl

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import ipympl

In [3]:
dataroot = '/scratch0/ilya/locDoc/data/celeba_partitions/close'
image_size = 32
batch_size = 128
dataset = ImageFolder(root=dataroot,
                           transform=transforms.Compose([
#                               transforms.Resize(image_size),
                               transforms.CenterCrop(image_size),
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                           ]))
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                         shuffle=True, num_workers=2)
train_loader = MyLiteDataLoader(dataset, batch_size)

In [4]:
train_loader = MyLiteDataLoader(dataset, batch_size)

In [12]:
x,y = train_loader.next()

### Find attrs to split by on the original dataset

In [5]:
import pandas as pd
import numpy as np

In [6]:
from scipy.stats import norm

In [7]:
import imageio
import os

In [8]:
attr_fn = '/scratch0/ilya/locDoc/data/celeba/list_attr_celeba_kaggle.csv'
df = pd.read_csv(attr_fn)

In [47]:
df.columns

Index(['image_id', '5_o_Clock_Shadow', 'Arched_Eyebrows', 'Attractive',
       'Bags_Under_Eyes', 'Bald', 'Bangs', 'Big_Lips', 'Big_Nose',
       'Black_Hair', 'Blond_Hair', 'Blurry', 'Brown_Hair', 'Bushy_Eyebrows',
       'Chubby', 'Double_Chin', 'Eyeglasses', 'Goatee', 'Gray_Hair',
       'Heavy_Makeup', 'High_Cheekbones', 'Male', 'Mouth_Slightly_Open',
       'Mustache', 'Narrow_Eyes', 'No_Beard', 'Oval_Face', 'Pale_Skin',
       'Pointy_Nose', 'Receding_Hairline', 'Rosy_Cheeks', 'Sideburns',
       'Smiling', 'Straight_Hair', 'Wavy_Hair', 'Wearing_Earrings',
       'Wearing_Hat', 'Wearing_Lipstick', 'Wearing_Necklace',
       'Wearing_Necktie', 'Young'],
      dtype='object')

In [48]:
df.shape

(202599, 41)

In [9]:
def split_df(adf, split_attr=None):
    if not split_attr:
        ntrue = [adf[adf[c] == 1].shape[0] for c in adf.columns]
        split_idx = np.argmin(np.abs(np.array(ntrue) - adf.shape[0]/2))
        split_attr = adf.columns[split_idx]
    t = adf[adf[split_attr] == 1]
    f = adf[adf[split_attr] == -1]
    print('Splitting on %s, %i/%i t/f ' % (split_attr, t.shape[0], f.shape[0]))
    return t,f

### Split by Attrs and orientation on original dataset

In [10]:
landmark_fn = '/scratch0/ilya/locDoc/data/celeba/list_landmarks_align_celeba_kaggle.csv'
df2 = pd.read_csv(landmark_fn)

In [11]:
df2.columns

Index(['image_id', 'lefteye_x', 'lefteye_y', 'righteye_x', 'righteye_y',
       'nose_x', 'nose_y', 'leftmouth_x', 'leftmouth_y', 'rightmouth_x',
       'rightmouth_y'],
      dtype='object')

In [12]:
nose_to_left_eye = df2['lefteye_x'] - df2['nose_x']
eye_distance = df2['lefteye_x'] - df2['righteye_x']
rotation = nose_to_left_eye / eye_distance

In [13]:
mu, std = norm.fit(rotation)

In [14]:
plt.figure()
plt.hist(rotation, density=True, bins = 100);
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, mu, std)
plt.plot(x, p, 'k', linewidth=2)
title = "Fit results: mu = %.2f,  std = %.2f" % (mu, std)
plt.title(title)

FigureCanvasNbAgg()

Text(0.5, 1.0, 'Fit results: mu = 0.49,  std = 0.21')

In [144]:
med = rotation.median()
r1 = rotation[rotation > (med-std/2)]
rs = r1[r1 < (med+std/2)]
rl = rotation[rotation > (med+std/2)]
rr = rotation[rotation < (med-std/2)]

rs.shape, rl.shape, rr.shape

((107189,), (46643,), (48767,))

In [184]:
df['rotation'] = rotation
df2['rotation'] = rotation

### Split halfway

In [284]:
center = df[ df['rotation'] > (med-std/2) ]
center = center[center['rotation'] < (med+std/2)]
center.shape

(107189, 42)

In [149]:
left = df[ df['rotation'] > (med+std/2) ]
right = df[ df['rotation'] < (med-std/2) ]

In [272]:
not_center = df[ (df['rotation'] > (med+std/2)) | (df['rotation'] < (med-std/2)) ]

In [288]:
not_center.shape[0]

39494

In [273]:
not_center.shape[0], left.shape[0]+right.shape[0]

AttributeError: 'int' object has no attribute 'shape'

In [278]:
center_1, center_0 =  split_df(center, 'Male')

Splitting on Male, 66224/90216 t/f 


In [279]:
not_center_1, not_center_0 =  split_df(not_center, 'Male')

Splitting on Male, 18210/27949 t/f 


In [280]:
center_11, center_10 =  split_df(center_1, 'Mouth_Slightly_Open')

Splitting on Mouth_Slightly_Open, 28096/38128 t/f 


In [281]:
center_01, center_00 =  split_df(center_0, 'Mouth_Slightly_Open')

Splitting on Mouth_Slightly_Open, 47947/42269 t/f 


In [282]:
not_center_11, not_center_10 =  split_df(not_center_1, 'Mouth_Slightly_Open')

Splitting on Mouth_Slightly_Open, 7750/10460 t/f 


In [283]:
not_center_01, not_center_00 =  split_df(not_center_0, 'Mouth_Slightly_Open')

Splitting on Mouth_Slightly_Open, 14149/13800 t/f 


### Split by visually distinctive

In [317]:
df3 = pd.merge(df, df2, on='image_id')

In [321]:
df = df3
df['rotation'] = rotation

In [322]:
not_center = df[ (df['rotation'] > (med+std*1.1)) | (df['rotation'] < (med-std*1.1)) ]

In [323]:
center = df[ df['rotation'] > (med-std*1.1) ]
center = center[center['rotation'] < (med+std*1.1)]

In [324]:
center.shape[0],  not_center.shape[0]

(163105, 39494)

In [348]:
center_1, center_0 =  split_df(center, 'Eyeglasses')

Splitting on Eyeglasses, 10812/152293 t/f 


In [326]:
not_center_1, not_center_0 =  split_df(not_center, 'Male')

Splitting on Male, 15674/23820 t/f 


In [349]:
center_01, center_00 =  split_df(center_0, 'Male')

Splitting on Male, 60146/92147 t/f 


In [355]:
center_100, center_000 =  split_df(center_00, 'Mouth_Slightly_Open')

Splitting on Mouth_Slightly_Open, 48731/43416 t/f 


In [356]:
center_101, center_001 =  split_df(center_01, 'Mouth_Slightly_Open')

Splitting on Mouth_Slightly_Open, 25342/34804 t/f 


In [374]:
classes = [center_100, center_000, center_101, center_001, center_1, not_center_1, not_center_0]
class_ids = ['0_center_female_openmouth', '1_center_female_closedmouth', '2_center_male_openmouth',
             '3_center_male_closedmouth', '4_center_eyeglasses', '5_profile_male', '6_profile_female']
print([len(c) for c in classes])

[48731, 43416, 25342, 34804, 10812, 15674, 23820]


### Split into 5 visually distinctive classes

In [386]:
center.shape

(163105, 54)

In [393]:
center_1, center_0 =  split_df(center, 'Male')
center_10, center_00 =  split_df(center_0, 'Mouth_Slightly_Open')
center_11, center_01 =  split_df(center_1, 'Mouth_Slightly_Open')

Splitting on Male, 68760/94345 t/f 
Splitting on Mouth_Slightly_Open, 49994/44351 t/f 
Splitting on Mouth_Slightly_Open, 29146/39614 t/f 


In [394]:
classes = [center_10, center_00, center_11, center_01, not_center]
class_ids = ['0_center_female_openmouth', '1_center_female_closedmouth', '2_center_male_openmouth',
             '3_center_male_closedmouth', '5_profile']
print([len(c) for c in classes])
print(sum([len(c) for c in classes]))
print(df.shape)
len(not_center) + len(center)

[49994, 44351, 29146, 39614, 39494]
202599
(202599, 54)


202599

### Split M/F

In [396]:
male, femal =  split_df(df, 'Male')

Splitting on Male, 84434/118165 t/f 


In [397]:
classes = [male, femal]
class_ids = ['0_male', '1_female']
print([len(c) for c in classes])
print(sum([len(c) for c in classes]))
print(df.shape)

[84434, 118165]
202599
(202599, 54)


### split left, center, right

In [None]:
left = df[ df['rotation'] > (med+std/2) ]
right = df[ df['rotation'] < (med-std/2) ]
center = df[ df['rotation'] > (med-std/2) ]
center = center[center['rotation'] < (med+std/2)]

In [None]:
classes = [left, right, center]
class_ids = ['0_left', '1_right', '2_center']
print([len(c) for c in classes])
print(sum([len(c) for c in classes]))
print(df.shape)
len(not_center) + len(center)

### Get crop right

In [223]:
import torch
import torchvision.utils as vutils

In [241]:
def rule_of_thirds_crop(row):
    eye_height = (row['lefteye_y'] + row['righteye_y']) // 2
    mouth_height = (row['leftmouth_y'] + row['rightmouth_y']) // 2
    nose_height = row['nose_y']
    # origin is top left
    thirds_height = mouth_height - eye_height
    is_rotated = (row['rotation'] > (med+std/2)) or (row['rotation'] < (med-std/2))
    # if face is rotated, then thirds dst should be height
    # when face is not rotated, then thirds dst can be eye width (in case head is tilted)
    if is_rotated:
        thirds_dst = thirds_height
    else:
        thirds_dst = row['righteye_x'] - row['lefteye_x']
    top = eye_height - thirds_dst
    bottom = nose_height + thirds_dst



    if True: # is_rotated
        # make up for rotation
        x = row['rotation']
        right_of_nose_prop = (np.cos(np.pi*(x + 1/2)/2) + 1) / 2.0
        left_of_nose_prop = 1 - right_of_nose_prop

        right = int((2*thirds_dst) * right_of_nose_prop + row['nose_x'])
        left = int(row['nose_x'] - ((2*thirds_dst) * left_of_nose_prop))
    # else:
    #     left = landmarks[4] - thirds_height
    #     right = landmarks[4] + thirds_height
    img = imageio.imread(os.path.join(src_folder, row['image_id']))
    [h,w,c] = img.shape
    right = min(right, h)
    left = max(left, 0)
    return img[top:bottom, left:right]

In [372]:
bs = 64
batch = np.zeros((bs,200,200,3))
src_folder = '/scratch0/ilya/locDoc/data/celeba/img_align_celeba'
i = 0
for index, row in classes[6].iterrows():
#     if len(not_center[not_center['image_id'] == row['image_id']]):
    crop = rule_of_thirds_crop(row) / 255.0
    [h,w,c] = crop.shape
    padded = np.pad(crop, ((int(np.floor((200-h)/2.0)),int(np.ceil((200-h)/2.0))),(int(np.floor((200-w)/2.0)),int(np.ceil((200-w)/2.0))),(0,0)), mode='constant')

    batch[i] = padded
    i += 1
    if i == 63:
        break

In [373]:
plt.figure()
grid = vutils.make_grid(torch.Tensor(np.moveaxis(batch,-1,1)), nrow=8, padding=0, normalize=False)
plt.imshow(np.moveaxis(grid.data.cpu().numpy(),0,-1))

FigureCanvasNbAgg()

<matplotlib.image.AxesImage at 0x7f4bc3d93eb8>

In [343]:
plt.close('all')

In [375]:
len(df)

202599

In [382]:
from GAN_training.utils import mkdirp
from tqdm import tqdm_notebook

### Save images now

In [398]:
pbar = tqdm_notebook(total=len(df))
for ci, c in enumerate(classes):
    mkdirp(os.path.join('/scratch0/ilya/locDoc/data/celeba_MFclass', class_ids[ci]))
    for _, row in c.iterrows():
        pbar.update(1)
        crop = rule_of_thirds_crop(row)
        imageio.imsave(os.path.join('/scratch0/ilya/locDoc/data/celeba_MFclass', class_ids[ci], row['image_id']), crop)
pbar.close()

HBox(children=(IntProgress(value=0, max=202599), HTML(value='')))