In [2]:
import sys
sys.path.append('..')

import torch
import torch.nn as nn
from src.data_loader import FairFaceData, CelebData, EmbeddingData
from facenet_pytorch import InceptionResnetV1
import torch.utils.data as torchdata
from torchvision.transforms.functional import to_pil_image
import matplotlib.pyplot as plt
from tqdm import tqdm
from pathlib import Path

from torchvision.models import vgg16
from torchvision.models import VGG16_Weights
from torchvision import datasets
from torchvision import transforms

# Setup Data Storage Paths

In [3]:
# Define Paths
root = Path('..')
preproc_root = root / 'data_processed'
sample_preproc_root = root / 'data_sample_processed'

preproc_root.mkdir(exist_ok=True)
sample_preproc_root.mkdir(exist_ok=True)

# Setup Models

In [4]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Using {device} for inference')

Using cuda for inference


In [5]:
# Define Models in embedding mode.
model1 = InceptionResnetV1(pretrained = 'vggface2', device = device, classify=False)
_ = model1.eval()
model2 = InceptionResnetV1(pretrained = 'casia-webface', device = device, classify=False)
_ = model2.eval()

# model 3 is a vgg16, ImageNet trained
model3 = vgg16(weights=VGG16_Weights.IMAGENET1K_V1)
# remove all but first fully connected
model3.classifier = model3.classifier = nn.Sequential(model3.classifier[0], model3.classifier[1])
model3 = model3.to(device)
model3.eval()

models = [model1, model2, model3]

# Preprocess Embeddings for Celeb Classification 

We use these embeddings for determining an appropriate classification model.

In [6]:
# we only want individuals without eyglasses
celeb_data = CelebData(root = str(root), sample = False)
celeb_data_sample = CelebData(root = str(root), sample = True)

In [7]:
celeb_data.get_all_filter()

Index(['5_o_Clock_Shadow', 'Arched_Eyebrows', 'Attractive', 'Bags_Under_Eyes',
       'Bald', 'Bangs', 'Big_Lips', 'Big_Nose', 'Black_Hair', 'Blond_Hair',
       'Blurry', 'Brown_Hair', 'Bushy_Eyebrows', 'Chubby', 'Double_Chin',
       'Eyeglasses', 'Goatee', 'Gray_Hair', 'Heavy_Makeup', 'High_Cheekbones',
       'Male', 'Mouth_Slightly_Open', 'Mustache', 'Narrow_Eyes', 'No_Beard',
       'Oval_Face', 'Pale_Skin', 'Pointy_Nose', 'Receding_Hairline',
       'Rosy_Cheeks', 'Sideburns', 'Smiling', 'Straight_Hair', 'Wavy_Hair',
       'Wearing_Earrings', 'Wearing_Hat', 'Wearing_Lipstick',
       'Wearing_Necklace', 'Wearing_Necktie', 'Young'],
      dtype='object')

In [8]:
celeb_data = celeb_data.filter_dataset({'Eyeglasses': -1})
celeb_data_sample = celeb_data_sample.filter_dataset({'Eyeglasses': -1})

In [9]:
len(celeb_data)

189406

In [10]:
celeb_dataloader = torchdata.DataLoader(celeb_data, batch_size=128, shuffle=False)
celeb_sample_dataloader = torchdata.DataLoader(celeb_data_sample, batch_size = 10, shuffle = False)

In [11]:
def compute_embeddings(model, dataloader, save: Path = None):
    ''' computes embeddings & results as 0-1 instead of -1 to 1'''
    model.to(device)
    model.eval()

    with torch.no_grad():
        d = enumerate(dataloader)
        idx, (img, gender)  = next(d)
        img = img.float().to(device)
        # outputs logit tensors.
        total = model(img)
        rez = ((gender + 1)/2).int()
        # iterate through all images and compute a euclidian
        for idx, (img, gender) in tqdm(d, total = len(dataloader) -1):
            # move img as float and to device
            img = img.float().to(device)
            
            # outputs logit tensors.
            t = model(img)
            r = ((gender + 1)/2).int()
            
            # combine with current total
            total = torch.vstack([total, t])
            rez = torch.vstack([rez, r])
    
    if save:
        torch.save(total, save / 'embeddings.pt')
        torch.save(rez, save / 'gender.pt')
        
    return total, rez

Model 1 - Facial Recognition

In [None]:
celeb_embeddings = preproc_root / 'celeb_embeddings_vggface2'
celeb_embeddings_sample = sample_preproc_root / 'celeb_embeddings_vggface2'
celeb_embeddings.mkdir(exist_ok=True, parents=True)
celeb_embeddings_sample.mkdir(exist_ok=True, parents=True)

In [57]:
# save sample
compute_embeddings(model1, celeb_sample_dataloader, save = celeb_embeddings_sample)

0it [00:00, ?it/s]


(tensor([[ 0.0582, -0.0461,  0.0283,  ...,  0.0144,  0.0181,  0.0049],
         [ 0.0546, -0.0363,  0.0391,  ...,  0.0110,  0.0078,  0.0103],
         [ 0.0610, -0.0475,  0.0301,  ...,  0.0235,  0.0339, -0.0205],
         ...,
         [ 0.0610, -0.0528,  0.0221,  ...,  0.0141,  0.0063,  0.0121],
         [ 0.0614, -0.0605,  0.0258,  ...,  0.0215,  0.0108, -0.0032],
         [ 0.0629, -0.0428,  0.0356,  ...,  0.0199,  0.0176, -0.0125]],
        device='cuda:0'),
 tensor([[0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0]], dtype=torch.int32))

In [58]:
# check it worked
test = EmbeddingData(data_dir_name = 'celeb_embeddings_vggface2', root = str(root), device = device, sample = True)
len(test[2][0]), len(test[2][1])

(512, 1)

In [59]:
# save full
compute_embeddings(model1, celeb_dataloader, save = celeb_embeddings)

100%|██████████| 1479/1479 [16:41<00:00,  1.48it/s]


(tensor([[ 0.0581, -0.0461,  0.0283,  ...,  0.0144,  0.0181,  0.0049],
         [ 0.0546, -0.0363,  0.0391,  ...,  0.0110,  0.0078,  0.0103],
         [ 0.0610, -0.0475,  0.0301,  ...,  0.0235,  0.0339, -0.0204],
         ...,
         [ 0.0592, -0.0445,  0.0382,  ...,  0.0038,  0.0049, -0.0049],
         [ 0.0683, -0.0549,  0.0267,  ...,  0.0167,  0.0112,  0.0154],
         [ 0.0508, -0.0473,  0.0299,  ...,  0.0077,  0.0069,  0.0049]],
        device='cuda:0'),
 tensor([[0],
         [0],
         [1],
         ...,
         [1],
         [0],
         [0]], dtype=torch.int32))

In [60]:
# check it worked
test = EmbeddingData(data_dir_name = 'celeb_embeddings_vggface2', root = str(root), device = device, sample = False)
len(test[2][0]), len(test[2][1])

(512, 1)

Model 2 - Facial Recognition

In [61]:
celeb_embeddings = preproc_root / 'celeb_embeddings_casia'
celeb_embeddings_sample = sample_preproc_root / 'celeb_embeddings_casia'
celeb_embeddings.mkdir(exist_ok=True, parents=True)
celeb_embeddings_sample.mkdir(exist_ok=True, parents=True)

In [62]:
# save sample
compute_embeddings(model2, celeb_sample_dataloader, save = celeb_embeddings_sample)

0it [00:00, ?it/s]


(tensor([[-0.0253, -0.0116,  0.0524,  ...,  0.0542, -0.0348,  0.0553],
         [-0.0306, -0.0079,  0.0632,  ...,  0.0519, -0.0409,  0.0541],
         [-0.0276, -0.0101,  0.0577,  ...,  0.0526, -0.0377,  0.0533],
         ...,
         [-0.0194, -0.0087,  0.0514,  ...,  0.0423, -0.0412,  0.0601],
         [-0.0254, -0.0083,  0.0604,  ...,  0.0500, -0.0384,  0.0577],
         [-0.0250, -0.0084,  0.0563,  ...,  0.0508, -0.0391,  0.0569]],
        device='cuda:0'),
 tensor([[0],
         [0],
         [1],
         [0],
         [0],
         [0],
         [1],
         [1],
         [0],
         [0]], dtype=torch.int32))

In [12]:
# save full
compute_embeddings(model2, celeb_dataloader, save = celeb_embeddings)

NameError: name 'celeb_embeddings' is not defined

Model 3 - VGG16, Imagenet

In [14]:
celeb_embeddings = preproc_root / 'celeb_embeddings_vgg16'
celeb_embeddings.mkdir(exist_ok=True, parents=True)

In [15]:
compute_embeddings(model3, celeb_dataloader, save = celeb_embeddings)

100%|██████████| 1479/1479 [15:37<00:00,  1.58it/s]


(tensor([[ 9.4920,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000, 10.5899,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000, 11.4709,  ...,  0.0000,  0.0000, 24.1227],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ..., 10.9405,  0.0000,  0.0000],
         [ 0.0000, 24.5744,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  4.5296,  ...,  0.0000,  3.0456,  0.0000]],
        device='cuda:0'),
 tensor([[0],
         [0],
         [1],
         ...,
         [1],
         [0],
         [0]], dtype=torch.int32))

# Preprocess Embeddings for different Ethnicities. 

In [5]:
fface = FairFaceData(root = str(root), sample = False)
fface_sample = FairFaceData(root = str(root), sample = True)

In [6]:
fface.set_filter(['gender'])

In [7]:
fface.get_all_filter()

Index(['age', 'gender', 'race', 'service_test'], dtype='object')

In [8]:
fface.get_attr_map()

{'age': {'50-59': 0,
  '30-39': 1,
  '3-9': 2,
  '20-29': 3,
  '40-49': 4,
  '10-19': 5,
  '60-69': 6,
  '0-2': 7,
  'more than 70': 8},
 'gender': {'Male': 1, 'Female': 0},
 'race': {'East Asian': 0,
  'Indian': 1,
  'Black': 2,
  'White': 3,
  'Middle Eastern': 4,
  'Latino_Hispanic': 5,
  'Southeast Asian': 6},
 'service_test': {True: 0, False: 1}}

In [9]:
# process by ethnicity
eadata = fface.filter_dataset(filter_={'race': 'East Asian'})
inddata = fface.filter_dataset(filter_={'race': 'Indian'})
whitedata = fface.filter_dataset(filter_={'race': 'White'})
blackdata = fface.filter_dataset(filter_={'race': 'Black'})
medata = fface.filter_dataset(filter_={'race': 'Middle Eastern'})
lhdata = fface.filter_dataset(filter_={'race': 'Latino_Hispanic'})

In [11]:
#! Wait to figure out a single model to run the ethnicity on to save time.

: 