## Install Packages

In [None]:
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import numpy as np
from torch.utils.data import DataLoader
from torchvision import datasets
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import os
import zipfile 
import torch
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms
from sklearn.metrics import accuracy_score
import src
from src.utils.celeba_helper import CelebA_MTCNN_Helper
from imp import reload
import warnings
warnings.filterwarnings('ignore')

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('Running on device: {}'.format(device))

## Define CelebA Dataset

In [None]:
# celeba images are originally 178 x 218
img_folder = 'data/img_align_celeba' 
mapping_file = 'data/identity_CelebA.txt'

# define original celeba dataset before MTCNN - resize to larger 512x512 before MTCNN so it has a better chance to detect the face
celeba_dataset = CelebA_MTCNN_Helper(img_folder, mapping_file, transform=transforms.Resize((512, 512)))

## Define MTCNN Module

In [None]:
# output of MTCNN should be 160x160 because that is what the FaceNet InceptionResNet is trained on with the VGGFace2 Dataset - pretrained weights

mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True, keep_all=False,
    device=device
)

## Perfom MTCNN facial detection
Iterate through the DataLoader object and obtain cropped faces.

**Only Run this Code Once to Create MTCNN Cropped/Detected CelebA Image Directory**

**MTCNN CelebA Dataset Saved In: 'data/img_align_celeba_mtcnn' Folder**

In [None]:
# new folder where the post_mtcnn celeba images will be stored
mtcnn_img_folder = img_folder + '_mtcnn'
mtcnn_img_folder

In [None]:
## Create a dataloader
# Batch size during training
batch_size = 128
# Number of workers for the dataloader
num_workers = 0 if device.type == 'cuda' else 2
# Whether to put fetched data tensors to pinned memory
pin_memory = True if device.type == 'cuda' else False

celeba_dataloader = DataLoader(celeba_dataset,
                               batch_size=batch_size,
                               num_workers=num_workers,
                               pin_memory=pin_memory,
                               collate_fn=training.collate_pil, # formats PIL batch correctly
                               shuffle=False)

In [None]:
# Run MTCNN on the original CelebA Dataset
for i, (x, y) in enumerate(celeba_dataloader):
    # the getitem method returns (x = [PIL img1, PIL img2, ...,], y = [img1_name, img2_name, ... ,]) for each batch in DataLoader but
    # need to replace each img_name with the new img file path to be saved post MTCNN
    y = [os.path.join(mtcnn_img_folder, img_file_name).replace('\\','/') for img_file_name in y]
    
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(celeba_dataloader)), end='')

# Remove mtcnn to reduce GPU memory usage
del mtcnn
    