In [15]:
import warnings 
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import random, time, copy
import shutil
import glob

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from PIL import Image

import torch
import torchvision
from torch import nn, optim
from torchsummary import summary
from torch.utils.data import DataLoader, ConcatDataset
from torchvision import datasets, transforms, models, utils

from facenet_pytorch import MTCNN
from facenet_pytorch import InceptionResnetV1

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-tneb8cpq because the default path (/home/jwong/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [6]:
def create_dataset(src, dst, range_, class_):
    """Copy images of class class_ within range_ from src to dst.
    
    Parameters
    ----------
    src : str
        source directory
    dst : str
        destination directory
    range_ : tuple
        tuple of min and max image index to copy
    class_ : str
        image class 
    """
    if os.path.exists(dst):
        # if existing, delete dir to reset
        shutil.rmtree(dst)
    os.makedirs(dst)
    fnames = [f'{class_} ({i}).jpg' for i in range(*range_)]
    file_names = [image_path for image_path in os.listdir(src)]
    
    for file_ind, fname in enumerate(fnames):
        src_file = os.path.join(src, file_names[file_ind])
        dst_file = os.path.join(dst, fname)
        shutil.copyfile(src_file, dst_file)

In [14]:
face_classes = ['angry', 'sad', 'happy', 'surprise', 'fear', 'disgust',
                'neutral']

CROP_DIR = '../datasets/cropped-images'
CROP_FILEPATHS = dict()

# Number of images per class
for face_label in face_classes:
    temp_fp = glob.glob(f'{CROP_DIR}/{face_label}/*')
    print(f'There are {len(temp_fp)} images for {face_label.title()}.')

    CROP_FILEPATHS[face_label] = temp_fp

print('-----' * 5)

# Specify the train-validation-test partition
partition_train = 0.6
partition_val = 0.2
partition_test = 0.2

# Create the train-val-test partition for all classes
TRAIN_DIR, VAL_DIR, TEST_DIR = dict(), dict(), dict()

for class_ind, class_ in enumerate(face_classes):
    train_start = 1
    train_end = round(len(CROP_FILEPATHS[class_]) * partition_train) + 1
    
    val_start = train_end
    val_end = val_start + round(len(CROP_FILEPATHS[class_]) * partition_val)
    
    test_start = val_end
    test_end = len(CROP_FILEPATHS[class_]) + 1
    
    train_dest = f'data/classifier/train/{class_}' # train directory
    TRAIN_DIR[class_] = train_dest
    create_dataset(f'{CROP_DIR}/{class_}', train_dest, 
                   range_=(train_start, train_end), class_=class_)
    
    val_dest = f'data/classifier/validation/{class_}' # val directory
    VAL_DIR[class_] = val_dest
    create_dataset(f'{CROP_DIR}/{class_}', val_dest, 
                   range_=(val_start, val_end), class_=class_)
    
    test_dest = f'data/classifier/test/{class_}' # test directory
    TEST_DIR[class_] = test_dest
    create_dataset(f'{CROP_DIR}/{class_}', test_dest, 
                   range_=(test_start, test_end), class_=class_)
    
    print(f'Total training images for {class_.title()}: ',
          len(os.listdir(TRAIN_DIR[class_])))
    
    print(f'Total validation images for {class_.title()}: ',
          len(os.listdir(VAL_DIR[class_])))
    
    print(f'Total test images for {class_.title()}: ',
          len(os.listdir(TEST_DIR[class_])))
    
    print('-----' * 5)
TRAIN_FDIR = "data/classifier/train"

# Resizing the images
data_transforms = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.ToTensor()
])

train_data = datasets.ImageFolder(root=TRAIN_FDIR,
                                  transform=data_transforms)

# Compute for the means and stds (for normalization)
imgs = torch.stack([img_t for img_t, _ in train_data], dim=3)
means = imgs.view(3, -1).mean(dim=1).numpy()
stds = imgs.view(3, -1).std(dim=1).numpy()

print(f'Means:           {means}') 
print(f'Std. Deviations: {stds}')
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(size=(224,224)),
        transforms.RandomHorizontalFlip(p=0.6),             
        transforms.RandomPerspective(p=0.5),
        transforms.ColorJitter(brightness=0.5),              
        transforms.ToTensor(),                              
        transforms.Normalize(means, stds)
    ]),
    'validation': transforms.Compose([
        transforms.Resize(size=(224,224)),
        transforms.ToTensor(),                              
        transforms.Normalize(means, stds)
    ]),
    'test': transforms.Compose([
        transforms.Resize(size=(224,224)),
        transforms.ToTensor(),                              
        transforms.Normalize(means, stds)
    ])
}


DATA_DIR = 'data/classifier'

# Loading image data using ImageFolder
image_datasets = {x: datasets.ImageFolder(os.path.join(DATA_DIR, x),
                                          data_transforms[x])
                  for x in ['train', 'validation', 'test']}

# Dataloaders
dataloaders = {x: DataLoader(image_datasets[x], batch_size=4,
                             shuffle=True, drop_last=True)
              for x in ['train', 'validation', 'test']}

# Size of datasets
dataset_sizes = {x: len(image_datasets[x]) for x in
                 ['train', 'validation', 'test']}

# Class names
class_names = image_datasets['train'].classes

There are 47 images for Angry.
There are 49 images for Sad.
There are 48 images for Happy.
There are 49 images for Surprise.
There are 49 images for Fear.
There are 47 images for Disgust.
There are 49 images for Neutral.
-------------------------
Total training images for Angry:  28
Total validation images for Angry:  9
Total test images for Angry:  10
-------------------------
Total training images for Sad:  29
Total validation images for Sad:  10
Total test images for Sad:  10
-------------------------
Total training images for Happy:  29
Total validation images for Happy:  10
Total test images for Happy:  9
-------------------------
Total training images for Surprise:  29
Total validation images for Surprise:  10
Total test images for Surprise:  10
-------------------------
Total training images for Fear:  29
Total validation images for Fear:  10
Total test images for Fear:  10
-------------------------
Total training images for Disgust:  28
Total validation images for Disgust:  9
T

In [16]:
TRAIN_FDIR = "data/classifier/train"

# Resizing the images
data_transforms = transforms.Compose([
    transforms.Resize(size=(224, 224)),
    transforms.ToTensor()
])

train_data = datasets.ImageFolder(root=TRAIN_FDIR,
                                  transform=data_transforms)

# Compute for the means and stds (for normalization)
imgs = torch.stack([img_t for img_t, _ in train_data], dim=3)
means = imgs.view(3, -1).mean(dim=1).numpy()
stds = imgs.view(3, -1).std(dim=1).numpy()

print(f'Means:           {means}') 
print(f'Std. Deviations: {stds}')
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(size=(224,224)),
        transforms.RandomHorizontalFlip(p=0.6),             
        transforms.RandomPerspective(p=0.5),
        transforms.ColorJitter(brightness=0.5),              
        transforms.ToTensor(),                              
        transforms.Normalize(means, stds)
    ]),
    'validation': transforms.Compose([
        transforms.Resize(size=(224,224)),
        transforms.ToTensor(),                              
        transforms.Normalize(means, stds)
    ]),
    'test': transforms.Compose([
        transforms.Resize(size=(224,224)),
        transforms.ToTensor(),                              
        transforms.Normalize(means, stds)
    ])
}


DATA_DIR = 'data/classifier'

# Loading image data using ImageFolder
image_datasets = {x: datasets.ImageFolder(os.path.join(DATA_DIR, x),
                                          data_transforms[x])
                  for x in ['train', 'validation', 'test']}

# Dataloaders
dataloaders = {x: DataLoader(image_datasets[x], batch_size=4,
                             shuffle=True, drop_last=True)
              for x in ['train', 'validation', 'test']}

# Size of datasets
dataset_sizes = {x: len(image_datasets[x]) for x in
                 ['train', 'validation', 'test']}

# Class names
class_names = image_datasets['train'].classes

Means:           [0.6235189 0.4881056 0.4277293]
Std. Deviations: [0.25944903 0.22985741 0.22180712]
