## Install Packages

In [1]:
import os
from facenet_pytorch import MTCNN, InceptionResnetV1, training, fixed_image_standardization
import torch
from torch import optim
from torch import nn
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler, SequentialSampler
from torchvision import datasets
from sklearn.neighbors import KNeighborsClassifier
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image
from torchvision import transforms
from sklearn.metrics import accuracy_score
import src
from src.utils import celeba_helper
from src.utils.center_loss import CenterLoss
from src.utils.similarity_functions import min_norm_2
from src.utils.fine_tuning import set_parameter_requires_grad
from importlib import reload

workers = 0 if os.name == 'nt' else 2
torch.manual_seed(42)

<torch._C.Generator at 0x7fdde0205630>

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')
print('Running on device: {}'.format(device))
nGPU = torch.cuda.device_count()

Running on device: cuda:0


# Define CelebA Dataset and Loader

In [3]:
## Load the dataset
# Path to directory with all the images
img_folder = 'data/img_align_celeba'
mapping_file = 'data/identity_CelebA.txt'

In [4]:
# Spatial size of training images, images are resized to this size.
image_size = 160

# Batch size during training
batch_size = 32
# Batch size during testing
test_batch_size = 256

# Number of workers for the dataloader
num_workers = nGPU * 16

# Whether to put fetched data tensors to pinned memory
pin_memory = True if device.type == 'cuda' else False

In [5]:
transform=transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    transforms.Resize((image_size, image_size)),
    fixed_image_standardization
])

transform_augment=transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    transforms.Resize((image_size, image_size)),
    transforms.ColorJitter(brightness=0.1, contrast=0.1),
    # transforms.RandomAdjustSharpness(sharpness_factor=2),
    transforms.RandomHorizontalFlip(p=0.5),
    fixed_image_standardization
])

# Load the dataset from file and apply transformations
celeba_dataset = celeba_helper.CelebADataset(img_folder, mapping_file, transform)
celeba_dataset_augment = celeba_helper.CelebADataset(img_folder, mapping_file, transform_augment)

# Setup FaceNet

In [6]:
mtcnn = MTCNN(
    image_size=image_size, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True, keep_all=False,
    device=device
)

# One-Shot Learning


## Load Dataset

## Select Files

In [7]:
# Select train files
file_label_mapping = celeba_dataset.get_file_label_mapping()
first_file_for_each_person_df = file_label_mapping.sort_values(by='person_id').groupby('person_id').agg(['min', 'count'])
train_files = np.sort(first_file_for_each_person_df['file_name']['min'].values)
print(f'Length of train set: {len(train_files)} images.')
num_classes = len(np.unique(file_label_mapping['person_id']))

Length of train set: 10177 images.


In [8]:
# Select test files
second_file_for_each_person_df = file_label_mapping[~file_label_mapping['file_name'].isin(first_file_for_each_person_df['file_name']['min'])].sort_values(by='person_id').groupby('person_id').agg(['min', 'count'])

# #1 test file per person 
# test_files = np.sort(second_file_for_each_person_df[second_file_for_each_person_df['file_name']['count'] >= 1]['file_name']['min'].values)

# Maximum number of test files per person
test_files = file_label_mapping[~file_label_mapping['file_name'].isin(first_file_for_each_person_df['file_name']['min'])]['file_name'].values

print(f'Length of test set: {len(test_files)} images.')

Length of test set: 192422 images.


# Predicting

In [9]:
testset_size = 1000

train_inds = [int(elem[:6])-1 for elem in train_files] #convert file names to indices that start at "0"
test_inds = [int(elem[:6])-1 for elem in test_files] #convert file names to indices that start at "0"

np.random.shuffle(test_inds)
val_inds = test_inds[:200]
# test_inds = np.random.choice(test_inds[200:], testset_size)

train_loader = DataLoader(
    celeba_dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds),
    prefetch_factor=4
)

# Create data loader with augmented images to concatenate to embeddings of normal ones
aug_loader = DataLoader(
    celeba_dataset_augment,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds),
    prefetch_factor=4
)

val_loader = DataLoader(
    celeba_dataset,
    num_workers=workers,
    batch_size=test_batch_size,
    sampler=SubsetRandomSampler(val_inds),
    prefetch_factor=64
)

test_loader = DataLoader(
    celeba_dataset,
    num_workers=workers,
    batch_size=test_batch_size,
    sampler=SubsetRandomSampler(test_inds),
    prefetch_factor=64,
    pin_memory=True
)

## Run Pre-trained Model

In [10]:
model = InceptionResnetV1(pretrained='vggface2').eval().to(device)

In [11]:
# Get embeddings - either from files (if exists) or by running model

train_embeddings_path = 'pytorch_objects/train_embeddings_pretrained_all_1img.pt'
train_labels_path = 'pytorch_objects/train_labels_pretrained_all_1img.pt'
test_embeddings_path = 'pytorch_objects/test_embeddings_pretrained_all_1img.pt'
test_labels_path = 'pytorch_objects/test_labels_pretrained_all_1img.pt'

train_embeddings, train_labels = celeba_helper.get_embeddings_and_file_names(model, train_loader, train_embeddings_path, train_labels_path)
test_embeddings, test_labels = celeba_helper.get_embeddings_and_file_names(model, test_loader, test_embeddings_path, test_labels_path)
aug_embeddings, aug_labels = celeba_helper.get_embeddings_and_file_names(model,aug_loader, save_tensors=False)
    
print(f'TRAIN:     Embeddings: {train_embeddings.shape}.\tLabels: {train_labels.shape}.')
print(f'AUGMENTED: Embeddings: {aug_embeddings.shape}.\tLabels: {aug_labels.shape}.')
print(f'TEST:      Embeddings: {test_embeddings.shape}.\tLabels: {test_labels.shape}.')

100%|██████████| 319/319 [01:03<00:00,  5.00it/s]

TRAIN:     Embeddings: torch.Size([10177, 512]).	Labels: torch.Size([10177]).
AUGMENTED: Embeddings: torch.Size([10177, 512]).	Labels: torch.Size([10177]).
TEST:      Embeddings: torch.Size([192422, 512]).	Labels: torch.Size([192422]).





In [12]:
train_embeddings_plus_aug = torch.cat([train_embeddings, aug_embeddings]).cpu()
train_labels_plus_aug = torch.cat([train_labels, aug_labels]).cpu()
print(f'Total train emmbeddings (incl. augmented): {train_embeddings_plus_aug.shape}')

Total train emmbeddings (incl. augmented): torch.Size([20354, 512])


In [13]:
# Test how embeddings of augmented image to original image and normal images to each other behave
# Ideally, mean_dist_same_label should be close to 0

mean_dist_same_label = 0 
mean_dist_diff_label = 0
rounds = 1000

for i in range(rounds):
    label1 = np.random.randint(0,num_classes)
    label2 = np.random.randint(0,num_classes)

    indices_label1 = np.where(train_labels_plus_aug == label1)[0]
    indices_label2 = np.where(train_labels_plus_aug == label2)[0]
    
    # Calculate distance = embedding(original image of label 1) - embedding(augmented image of label 1)  
    dist_label1 = (train_embeddings_plus_aug[indices_label1[0]] - train_embeddings_plus_aug[indices_label1[1]]).norm().item()
    # Calculate distance = embedding(original image of label 1) - embedding(original image of label 2)
    dist_label1_label2 = (train_embeddings_plus_aug[indices_label1[0]] - train_embeddings_plus_aug[indices_label2[0]]).norm().item()

    # print(f'Distance for the two embeddings for the label {label1} is {dist_label1}.') 
    # print(f'Distance for the embeddings of the original images of {label2} and {label1} is {dist_label1_label2}.') 
    
    mean_dist_same_label += dist_label1 / rounds 
    mean_dist_diff_label += dist_label1_label2 / rounds

print(f'Number of rounds: {rounds}')
print(f'Mean Distance for the two embeddings for the same label is {mean_dist_same_label}.') 
print(f'Mean Distance for the embeddings of the original images of different labels is {mean_dist_diff_label}.') 

Number of rounds: 1000
Mean Distance for the two embeddings for the same label is 1.3929909987449658.
Mean Distance for the embeddings of the original images of different labels is 1.390016651391984.


In [14]:
%%time

knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(train_embeddings, train_labels)
score = knn.score(test_embeddings, test_labels)

print(f'Pre-trained model: Accuracy = {score}.')

Pre-trained model: Accuracy = 0.4908586336281714.
CPU times: user 2min 33s, sys: 50.1 s, total: 3min 23s
Wall time: 1min 1s


In [None]:
%%time

knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(train_embeddings_plus_aug, train_labels_plus_aug)
score = knn.score(test_embeddings, test_labels)

print(f'Pre-trained model: Accuracy = {score}.')

## Fine-tune Model

## General Functions

## Training

In [10]:
print(f'There are {num_classes} classes/people in the training data.')

There are 10177 classes/people in the training data.


### Cross Entropy

In [None]:
import src
reload(src.utils.celeba_helper)

epochs = 20

metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

# Define path where to load from/save to the model
model_path = f'pytorch_objects/resnet_finetuned_celeba_{epochs}_epochs.pth'

if os.path.exists(model_path):
    model = InceptionResnetV1(classify=True, num_classes=num_classes).to(device)
    model.load_state_dict(torch.load(model_path))

else:
    # Load pre-trained model and finetune
    model = InceptionResnetV1(classify=True, pretrained='vggface2', num_classes=num_classes).to(device)
    
    # Only train logits layer
    set_parameter_requires_grad(model=model, feature_extracting=True)
    model.logits.requires_grad_(True)
    
    # Finetune
    model = celeba_helper.finetune_model(model, train_loader, val_loader, loss_fn=torch.nn.CrossEntropyLoss(), metrics=metrics, epochs=epochs, lr=0.001)
    
    # Save model
    torch.save(model.state_dict(), f'pytorch_objects/resnet_finetuned_celeba_{epochs}_epochs.pth')

In [None]:
predictions, labels = celeba_helper.test(test_loader, model, loss_fn)

### Center Loss

In [92]:
def pass_epoch_circle_loss(
    model, loss_fn, loader, optimizer=None, scheduler=None,
    batch_metrics={'time': training.BatchTimer()}, show_running=True,
    device='cpu', writer=None, lr_cent=0.5, alpha=0.1
):
    mode = 'Train' if model.training else 'Valid'
    logger = training.Logger(mode, length=len(loader), calculate_mean=show_running)
    loss = 0
    metrics = {}
    criterion_xent = torch.nn.CrossEntropyLoss() 
    
    for i_batch, (x, y) in enumerate(loader):
        x = x.to(device)
        y = y.to(device)
        features, y_pred = model(x)
        loss_xent = criterion_xent(y_pred, y)
        loss_batch = loss_fn(features, y) * alpha + loss_xent

        
        if model.training:
            loss_batch.backward()
            for param in center_loss.parameters():
                # lr_cent is learning rate for center loss, e.g. lr_cent = 0.5
                param.grad.data *= (lr_cent / (alpha * lr))
            optimizer.step()
            optimizer.zero_grad()

        metrics_batch = {}
        for metric_name, metric_fn in batch_metrics.items():
            metrics_batch[metric_name] = metric_fn(y_pred, y).detach().cpu()
            metrics[metric_name] = metrics.get(metric_name, 0) + metrics_batch[metric_name]
            
        if writer is not None and model.training:
            if writer.iteration % writer.interval == 0:
                writer.add_scalars('loss', {mode: loss_batch.detach().cpu()}, writer.iteration)
                for metric_name, metric_batch in metrics_batch.items():
                    writer.add_scalars(metric_name, {mode: metric_batch}, writer.iteration)
            writer.iteration += 1
        
        loss_batch = loss_batch.detach().cpu()
        loss += loss_batch
        if show_running:
            logger(loss, metrics, i_batch)
        else:
            logger(loss_batch, metrics_batch, i_batch)
    
    if model.training and scheduler is not None:
        scheduler.step()

    loss = loss / (i_batch + 1)
    metrics = {k: v / (i_batch + 1) for k, v in metrics.items()}
            
    if writer is not None and not model.training:
        writer.add_scalars('loss', {mode: loss.detach()}, writer.iteration)
        for metric_name, metric in metrics.items():
            writer.add_scalars(metric_name, {mode: metric})

    return loss, metrics

In [50]:
def finetune_model_circle_loss(model, optimizer, loss_fn=torch.nn.CrossEntropyLoss(), metrics={}, epochs:int=10, lr_cent = 0.5, alpha = 0.1):   
    # Make use of multiple GPUs if available
    CUDA = torch.cuda.is_available()
    nGPU = torch.cuda.device_count()

    if CUDA:
        model = model.cuda()
        if nGPU > 1:
            model = nn.DataParallel(model)
    
    scheduler = MultiStepLR(optimizer, [5, 10])
    
    metric_tracker = {}
    
    writer = SummaryWriter()
    writer.iteration, writer.interval = 0, 10
    
    set_parameter_requires_grad(model=model, feature_extracting=True)
    model.requires_grad_(True)

    for epoch in range(epochs):
        print('\nEpoch {}/{}'.format(epoch + 1, epochs))
        print('-' * 10)

        model.train()
        train_loss, train_metrics = pass_epoch_circle_loss(
            model, loss_fn, train_loader, optimizer, scheduler,
            batch_metrics=metrics, show_running=True, device=device,
            writer=writer, lr_cent = 0.5, alpha = 0.1
        )
        
        model.eval()
        val_loss, val_metrics = pass_epoch_circle_loss(
            model, loss_fn, val_loader,
            batch_metrics=metrics, show_running=True, device=device,
            writer=writer, lr_cent = 0.5, alpha = 0.1
        )

    writer.close()
    
    return model

In [51]:
use_gpu = True

model = InceptionResnetV1(
        classify=True,
        pretrained='vggface2',
        num_classes=len(pd.unique(file_label_mapping['person_id']))
    ).to(device)

In [52]:
class CustomFaceNet(nn.Module):
    def __init__(self, num_classes, feat_dim=2):
        super(CustomFaceNet, self).__init__()
        self.model = InceptionResnetV1(
            pretrained='vggface2',
            num_classes=num_classes
        )
        
        self.fc1 = nn.Linear(512, feat_dim)
        self.prelu_fc1 = nn.PReLU()
        self.fc2 = nn.Linear(feat_dim, self.model.num_classes)
        
    def forward(self, x):
        """Calculate embeddings or logits given a batch of input image tensors.

        Arguments:
            x {torch.tensor} -- Batch of image tensors representing faces.

        Returns:
            torch.tensor -- Batch of embedding vectors or multinomial logits.
        """
        x = self.model.conv2d_1a(x)
        x = self.model.conv2d_2a(x)
        x = self.model.conv2d_2b(x)
        x = self.model.maxpool_3a(x)
        x = self.model.conv2d_3b(x)
        x = self.model.conv2d_4a(x)
        x = self.model.conv2d_4b(x)
        x = self.model.repeat_1(x)
        x = self.model.mixed_6a(x)
        x = self.model.repeat_2(x)
        x = self.model.mixed_7a(x)
        x = self.model.repeat_3(x)
        x = self.model.block8(x)
        x = self.model.avgpool_1a(x)
        x = self.model.dropout(x)
        x = self.model.last_linear(x.view(x.shape[0], -1))
        x = self.model.last_bn(x)
        
        # Center Loss Layers
        x = self.fc1(x)
        x = self.prelu_fc1(x)
        y = self.fc2(x)
        
        return x, y

In [53]:
feat_dim = 4
model = CustomFaceNet(num_classes=num_classes, feat_dim=feat_dim).to(device)

In [54]:
lr = 0.01
epochs = 50

center_loss = CenterLoss(num_classes=num_classes, feat_dim=feat_dim, use_gpu=use_gpu)
params = list(model.fc1.parameters()) + list(model.prelu_fc1.parameters()) + list(model.fc2.parameters()) + list(center_loss.parameters())
optimizer = torch.optim.SGD(params, lr=lr) # here lr is the overall learning rate

model = finetune_model_circle_loss(model, 
                                   loss_fn=center_loss, 
                                   metrics=metrics, 
                                   epochs=epochs, 
                                   optimizer=optimizer, 
                                   lr_cent = 0.1,
                                   alpha = 0.5
                                  )   


Epoch 1/20
----------
Train |   318/318  | loss:    9.6863 | fps:  165.2563 | acc:    0.0000   
Valid |     7/7    | loss:    9.6623 | fps:  340.8924 | acc:    0.0000   

Epoch 2/20
----------
Train |   318/318  | loss:    9.6489 | fps:  166.2873 | acc:    0.0000   
Valid |     7/7    | loss:    9.6585 | fps:  323.0119 | acc:    0.0000   

Epoch 3/20
----------
Train |   318/318  | loss:    9.6207 | fps:  165.8471 | acc:    0.0001   
Valid |     7/7    | loss:    9.6248 | fps:  353.9544 | acc:    0.0000   

Epoch 4/20
----------
Train |   318/318  | loss:    9.5856 | fps:  166.8148 | acc:    0.0001   
Valid |     7/7    | loss:    9.6176 | fps:  352.6427 | acc:    0.0000   

Epoch 5/20
----------
Train |   318/318  | loss:    9.5389 | fps:  163.9182 | acc:    0.0004   
Valid |     7/7    | loss:    9.6109 | fps:  392.4390 | acc:    0.0000   

Epoch 6/20
----------
Train |   318/318  | loss:    9.4506 | fps:  164.9186 | acc:    0.0002   
Valid |     7/7    | loss:    9.5906 | fps:  370

KeyboardInterrupt: 

## Testing

In [118]:
model.classify = False
train_embeddings, train_labels = get_embeddings(model, train_loader, len(train_files), batch_size)
test_embeddings, test_labels = get_embeddings(model, test_loader, len(test_files), batch_size)

 50%|█████     | 159/316 [01:31<01:30,  1.74it/s]
 50%|█████     | 159/316 [01:30<01:29,  1.75it/s]


In [119]:
test_predictions, test_labels = test(test_loader, model, loss_fn)

100%|██████████| 159/159 [01:31<00:00,  1.74it/s]

Test Error: 
 Accuracy: 0.2%, Avg loss: 8.176490 






In [122]:
knn = KNeighborsClassifier(n_neighbors=1)


test_embeddings = test_embeddings.cpu()
test_labels = test_labels.cpu()

knn.fit(train_embeddings, train_labels)
score = knn.score(test_embeddings, test_labels)

print(f'Fine-tuned model: Accuracy = {score}.')

Fine-tuned model: Accuracy = 9.868745682423764e-05.


# Conclusion
Fine-tuning FaceNet on a one-shot dataset does not yield desirable results, even after hyperparameter tuning. The model quickly overfits and performs worse than the default FaceNet which achieves approx. $50\%$ test accuracy on the One-Shot CelebA dataset.

In [None]:
%reload_ext tensorboard
%tensorboard --logdir runs