## Install Packages

In [18]:
import os
from facenet_pytorch import MTCNN, InceptionResnetV1, training, fixed_image_standardization
import torch
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler, SequentialSampler
from torchvision import datasets
from sklearn.neighbors import KNeighborsClassifier
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image
from torchvision import transforms
from sklearn.metrics import accuracy_score
import src
from src.utils.celeba_helper import CelebADataset, CelebAClassifier, save_file_names
from importlib import reload

workers = 0 if os.name == 'nt' else 2

In [19]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')
print('Running on device: {}'.format(device))
nGPU = torch.cuda.device_count()

Running on device: cuda:0


# Define CelebA Dataset and Loader

In [20]:
## Load the dataset
# Path to directory with all the images
img_folder = 'data/img_align_celeba'
mapping_file = 'data/identity_CelebA.txt'

In [21]:
# Spatial size of training images, images are resized to this size.
image_size = 160

# Batch size during training
batch_size = 32

# Number of workers for the dataloader
num_workers = nGPU * 4

# Whether to put fetched data tensors to pinned memory
pin_memory = True if device.type == 'cuda' else False

In [22]:
transform=transforms.Compose([
    transforms.Resize(image_size),
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])

# Load the dataset from file and apply transformations
celeba_dataset = CelebADataset(img_folder, mapping_file, transform)

In [23]:
## Create a dataloader
celeba_dataloader = DataLoader(celeba_dataset,  # type: ignore
                        batch_size=batch_size,
                        num_workers=num_workers,
                        pin_memory=pin_memory,
                        shuffle=False)

# Setup FaceNet

In [24]:
mtcnn = MTCNN(
    image_size=image_size, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True, keep_all=False,
    device=device
)

model = InceptionResnetV1(pretrained='vggface2').eval().to(device)

In [25]:
classifier = CelebAClassifier(celeba_dataloader, detection_model=mtcnn, embedding_model=model)

# One-Shot Learning


## Load Dataset

## Select Files

In [26]:
# Select train files
file_label_mapping = celeba_dataset.get_file_label_mapping()
first_file_for_each_person_df = file_label_mapping.sort_values(by='person_id').groupby('person_id').agg(['min', 'count'])
train_files = np.sort(first_file_for_each_person_df['file_name']['min'].values)
print(f'Length of train set: {len(train_files)} images.')
display(train_files)

Length of train set: 10177 images.


array(['000001.jpg', '000002.jpg', '000003.jpg', ..., '197332.jpg',
       '197837.jpg', '198787.jpg'], dtype=object)

In [27]:
# Select test files
second_file_for_each_person_df = file_label_mapping[~file_label_mapping['file_name'].isin(first_file_for_each_person_df['file_name']['min'])].sort_values(by='person_id').groupby('person_id').agg(['min', 'count'])

# #1 test file per person 
# test_files = np.sort(second_file_for_each_person_df[second_file_for_each_person_df['file_name']['count'] >= 1]['file_name']['min'].values)

# Maximum number of test files per person
test_files = file_label_mapping[~file_label_mapping['file_name'].isin(first_file_for_each_person_df['file_name']['min'])]['file_name'].values

print(f'Length of test set: {len(test_files)} images.')
display(file_label_mapping[~file_label_mapping['file_name'].isin(first_file_for_each_person_df['file_name']['min'])]['file_name'].values)

Length of test set: 192422 images.


array(['000050.jpg', '000082.jpg', '000210.jpg', ..., '202597.jpg',
       '202598.jpg', '202599.jpg'], dtype=object)

# Predicting

In [28]:
def get_embeddings(model, dataloader, dataset_size, batch_size):
    model.eval()
    embeddings = torch.tensor([])
    
    for idx, batch in tqdm(enumerate(dataloader), total=int(dataset_size/batch_size)):
        imgs, batch_labels = batch
        batch_embeddings = model(imgs.to(device)).detach().cpu()
        
        if not embeddings.numel():
            embeddings = batch_embeddings
            labels = batch_labels
        else:
            embeddings = torch.cat([embeddings, batch_embeddings])
            labels = torch.cat([labels, batch_labels])
    
    return embeddings, labels

In [29]:
train_inds = [int(elem[:6])-1 for elem in train_files] #convert file names to indices that start at "0"
test_inds = [int(elem[:6])-1 for elem in test_files] #convert file names to indices that start at "0"

np.random.shuffle(test_inds)
val_inds = test_inds[:200] #convert file names to indices that start at "0"

train_loader = DataLoader(
    celeba_dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
)

val_loader = DataLoader(
    celeba_dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds)
)

test_loader = DataLoader(
    celeba_dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(test_inds)
)

## Run Pre-trained Model

In [30]:
model = InceptionResnetV1(
    pretrained='vggface2'
).eval().to(device)

In [31]:
embeddings_path = 'pytorch_objects/train_embeddings_pretrained_all_1img.pickle'
labels_path = 'pytorch_objects/train_labels_pretrained_all_1img'
test_embeddings_path = 'pytorch_objects/test_embeddings_pretrained_all_1img.pickle'
test_labels_path = 'pytorch_objects/test_labels_pretrained_all_1img'

if not os.path.exists(embeddings_path) or not os.path.exists(labels_path):
    train_embeddings, train_labels = get_embeddings(model, train_loader, len(train_files), batch_size)
    torch.save(train_embeddings, embeddings_path)
    save_file_names(train_labels, labels_path)
else:
    train_embeddings = torch.load(embeddings_path)
    train_labels = []
    with open(labels_path, 'r') as fp:
        for line in fp:
            x = line[:-1]
            # add current item to the list
            train_labels.append(x)

            
if not os.path.exists(test_embeddings_path) or not os.path.exists(test_labels_path):
    test_embeddings, test_labels = get_embeddings(model, test_loader, len(test_files), batch_size)
    torch.save(test_embeddings, test_embeddings_path)
    save_file_names(test_labels, test_labels_path)
else:
    test_embeddings = torch.load(test_embeddings_path)
    test_labels = []
    with open(test_labels_path, 'r') as fp:
        for line in fp:
            x = line[:-1]
            # add current item to the list
            test_labels.append(x)

In [32]:
%%time

knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(train_embeddings, train_labels)
score = knn.score(test_embeddings, test_labels)

print(f'Pre-trained model: Accuracy = {score}.')

Pre-trained model: Accuracy = 0.5863570693579736.
CPU times: user 2min 35s, sys: 43.3 s, total: 3min 18s
Wall time: 1min


## Fine-tune Model

## General Functions

In [33]:
def load_model(model, model_path:str):    
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

In [None]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

            
def finetune_model(model, loss_fn=torch.nn.CrossEntropyLoss(), metrics={}, epochs:int=10, lr:float=0.001):   
    # Make use of multiple GPUs if available
    CUDA = torch.cuda.is_available()
    nGPU = torch.cuda.device_count()

    if CUDA:
        model = model.cuda()
        if nGPU > 1:
            model = nn.DataParallel(model)
    
    # Only fine-tune the logits layer
    logits = model.module.logits.parameters() if nGPU > 1 else model.logits.parameters()
    optimizer = optim.Adam(logits, lr=lr)
    
    scheduler = MultiStepLR(optimizer, [5, 10])
    
    metric_tracker = {}
    
    writer = SummaryWriter()
    writer.iteration, writer.interval = 0, 10
    
    # set_parameter_requires_grad(model=model, feature_extracting=True)
    # model.logits.requires_grad_(True)

    for epoch in range(epochs):
        print('\nEpoch {}/{}'.format(epoch + 1, epochs))
        print('-' * 10)

        model.train()
        train_loss, train_metrics = training.pass_epoch(
            model, loss_fn, train_loader, optimizer, scheduler,
            batch_metrics=metrics, show_running=True, device=device,
            writer=writer
        )
        
        
        model.eval()
        val_loss, val_metrics = training.pass_epoch(
            model, loss_fn, val_loader,
            batch_metrics=metrics, show_running=True, device=device,
            writer=writer
        )

    writer.close()
    
    return model

In [41]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    predictions = torch.tensor([])
    labels = torch.tensor([])
    
    model.eval()
    model.classify = True
    test_loss, correct = 0, 0
    
    with torch.no_grad():
        idx = 0
        for X, y in tqdm(dataloader, total=num_batches):
            idx += 1
            X, y = X.to(device), y.to(device)
            pred = model(X)
            
            if not predictions.numel():
                predictions = pred
                labels = y
            else:
                predictions = torch.cat([predictions, pred])
                labels = torch.cat([labels, y])
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return predictions, labels

## Training

In [None]:
epochs = 1

metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

model_path = f'pytorch_objects/resnet_finetuned_celeba_{epochs}_epochs.pth'

if os.path.exists(model_path):
    model = InceptionResnetV1(
        classify=True,
        num_classes=len(pd.unique(file_label_mapping['person_id']))
    ).to(device)
    model = load_model(model, model_path)
else:
    model = InceptionResnetV1(
        classify=True,
        pretrained='vggface2',
        num_classes=len(pd.unique(file_label_mapping['person_id']))
    ).to(device)
    
    model = finetune_model(model, loss_fn=torch.nn.CrossEntropyLoss(), metrics=metrics, epochs=epochs, lr=0.001)
    torch.save(model.state_dict(), f'pytorch_objects/resnet_finetuned_celeba_{epochs}_epochs.pth')

## Testing

In [118]:
model.classify = False
train_embeddings, train_labels = get_embeddings(model, train_loader, len(train_files), batch_size)
test_embeddings, test_labels = get_embeddings(model, test_loader, len(test_files), batch_size)

 50%|█████     | 159/316 [01:31<01:30,  1.74it/s]
 50%|█████     | 159/316 [01:30<01:29,  1.75it/s]


In [119]:
test_predictions, test_labels = test(test_loader, model, loss_fn)

100%|██████████| 159/159 [01:31<00:00,  1.74it/s]

Test Error: 
 Accuracy: 0.2%, Avg loss: 8.176490 






In [122]:
knn = KNeighborsClassifier(n_neighbors=1)


test_embeddings = test_embeddings.cpu()
test_labels = test_labels.cpu()

knn.fit(train_embeddings, train_labels)
score = knn.score(test_embeddings, test_labels)

print(f'Fine-tuned model: Accuracy = {score}.')

Fine-tuned model: Accuracy = 9.868745682423764e-05.


# Conclusion
Fine-tuning FaceNet on a one-shot dataset does not yield desirable results, even after hyperparameter tuning. The model quickly overfits and performs worse than the default FaceNet which achieves approx. $50\%$ test accuracy on the One-Shot CelebA dataset.

In [49]:
%reload_ext tensorboard
%tensorboard --logdir runs