## Install Packages

In [91]:
import os
from facenet_pytorch import MTCNN, InceptionResnetV1, training, fixed_image_standardization
import torch
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader, Dataset, SubsetRandomSampler, SequentialSampler
from torchvision import datasets
from sklearn.neighbors import KNeighborsClassifier
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from tqdm import tqdm
from PIL import Image
from torchvision import transforms
from sklearn.metrics import accuracy_score
import src
from src.utils.celeba_helper import CelebADataset, CelebAClassifier, save_file_names
from importlib import reload

workers = 0 if os.name == 'nt' else 2

In [92]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')
print('Running on device: {}'.format(device))
nGPU = torch.cuda.device_count()

Running on device: cuda:0


# Define CelebA Dataset and Loader

In [93]:
## Load the dataset
# Path to directory with all the images
img_folder = 'data/img_align_celeba'
mapping_file = 'data/identity_CelebA.txt'

In [108]:
# Spatial size of training images, images are resized to this size.
image_size = 160

# Batch size during training
batch_size = 32

# Number of workers for the dataloader
num_workers = nGPU * 4

# Whether to put fetched data tensors to pinned memory
pin_memory = True if device.type == 'cuda' else False

In [95]:
transform=transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    fixed_image_standardization
])

# Load the dataset from file and apply transformations
celeba_dataset = CelebADataset(img_folder, mapping_file, transform)

In [96]:
## Create a dataloader
celeba_dataloader = DataLoader(celeba_dataset,  # type: ignore
                        batch_size=batch_size,
                        num_workers=num_workers,
                        pin_memory=pin_memory,
                        shuffle=False)

# Setup FaceNet

In [97]:
mtcnn = MTCNN(
    image_size=image_size, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True, keep_all=False,
    device=device
)

model = InceptionResnetV1(pretrained='vggface2').eval().to(device)

In [98]:
classifier = CelebAClassifier(celeba_dataloader, detection_model=mtcnn, embedding_model=model)

# One-Shot Learning


## Load Dataset

## Select Files

In [99]:
# Select train files
file_label_mapping = celeba_dataset.get_file_label_mapping()
first_file_for_each_person_df = file_label_mapping.sort_values(by='person_id').groupby('person_id').agg(['min', 'count'])
train_files = np.sort(first_file_for_each_person_df[first_file_for_each_person_df['file_name']['count'] > 1]['file_name']['min'].values)

In [100]:
# Select test files
second_file_for_each_person_df = file_label_mapping[~file_label_mapping['file_name'].isin(first_file_for_each_person_df['file_name']['min'])].sort_values(by='person_id').groupby('person_id').agg(['min', 'count'])
test_files = np.sort(second_file_for_each_person_df[second_file_for_each_person_df['file_name']['count'] >= 1]['file_name']['min'].values)

The following cells, each might take longer.

In [65]:
for similarity_metric in ['norm_2', 'norm_2_squared', 'cosine_similarity']:
    test_predictions, test_predictions_files = classifier.predict(test_embeddings, train_embeddings, train_face_file_names, similarity_metric)
    accuracy = accuracy_score(test_labels, test_predictions)
    print(f'Accuracy - {similarity_metric}: {np.round(accuracy, 4)}')

KeyboardInterrupt: 

In [None]:
%%time

# fit Support Vector Classifier
from sklearn.svm import SVC
model = SVC(kernel='linear', verbose=True)
model.fit(train_embeddings, train_labels)

[LibSVM]CPU times: user 5min 57s, sys: 8min 53s, total: 14min 50s
Wall time: 14min 52s


SVC(kernel='linear', verbose=True)

In [None]:
%%time
# train_predictions = model.predict(train_embeddings)
test_predictions = model.predict(test_embeddings)
# train_predictions = model.predict(train_embeddings)

# score_train = accuracy_score(train_labels, train_predictions)
score_test = accuracy_score(test_labels, test_predictions)
# score_train = accuracy_score(train_labels, train_predictions)

# print(f'Accuracy: train = {np.round(score_train*100, 3)}%')
print(f'Accuracy: test = {np.round(score_test*100, 3)}%')

Accuracy: test = 69.546%
CPU times: user 2h 32min 4s, sys: 11min 53s, total: 2h 43min 57s
Wall time: 2h 43min 54s


In [None]:
%%time

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=1)

knn.fit(train_embeddings, train_labels)

knn.score(test_embeddings, test_labels)

# Predicting

In [101]:
def get_embeddings(model, dataloader, dataset_size, batch_size):
    model.eval()
    embeddings = torch.tensor([])
    
    for idx, batch in tqdm(enumerate(dataloader), total=int(dataset_size/batch_size)):
        imgs, batch_labels = batch
        batch_embeddings = model(imgs.to(device)).detach().cpu()
        
        if not embeddings.numel():
            embeddings = batch_embeddings
            labels = batch_labels
        else:
            embeddings = torch.cat([embeddings, batch_embeddings])
            labels = torch.cat([labels, batch_labels])
    
    return embeddings, labels

In [102]:
train_inds = [int(elem[:6])-1 for elem in train_files] #convert file names to indices that start at "0"
test_inds = [int(elem[:6])-1 for elem in test_files] #convert file names to indices that start at "0"

np.random.shuffle(test_inds)
val_inds = test_inds[:200] #convert file names to indices that start at "0"

train_loader = DataLoader(
    celeba_dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(train_inds)
)

val_loader = DataLoader(
    celeba_dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(val_inds)
)

test_loader = DataLoader(
    celeba_dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(test_inds)
)

## Run Pre-trained Model

In [83]:
model = InceptionResnetV1(
    pretrained='vggface2'
).eval().to(device)

In [None]:
embeddings_path = 'pytorch_objects/train_embeddings_pretrained_all_1img.pickle'
labels_path = 'pytorch_objects/train_labels_pretrained_all_1img'

if not os.path.exists(embeddings_path) or not os.path.exists(labels_path):
    train_embeddings, train_labels = get_embeddings(model, train_loader, len(train_files), batch_size)
    torch.save(train_embeddings, embeddings_path)
    save_file_names(train_labels, labels_path)
else:
    train_embeddings = torch.load(embeddings_path)
    train_labels = []
    with open(labels_path, 'r') as fp:
        for line in fp:
            x = line[:-1]
            # add current item to the list
            train_labels.append(x)

test_embeddings, test_labels = get_embeddings(model, test_loader, len(test_files), batch_size)

In [27]:
%%time

knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(train_embeddings, train_labels)
score = knn.score(test_embeddings, test_labels)

print(f'Pre-trained model: Accuracy = {score}.')

Pre-trained model: Accuracy = 0.0.
CPU times: user 8.85 s, sys: 2.29 s, total: 11.1 s
Wall time: 3.35 s


  score = y_true == y_pred


## Fine-tune Model

In [110]:
def load_model(model, model_path:str):    
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

In [114]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

            
def finetune_model(model, loss_fn=torch.nn.CrossEntropyLoss(), metrics={}, epochs:int=10, lr:float=0.001):   
    # Make use of multiple GPUs if available
    CUDA = torch.cuda.is_available()
    nGPU = torch.cuda.device_count()

    if CUDA:
        model = model.cuda()
        if nGPU > 1:
            model = nn.DataParallel(model)
    
    # Only fine-tune the logits layer
    logits = model.module.logits.parameters() if nGPU > 1 else model.logits.parameters()
    optimizer = optim.Adam(logits, lr=lr)
    
    scheduler = MultiStepLR(optimizer, [5, 10])

    writer = SummaryWriter()
    writer.iteration, writer.interval = 0, 10
    
    # set_parameter_requires_grad(model=model, feature_extracting=True)
    # model.logits.requires_grad_(True)

    for epoch in range(epochs):
        print('\nEpoch {}/{}'.format(epoch + 1, epochs))
        print('-' * 10)

        model.train()
        training.pass_epoch(
            model, loss_fn, train_loader, optimizer, scheduler,
            batch_metrics=metrics, show_running=True, device=device,
            writer=writer
        )
        
        model.eval()
        training.pass_epoch(
            model, loss_fn, val_loader,
            batch_metrics=metrics, show_running=True, device=device,
            writer=writer
        )

    writer.close()
    
    return model

In [115]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    predictions = torch.tensor([])
    labels = torch.tensor([])
    
    model.eval()
    model.classify = True
    test_loss, correct = 0, 0
    
    with torch.no_grad():
        idx = 0
        for X, y in tqdm(dataloader, total=num_batches):
            idx += 1
            X, y = X.to(device), y.to(device)
            pred = model(X)
            
            if not predictions.numel():
                predictions = pred
                labels = y
            else:
                predictions = torch.cat([predictions, pred])
                labels = torch.cat([labels, y])
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return predictions, labels

In [116]:
epochs = 20

metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

model_path = f'pytorch_objects/resnet_finetuned_celeba_{epochs}_epochs.pth'

if os.path.exists(model_path):
    model = InceptionResnetV1(
        classify=True,
        num_classes=len(pd.unique(file_label_mapping['person_id']))
    ).to(device)
    model = load_model(model, model_path)
else:
    model = InceptionResnetV1(
        classify=True,
        pretrained='vggface2',
        num_classes=len(pd.unique(file_label_mapping['person_id']))
    ).to(device)
    
    model = finetune_model(model, loss_fn=torch.nn.CrossEntropyLoss(), metrics=metrics, epochs=epochs, lr=0.001)
    torch.save(model.state_dict(), f'pytorch_objects/resnet_finetuned_celeba_{epochs}_epochs.pth')


Epoch 1/20
----------
Train |   159/159  | loss:    9.3846 | fps:   55.5395 | acc:    0.0000   
Valid |     2/2    | loss:    8.9578 | fps:   77.3039 | acc:    0.0078   

Epoch 2/20
----------
Train |   159/159  | loss:    7.9492 | fps:   55.8236 | acc:    0.0183   
Valid |     2/2    | loss:    8.5705 | fps:   79.5687 | acc:    0.0139   

Epoch 3/20
----------
Train |    80/159  | loss:    6.3300 | fps:   55.7178 | acc:    0.1385   

Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f847b161710>>
Traceback (most recent call last):
  File "/opt/conda/envs/one-shot-face-recognition/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/opt/conda/envs/one-shot-face-recognition/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/envs/one-shot-face-recognition/lib/python3.6/multiprocessing/process.py", line 134, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Train |    81/159  | loss:    6.3353 | fps:   55.7254 | acc:    0.1372   

Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f847b161710>>
Traceback (most recent call last):
  File "/opt/conda/envs/one-shot-face-recognition/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 1328, in __del__
    self._shutdown_workers()
  File "/opt/conda/envs/one-shot-face-recognition/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 1320, in _shutdown_workers
    if w.is_alive():
  File "/opt/conda/envs/one-shot-face-recognition/lib/python3.6/multiprocessing/process.py", line 134, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process


Train |   159/159  | loss:    6.5606 | fps:   55.7557 | acc:    0.0780   
Valid |     2/2    | loss:    8.4068 | fps:   78.6381 | acc:    0.0278   

Epoch 4/20
----------
Train |   159/159  | loss:    5.3797 | fps:   55.7625 | acc:    0.1953   
Valid |     2/2    | loss:    8.3897 | fps:   78.8872 | acc:    0.0295   

Epoch 5/20
----------
Train |   159/159  | loss:    4.4122 | fps:   55.6962 | acc:    0.3488   
Valid |     2/2    | loss:    8.4311 | fps:   78.8398 | acc:    0.0356   

Epoch 6/20
----------
Train |   159/159  | loss:    3.1120 | fps:   55.7081 | acc:    0.7476   
Valid |     2/2    | loss:    8.3716 | fps:   79.0073 | acc:    0.0295   

Epoch 7/20
----------
Train |   159/159  | loss:    3.0095 | fps:   55.8299 | acc:    0.7679   
Valid |     2/2    | loss:    8.4862 | fps:   79.0273 | acc:    0.0295   

Epoch 8/20
----------
Train |   159/159  | loss:    2.9118 | fps:   55.7379 | acc:    0.7937   
Valid |     2/2    | loss:    8.4357 | fps:   78.7237 | acc:    0.0356 

In [118]:
model.classify = False
train_embeddings, train_labels = get_embeddings(model, train_loader, len(train_files), batch_size)
test_embeddings, test_labels = get_embeddings(model, test_loader, len(test_files), batch_size)

 50%|█████     | 159/316 [01:31<01:30,  1.74it/s]
 50%|█████     | 159/316 [01:30<01:29,  1.75it/s]


In [119]:
test_predictions, test_labels = test(test_loader, model, loss_fn)

100%|██████████| 159/159 [01:31<00:00,  1.74it/s]

Test Error: 
 Accuracy: 0.2%, Avg loss: 8.176490 






In [122]:
knn = KNeighborsClassifier(n_neighbors=1)


test_embeddings = test_embeddings.cpu()
test_labels = test_labels.cpu()

knn.fit(train_embeddings, train_labels)
score = knn.score(test_embeddings, test_labels)

print(f'Fine-tuned model: Accuracy = {score}.')

Fine-tuned model: Accuracy = 9.868745682423764e-05.


# Conclusion
Fine-tuning FaceNet on a one-shot dataset does not yield desirable results, even after hyperparameter tuning. The model quickly overfits and performs worse than the default FaceNet which achieves approx. $50\%$ test accuracy on the One-Shot CelebA dataset.

In [49]:
%reload_ext tensorboard
%tensorboard --logdir runs