In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import tqdm

from torch.utils.data import DataLoader
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

Set the device. Using CUDA on CUDA-enabled devices speeds up the use of convolutional networks significantly.

In [None]:
cuda_enabled = torch.cuda.is_available()
device = torch.device("cuda:1" if cuda_enabled else "cpu")

We create the descriptors through transfer learning. We remove the last fully-connected (classification) layers of pre-trained models and use the output of the convolutional part of the respective model as descriptors. We do this for:

- VGG16

These models were chosen because of ... **TODO: List reasons for choosing models**

## Load datasets

In [2]:
# TODO: Obtain the number of classes
n_classes = 0

# TODO: Actually get a numpy array of data and targets
data_train = np.zeros((1, 1))
data_test = np.zeros((1, 1))
targets_train = np.zeros((1, 1))
targets_test = np.zeros((1, 1))

NameError: name 'np' is not defined

## Visualise datasets

## Initialize models

### VGG16

In [None]:
model_vgg16 = torchvision.models.vgg16(weights="DEFAULT")

for param in model_vgg16.parameters():
    # As the model will not be trained, gradients are not required. Disabling
    # them speeds up performance.
    param.requires_grad = False

# Set model to evaluation mode for reasons
# TODO: Check why model should be set to evaluation mode
model_vgg16.eval()

# Empty CUDA cache to prevent memory issues
if cuda_enabled:
    torch.cuda.empty_cache()

# Replace VGG16 classifier with identity layer to allow descriptor extraction
model_vgg16.classifier = nn.Identity()

# Move model to previously set device, speeding up performance if CUDA-enabled
model_vgg16 = model_vgg16.to(device)

## Baseline

### Obtain image descriptors

In [None]:
def get_descriptors_from_model(model, dataloader):
    """Returns the output of the provided model for all items in the dataloader."""
    outputs = []

    # Improve performance by disabling unnecessary gradient calculation
    with torch.no_grad():
        for data, targets in tqdm(dataloader):
            output = model(data).detach()
            outputs.extend(output)

    return outputs


def transform_descriptors_to_numpy(descriptors):
    """Returns a numpy array derived from a provided list of tensors."""
    return np.array([descriptor.cpu().numpy() for descriptor in descriptors])

In [None]:
# TODO: Actually get descriptors for data
descriptors_train = np.zeros((1, 1))
descriptors_test = np.zeros((1, 1))

### Perform k-nearest neighbors (kNN) classification

First, we perform the predictions.

In [None]:
def get_knn_classifiers_for_neighbors(n_neighbors_list, data, targets):
    classifiers = {}
    for n_neighbors in n_neighbors_list:
        classifier = KNeighborsClassifier(n_neighbors=n_neighbors).fit(data,
                                                                       targets)
        classifiers[n_neighbors] = classifier
    return classifiers


def get_knn_classifiers_predictions(knn_classifiers, data):
    predictions = []
    for classifier in tqdm(knn_classifiers):
        predictions.append(classifier.predict(data))

First, we create KNN classifiers for all numbers of neighbors we would like to assess.

In [None]:
n_neighbors_list = [2, 5, 10, 15]
classifiers = get_knn_classifiers_for_neighbors(n_neighbors_list,
                                                descriptors_train,
                                                targets_train)

For each of the previously created kNN classifiers, we obtain the predictions for both the training data and the test data. This allows an assessment of the performance of the classifiers.

In [None]:
predictions_train_list = get_knn_classifiers_predictions(classifiers, descriptors_train)
predictions_test_list = get_knn_classifiers_predictions(classifiers, descriptors_test)

We now assess the accuracy of the predictions of each classifier

In [None]:
train_accuracies_per_n_neighbors = []
for i, predictions in enumerate(predictions_train_list):
    accuracy = accuracy_score(targets_train, predictions)
    train_accuracies_per_n_neighbors.append((n_neighbors_list[i], accuracy))

test_accuracies_per_n_neighbors = {}
for i, predictions in enumerate(predictions_test_list):
    accuracy = accuracy_score(targets_train, predictions)
    test_accuracies_per_n_neighbors.append((n_neighbors_list[i], accuracy))

In [None]:
pca_training = PCA(n_components=2).fit_transform(descriptors_train)
pca_testing = PCA(n_components=2).fit_transform(descriptors_test)

In [None]:
def plot_predictions_against_targets(data,
                                     predictions,
                                     targets,
                                     n_classes,
                                     cmap=None):
    """Draws a scatter plot of two-dimensional data which highlights
    differences between targets and predictions through distinct edge and fill
    colors."""

    if (cmap is None):
        cmap = plt.get_cmap("rainbow")

    colors_target = cmap(targets / n_classes)
    colors_predictions = cmap(predictions / n_classes)

    fig, ax = plt.subplots()
    # Plot targets (ground truth) with fill color representing target class
    ax.scatter(data[:, 0],
               data[:, 1],
               facecolors=colors_target)
    
    # Plot predictions with edge color representing predicted class
    ax.scatter(data[:, 0],
               data[:, 1],
               edgecolors=colors_predictions)

    return (fig, ax)

In [None]:
_, ax_train_scatter = plot_predictions_against_targets(pca_training,
                                                       best_predictions_train,
                                                       targets_train,
                                                       n_classes)
ax_train_scatter.set_title("Two-component PCA, training data")

_, ax_test_scatter = plot_predictions_against_targets(pca_testing,
                                                      best_predictions_test,
                                                      targets_test,
                                                      n_classes)
ax_test_scatter.set_title("Two-component PCA, test data")