In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import tqdm
import os
import re

from torch.utils.data import DataLoader
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from PIL import Image

Set the device. Using CUDA on CUDA-enabled devices speeds up the use of convolutional networks significantly.

In [2]:
cuda_enabled = torch.cuda.is_available()
device = torch.device("cuda:0" if cuda_enabled else "cpu")

We create the descriptors through transfer learning. We remove the last fully-connected (classification) layers of pre-trained models and use the output of the convolutional part of the respective model as descriptors. We do this for:

- VGG16

These models were chosen because of ... **TODO: List reasons for choosing models**

## Load datasets

In [3]:
# Because the dataset contains images with a filename starting with a ".",
# these files are hidden, giving problems down the line with reading. As such,
# rename them by adding "img" as a filename prefix.

base_path = "./data/Incidents-subset"
directories = os.listdir(os.path.expanduser(base_path))
for directory in directories:
    files = os.listdir(os.path.expanduser(base_path + "/" + directory))
    for file in files:
        if re.match(r".*.((jpg)|(png)|(jpeg))", file, re.IGNORECASE) and not re.match(r"img.*", file):
            os.rename(os.path.expanduser(base_path + "/" + directory + "/" + file), os.path.expanduser(base_path + "/" + directory + "/img" + file))

In [4]:
# Some images are corrupted, so we need a check whether this is the case
# before images are added to the dataset. This is done by calling the function
# below in the is_valid_file parameter of the ImageFolder function.

def check_Image(path):
    try:
        im = Image.open(path)
        return True
    except:
        return False

In [5]:
# Use the same transform and normalisation as used in the paper source code
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225])

transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])

dataset = torchvision.datasets.ImageFolder('./data/Incidents-subset', transform=transform, is_valid_file=check_Image)

In [6]:
n_classes = 12

data_train, data_test = torch.utils.data.random_split(dataset, [0.7, 0.3])

targets_train = [target for (_, target) in data_train]
targets_test = [target for (_, target) in data_test]

training_dl = DataLoader(data_train, batch_size=4, shuffle=False)
testing_dl = DataLoader(data_test, batch_size=4, shuffle=False)



## Visualise datasets

## Initialize models

### VGG16

In [7]:
model_vgg16 = torchvision.models.vgg16(weights="DEFAULT")

for param in model_vgg16.parameters():
    # As the model will not be trained, gradients are not required. Disabling
    # them speeds up performance.
    param.requires_grad = False

# Set model to evaluation mode for reasons
# TODO: Check why model should be set to evaluation mode
model_vgg16.eval()

# Empty CUDA cache to prevent memory issues
if cuda_enabled:
    torch.cuda.empty_cache()

# Replace VGG16 classifier with identity layer to allow descriptor extraction
model_vgg16.classifier = nn.Identity()

# Move model to previously set device, speeding up performance if CUDA-enabled
model_vgg16 = model_vgg16.to(device)

## Baseline

### Obtain image descriptors

In [10]:
def get_descriptors_from_model(model, dataloader):
    """Returns the output of the provided model for all items in the dataloader."""
    outputs = []

    # Improve performance by disabling unnecessary gradient calculation
    with torch.no_grad():
        for data, targets in tqdm.tqdm(dataloader):
            data = data.to(device)
            output = model(data).detach()
            outputs.extend(output)

    return outputs


def transform_descriptors_to_numpy(descriptors):
    """Returns a numpy array derived from a provided list of tensors."""
    return np.array([descriptor.cpu().numpy() for descriptor in descriptors])

In [11]:
descriptors_train = get_descriptors_from_model(model_vgg16, training_dl)
descriptors_test = get_descriptors_from_model(model_vgg16, testing_dl)

100%|██████████| 1288/1288 [02:21<00:00,  9.12it/s]
100%|██████████| 552/552 [00:52<00:00, 10.50it/s]


In [12]:
descriptors_train = transform_descriptors_to_numpy(descriptors_train)
descriptors_test = transform_descriptors_to_numpy(descriptors_test)

### Perform k-nearest neighbors (kNN) classification

First, we perform the predictions.

In [30]:
def get_knn_classifiers_for_neighbors(n_neighbors_list, data, targets):
    classifiers = {}
    for n_neighbors in n_neighbors_list:
        classifier = KNeighborsClassifier(n_neighbors=n_neighbors).fit(data,
                                                                       targets)
        classifiers[n_neighbors] = classifier
    return classifiers


def get_knn_classifiers_predictions(knn_classifiers, data):
    predictions = []
    for classifier in tqdm.tqdm(knn_classifiers):
        predictions.append(knn_classifiers[classifier].predict(data))

First, we create KNN classifiers for all numbers of neighbors we would like to assess.

In [31]:
n_neighbors_list = [2, 5, 10, 15]
classifiers = get_knn_classifiers_for_neighbors(n_neighbors_list,
                                                descriptors_train,
                                                targets_train)

For each of the previously created kNN classifiers, we obtain the predictions for both the training data and the test data. This allows an assessment of the performance of the classifiers.

In [32]:
predictions_train_list = get_knn_classifiers_predictions(classifiers, descriptors_train)
predictions_test_list = get_knn_classifiers_predictions(classifiers, descriptors_test)

100%|██████████| 4/4 [00:24<00:00,  6.18s/it]
100%|██████████| 4/4 [00:15<00:00,  3.87s/it]


We now assess the accuracy of the predictions of each classifier

In [33]:
train_accuracies_per_n_neighbors = []
for i, predictions in enumerate(predictions_train_list):
    accuracy = accuracy_score(targets_train, predictions)
    train_accuracies_per_n_neighbors.append((n_neighbors_list[i], accuracy))

test_accuracies_per_n_neighbors = {}
for i, predictions in enumerate(predictions_test_list):
    accuracy = accuracy_score(targets_train, predictions)
    test_accuracies_per_n_neighbors.append((n_neighbors_list[i], accuracy))

TypeError: 'NoneType' object is not iterable

In [None]:
pca_training = PCA(n_components=2).fit_transform(descriptors_train)
pca_testing = PCA(n_components=2).fit_transform(descriptors_test)

In [None]:
def plot_predictions_against_targets(data,
                                     predictions,
                                     targets,
                                     n_classes,
                                     cmap=None):
    """Draws a scatter plot of two-dimensional data which highlights
    differences between targets and predictions through distinct edge and fill
    colors."""

    if (cmap is None):
        cmap = plt.get_cmap("rainbow")

    colors_target = cmap(targets / n_classes)
    colors_predictions = cmap(predictions / n_classes)

    fig, ax = plt.subplots()
    # Plot targets (ground truth) with fill color representing target class
    ax.scatter(data[:, 0],
               data[:, 1],
               facecolors=colors_target)
    
    # Plot predictions with edge color representing predicted class
    ax.scatter(data[:, 0],
               data[:, 1],
               edgecolors=colors_predictions)

    return (fig, ax)

In [None]:
_, ax_train_scatter = plot_predictions_against_targets(pca_training,
                                                       best_predictions_train,
                                                       targets_train,
                                                       n_classes)
ax_train_scatter.set_title("Two-component PCA, training data")

_, ax_test_scatter = plot_predictions_against_targets(pca_testing,
                                                      best_predictions_test,
                                                      targets_test,
                                                      n_classes)
ax_test_scatter.set_title("Two-component PCA, test data")