In [None]:
%%sh
# Colab does not have Faiss, you need to install it.
pip install faiss-cpu

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import copy
import random
from tqdm import tqdm
import torch
import faiss
from torchvision import datasets

In [None]:
class Visualizer():
    def __init__(self, num_rows=1, num_cols=1, figsize=(5,5), axis_off=True, title='', tight=False, cm=None):
        fig, self.axs = plt.subplots(num_rows, num_cols, figsize=figsize, squeeze=False)
        # remove ticks
        if axis_off:
          plt.setp(plt.gcf().get_axes(), xticks=[], yticks=[])
        # set colormap
        if cm is not None:
            plt.set_cmap(cm)
        # set supertitle
        fig.suptitle(title)
        if tight:
            fig.subplots_adjust(top=0.88)

    def add_image_subplot(self, i, j, image, normalize=False, title_str=''):
        if normalize:
            image = self.normalize_image(image)
        if len(image.shape) == 3:
            #BGR -> RGB
            image = image[:, :, ::-1]
        self.axs[i, j].imshow(image)
        self.axs[i, j].set_title(title_str)

    def add_stem_subplot(self, i, j, x, y, title_str=''):
        self.axs[i, j].stem(x, y)
        self.axs[i, j].set_title(title_str)

    def add_subplot(self, i, j, data, title_str=''):
        self.axs[i, j].plot(data)
        self.axs[i, j].set_title(title_str)

    def add_bar_subplot(self, i, j, x, y, title_str=''):
        self.axs[i, j].bar(x, y)
        self.axs[i, j].set_title(title_str)

    def add_scatter_subplot_with_labels(self, i, j, data, labels, legend=None, xlabel=None, ylabel=None, title_str=''):
        scatter = self.axs[i, j].scatter(data[:,0], data[:,1], c=labels)
        if legend is not None:
            plt.legend(handles=scatter.legend_elements()[0], labels=legend)
        if xlabel is not None:
            self.axs[i, j].set_xlabel(xlabel)
        if xlabel is not None:
            self.axs[i, j].set_ylabel(ylabel)
        self.axs[i, j].set_title(title_str)

    @staticmethod
    def normalize_image(image):
        img = np.float64(image) - np.min(image)
        img /= np.max(img)
        return img

In [None]:
# get cifar10 dataset
train_dataset = datasets.CIFAR10('../data', train=True, download=True)
test_dataset = datasets.CIFAR10('../data', train=False, download=True)

# samples from cifar10 dataset
vis = Visualizer(3, 10, figsize=(25,5), title='CIFAR10 samples')
for i in range(30):
    image = train_dataset[i][0]
    # convert image to numpy array
    image = np.array(image)[:,:,::-1]
    vis.add_image_subplot(i // 10, i % 10, image, title_str=train_dataset.classes[train_dataset[i][1]])


In [None]:
# get boats and deer from the dataset
boat_idx = train_dataset.class_to_idx['ship']
deer_idx = train_dataset.class_to_idx['deer']

def color_feature(image):
    # compute number of blue and green pixels
    colors = np.argmax(image, axis=2)
    intensities = np.max(image, axis=2)
    blue_pixels = np.sum((colors == 2) & (intensities > 128))
    green_pixels = np.sum((colors == 1) & (intensities > 128))
    return np.array([blue_pixels, green_pixels])

def compute_boat_deer_features(dataset):
    # restrict dataset to boats and deer
    boat_deer_dataset = []
    boat_deer_labels = []
    for i in range(len(dataset)):
        if dataset[i][1] == boat_idx:
            boat_deer_dataset.append(dataset[i])
            boat_deer_labels.append(0)
        elif dataset[i][1] == deer_idx:
            boat_deer_dataset.append(dataset[i])
            boat_deer_labels.append(1)
    boat_deer_labels = np.array(boat_deer_labels)

    # compute all features
    features = np.zeros((len(boat_deer_dataset), 2))
    for i in tqdm(range(len(boat_deer_dataset))):
        image = np.array(boat_deer_dataset[i][0])
        features[i] = color_feature(image)
    return features, boat_deer_labels

train_features, train_labels = compute_boat_deer_features(train_dataset)
test_features, test_labels = compute_boat_deer_features(test_dataset)

# plot features
vis = Visualizer(1, 1, figsize=(5,5), title='Color features', axis_off=False)
vis.add_scatter_subplot_with_labels(0, 0, train_features[:200], train_labels[:200],
                                    legend=['boat', 'deer'], xlabel='# Blue pixels', ylabel='# Green pixels')

In [None]:
# compute NN lookup index in faiss
index = faiss.IndexFlatL2(2)
index.add(train_features)

# compute nearest neighbor classification accuracy on the test set
def nn_classify(k=1):
    _, indices = index.search(test_features, k)
    predictions = np.zeros(len(test_labels))
    for i in range(len(test_labels)):
        predictions[i] = np.argmax(np.bincount(train_labels[indices[i, :]]))
    accuracy = np.sum(predictions == test_labels) / len(test_labels)
    return accuracy

# compute accuracy for different k
accuracies = []
ks = [1, 2, 5, 10, 50, 100]
for k in ks:
    accuracies.append(nn_classify(k))
vis = Visualizer(1, 1, axis_off=False, figsize=(5,5), title='Accuracy for different k')
vis.add_stem_subplot(0, 0, ks, accuracies, title_str='Accuracy')


In [None]:
# create softmax examples
def softmax(x, temperature=1):
    return np.exp(x/temperature) / np.sum(np.exp(x/temperature))

input = np.array([5, -2, -3, 3, 7])

#plot input
vis = Visualizer(1, 1, figsize=(5,5), axis_off=False, title='Input')
vis.add_bar_subplot(0, 0, np.arange(input.shape[0]), input)

# plot softmax for different temperatures
vis = Visualizer(1, 5, figsize=(25,5), axis_off=False, title='Softmax')
for i, temperature in enumerate([0.1, 0.5, 1, 2, 10]):
    vis.add_bar_subplot(0, i, np.arange(input.shape[0]), softmax(input, temperature=temperature), title_str=f'tau={temperature}'.format(temperature))