In [None]:
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, KFold
import matplotlib.pyplot as plt
from plots import heatmap, annotate_heatmap

In [None]:
def import_data(fn):
    data = np.loadtxt(fn)
    inputs, labels = data[:, 1:], data[:, 0]
    return inputs, labels

In [None]:
class PolynomialKernel:

    def __init__(self, X1, X2=None):
        self.base_kernel_matrix = self.base_kernel(X1, X2 if X2 is not None else X1)

    def base_kernel(self, a, b):
        dot_products = np.dot(a, b.T)
        return dot_products
    
    def kernel(self, hparam, row_indices=None, col_indices=None):
        if row_indices is None:
            row_indices = np.arange(self.base_kernel_matrix.shape[0])
        if col_indices is None:
            col_indices = np.arange(self.base_kernel_matrix.shape[1])
        kernel_matrix = np.power(self.base_kernel_matrix[np.ix_(row_indices, col_indices)], hparam)
        return kernel_matrix
    
def init_coefs(n_classes, train_size):
    coefs = np.zeros((n_classes, train_size))
    return coefs
    
def predict(coefs, kernel_values):
    predictions = coefs.dot(kernel_values)
    return predictions

def sign(x):
    return np.where(x <= 0., -1., 1.)

In [None]:
from time import time

def train(train_kernel, train_y, coefs, n_epochs=1):
    
    mistakes = np.zeros(n_epochs, dtype=np.uint16)
    for epoch in range(n_epochs):
        for i, y in enumerate(train_y):
            labels = np.full(coefs.shape[0], -1.); labels[int(y)-1] = 1.
            predictions = predict(coefs, train_kernel[i])
            updates = np.where(labels*predictions <= 0., sign(predictions), 0.)
            coefs[:, i] -= updates
            if np.argmax(predictions) != y:
                mistakes[epoch] += 1
    return coefs, mistakes

def test(test_kernel, test_y, coefs):

    mistakes = 0
    for i, y in enumerate(test_y):
        prediction = np.argmax(predict(coefs, test_kernel[i]))
        if prediction != y:
            mistakes += 1

    return mistakes

In [None]:
def execute_run(train_x, train_y, test_x, test_y, d=3, n_classes=3, n_epochs=3):

    train_kernel = PolynomialKernel(train_x).kernel(d)
    test_kernel = PolynomialKernel(test_x, train_x).kernel(d)
    coefs = init_coefs(n_classes, train_x.shape[0])
    for i in range(1, n_epochs+1):
        coefs, train_mistakes = train(train_kernel, train_y, coefs, n_epochs=1)
        test_mistakes = test(test_kernel, test_y, coefs)
        print(f"Epoch {i} - {train_mistakes[0]} mistakes out of {train_x.shape[0]} items on training set, test error is {test_mistakes/test_x.shape[0]*100:.3f}%.")
    
    return coefs, train_mistakes[0], test_mistakes

In [None]:
train_x, train_y = import_data("dtrain123.dat")
test_x, test_y = import_data("dtest123.dat")

coefs, *_ = execute_run(train_x, train_y, test_x, test_y, d=3, n_classes=3, n_epochs=3)

### Question 1

In [None]:
X, Y = import_data("zipcombo.dat")
full_kernel = PolynomialKernel(X)

In [None]:
n_runs = 20
ds = range(1, 8)

error_rates = {'train': np.zeros((n_runs, len(ds))), 'test': np.zeros((n_runs, len(ds)))}

for i in tqdm(range(len(ds))):

    d = ds[i]
    full_kernel_d = full_kernel.kernel(d)

    for run in range(n_runs):

        train_indices, test_indices = train_test_split(np.arange(X.shape[0]), train_size=0.8, shuffle=True)
        train_kernel, test_kernel = full_kernel_d[np.ix_(train_indices, train_indices)], full_kernel_d[np.ix_(test_indices, train_indices)]
        train_y, test_y = Y[train_indices], Y[test_indices]

        coefs = init_coefs(n_classes=10, train_size=train_indices.size)
        coefs, train_mistakes = train(train_kernel, train_y, coefs, n_epochs=3)
        test_mistakes = test(test_kernel, test_y, coefs)
        
        error_rates['train'][run, i] = train_mistakes[-1]/train_y.size
        error_rates['test'][run, i] = test_mistakes/test_y.size

error_rates['train'] = [f'{m:.3f} ± {s:.3f}' for m, s in zip(np.mean(error_rates['train'], axis=0), np.std(error_rates['train'], axis=0))]
error_rates['test']  = [f'{m:.3f} ± {s:.3f}' for m, s in zip(np.mean(error_rates['test'], axis=0),  np.std(error_rates['test'], axis=0))]

pd.DataFrame(data=error_rates, index=ds)

### Questions 2 and 3

In [None]:
def test_cm(test_kernel, test_y, coefs):

    mistakes = 0
    cm = np.zeros((10, 10))
    for i, y in enumerate(test_y):
        prediction = np.argmax(predict(coefs, test_kernel[i]))
        cm[int(y), prediction] += 1
        if prediction != y:
            mistakes += 1

    return mistakes, cm

In [None]:
kfold = KFold(n_splits=5, shuffle=True)

confusion_matrix = np.zeros((n_runs, 10, 10))
results = {'d_star': np.zeros((n_runs,)), 'test_error': np.zeros((n_runs,))}

for run in tqdm(range(n_runs)):
    
    train_indices, test_indices = train_test_split(np.arange(X.shape[0]), train_size=0.8, shuffle=True)
    
    val_errors = np.zeros((len(ds), kfold.get_n_splits()))
    for i, d in enumerate(ds):
        full_kernel_d = full_kernel.kernel(d)
        for fold, (train_fold, val_fold) in enumerate(kfold.split(train_indices)):
            train_kernel, val_kernel = full_kernel_d[np.ix_(train_fold, train_fold)], full_kernel_d[np.ix_(val_fold, train_fold)]
            train_y, val_y = Y[train_fold], Y[val_fold]
            coefs = init_coefs(n_classes=10, train_size=train_fold.size)
            coefs, _ = train(train_kernel, train_y, coefs, n_epochs=3)
            val_mistakes = test(val_kernel, val_y, coefs)
            val_errors[i, fold] = val_mistakes/val_y.size
    val_errors = val_errors.mean(axis=1)
    d_star = np.argmin(val_errors)

    full_kernel_d = full_kernel.kernel(ds[d_star])
    train_kernel, test_kernel = full_kernel_d[np.ix_(train_indices, train_indices)], full_kernel_d[np.ix_(test_indices, train_indices)]
    train_y, test_y = Y[train_indices], Y[test_indices]
    coefs = init_coefs(n_classes=10, train_size=train_indices.size)
    coefs, _ = train(train_kernel, train_y, coefs, n_epochs=3)
    test_mistakes, confusion_matrix[run] = test_cm(test_kernel, test_y, coefs)
    results['d_star'][run] = ds[d_star]; results['test_error'][run] = test_mistakes/test_y.size

In [None]:
print(f"d* = {np.mean(results['d_star']):.3f} ± {np.std(results['d_star']):.3f}")
print(f"test error = {np.mean(results['test_error']):.3f} ± {np.std(results['test_error']):.3f}")

confusion_matrix = confusion_matrix / confusion_matrix.sum(axis=2, keepdims=True)
cm_mean = np.mean(confusion_matrix, axis=0)
cm_std = np.std(confusion_matrix, axis=0)
labels = np.array([f"{mean:.1f} ±\n{std:.1f}" for mean, std in zip(cm_mean.flatten(), cm_std.flatten())]).reshape(cm_mean.shape)

fig, axs = plt.subplots(figsize=(8, 8))
im, cbar = heatmap(cm_mean, np.arange(10), np.arange(10), ax=axs, cmap="Blues")
texts = annotate_heatmap(im, labels=labels)
fig.tight_layout()
plt.show()

### Question 4

In [None]:
def hardest_test_samples(test_kernel, test_y, coefs):

    errors = np.zeros_like(test_y)
    for i, y in enumerate(test_y):
        prediction = np.argmax(predict(coefs, test_kernel[i]))
        errors[i] = y*prediction

    return np.argpartition(errors, 5)[:5]

In [None]:
d = ds[i]
full_kernel_d = full_kernel.kernel(d)

train_indices, test_indices = train_test_split(np.arange(X.shape[0]), train_size=0.8, shuffle=True)
train_kernel, test_kernel = full_kernel_d[np.ix_(train_indices, train_indices)], full_kernel_d[np.ix_(test_indices, train_indices)]
train_y, test_y = Y[train_indices], Y[test_indices]

coefs = init_coefs(n_classes=10, train_size=train_indices.size)
coefs, train_mistakes = train(train_kernel, train_y, coefs, n_epochs=3)
indices = hardest_test_samples(test_kernel, test_y, coefs)
hardest_samples = test_indices[indices]