# Kernel methods for machine learning - image classification challenge

## Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from tqdm import tqdm
%matplotlib inline

### Helper functions

In [None]:
from sklearn.base import BaseEstimator
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import StratifiedKFold, GridSearchCV, ParameterGrid

## Load data

In [None]:
Xtr = np.array(pd.read_csv('data/Xtr.csv', header=None, sep=',', usecols=range(3072)))
Xte = np.array(pd.read_csv('data/Xte.csv', header=None, sep=',', usecols=range(3072)))
Ytr = np.array(pd.read_csv('data/Ytr.csv', sep=',', usecols=[1])).squeeze()

classes = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

In [None]:
Ytr

## Visualization

In [None]:
def scale(A):
    return (A-np.min(A))/(np.max(A) - np.min(A))

X = scale(Xtr)

In [None]:
i = 1000
print(classes[Ytr[i]])
tmp = scale(Xtr[i])
r = tmp[:1024].reshape([32, 32])
g = tmp[1024:2048].reshape([32, 32])
b = tmp[2048:].reshape([32, 32])
img = np.dstack((r, g, b))

plt.imshow(scale(img))
plt.show()

tmp = X[i]
r = tmp[:1024].reshape([32, 32])
g = tmp[1024:2048].reshape([32, 32])
b = tmp[2048:].reshape([32, 32])
img = np.dstack((r, g, b))

plt.imshow(scale(img))
plt.show()

## Kernels

In [None]:
class Kernel:
    
    def __init__(self, X, gamma):
        self.X = X
        self.gamma = gamma


class LinearKernel(Kernel):
    
    def __init__(self, X, gamma=None):
        super().__init__(X, gamma)
    
    def similarity_matrix(self):
        l = len(self.X)
        K = np.empty([l, l])
        for i in range(l):
            for j in range(i, l):
                K[i, j] = K[j, i] = np.dot(self.X[i], self.X[j])
        return K
    
    def similarity(self, x):
        return np.array([np.dot(x_i, x) for x_i in self.X])


class GaussianKernel(Kernel):
    
    def __init__(self, X, gamma):
        super().__init__(X, gamma)
    
    def similarity_matrix(self):
        l = len(self.X)
        K = np.empty([l, l])
        for i in range(l):
            for j in range(i, l):
                K[i, j] = K[j, i] = np.exp(- self.gamma * (np.linalg.norm(self.X[i] - self.X[j]) ** 2))
        return K
    
    def similarity(self, x):
        return np.array([np.exp(- self.gamma * (np.linalg.norm(x_i - x) ** 2)) for x_i in self.X])
    

kernels = {'linear': LinearKernel, 'rbf': GaussianKernel}

## Algorithms

In [None]:
class KernelRidgeClassifier(BaseEstimator):
    
    def __init__(self, C=1.0, kernel='rbf', gamma=10):
        self.C = C
        self.kernel = kernel
        self.gamma = gamma
    
    def fit(self, X, y):
        # map labels in {-1, 1}
        Y = LabelBinarizer(pos_label=1, neg_label=-1).fit_transform(y)
        # initialize kernel
        self.K = kernels[self.kernel](X, self.gamma)
        print("Start computing kernel similarity matrix...")
        start = time.time()
        K = self.K.similarity_matrix()
        end = time.time()
        print(f"Kernel similarity matrix computed in {end - start:.2f} seconds")
        
        # get second term of KRR
        diag = np.zeros_like(K)
        np.fill_diagonal(diag, self.C * len(X))
        # compute coefficients for each class, one-vs-all
        # @ is matrix multiplication, equivalent to np.matmul
        self.alpha = []
        print("Fitting alphas...")
        for c in tqdm(sorted(set(y))):
            self.alpha.append(np.linalg.inv(K + diag) @ Y[:,c])
        self.alpha = np.array(self.alpha)
        return self
    
    def predict(self, X):
        print("Predicting...")
        preds = []
        for x in tqdm(X):
            similarity = self.K.similarity(x)
            preds.append(np.argmax([np.dot(alpha, similarity) for alpha in self.alpha]))
        return np.array(preds)

## Training

Kernel ridge regression with linear kernel:

In [None]:
clf = KernelRidgeClassifier(kernel='linear')
start = time.time()
clf.fit(Xtr, Ytr)
end = time.time()
print(f"fit completed in {end - start:2f} seconds")

**Kernel ridge regression with linear kernel:**

In [None]:
clf = KernelRidgeClassifier(kernel='rbf', gamma=0.1)
start = time.time()
clf.fit(Xtr, Ytr)
end = time.time()
print(f"fit completed in {end - start:2f} seconds")

With cross-validation:

In [None]:
skf = StratifiedKFold(n_splits=5, shuffle=True)
ridge = KernelRidgeClassifier(kernel='rbf', verbose=True)
params = {'C': [0.01, 0.1, 1, 10, 100], 'gamma': [0.01, 0.1, 1, 10, 100]}
clf = GridSearchCV(ridge, params, scoring='accuracy', cv=skf, verbose=5)
clf.fit(Xtr, Ytr)
print(clf.best_params_)
print(clf.best_score_)

## Predict and export

In [None]:
start = time.time()
Yte = clf.predict(Xte)
end = time.time()
print(f"predict completed in {end - start:.2f} seconds")

In [None]:
Yte = {'Prediction' : Yte}
dataframe = pd.DataFrame(Yte)
dataframe.index += 1
dataframe.to_csv('Yte_pred_krr_rbf_C1_gamma0.1.csv', index_label='Id')

## Tests

In [None]:
from sklearn.datasets import make_classification
from sklearn.linear_model import RidgeClassifier

In [None]:
X, y = make_classification(n_informative=5, n_classes=10)
y

In [None]:
clf = KernelRidgeClassifier(kernel='rbf', gamma=0.01, verbose=False)
clf.fit(X, y)
clf.predict(X)

In [None]:
clf.K.similarity_matrix()

In [None]:
clf = RidgeClassifier()
clf.fit(Xtr, Ytr)
clf.predict(Xte)