# Kernels

In [1]:
import numpy as np
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import make_scorer, fbeta_score, accuracy_score, roc_auc_score


from scripts.nested_CV import nested_cv, nested_cv_multi
from scripts.skwrapped_kernels import dtw_SVC, rbf_SVC, poly_SVC

In [2]:
# data
file_path = "../data/laser.mat"
mat = loadmat(file_path)

X = mat["X"]
y = mat["Y"].reshape(200)
y_flipped = y*-1

print(X.shape, y.shape)

(200, 60) (200,)


In [3]:
# configs
seed = 571489  # for repeatability
np.random.seed(42)

fbeta_scorer = make_scorer(fbeta_score, beta=2)
accuracy_scorer = make_scorer(accuracy_score)
roc_auc_scorer = make_scorer(roc_auc_score, needs_threshold=True)

score_dict = { "f2": fbeta_scorer, "roc_auc": roc_auc_scorer, "accuracy": accuracy_scorer}



In [4]:
# normalizing the data
X = (X - X.mean(axis=1, keepdims=True)) / X.std(axis=1, keepdims=True)


# Support Vector Machines

Classifiers, that use the hinge loss and use the L2 regularizer

In the dual mode, they can be used in conjunction with kernel methods, which is what is employed in the following notebook.

The optimization criterion of the dual SVM is given by:
$$
\underset{\beta}{max} \space \sum_{i=1}^{n} \space \beta_{i} - \frac{1}{2} \space \sum_{i,j=1}^{n} \space \beta_{i}\beta_{j}y_{i}y_{j}k(x_{i},x_{j}), 
\newline 
\text{such that} \space 0 \le \beta_{i} \le \lambda
$$



## 1. SVC with Polynomial Kernel

### Description
The idea here is to map the data into a higher dimensional space, such that data is linearly seperable in that space and we can then perform linear classification

The polynomial kernel is:
$$ k_{\text{poly}}(x, x') = (x^{\text{T}}x' + c)^{p} 

### Nested CV
We run a nested cross validation function for the following hyperparameters of the Polynomial Kernel:
- p - degree of polynomial
- lambda (C) - regularization coefficient

In [5]:
param_grid_poly = {"C": [0.001, 0.01, 0.1, 1, 10, 100], "degree": [2, 3, 4, 5, 6]}

results_poly = nested_cv_multi(
    poly_SVC(), X, y_flipped, param_grid_poly, scoring=score_dict, random_state=seed
)
print(">>>>RESULTS<<<<")
print("Mean scores over 5 by 5 rounds of Nested CV ", results_poly["mean_scores"])
print("Best parameters ", results_poly["star_params"])

>>>>RESULTS<<<<
Mean scores over 5 by 5 rounds of Nested CV  {'f2': 0.9740150485636798, 'roc_auc': 0.9854999999999998, 'accuracy': 0.9649999999999999}
Best parameters  {'C': 0.001, 'degree': 4}


In [6]:
print(results_poly)

{'outer_scores': [{'f2': 0.9803921568627451, 'roc_auc': 0.95, 'accuracy': 0.95}, {'f2': 0.9595959595959596, 'roc_auc': 1.0, 'accuracy': 0.975}, {'f2': 0.9595959595959596, 'roc_auc': 0.9849999999999999, 'accuracy': 0.975}, {'f2': 0.9803921568627451, 'roc_auc': 1.0, 'accuracy': 0.95}, {'f2': 0.9900990099009901, 'roc_auc': 0.9924999999999999, 'accuracy': 0.975}], 'mean_scores': {'f2': 0.9740150485636798, 'roc_auc': 0.9854999999999998, 'accuracy': 0.9649999999999999}, 'std_scores': {'f2': 0.012295113550790568, 'roc_auc': 0.01860107523773829, 'accuracy': 0.012247448713915901}, 'best_params': [{'C': 0.001, 'degree': 5}, {'C': 0.001, 'degree': 6}, {'C': 0.001, 'degree': 5}, {'C': 0.001, 'degree': 4}, {'C': 0.001, 'degree': 4}], 'star_params': {'C': 0.001, 'degree': 4}}


In [7]:
clf_poly = poly_SVC(C=0.001, degree=4)
clf_poly.fit(np.delete(X, [35, 162], axis=0), np.delete(y_flipped, [35, 162], axis=0))

odd_laser_faulty = clf_poly.predict([X[35]])
odd_laser_non_faulty = clf_poly.predict([X[162]])
print(odd_laser_faulty, odd_laser_non_faulty)

[1] [-1]


## 2. SVC with RBF

## Description

The idea here is to use the squard euclidean distance between vectors as a similarity measure. This creates, gaussian "hills" around samples in the original feature space. Furthermore, since we have a dimension for every data point, this kernel maps to an infinite dimensional space.

The RBF Kernel is:
$$ k_{\text{RBF}}(x,x') = exp(-\gamma ||x-x'||^{2})

### Nested CV
We run a nested cross validation function for the following hyperparameters of the RBF Kernel:
- gamma - scaling of the RBF function
- lambda - regularization coefficient


In [8]:
param_grid_rbf = {
    "C": [0.001, 0.01, 0.1, 1, 10, 100],
    "gamma": [0.001, 0.01, 0.1, 1, 10, 100],
}
results_rbf = nested_cv_multi(
    rbf_SVC(), X, y_flipped, param_grid=param_grid_rbf, scoring=score_dict, random_state=seed
)

print(">>>>RESULTS<<<<")
print("Mean scores over 5 by 5 rounds of Nested CV ", results_rbf["mean_scores"])
print("Best parameters ", results_rbf["star_params"])

>>>>RESULTS<<<<
Mean scores over 5 by 5 rounds of Nested CV  {'f2': 0.97482761248304, 'roc_auc': 0.999, 'accuracy': 0.9349999999999999}
Best parameters  {'C': 1, 'gamma': 0.1}


In [9]:
print(results_rbf)

{'outer_scores': [{'f2': 0.9523809523809523, 'roc_auc': 0.9974999999999999, 'accuracy': 0.875}, {'f2': 0.9900990099009901, 'roc_auc': 1.0, 'accuracy': 0.975}, {'f2': 0.9803921568627451, 'roc_auc': 1.0, 'accuracy': 0.95}, {'f2': 0.9803921568627451, 'roc_auc': 0.9974999999999999, 'accuracy': 0.95}, {'f2': 0.970873786407767, 'roc_auc': 1.0, 'accuracy': 0.925}], 'mean_scores': {'f2': 0.97482761248304, 'roc_auc': 0.999, 'accuracy': 0.9349999999999999}, 'std_scores': {'f2': 0.012764240500527638, 'roc_auc': 0.0012247448713916173, 'accuracy': 0.03391164991562633}, 'best_params': [{'C': 10, 'gamma': 0.1}, {'C': 1, 'gamma': 0.1}, {'C': 1, 'gamma': 0.1}, {'C': 1, 'gamma': 0.01}, {'C': 10, 'gamma': 0.1}], 'star_params': {'C': 1, 'gamma': 0.1}}


In [10]:
clf_rbf = rbf_SVC(C=1, gamma=0.1)
clf_rbf.fit(np.delete(X, [35, 162], axis=0), np.delete(y_flipped, [35, 162], axis=0))

odd_laser_faulty = clf_rbf.predict([X[35]])
odd_laser_non_faulty = clf_rbf.predict([X[162]])
print(odd_laser_faulty, odd_laser_non_faulty)

[1] [1]


## 3. SVC with DTW

## Description
The idea here is to find the similarity of time series, that may not be aligned/ are out of phase or are moving at different rates and/or are of different lengths. The kernel is in the form of an RBF kernel, but uses the DTW distance of the time series.

Given a metric $d: X \times X \rightarrow \mathbb{R}_{\geq 0}$ on the input space $X$, the family of *DTW Kernels* is given as:

$$ k_{\text{DTW}}(x, x') = e^{- \lambda d_{\text{DTW}}(x, x'; d)}, $$

for sequences $x, x' \in X^+ := \bigcup_{n \geq 1}{X^n}$ of lengths $|x|$ and $|x'|$. The *DTW distance metric* $d_{\text{DTW}}$ is then given by $\gamma(|x|, |x'|)$, where the helper function $\gamma$ is defined recursively via:

$$ \gamma(i, j) = \begin{cases} d(x_i, x_j') + \min\left(\gamma(i-1, j-1), \gamma(i-1, j), \gamma(i, j-1)\right) & (1 \leq i \leq |x|, \, 1 \leq j \leq |x'|), \\ 
\infty & i = 0 \vee j = 0, \\
0 & (i, j) = (0, 0). \end{cases}
$$
Source: Lab jupyter notebooks

A more visual explanation (source: [Herman Kamper](https://www.youtube.com/watch?v=9GdbMc4CEhE)): <br>
![image](../diagrams/DTW.jpg)


### Nested CV
We run a nested cross validation function for the following hyperparameters of the DTW Kernel:
- gamma - smoothing of the DTW function
- d - distance
- lambda - regularization coefficient

In [11]:
param_grid_dtw = {
    "C": [0.001, 0.01, 0.1, 1, 10, 100],
    "gamma": [0.001, 0.01, 0.1, 1],
    "inner_dist": ["euclidean", "squared euclidean"],
}

results_dtw = nested_cv_multi(
    dtw_SVC(), X, y_flipped, param_grid_dtw, scoring=score_dict, random_state=seed
)

print(">>>>RESULTS<<<<")
print("Mean scores over 5 by 5 rounds of Nested CV ", results_dtw["mean_scores"])
print("Best parameters ", results_dtw["star_params"])

>>>>RESULTS<<<<
Mean scores over 5 by 5 rounds of Nested CV  {'f2': 0.998019801980198, 'roc_auc': 0.9995, 'accuracy': 0.9949999999999999}
Best parameters  {'C': 1, 'gamma': 0.1, 'inner_dist': 'euclidean'}


In [12]:
print(results_dtw)

{'outer_scores': [{'f2': 0.9900990099009901, 'roc_auc': 0.9974999999999999, 'accuracy': 0.975}, {'f2': 1.0, 'roc_auc': 1.0, 'accuracy': 1.0}, {'f2': 1.0, 'roc_auc': 1.0, 'accuracy': 1.0}, {'f2': 1.0, 'roc_auc': 1.0, 'accuracy': 1.0}, {'f2': 1.0, 'roc_auc': 1.0, 'accuracy': 1.0}], 'mean_scores': {'f2': 0.998019801980198, 'roc_auc': 0.9995, 'accuracy': 0.9949999999999999}, 'std_scores': {'f2': 0.003960396039603964, 'roc_auc': 0.0010000000000000232, 'accuracy': 0.010000000000000009}, 'best_params': [{'C': 1, 'gamma': 0.1, 'inner_dist': 'euclidean'}, {'C': 1, 'gamma': 0.1, 'inner_dist': 'euclidean'}, {'C': 1, 'gamma': 0.1, 'inner_dist': 'euclidean'}, {'C': 1, 'gamma': 0.1, 'inner_dist': 'euclidean'}, {'C': 1, 'gamma': 0.1, 'inner_dist': 'euclidean'}], 'star_params': {'C': 1, 'gamma': 0.1, 'inner_dist': 'euclidean'}}


In [13]:
clf_dtw = dtw_SVC(C=1, gamma=0.1)
clf_dtw.fit(np.delete(X, [35, 162], axis=0), np.delete(y_flipped, [35, 162], axis=0))

odd_laser_faulty = clf_dtw.predict([X[35]])
odd_laser_non_faulty = clf_dtw.predict([X[162]])
print(odd_laser_faulty, odd_laser_non_faulty)

[1] [1]
