# Kernals

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import recall_score, roc_auc_score, make_scorer, accuracy_score, fbeta_score
from sklearn.model_selection import StratifiedKFold
import pandas as pd


from scripts.kernals import *
from scripts.nested_CV import nested_cv

In [2]:
# data
file_path = "../data/laser.mat"
mat = loadmat(file_path)

X = mat["X"]
y = mat["Y"].reshape(200)

print(X.shape, y.shape)

(200, 60) (200,)


In [3]:
# configs
seed = 42  # for repeatability
np.random.seed(42)
recall_scorer = make_scorer(recall_score, pos_label=-1)
accuracy_scorer = make_scorer(accuracy_score)
fbeta_scorer = make_scorer(fbeta_score, beta=2, pos_label=-1)


In [4]:
# normalizing the data
X = (X - X.mean(axis=1, keepdims=True)) / X.std(axis=1, keepdims=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=seed
)

# Support Vector Machines

Classifiers, that use the hinge loss and use the L2 regularizer

In the dual mode, they can be used in conjunction with kernal methods, which is what is employed in the following notebook.


## 1. SVC with Polynomial Kernal

### Description
The idea here is to map the data into a higher dimensional space, such that data is linearly seperable in that space and we can then perform linear classification


### Nested CV
We run a nested cross validation function for the following hyperparameters of the Polynomial Kernal:
- p - degree of polynomial
- lambda - regularization coefficient

In [None]:
param_grid_poly = {
    'k_param' : [2,3,4,5,6], # degree of polynomial
    'lbda' : [.01, .1, 1, 10, 100],
}
results = nested_cv(KernelEstimator(k_poly), X, y, param_grid=param_grid_poly, scoring=fbeta_scorer, random_state=seed)

print(">>>>RESULTS<<<<")
print("Mean f2 score (of negative class) over 5 by 5 rounds of Nested CV ", results["mean_score"])
print("Best parameters ", results["star_params"])

>>>>RESULTS<<<<
Mean f2 score (of negative class) over 5 by 5 rounds of Nested CV  0.8333333333333334
Best parameters  {'k_param': 2, 'lbda': 0.01}


In [6]:
print(results)

{'mean_score': 0.8333333333333334, 'star_params': {'k_param': 2, 'lbda': 0.01}, 'outer_scores': [0.8333333333333334, 0.8333333333333334, 0.8333333333333334, 0.8333333333333334, 0.8333333333333334], 'std_score': 0.0, 'best_params': [{'k_param': 2, 'lbda': 0.01}, {'k_param': 2, 'lbda': 0.01}, {'k_param': 2, 'lbda': 0.01}, {'k_param': 2, 'lbda': 0.01}, {'k_param': 2, 'lbda': 0.01}]}


In [None]:
clf_poly = KernelEstimator(k_poly, 3, 100)
clf_poly.fit(np.delete(X, [35, 162], axis=0), np.delete(y, [35,162], axis=0))

odd_laser_faulty = clf_poly.predict([X[35]])
odd_laser_non_faulty = clf_poly.predict([X[162]])
print(odd_laser_faulty, odd_laser_non_faulty)

[1.] [1.]


In [8]:
y_preds_poly = clf_poly.predict(X)
print(fbeta_score(y, y_preds_poly, beta=2, pos_label=-1))
print(classification_report(y, y_preds_poly))

0.097799511002445
              precision    recall  f1-score   support

          -1       0.89      0.08      0.15       100
           1       0.52      0.99      0.68       100

    accuracy                           0.54       200
   macro avg       0.70      0.54      0.41       200
weighted avg       0.70      0.54      0.41       200



## 2. SVC with RBF

### Nested CV
We run a nested cross validation function for the following hyperparameters of the Polynomial Kernal:
- gamma - scaling of the RBF function
- lambda - regularization coefficient


In [9]:
param_grid_rbf = {
    'k_param' : [.001,.01,.1,1], # gamma
    'lbda' : [.01, .1, 1]
}
results = nested_cv(KernelEstimator(k_rbf), X, y, param_grid=param_grid_rbf, scoring=fbeta_scorer, random_state=seed)

print(">>>>RESULTS<<<<")
print("Mean f2 score (of negative class) over 5 by 5 rounds of Nested CV ", results["mean_score"])
print("Best parameters ", results["star_params"])

>>>>RESULTS<<<<
Mean f2 score (of negative class) over 5 by 5 rounds of Nested CV  0.8125
Best parameters  {'k_param': 0.001, 'lbda': 0.01}


In [10]:
clf_rbf = KernelEstimator(k_rbf, .001, .001, eta=.001)
clf_rbf.fit(np.delete(X, [35, 162], axis=0), np.delete(y, [35,162], axis=0))

odd_laser_faulty = clf_rbf.predict([X[35]])
odd_laser_non_faulty = clf_rbf.predict([X[162]])
print(odd_laser_faulty, odd_laser_non_faulty)

[-1.] [-1.]


In [11]:
y_preds_rbf = clf_rbf.predict(X)
print(classification_report(y,y_preds_rbf))

              precision    recall  f1-score   support

          -1       0.50      0.99      0.66       100
           1       0.00      0.00      0.00       100

    accuracy                           0.49       200
   macro avg       0.25      0.49      0.33       200
weighted avg       0.25      0.49      0.33       200



## 3. SVC with DTW

### Nested CV
We run a nested cross validation function for the following hyperparameters of the Polynomial Kernal:
- gamma - smoothing of the DTW function
- (maybe) k - distance (Euclidean, manhattan, )
- lambda - regularization coefficient

In [None]:
param_grid_dtw = {
    'k_param' : [.001, .01, .1,1], # gamma
    'lbda' : [.01, .1, 1],
}
results = nested_cv(KernelEstimator(k2_hyp), X, y, param_grid=param_grid_dtw, scoring=fbeta_scorer, random_state=seed)

print(">>>>RESULTS<<<<")
print("Mean f2 score (of negative class) over 3 by 3 rounds of Nested CV ", results["mean_score"])
print("Best parameters ", results["star_params"])

In [None]:
print(results)

{'mean_score': 0.8333333333333334, 'star_params': {'eta': 0.001, 'k_param': 0.001, 'lbda': 0.001}, 'outer_scores': [0.8333333333333334, 0.8333333333333334, 0.8333333333333334, 0.8333333333333334, 0.8333333333333334], 'std_score': 0.0, 'best_params': [{'eta': 0.001, 'k_param': 0.001, 'lbda': 0.001}, {'eta': 0.001, 'k_param': 0.001, 'lbda': 0.001}, {'eta': 0.001, 'k_param': 0.001, 'lbda': 0.001}, {'eta': 0.001, 'k_param': 0.001, 'lbda': 0.001}, {'eta': 0.001, 'k_param': 0.001, 'lbda': 0.001}]}


In [None]:
clf_dtw = KernelEstimator(k2_hyp, .001, .001)
clf_dtw.fit(np.delete(X, [35, 162], axis=0), np.delete(y, [35,162], axis=0))

odd_laser_faulty = clf_dtw.predict([X[35]])
odd_laser_non_faulty = clf_dtw.predict([X[162]])
print(odd_laser_faulty, odd_laser_non_faulty)

[-1.] [-1.]


In [None]:
y_preds_dtw = clf_dtw.predict(X)
print(classification_report(y,y_preds_dtw))

print(y_preds_dtw)

              precision    recall  f1-score   support

          -1       0.50      0.99      0.66       100
           1       0.00      0.00      0.00       100

    accuracy                           0.49       200
   macro avg       0.25      0.49      0.33       200
weighted avg       0.25      0.49      0.33       200

[-1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.
 -1. -1. -1. -1.