# Kernels

In [65]:
import numpy as np
from scipy.io import loadmat
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import make_scorer, fbeta_score, accuracy_score


from scripts.nested_CV import nested_cv
from scripts.skwrapped_kernals import dtw_SVC, rbf_SVC, poly_SVC

In [66]:
# data
file_path = "../data/laser.mat"
mat = loadmat(file_path)

X = mat["X"]
y = mat["Y"].reshape(200)

print(X.shape, y.shape)

(200, 60) (200,)


In [None]:
# configs
seed = 571489  # for repeatability
np.random.seed(42)

fbeta_scorer = make_scorer(fbeta_score, beta=2, pos_label=-1)


In [68]:
# normalizing the data
X = (X - X.mean(axis=1, keepdims=True)) / X.std(axis=1, keepdims=True)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=seed
)

# Support Vector Machines

Classifiers, that use the hinge loss and use the L2 regularizer

In the dual mode, they can be used in conjunction with kernal methods, which is what is employed in the following notebook.


## 1. SVC with Polynomial Kernel

### Description
The idea here is to map the data into a higher dimensional space, such that data is linearly seperable in that space and we can then perform linear classification


### Nested CV
We run a nested cross validation function for the following hyperparameters of the Polynomial Kernal:
- p - degree of polynomial
- lambda (C) - regularization coefficient

In [69]:
param_grid_poly = {
    'C' : [.001,.01,.1,1,10,100],
    'degree' : [2,3,4,5,6]
}

results_poly = nested_cv(poly_SVC(), X, y, param_grid_poly, scoring=fbeta_scorer, random_state=seed)
print(">>>>RESULTS<<<<")
print("Mean f2 score (of negative class) over 5 by 5 rounds of Nested CV ", results_poly["mean_score"])
print("Best parameters ", results_poly["star_params"])

>>>>RESULTS<<<<
Mean f2 score (of negative class) over 5 by 5 rounds of Nested CV  0.9763471378548477
Best parameters  {'C': 0.001, 'degree': 4}


In [70]:
print(results_poly)

{'mean_score': 0.9763471378548477, 'star_params': {'C': 0.001, 'degree': 4}, 'outer_scores': [1.0, 0.9595959595959596, 0.970873786407767, 0.9803921568627451, 0.970873786407767], 'std_score': 0.013537630454547036, 'best_params': [{'C': 0.001, 'degree': 6}, {'C': 0.001, 'degree': 5}, {'C': 0.001, 'degree': 6}, {'C': 0.001, 'degree': 3}, {'C': 0.001, 'degree': 4}]}


In [71]:
clf_poly = poly_SVC(C=.001, degree=6)
clf_poly.fit(np.delete(X, [35, 162], axis=0), np.delete(y, [35,162], axis=0))

odd_laser_faulty = clf_poly.predict([X[35]])
odd_laser_non_faulty = clf_poly.predict([X[162]])
print(odd_laser_faulty, odd_laser_non_faulty)

[-1] [1]


In [72]:
y_preds_poly = clf_poly.predict(X)
print(fbeta_score(y, y_preds_poly, beta=2, pos_label=-1))
print(classification_report(y, y_preds_poly))

1.0
              precision    recall  f1-score   support

          -1       1.00      1.00      1.00       100
           1       1.00      1.00      1.00       100

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200



## 2. SVC with RBF

### Nested CV
We run a nested cross validation function for the following hyperparameters of the Polynomial Kernal:
- gamma - scaling of the RBF function
- lambda - regularization coefficient


In [73]:
param_grid_rbf = {
    'C' : [.001,.01,.1,1,10,100],
    'gamma' : [.001,.01, .1 , 1,10,100],
}
results_rbf = nested_cv(rbf_SVC(), X, y, param_grid=param_grid_rbf, scoring=fbeta_scorer, random_state=seed)

print(">>>>RESULTS<<<<")
print("Mean f2 score (of negative class) over 5 by 5 rounds of Nested CV ", results_rbf["mean_score"])
print("Best parameters ", results_rbf["star_params"])

>>>>RESULTS<<<<
Mean f2 score (of negative class) over 5 by 5 rounds of Nested CV  0.9643790627799766
Best parameters  {'C': 1, 'gamma': 0.1}


In [74]:
print(results_rbf)

{'mean_score': 0.9643790627799766, 'star_params': {'C': 1, 'gamma': 0.1}, 'outer_scores': [1.0, 0.9090909090909091, 0.9615384615384616, 0.9803921568627451, 0.970873786407767], 'std_score': 0.030437807868823507, 'best_params': [{'C': 10, 'gamma': 0.1}, {'C': 0.001, 'gamma': 10}, {'C': 1, 'gamma': 0.1}, {'C': 1, 'gamma': 0.01}, {'C': 10, 'gamma': 0.1}]}


In [75]:
clf_rbf = rbf_SVC(C=.001, gamma=10)
clf_rbf.fit(np.delete(X, [35, 162], axis=0), np.delete(y, [35,162], axis=0))

odd_laser_faulty = clf_rbf.predict([X[35]])
odd_laser_non_faulty = clf_rbf.predict([X[162]])
print(odd_laser_faulty, odd_laser_non_faulty)

[1] [1]


In [76]:
y_preds_rbf = clf_rbf.predict(X)
print(classification_report(y,y_preds_rbf))

              precision    recall  f1-score   support

          -1       1.00      0.99      0.99       100
           1       0.99      1.00      1.00       100

    accuracy                           0.99       200
   macro avg       1.00      0.99      0.99       200
weighted avg       1.00      0.99      0.99       200



## 3. SVC with DTW

### Nested CV
We run a nested cross validation function for the following hyperparameters of the Polynomial Kernal:
- gamma - smoothing of the DTW function
- (maybe) k - distance (Euclidean, manhattan, )
- lambda - regularization coefficient

In [77]:
param_grid_dtw = {
    'C': [.001, .01, .1, 1, 10, 100],
    'gamma':[.001, .01, .1, 1]
}

results_dtw = nested_cv(dtw_SVC(), X, y, param_grid_dtw, scoring=fbeta_scorer, random_state=seed)

print(">>>>RESULTS<<<<")
print("Mean f2 score (of negative class) over 5 by 5 rounds of Nested CV ", results_dtw["mean_score"])
print("Best parameters ", results_dtw["star_params"])

>>>>RESULTS<<<<
Mean f2 score (of negative class) over 5 by 5 rounds of Nested CV  0.998019801980198
Best parameters  {'C': 1, 'gamma': 0.1}


In [78]:
clf_dtw = dtw_SVC(C=1, gamma=.1)
clf_dtw.fit(np.delete(X, [35, 162], axis=0), np.delete(y, [35,162], axis=0))

odd_laser_faulty = clf_dtw.predict([X[35]])
odd_laser_non_faulty = clf_dtw.predict([X[162]])
print(odd_laser_faulty, odd_laser_non_faulty)

print(classification_report(y, clf_dtw.predict(X)))

[-1] [-1]
              precision    recall  f1-score   support

          -1       0.99      1.00      1.00       100
           1       1.00      0.99      0.99       100

    accuracy                           0.99       200
   macro avg       1.00      0.99      0.99       200
weighted avg       1.00      0.99      0.99       200

