# Benchmarking SVM2+ Against Parkinson's Dataset

Data from http://www.maxlittle.net/publications/TBME-00342-HTML.html#_Toc237118870

In [1]:
import os
import sys
sys.path.append(os.path.join(os.getcwd(), '../svm2plus/'))
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from svm2plus import SVC2Plus
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

In [2]:
# Original parameter grid as proposed by Vapnik
param_grid_svc = [{'C': np.exp2(np.linspace(-5, 5, 21)),
                   'gamma': np.exp2(np.linspace(-6, 6, 25)),
                   'kernel': ['rbf']}]

# lmbda is a regularization parameter, just like C. So we CV it in the same way.
param_grid_svc2p = [{'C': np.exp2(np.linspace(-5, 5, 21)),
                     'lmbda': np.exp2(np.linspace(-5, 5, 21)),
                     'gamma': np.exp2(np.linspace(-6, 6, 25)),
                     'decision_kernel': ['rbf'],
                     'correcting_kernel': ['rbf']}]

In [3]:
data = pd.read_csv('../data/parksinsons/parkinsons.csv')

privileged_features = ['DFA', 'spread1', 'spread2', 'D2', 'PPE']

X = data.drop(['name', 'status'], axis='columns')
y = data.loc[:, 'status']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1618)

Z_train = X_train.loc[:, privileged_features]
Z_test = X_test.loc[:, privileged_features]
X.drop(privileged_features, axis='columns', inplace=True)

## SVC

In [6]:
svc = GridSearchCV(SVC(class_weight='balanced'), param_grid_svc, scoring='accuracy', return_train_score=True)
svc.fit(X_train, y_train);
precision_recall_fscore_support(y_test, svc.predict(X_test))

(array([0.83333333, 0.75471698]),
 array([0.27777778, 0.97560976]),
 array([0.41666667, 0.85106383]),
 array([18, 41]))

In [7]:
accuracy_score(y_test, svc.predict(X_test))

0.7627118644067796

## SVC2+

In [8]:
# Train SVC2+ with grid searching
svc2p = GridSearchCV(SVC2Plus(class_weight='balanced'), param_grid_svc2p, scoring='accuracy', return_train_score=True)
svc2p.fit(X=X_train.values, y=y_train, Z=Z_train.values)

preds = svc2p.predict(X_test.values)
precision_recall_fscore_support(y_test, preds)

(array([0.85714286, 0.76923077]),
 array([0.33333333, 0.97560976]),
 array([0.48      , 0.86021505]),
 array([18, 41]))

In [9]:
accuracy_score(y_test, preds)

0.7796610169491526