# Scalable Vector Machines

In [1]:
#Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC 

### 1.) Data Import + PreProcessing


In [4]:
#Read CSV
df = pd.read_csv('data/heart.csv')

<b><u>Split into Test and Train Data

In [7]:
#Create Axis
y = df["target"]
X = df.drop(columns=["target"])

#Perform Test/Train split of the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

#Because Y data is binary, scale only X axis data 
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

  return self.partial_fit(X, y)


### 2.) SVM Training 

In [8]:
#Create a linear Model 
model2 = SVC(kernel='linear')
model2.fit(X_train_scaled, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [9]:
print(f"Training Data Score: {model2.score(X_train_scaled, y_train)}")
print(f"Testing Data Score: {model2.score(X_test_scaled, y_test)}")

Training Data Score: 0.8370044052863436
Testing Data Score: 0.8289473684210527


# Hyperparameter Tuning

In [15]:
from sklearn.model_selection import GridSearchCV
Cs = [0.001, 0.01, 0.1, 1, 10]
gammas = [0.001, 0.01, 0.1, 1]
param_grid = {'C': Cs, 'gamma' : gammas}
grid = GridSearchCV(model2, param_grid, verbose=5)
grid.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.0s remaining:    0.0s


Fitting 3 folds for each of 20 candidates, totalling 60 fits
[CV] C=0.001, gamma=0.001 ............................................
[CV] ... C=0.001, gamma=0.001, score=0.6233766233766234, total=   0.0s
[CV] C=0.001, gamma=0.001 ............................................
[CV] ................. C=0.001, gamma=0.001, score=0.68, total=   0.0s
[CV] C=0.001, gamma=0.001 ............................................
[CV] ... C=0.001, gamma=0.001, score=0.7466666666666667, total=   0.0s
[CV] C=0.001, gamma=0.01 .............................................
[CV] .... C=0.001, gamma=0.01, score=0.6233766233766234, total=   0.0s
[CV] C=0.001, gamma=0.01 .............................................
[CV] .................. C=0.001, gamma=0.01, score=0.68, total=   0.0s
[CV] C=0.001, gamma=0.01 .............................................
[CV] .... C=0.001, gamma=0.01, score=0.7466666666666667, total=   0.0s
[CV] C=0.001, gamma=0.1 ..............................................
[CV] ..... C=0.0

[CV] .......... C=10, gamma=1, score=0.7792207792207793, total=   1.8s
[CV] C=10, gamma=1 ...................................................
[CV] .......... C=10, gamma=1, score=0.7866666666666666, total=   3.6s
[CV] C=10, gamma=1 ...................................................
[CV] .......... C=10, gamma=1, score=0.8133333333333334, total=   1.6s


[Parallel(n_jobs=1)]: Done  60 out of  60 | elapsed:   29.9s finished


GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='linear', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [0.001, 0.01, 0.1, 1, 10], 'gamma': [0.001, 0.01, 0.1, 1]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=5)

In [16]:
print(grid.best_params_)
print(grid.best_score_)

{'C': 0.1, 'gamma': 0.001}
0.801762114537445
