# SVM Classifier

In [1]:
# general imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# loading in original data sets
df_training = pd.read_csv('../data/archive/train.csv')
df_testing = pd.read_csv('../data/archive/test.csv')

# loading in pca data sets
df_training_pca = pd.read_csv('../data/archive/train_pca.csv')
df_testing_pca = pd.read_csv('../data/archive/test_pca.csv')

In [3]:
# get the normal training and test data

x_train = df_training.iloc[:, :-2]
y_train = df_training.iloc[:,-1]

x_test = df_testing.iloc[:, :-2]
y_test = df_testing.iloc[:,-1]

# get the pca training and test data

x_train_pca = df_training_pca.iloc[:, :-1]
y_train_pca = df_training_pca.iloc[:,-1]

x_test_pca = df_testing_pca.iloc[:, :-1]
y_test_pca = df_testing_pca.iloc[:,-1]

In [4]:
# sklearn imports

from sklearn import svm
from sklearn import preprocessing
from sklearn.model_selection import GridSearchCV
import sklearn
from sklearn.metrics import classification_report

#sorted(sklearn.metrics.SCORERS.keys())

In [6]:
# hyper parameters to test: kernel, C, gamma

# parameters to test
param_grid = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1, 10, 100, 1000]},
                    {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
best_param_dict = {}

# scoring on accuracy, precicsion_micro and f1_micro
scores = ['accuracy', 'precision_micro', 'f1_micro']
for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()
    clf = GridSearchCV(
        svm.SVC(), param_grid, scoring= score
    )
    clf.fit(x_train_pca, y_train_pca)

    print("Best parameters set found on training set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on training set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
                % (mean, std * 2, params))
    print()

    print("Detailed classification report using best parameters:")
    print()
    print("The model is trained on the training set.")
    print("The scores are computed on the test set.")
    print()
    y_true, y_pred = y_test_pca, clf.predict(x_test_pca)
    print(classification_report(y_true, y_pred))
    print()

# Tuning hyper-parameters for accuracy

Best parameters set found on training set:

{'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}

Grid scores on training set:

0.885 (+/-0.046) for {'C': 1, 'gamma': 0.001, 'kernel': 'rbf'}
0.786 (+/-0.028) for {'C': 1, 'gamma': 0.0001, 'kernel': 'rbf'}
0.898 (+/-0.064) for {'C': 10, 'gamma': 0.001, 'kernel': 'rbf'}
0.885 (+/-0.047) for {'C': 10, 'gamma': 0.0001, 'kernel': 'rbf'}
0.906 (+/-0.054) for {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}
0.897 (+/-0.070) for {'C': 100, 'gamma': 0.0001, 'kernel': 'rbf'}
0.908 (+/-0.049) for {'C': 1000, 'gamma': 0.001, 'kernel': 'rbf'}
0.902 (+/-0.060) for {'C': 1000, 'gamma': 0.0001, 'kernel': 'rbf'}
0.900 (+/-0.050) for {'C': 1, 'kernel': 'linear'}
0.897 (+/-0.052) for {'C': 10, 'kernel': 'linear'}
0.897 (+/-0.056) for {'C': 100, 'kernel': 'linear'}
0.896 (+/-0.055) for {'C': 1000, 'kernel': 'linear'}

Detailed classification report using best parameters:

The model is trained on the training set.
The scores are c

ValueError: 'precision_micr0' is not a valid scoring value. Use sorted(sklearn.metrics.SCORERS.keys()) to get valid options.

In [6]:
clf = svm.SVC(C= 100, gamma= 0.0001, kernel= 'rbf')
clf.fit(x_train_pca, y_train_pca)

y_true, y_pred = y_test_pca, clf.predict(x_test_pca)
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

           1       0.89      0.77      0.82       537
           2       0.52      0.43      0.47       532
           3       0.38      0.52      0.44       491
           4       0.59      0.89      0.71       496
           5       0.89      0.67      0.76       471
           6       0.58      0.37      0.45       420

    accuracy                           0.61      2947
   macro avg       0.64      0.61      0.61      2947
weighted avg       0.64      0.61      0.61      2947

