# Support Vector Machine Modeling

In [18]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE

# 1.1 Load Data

In [11]:
train = pd.read_csv("train.csv")
val = pd.read_csv("val.csv")

# 1.2 Preprocessing

First I will have to make sure there are at least 6 rows in each class for SMOTE to work properly.

In [12]:
# see number of rows per a class
train.Type.value_counts()

2    48
1    34
7    19
3    10
5     6
6     3
Name: Type, dtype: int64

In [13]:
# copy rows for class 6
copy = train[train["Type"] == 6]
train = pd.concat([train, copy], axis=0)

Here I will scale the data

In [14]:
X_train, y_train = train.drop("Type", axis=1), train.Type
X_val, y_val = val.drop("Type", axis=1), val.Type

In [15]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_val = sc.transform(X_val)

Now I will apply smote to make the classes balanced.

In [16]:
sm = SMOTE()
X_train, y_train = sm.fit_resample(X_train, y_train)

# 2.1 Hyper Parameter Tuning

To find the optimal parameters I will use a grid search.

In [21]:
params = {"C":[0.01, 0.1, 1, 10]}
gscv = GridSearchCV(SVC(), params)

gscv.fit(X_train, y_train)
gscv.best_params_



{'C': 10}

Now I will do a more fine combed search

In [23]:
params = {"C":[9, 10, 11, 15]}
gscv = GridSearchCV(SVC(), params)

gscv.fit(X_train, y_train)
gscv.best_params_



{'C': 10}

In [24]:
# save model
model = gscv.best_estimator_

# 3.1 Evaluation

Here I will see how well the model does on the training and validation sets.

In [25]:
# fit
model.fit(X_train, y_train)

# training predictions
train_pred = model.predict(X_train)

# validation predictions
val_pred = model.predict(X_val)

# scores
print("training performance")
print(classification_report(y_train, train_pred))

print("validation performance")
print(classification_report(y_val, val_pred))

training performance
              precision    recall  f1-score   support

           1       0.78      0.94      0.85        48
           2       0.95      0.75      0.84        48
           3       0.94      0.94      0.94        48
           5       1.00      1.00      1.00        48
           6       1.00      1.00      1.00        48
           7       1.00      1.00      1.00        48

    accuracy                           0.94       288
   macro avg       0.94      0.94      0.94       288
weighted avg       0.94      0.94      0.94       288

validation performance
              precision    recall  f1-score   support

           1       0.94      0.89      0.91        18
           2       0.75      1.00      0.86         6
           3       0.80      1.00      0.89         4
           5       1.00      0.75      0.86         4
           6       1.00      1.00      1.00         2
           7       1.00      0.83      0.91         6

    accuracy                     

