# SVM with Linearly Non Separable Data

In [3]:
# import the necessary packages
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report,confusion_matrix

In [4]:
df = pd.read_csv('mnist.csv')       # read the csv file

df_feat = df.drop('label',axis=1)   # drop the label column

label_y = df['label']               # label column

In [6]:
X_train, X_test, y_train, y_test = train_test_split(df_feat,label_y,test_size=0.5) # split the data

C_values = [0.1,1,10]

for c in C_values:
    start_time = time.time()            # start time
    model = SVC(C=c)                    # create the model
    model.fit(X_train,y_train)          # fit the model
    print("-----------------",c,"-----------------")
    print("---%s mins %s secs ---" %( (time.time()-start_time)//60 , (time.time()-start_time)%60 )) # time taken to train the model
    predictions = model.predict(X_test) # predict the test data
    report = classification_report(y_test,predictions,output_dict=True)    # classification report
    print("Accuracy: ",report['accuracy'])
    print("confusion_matrix: ",confusion_matrix(y_test,predictions))

    

----------------- 0.1 -----------------
---2.0 mins 43.12374210357666 secs ---
Accuracy:  0.9426666666666667
confusion_matrix:  [[2009    0    2    1    2    8   14    1   11    1]
 [   0 2301   15    4    3    6    2    2   10    2]
 [  10    5 1909   14   21    4   12   23   31    5]
 [   3   12   47 1977    2   77    7   17   25   16]
 [   3    8   12    0 1897    1   17    3    3   56]
 [  12   11    4   31   11 1801   26    3   12    3]
 [  21    5    8    0    8   17 2033    0    6    0]
 [   1   25   21    4   32    4    0 2037    3   68]
 [   7   21   12   33   14   36   11    7 1908   23]
 [  14    7   10   36   53    9    2   41   14 1924]]
----------------- 1 -----------------
---1.0 mins 20.07852053642273 secs ---
Accuracy:  0.9712380952380952
confusion_matrix:  [[2029    0    2    1    2    0    7    1    6    1]
 [   0 2314   15    2    3    2    1    2    4    2]
 [   6    2 1981    4   12    1    3   15    8    2]
 [   2    3   28 2084    1   31    2    8   13   11]
 [ 

Here we can see that SVM with C(Regularization or Penalty Factor) value equal to 10 gave us the best accuracy of 0.976 with time 1 min 6 sec (fastest).

So out of all the 3 kernals we can see that rbf was the worst performer, polynomial kernel with degree 2 and 4 performed with a high score of 0.96 with decrease in accuracy with increase in degree. We got the highest accuracy with C=10 iin SVM.