In [None]:
# import libraries
import pandas as pd
import numpy as np
from tensorflow.keras.datasets import mnist
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings("ignore")
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import f1_score, confusion_matrix, accuracy_score
from sklearn.model_selection import GridSearchCV

## Linear SVM Classifier

In [None]:
# load mnist datasset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [None]:
# check shape of data
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(60000, 28, 28)
(10000, 28, 28)
(60000,)
(10000,)


In [None]:
# reshape data
x = x_train.shape
y = x_test.shape
x_train = x_train.reshape(x[0], x[1] * x[2])
x_test = x_test.reshape(y[0], y[1] * y[2])

In [None]:
# scale the data
scaler = MinMaxScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.fit_transform(x_test)

### sampling of data

In [None]:
# create dataframe of train data
df_x_train_scaled = pd.DataFrame(x_train_scaled)
df_y_train = pd.DataFrame(y_train)
df_train = pd.concat([df_x_train_scaled, df_y_train], axis=1)
display(df_train.head())       

# create dataframe of test data
df_x_test_scaled = pd.DataFrame(x_test_scaled)
df_y_test = pd.DataFrame(y_test)
df_test = pd.concat([df_x_test_scaled, df_y_test], axis=1)
display(df_test.head()) 

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,0.1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,0.1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4


In [None]:
# check shape of dataframes
print(df_train.shape)
print(df_test.shape)

(60000, 785)
(10000, 785)


# sample only 15000 data points to train because of resource constraints on my PC

In [None]:
sampled_data = df_train.sample(15000)

# extract x and y values
x_train_sample1 = sampled_data.iloc[:, :-1]
y_train_sample1 = sampled_data.iloc[:, -1]

# print shapes
print(sampled_data.shape)
print(x_train_sample1.shape)
print(y_train_sample1.shape)

(15000, 785)
(15000, 784)
(15000,)


In [None]:
# Build Linear SVM Classifier
model = LinearSVC()
model.fit(x_train_sample1, y_train_sample1)
y_pred = model.predict(x_test_scaled)

In [None]:
# print first 20 predictions on test data
print(y_pred[:20])

[7 2 1 0 4 1 4 1 6 9 0 6 9 0 1 5 9 7 3 4]


In [None]:
# display confusion matrix
print("confusion metrix for MNIST Dataset")
print()
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

confusion metrix for MNIST Dataset

[[ 943    0    3    3    2    9    9    3    6    2]
 [   0 1113    3    2    0    3    5    1    8    0]
 [  15   13  887   21    8    7   12   16   47    6]
 [   7    2   18  904    3   24    3   13   27    9]
 [   3    0    7    3  899    2    6    5    6   51]
 [  14    3    3   41    8  755   16   14   30    8]
 [   8    6    6    1    6   22  899    2    7    1]
 [   4   12   22    7    8    7    1  921    4   42]
 [  10   17    9   30   14   39   14   10  821   10]
 [   7    7    2   13   35   15    0   36   16  878]]


### Function to calcuate f1 score from Scratch

In [None]:
def classwise_f1_score(c_matrix):
    class_wise_f1_score_1 = []
    for i, row in enumerate(c_matrix):
        precision = row[i] / sum(row)
        temp = 0
        for r in c_matrix:
            temp += r[i]
            recall = row[i] / temp
            f1 = 2 * recall * precision / (precision + recall)
        class_wise_f1_score_1.append(f1)
    return class_wise_f1_score_1


# call function and print classwise f1 score
class_wise_f1_score_1 = classwise_f1_score(matrix)

In [None]:
# class wise f1 score using sklearn library
f1_scores = f1_score(y_test, y_pred, average=None)
class_wise_f1_score_2 = [score for score in f1_scores]

# print f1 score given by sklearn inbuilt function
print(class_wise_f1_score_2)

[0.9472626820693119, 0.9644714038128249, 0.890562248995984, 0.8884520884520885, 0.9150127226463104, 0.8507042253521128, 0.934997399895996, 0.8989751098096632, 0.843782117163412, 0.871031746031746]


In [None]:
# check both are same or not
print(class_wise_f1_score_1 == class_wise_f1_score_2)

True


In [None]:
# report the accuracy score
acc = accuracy_score(y_test, y_pred)
print("accuracy score of linear SVM is :", acc)

accuracy score of linear SVM is : 0.902


# Non Linear SVM

In [None]:
# Non linear model with RBF kernel
model1 = SVC(kernel='rbf')
model1.fit(x_train_sample1, y_train_sample1)
y_pred1 = model.predict(x_test_scaled)
acc1 = accuracy_score(y_test, y_pred1) 
print("accuracy score of Non linear SVM with RBF kernel is :", acc1)

accuracy score of Non linear SVM with RBF kernel is : 0.902


In [None]:
# Non linear model with Polynomial kernel
model2 = SVC(kernel='poly')
model2.fit(x_train_sample1, y_train_sample1)
y_pred2 = model.predict(x_test_scaled)
acc2 = accuracy_score(y_test, y_pred2)
print("accuracy score of Non linear SVM with Polynomial kernel is :", acc2)

accuracy score of Non linear SVM with Polynomial kernel is : 0.902


# Find the optimal values of cost C and gamma using Grid Search Cross Validation

In [None]:
# set parameter values
param_grid = {'C': [10, 100], 'gamma': [0.1, 0.01], 'kernel': ['rbf']} 

# train model on given values of C and Gamma  
grid = GridSearchCV(SVC(), param_grid, refit = True, verbose=2)
grid.fit(x_train_sample1, y_train_sample1)
y_pred_grid = grid.predict(x_test_scaled)

Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] END ........................C=10, gamma=0.1, kernel=rbf; total time= 3.0min
[CV] END ........................C=10, gamma=0.1, kernel=rbf; total time= 3.2min
[CV] END ........................C=10, gamma=0.1, kernel=rbf; total time= 3.1min
[CV] END ........................C=10, gamma=0.1, kernel=rbf; total time= 3.0min
[CV] END ........................C=10, gamma=0.1, kernel=rbf; total time= 3.1min
[CV] END .......................C=10, gamma=0.01, kernel=rbf; total time=  30.4s
[CV] END .......................C=10, gamma=0.01, kernel=rbf; total time=  28.9s
[CV] END .......................C=10, gamma=0.01, kernel=rbf; total time=  29.2s
[CV] END .......................C=10, gamma=0.01, kernel=rbf; total time=  26.8s
[CV] END .......................C=10, gamma=0.01, kernel=rbf; total time=  27.3s
[CV] END .......................C=100, gamma=0.1, kernel=rbf; total time= 2.8min
[CV] END .......................C=100, gamma=0.1,

In [None]:
# print best parameter
print(grid.best_params_)

{'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}


# Build the final model with Best hyperparameters

In [None]:
final_model = SVC(kernel='rbf', C=grid.best_params_['C'], gamma=grid.best_params_['gamma'])
final_model.fit(x_train_sample1, y_train_sample1)
y_pred_final = final_model.predict(x_test_scaled)

In [None]:
# display confusion metrix of final model
print("confusion metrix for MNIST Dataset using Best parameters")
print()
matrix = confusion_matrix(y_test, y_pred_final)
print(matrix)

confusion metrix for MNIST Dataset using Best parameters

[[ 972    0    1    1    0    2    1    1    2    0]
 [   0 1124    2    1    1    1    4    0    2    0]
 [   6    2  999    2    2    0    1    8   10    2]
 [   0    2    7  979    0    9    1    3    7    2]
 [   1    0    3    0  961    0    5    1    1   10]
 [   4    0    0   16    0  865    3    0    3    1]
 [   7    2    3    0    3    7  935    0    1    0]
 [   1    4   12    4    4    0    0  991    0   12]
 [   5    1    4    8    5    6    3    4  934    4]
 [   5    8    2    9   13    3    1    7    3  958]]


In [None]:
# print accuracy
acc_final = accuracy_score(y_test, y_pred_final)
print("accuracy score of SVC on best choosen parameters is :", acc_final)

accuracy score of SVC on best choosen parameters is : 0.9718


## Build SVM on Support vectors Data

In [None]:
# extract data of only support vectors
support_vectors_x = final_model.support_vectors_
index_of_support_vectors = final_model.support_
support_vectors_y = []

for i in index_of_support_vectors:
  support_vectors_y.append(sampled_data.iloc[i, -1])

# create dataframe of X and y values of support vectors 
df_support_vector_x = pd.DataFrame(support_vectors_x)
df_support_vector_y = pd.DataFrame(support_vectors_y, columns=['label'])
df_support_vector = pd.concat([df_support_vector_x, df_support_vector_y], axis=1)

In [None]:
# split the data into train and test
from sklearn.model_selection import train_test_split
X_train, X_test1, y_train, y_test1 = train_test_split(df_support_vector_x, df_support_vector_y, test_size=0.3, random_state=20)

# train a model with new data
final_model = SVC(kernel='rbf')
final_model.fit(X_train, y_train)
y_pred_final1 = final_model.predict(X_train)
y_pred_final2 = final_model.predict(X_test1)

In [None]:
# print accuracy on train and test data
acc_final1 = accuracy_score(y_train, y_pred_final1)
acc_final2 = accuracy_score(y_test1, y_pred_final2)
print("accuracy score of SVC on Support vectors on train data is :", acc_final1)
print("accuracy score of SVC on Support vectors on test data is :", acc_final2)

accuracy score of SVC on Support vectors on train data is : 0.9526479750778816
accuracy score of SVC on Support vectors on test data is : 0.8569353667392883


# Observation with previous models

Training Accuracy is higher than all model except the model which was trained  with best hyperparameter.

Test Accuracy is lower than all models accuracy.

Beacuse Spliting support vectors data Final hyperplane of SVM will chenge So accuracy will change accordingly.