In [None]:
'''
Documentation:
Topic: SVM classifier using sklearn digts dataset.
Dataset: digits dataset (from sklearn.datasets import load_digits)
Methodology:
Part 1: 
Read and parse the initial dataset
Load it into our pandas dataframe
Part 2: 
Use 80% of samples as training data size
Part 3:
Measure accuracy of your model using different kernels such as rbf, poly and linear
Tune your model further using regularization and gamma parameters and try to come up with highest accuracy score.
'''

In [1]:
#importing all necessary libraries
import pandas as pd
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC 
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [2]:
digits = load_digits()
df=pd.DataFrame(digits.data)
# Read dataset to pandas dataframe
df['target']=digits.target
# Display the dataset with feature information
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,55,56,57,58,59,60,61,62,63,target
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4


In [3]:
X=df.drop('target',axis=1)
y=df.target
# create training and test splits
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)
#Use 80% of samples as training data size (20% as testing)

In [4]:
print("For GAUSSIAN (RBF) kernel\n")
#Training and prediction
svclassifier = SVC(kernel='rbf') 
svclassifier.fit(X_train, y_train)
y_pred = svclassifier.predict(X_test)
#Evaluation
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))
print('\nAccuracy Score with GAUSSIAN kernel:',accuracy_score(y_test,y_pred)) 

For GAUSSIAN (RBF) kernel

[[40  0  0  0  0  0  0  0  0  0]
 [ 0 40  0  0  0  0  0  0  0  0]
 [ 0  0 35  0  0  0  0  0  0  0]
 [ 0  0  0 40  0  0  0  0  0  0]
 [ 0  0  0  0 38  0  0  0  0  0]
 [ 0  0  0  0  0 26  0  0  0  0]
 [ 0  0  0  0  0  0 29  0  0  0]
 [ 0  0  0  0  0  0  0 33  0  0]
 [ 0  1  0  0  0  0  0  0 41  0]
 [ 0  0  0  1  0  0  0  0  0 36]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.98      1.00      0.99        40
           2       1.00      1.00      1.00        35
           3       0.98      1.00      0.99        40
           4       1.00      1.00      1.00        38
           5       1.00      1.00      1.00        26
           6       1.00      1.00      1.00        29
           7       1.00      1.00      1.00        33
           8       1.00      0.98      0.99        42
           9       1.00      0.97      0.99        37

    accuracy                           0.99   

In [5]:
print("For LINEAR kernel\n")
#Training and prediction
svclassifier = SVC(kernel='linear') 
svclassifier.fit(X_train, y_train)
y_pred = svclassifier.predict(X_test)
#Evaluation
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))
print('\nAccuracy Score with LINEAR kernel:',accuracy_score(y_test,y_pred)) 

For LINEAR kernel

[[40  0  0  0  0  0  0  0  0  0]
 [ 0 40  0  0  0  0  0  0  0  0]
 [ 0  0 35  0  0  0  0  0  0  0]
 [ 0  0  0 40  0  0  0  0  0  0]
 [ 0  0  0  0 38  0  0  0  0  0]
 [ 0  0  0  0  0 26  0  0  0  0]
 [ 0  0  0  0  0  0 29  0  0  0]
 [ 0  0  0  0  0  0  0 33  0  0]
 [ 0  1  0  0  1  0  0  0 39  1]
 [ 0  0  0  1  0  0  0  0  0 36]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.98      1.00      0.99        40
           2       1.00      1.00      1.00        35
           3       0.98      1.00      0.99        40
           4       0.97      1.00      0.99        38
           5       1.00      1.00      1.00        26
           6       1.00      1.00      1.00        29
           7       1.00      1.00      1.00        33
           8       1.00      0.93      0.96        42
           9       0.97      0.97      0.97        37

    accuracy                           0.99       360


In [6]:
print("For POLYNOMIAL kernel\n")
#Training and prediction
svclassifier = SVC(kernel='poly',degree=9) 
svclassifier.fit(X_train, y_train)
y_pred = svclassifier.predict(X_test)
#Evaluation
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))
print('\nAccuracy Score with POLYNOMIAL kernel:',accuracy_score(y_test,y_pred)) 

For POLYNOMIAL kernel

[[39  0  0  0  0  0  1  0  0  0]
 [ 0 39  0  0  0  0  0  0  1  0]
 [ 0  0 34  0  0  0  0  1  0  0]
 [ 0  0  0 40  0  0  0  0  0  0]
 [ 0  0  0  0 38  0  0  0  0  0]
 [ 0  0  0  0  0 26  0  0  0  0]
 [ 0  0  0  0  0  0 29  0  0  0]
 [ 0  0  0  0  0  0  0 33  0  0]
 [ 0  0  0  2  0  0  0  0 40  0]
 [ 0  0  0  2  0  0  0  0  0 35]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.99        40
           1       1.00      0.97      0.99        40
           2       1.00      0.97      0.99        35
           3       0.91      1.00      0.95        40
           4       1.00      1.00      1.00        38
           5       1.00      1.00      1.00        26
           6       0.97      1.00      0.98        29
           7       0.97      1.00      0.99        33
           8       0.98      0.95      0.96        42
           9       1.00      0.95      0.97        37

    accuracy                           0.98       

In [7]:
print("For SIGMOID kernel\n")
#Training and prediction
svclassifier = SVC(kernel='sigmoid') 
svclassifier.fit(X_train, y_train)
y_pred = svclassifier.predict(X_test)
#Evaluation
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))
print('\nAccuracy Score with SIGMOID kernel:',accuracy_score(y_test,y_pred)) 

For SIGMOID kernel

[[38  0  0  0  2  0  0  0  0  0]
 [ 0 34  0  0  1  0  1  2  0  2]
 [ 0  0 35  0  0  0  0  0  0  0]
 [ 0  2  0 38  0  0  0  0  0  0]
 [ 1  0  0  0 35  0  1  1  0  0]
 [ 0  0  0  0  0 26  0  0  0  0]
 [ 0  0  0  0  0  0 29  0  0  0]
 [ 0  0  0  0  0  0  0 33  0  0]
 [ 0  1  1  0  0  0  0  1 35  4]
 [ 0  1  0  0  0  0  0  1  0 35]]
              precision    recall  f1-score   support

           0       0.97      0.95      0.96        40
           1       0.89      0.85      0.87        40
           2       0.97      1.00      0.99        35
           3       1.00      0.95      0.97        40
           4       0.92      0.92      0.92        38
           5       1.00      1.00      1.00        26
           6       0.94      1.00      0.97        29
           7       0.87      1.00      0.93        33
           8       1.00      0.83      0.91        42
           9       0.85      0.95      0.90        37

    accuracy                           0.94       360

In [8]:
#Tuning with gamma parameters

print("For GAUSSIAN (RBF) kernel with gamma as 0.05\n")
#Training and prediction
svclassifier = SVC(kernel='rbf',gamma=0.05) 
svclassifier.fit(X_train, y_train)
y_pred = svclassifier.predict(X_test)
#Evaluation
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred,zero_division=0))
print('\nAccuracy Score with GAUSSIAN kernel with gamma as 0.05:',accuracy_score(y_test,y_pred)) 

For GAUSSIAN (RBF) kernel with gamma as 0.05

[[ 0  0  0  0  0 40  0  0  0  0]
 [ 0  0  0  0  0 40  0  0  0  0]
 [ 0  0  0  0  0 35  0  0  0  0]
 [ 0  0  0  0  0 40  0  0  0  0]
 [ 0  0  0  0  0 38  0  0  0  0]
 [ 0  0  0  0  0 26  0  0  0  0]
 [ 0  0  0  0  0 29  0  0  0  0]
 [ 0  0  0  0  0 33  0  0  0  0]
 [ 0  0  0  0  0 42  0  0  0  0]
 [ 0  0  0  0  0 37  0  0  0  0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        40
           1       0.00      0.00      0.00        40
           2       0.00      0.00      0.00        35
           3       0.00      0.00      0.00        40
           4       0.00      0.00      0.00        38
           5       0.07      1.00      0.13        26
           6       0.00      0.00      0.00        29
           7       0.00      0.00      0.00        33
           8       0.00      0.00      0.00        42
           9       0.00      0.00      0.00        37

    accuracy               

In [9]:
print("\nFor GAUSSIAN (RBF) kernel with gamma as 0.1\n")
#Training and prediction
svclassifier = SVC(kernel='rbf',gamma=0.1) 
svclassifier.fit(X_train, y_train)
y_pred = svclassifier.predict(X_test)
#Evaluation
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred,zero_division=0))
print('\nAccuracy Score with GAUSSIAN kernel with gamma as 0.1:',accuracy_score(y_test,y_pred)) 


For GAUSSIAN (RBF) kernel with gamma as 0.1

[[ 0  0  0  0  0 40  0  0  0  0]
 [ 0  0  0  0  0 40  0  0  0  0]
 [ 0  0  0  0  0 35  0  0  0  0]
 [ 0  0  0  0  0 40  0  0  0  0]
 [ 0  0  0  0  0 38  0  0  0  0]
 [ 0  0  0  0  0 26  0  0  0  0]
 [ 0  0  0  0  0 29  0  0  0  0]
 [ 0  0  0  0  0 33  0  0  0  0]
 [ 0  0  0  0  0 42  0  0  0  0]
 [ 0  0  0  0  0 37  0  0  0  0]]
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        40
           1       0.00      0.00      0.00        40
           2       0.00      0.00      0.00        35
           3       0.00      0.00      0.00        40
           4       0.00      0.00      0.00        38
           5       0.07      1.00      0.13        26
           6       0.00      0.00      0.00        29
           7       0.00      0.00      0.00        33
           8       0.00      0.00      0.00        42
           9       0.00      0.00      0.00        37

    accuracy               

In [10]:
# the above cells give a red box due to division of 0 by 0. we have used zero_division=0 to remove the warnings