In [1]:
# dimension in machine learning - number of features

In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_digits

from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV, cross_validate, cross_val_score 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix


In [3]:
digits_data = load_digits()
digits_data.data

array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.],
       [ 0.,  0.,  0., ..., 16.,  9.,  0.],
       ...,
       [ 0.,  0.,  1., ...,  6.,  0.,  0.],
       [ 0.,  0.,  2., ..., 12.,  0.,  0.],
       [ 0.,  0., 10., ..., 12.,  1.,  0.]])

In [16]:
digits_data.data.shape

(1797, 64)

In [4]:
scoring = ['precision_macro', 'recall_macro', 'accuracy']

def evaluate_model(model, X, Y):
    
    print("\nCross Validate:\n")
    cv = cross_validate(model, X, Y, scoring=scoring, cv=5)
    print("\nCross Validation:",model,"\n")
    print("Mean Accuracy:",cv['test_accuracy'].mean())
    print("Mean Precision:",cv['test_precision_macro'].mean())
    print("Mean Recall:",cv['test_recall_macro'].mean())
    print("\n\n")

In [5]:
def get_reports(model, X, Y):
    
    y_pred = model.predict(X)
    print("\nAccuracy:",accuracy_score(Y, y_pred))
    print("Precision Score:",precision_score(Y, y_pred, average = 'macro', zero_division=0))
    print("Recall Score:",recall_score(Y, y_pred, average = 'macro', zero_division=0))
    print("f1 Score:",f1_score(Y, y_pred, average = 'macro', zero_division=0))
    
    print("\n \nClassification Report:\n\n",classification_report(Y, y_pred, zero_division=0))
    print("\n \nConfusion Matrix:\n")
    print(confusion_matrix(Y, y_pred))

## SVC

In [6]:
svc = SVC(kernel='rbf')
svc.fit(digits_data.data, digits_data.target)

SVC()

In [7]:
get_reports(svc, digits_data.data, digits_data.target)
evaluate_model(svc, digits_data.data, digits_data.target)


Accuracy: 0.996661101836394
Precision Score: 0.9967027613595763
Recall Score: 0.9966344574965265
f1 Score: 0.9966577171492166

 
Classification Report:

               precision    recall  f1-score   support

           0       1.00      1.00      1.00       178
           1       0.99      1.00      0.99       182
           2       1.00      1.00      1.00       177
           3       0.99      1.00      1.00       183
           4       1.00      1.00      1.00       181
           5       0.99      0.99      0.99       182
           6       1.00      1.00      1.00       181
           7       0.99      1.00      1.00       179
           8       1.00      0.99      0.99       174
           9       0.99      0.98      0.99       180

    accuracy                           1.00      1797
   macro avg       1.00      1.00      1.00      1797
weighted avg       1.00      1.00      1.00      1797


 
Confusion Matrix:

[[178   0   0   0   0   0   0   0   0   0]
 [  0 182   0   0   0

## SVC with PCA

In [9]:
pca = PCA(n_components=24)
model = make_pipeline(pca, svc)
param_grid = {'svc__C':[1, 5, 10, 50],
              'svc__gamma':[0.001, 0.005, 1.0]}

grid_search = GridSearchCV(model, param_grid, cv=5, scoring='recall_macro', n_jobs=-1,verbose=3)
grid_search.fit(digits_data.data, digits_data.target)
model = grid_search.best_estimator_

Fitting 5 folds for each of 12 candidates, totalling 60 fits


In [11]:
print("Model:", model)
get_reports(model, digits_data.data, digits_data.target)
evaluate_model(model, digits_data.data, digits_data.target)

Model: Pipeline(steps=[('pca', PCA(n_components=24)), ('svc', SVC(C=10, gamma=0.001))])

Accuracy: 1.0
Precision Score: 1.0
Recall Score: 1.0
f1 Score: 1.0

 
Classification Report:

               precision    recall  f1-score   support

           0       1.00      1.00      1.00       178
           1       1.00      1.00      1.00       182
           2       1.00      1.00      1.00       177
           3       1.00      1.00      1.00       183
           4       1.00      1.00      1.00       181
           5       1.00      1.00      1.00       182
           6       1.00      1.00      1.00       181
           7       1.00      1.00      1.00       179
           8       1.00      1.00      1.00       174
           9       1.00      1.00      1.00       180

    accuracy                           1.00      1797
   macro avg       1.00      1.00      1.00      1797
weighted avg       1.00      1.00      1.00      1797


 
Confusion Matrix:

[[178   0   0   0   0   0   0   0  