In [1]:
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

from sklearn import datasets, metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import tree
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.decomposition import PCA
from matplotlib.colors import ListedColormap

from time import time

np.random.seed(42)

In [2]:
digits = datasets.load_digits()
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.2, stratify=digits.target)

In [None]:
print(digits.images.shape)

In [3]:
dict_classifiers = {
    "Nearest Neighbors": KNeighborsClassifier(n_neighbors=5, weights='distance'),
    "Linear SVM": SGDClassifier(),
    "Decision Tree": tree.DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "Neural Network": MLPClassifier(alpha = 1)
}

In [4]:
for name, classifier in dict_classifiers.items():
    start_training = time()
    classifier = classifier.fit(X_train, y_train)
    end_training = time()
    start_testing = time()
    predicted = classifier.predict(X_test)
    end_testing = time()
    print("Accuracy of "+ name + " : "+str(np.mean(predicted == y_test)))
    print("Training time of "+ name + " : "+str(end_training - start_training))
    print("Testing time of "+ name + " : "+str(end_testing - start_testing))

Accuracy of Nearest Neighbors : 0.983333333333
Training time of Nearest Neighbors : 0.005032777786254883
Testing time of Nearest Neighbors : 0.06562209129333496
Accuracy of Linear SVM : 0.947222222222
Training time of Linear SVM : 0.15979480743408203
Testing time of Linear SVM : 0.0034601688385009766
Accuracy of Decision Tree : 0.838888888889
Training time of Decision Tree : 0.021347761154174805
Testing time of Decision Tree : 0.00032901763916015625
Accuracy of Naive Bayes : 0.811111111111
Training time of Naive Bayes : 0.0034317970275878906
Testing time of Naive Bayes : 0.0023131370544433594




Accuracy of Neural Network : 0.983333333333
Training time of Neural Network : 1.9955041408538818
Testing time of Neural Network : 0.0009899139404296875


In [None]:
pca = PCA(n_components=2)
reduced_X_train_pca = pca.fit_transform(X_train)
reduced_X_test_pca = pca.fit_transform(X_test)

In [None]:
X_train

In [None]:
knn_clf = KNeighborsClassifier(n_neighbors=5, weights='distance').fit(reduced_X_train_pca, y_train)
svm_clf = SGDClassifier(max_iter=5, tol=None).fit(reduced_X_train_pca, y_train)
dt_clf = tree.DecisionTreeClassifier().fit(reduced_X_train_pca, y_train)
nb_clf = GaussianNB().fit(reduced_X_train_pca, y_train)
nn_clf = MLPClassifier(alpha = 1).fit(reduced_X_train_pca, y_train)

In [None]:
def draw_PCA(classifier, reduced_X_train_pca):
    reduced_X_train_pca=reduced_X_train_pca[0:500]
    reduced_y_train = y_train[0:500]

    n_neighbors = 5
    h = .02

    colors_dark = ListedColormap(['black', 'blue', 'purple', 'yellow', 'white', 'red', 
                   'lime', 'cyan', 'orange', 'darkmagenta'])
    colors_light = ListedColormap(['grey', 'royalblue', 'orchid', 'lightyellow', 'snow', 'lightsalmon', 
                    'honeydew', 'lightcyan', 'coral', 'magenta'])

    classifier.fit(reduced_X_train_pca, reduced_y_train)
    x_min, x_max = reduced_X_train_pca[:, 0].min() - 1, reduced_X_train_pca[:, 0].max() + 1
    y_min, y_max = reduced_X_train_pca[:, 1].min() - 1, reduced_X_train_pca[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                             np.arange(y_min, y_max, h))
    # np.c_ functions similar to zip in dict, returns a 2D array if input are two 1D arrays
    Z = classifier.predict(np.c_[xx.ravel(), yy.ravel()])

    predicted = classifier.predict(reduced_X_test_pca)
    print("Accuracy of "+ str(classifier) + " : "+str(np.mean(predicted == y_test)))
    
    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.figure()
    plt.pcolormesh(xx, yy, Z, cmap=colors_light)

    # Plot the training points
    # A legend won't work for scatter plot because the scatter plot creates a single object 
    # and would only appear as a single item in a legend
    plt.scatter(reduced_X_train_pca[:, 0], reduced_X_train_pca[:, 1], c=reduced_y_train, cmap=colors_light,
                edgecolor='k',s=20)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())


    plt.show()

In [None]:
digits.target_names

In [None]:
# PCA+kNN

draw_PCA(knn_clf, reduced_X_train_pca)

In [None]:
#PCA + SVM

draw_PCA(svm_clf, reduced_X_train_pca)

In [None]:
#PCA + DT

draw_PCA(dt_clf, reduced_X_train_pca)

In [None]:
#PCA + NN
draw_PCA(nn_clf, reduced_X_train_pca)

In [None]:
#PCA + NB
draw_PCA(nb_clf, reduced_X_train_pca)