In [1]:
# Import all necessary libs and initial setup
import tensorflow as tf
import numpy as np

from sklearn import neighbors, linear_model, tree
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, confusion_matrix

# Dataset size constants
train_size = 2000
test_size = 500

In [2]:
models = {
    'KNN': neighbors.KNeighborsClassifier(n_neighbors=3),
    'SGD': linear_model.SGDClassifier(max_iter=250),
    'DT': tree.DecisionTreeClassifier()
}

def train_and_test(classifier):
    # get corresponding model and test
    model = models[classifier].fit(train_data, train_labels)
    results = model.predict(test_data)

    # show results
    accuracy = accuracy_score(test_labels, results)
    print(f'Accuracy score: {accuracy}')

    recall = recall_score(test_labels, results, average='macro')
    print(f'Recall score: {recall}')

    confusion = confusion_matrix(test_labels, results)
    print(f'Confusion Matrix:\n{confusion}')

# fetch train/test data
(train_data, train_labels), (test_data, test_labels) = tf.keras.datasets.fashion_mnist.load_data()
data = np.concatenate((train_data, test_data), axis=0)
labels = np.concatenate((train_labels, test_labels), axis=0)

# split data as specified, format appropriately
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, train_size=train_size, test_size=test_size)
print(train_data.shape, test_data.shape)

train_data = np.reshape(train_data, (train_size, -1))
test_data = np.reshape(test_data, (test_size, -1))
print(train_data.shape, test_data.shape)

(2000, 28, 28) (500, 28, 28)
(2000, 784) (500, 784)


In [3]:
print('K - Nearest Neighbours Classifier\n')
train_and_test('KNN')

K - Nearest Neighbours Classifier

Accuracy score: 0.782
Recall score: 0.7789774433545245
Confusion Matrix:
[[43  0  2  1  0  0  6  0  0  0]
 [ 1 52  0  2  0  0  0  0  0  0]
 [ 2  0 39  0  6  0  9  0  0  0]
 [ 5  0  1 42  3  0  2  0  0  0]
 [ 1  0  7  1 29  0 10  0  0  0]
 [ 0  0  0  0  0 26  1  2  0  6]
 [16  0  7  1  4  0 23  0  0  0]
 [ 0  0  0  0  0  1  0 38  0  6]
 [ 1  0  1  0  0  0  0  0 50  0]
 [ 1  0  0  0  0  0  0  3  0 49]]


In [4]:
print('Stochastic Gradient Descent Classifier\n')
train_and_test('SGD')

Stochastic Gradient Descent Classifier

Accuracy score: 0.796
Recall score: 0.7947246374910082
Confusion Matrix:
[[45  0  3  2  0  0  2  0  0  0]
 [ 0 52  0  2  0  0  0  0  0  1]
 [ 9  0 38  2  4  0  3  0  0  0]
 [ 4  0  1 45  2  0  1  0  0  0]
 [ 2  0  5  2 34  0  5  0  0  0]
 [ 0  0  0  0  0 28  1  1  0  5]
 [18  0  5  2  5  0 20  0  1  0]
 [ 0  0  0  0  0  3  0 37  0  5]
 [ 0  0  0  1  1  0  0  0 50  0]
 [ 0  0  0  0  0  1  0  3  0 49]]


In [5]:
print('Decision Tree Classifier\n')
train_and_test('DT')

Decision Tree Classifier

Accuracy score: 0.754
Recall score: 0.757552343297349
Confusion Matrix:
[[32  1  3  2  2  0 11  1  0  0]
 [ 0 51  0  3  0  0  1  0  0  0]
 [ 0  0 39  2 10  0  4  0  1  0]
 [ 1  1  1 40  4  0  5  0  1  0]
 [ 1  2 10  1 27  1  6  0  0  0]
 [ 0  0  0  0  0 33  0  1  0  1]
 [ 9  0  4  4  7  0 23  0  4  0]
 [ 0  0  0  0  0  4  0 35  0  6]
 [ 0  0  1  1  1  0  1  0 48  0]
 [ 0  0  0  0  0  2  0  2  0 49]]
