# MNIST Classifiers using machine learning (without Neural Networks)

In [None]:
# Install python packages
!pip install Pillow scikit-learn python-mnist matplotlib numpy

# Downloading MNIST Dataset
!mkdir -p data
!wget -nc -P ./data http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
!wget -nc -P ./data http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
!wget -nc -P ./data http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
!wget -nc -P ./data http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
    
# Extracting content
!gunzip ./data/train-images-idx3-ubyte.gz ./data/train-labels-idx1-ubyte.gz ./data/t10k-images-idx3-ubyte.gz ./data/t10k-labels-idx1-ubyte.gz

File ‘./data/train-images-idx3-ubyte.gz’ already there; not retrieving.

File ‘./data/train-labels-idx1-ubyte.gz’ already there; not retrieving.

File ‘./data/t10k-images-idx3-ubyte.gz’ already there; not retrieving.

File ‘./data/t10k-labels-idx1-ubyte.gz’ already there; not retrieving.

gzip: ./data/train-images-idx3-ubyte already exists; do you wish to overwrite (y or n)? 

### Loading the Dataset

In [None]:
%matplotlib inline
from mnist import MNIST
from sklearn import datasets
from PIL import Image, ImageDraw
from matplotlib.pyplot import imshow
import numpy as np
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load dataset
mndata = MNIST("./data/")
images, labels = mndata.load_training()

# Pick the fifth image from the dataset (it's a 9)
image, label = images[4], labels[4]

# Print the image
output = Image.new("L", (28, 28))
output.putdata(image)
imshow(np.asarray(output))
print("Label: ",label) # Print label

### Splitting the dataset

In [None]:
train_x = images[:10000]
train_y = labels[:10000]
test_x = images[10000:10100]
expected = labels[10000:10100].tolist()

## K-Nearest Neighbors

In [None]:
from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier()
clf.fit(train_x, train_y)
predicted = clf.predict(test_x)

print("Accuracy: ", accuracy_score(expected, predicted))
print("\nConfusion Matrix:\n",confusion_matrix(y_test,y_pred))
print("\nClassification Report:\n",classification_report(y_test,y_pred))

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=100)
clf.fit(train_x, train_y)
predicted = clf.predict(test_x)

print("Accuracy: ", accuracy_score(expected, predicted))
print("\nConfusion Matrix:\n",confusion_matrix(y_test,y_pred))
print("\nClassification Report:\n",classification_report(y_test,y_pred))

## SVM - Linear SVC

In [None]:
from sklearn.svm import LinearSVC

clf = LinearSVC(kernel='linear')
clf.fit(train_x, train_y)
predicted = clf.predict(test_x)

print("Accuracy: ", accuracy_score(expected, predicted))
print("\nConfusion Matrix:\n",confusion_matrix(y_test,y_pred))
print("\nClassification Report:\n",classification_report(y_test,y_pred))

## SVM