https://machinelearningmastery.com/how-to-develop-a-convolutional-neural-network-from-scratch-for-mnist-handwritten-digit-classification/


In [10]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD
from sklearn.model_selection import KFold

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_digits

In [11]:
X = pd.read_csv('handwritten_digits_images.csv').to_numpy()
y = pd.read_csv('handwritten_digits_labels.csv').to_numpy()
print(X.shape)

(69999, 784)


## Preprocessing steps

For preprocessing 

In [12]:
#X = X.reshape(X.shape[0], 28, 28)
#X = X.reshape(X.shape[0], 56)

y = to_categorical(y)

# Normalize to range 0-1
X = X.astype('float32')
X = X / 255.0

In [13]:
print(y[15000])

#px.imshow(X[15000], color_continuous_scale=["white", "black"])

[0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]


In [14]:
X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

print('Train: X=%s, y=%s' % (X_train.shape, y_train.shape))
print('Test: X=%s, y=%s' % (X_test.shape, y_test.shape))
print('Validate: X=%s, y=%s' % (X_val.shape, y_val.shape))

Train: X=(48999, 784), y=(48999, 10)
Test: X=(10500, 784), y=(10500, 10)
Validate: X=(10500, 784), y=(10500, 10)


In [15]:
mnist = load_digits()
print(print(mnist.data.shape))
trainData,testData,trainLabel,testLabel = train_test_split(np.array(mnist.data),mnist.target,test_size=0.25,random_state=42)
print(trainData.shape)
print(X_train.shape)

(1797, 64)
None
(1347, 64)
(48999, 784)


## Candidate  algorithms  and  choice  of  candidate  hyperparameters  (and why were the others left out)

- KNeighborsClassifier
- DecisionTreeClassifier
- RandomForestClassifier

## Chosen performance measure
Accuracy on test data (0-100%)

## K Nearest Neighbors Classifier

In [16]:
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y_train)

KNeighborsClassifier()

In [17]:
kVals = range(1, 30, 2)
accuracies = []

# loop over various values of `k` for the k-Nearest Neighbor classifier
for k in range(1, 30, 2):
	# train the k-Nearest Neighbor classifier with the current value of `k`
	model = KNeighborsClassifier(n_neighbors=k)
	model.fit(X_train, y_train)

	# evaluate the model and update the accuracies list
	score = model.score(X_val, y_val)
	print("k=%d, accuracy=%.2f%%" % (k, score * 100))
	accuracies.append(score)

# find the value of k that has the largest accuracy
i = int(np.argmax(accuracies))
print("k=%d achieved highest accuracy of %.2f%% on validation data" % (kVals[i],
	accuracies[i] * 100))

# re-train our classifier using the best k value and predict the labels of the
# test data
model = KNeighborsClassifier(n_neighbors=kVals[i])
model.fit(X_train, y_train)
predictions = model.predict(X_test)

# show a final classification report demonstrating the accuracy of the classifier
# for each of the digits
print("EVALUATION ON TESTING DATA")
print(classification_report(t_test, predictions))

KeyboardInterrupt: 

## Decision Tree Classifier

## Random Forest Classifier

## Neural net model

In [None]:
def neural_net(X_train, y_train, X_val, y_val, X_test, y_test, epochs = 5):
	model = Sequential()
	model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
	model.add(MaxPooling2D((2, 2)))
	model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
	model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
	model.add(MaxPooling2D((2, 2)))
	model.add(Flatten())
	model.add(Dense(100, activation='relu', kernel_initializer='he_uniform'))
	model.add(Dense(10, activation='softmax'))
	opt = SGD(lr=0.01, momentum=0.9)
	model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

	model.fit(X_train, y_train, epochs=epochs, batch_size=32, validation_data=(X_val, y_val), verbose=1)
	_, acc = model.evaluate(X_test, y_test, verbose=1)
	print('Model accuracy on test data:  %.3f' % (acc * 100.0))

	return model


#neural_net = neural_net(X_train, y_train, X_val, y_val, X_test, y_test)


## Model selection schemes that you used.  Justify your choices

## What is your final classifier and how does it work.
Neural net
## How  well  it  is  expected  to  perform  in  production  (on  unseen  data).Justify your estimate

## Measures taken to avoid overfitting 

## Given more resources (time or computing resources),  how would youimprove your solution
K-folds should be used to avoid overfitting to the specific data split, but i did not manage to use my GPU with CUDA for Tensorflow so the fit of the model took a lot of time. I would run the same function with 5 folds of splitting data.