# Kannada-MNIST CNN

## 1: Train Model & Prepare Case Base

In [None]:
import pickle
import pandas as pd
import numpy as np
import keras

import matplotlib.pyplot as plt
%matplotlib inline

from tensorflow.keras.models import Sequential
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, Activation
from tensorflow.keras import backend as K

from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier

from copy import deepcopy

In [None]:
# Load data
def load(f):
    return np.load(f)['arr_0']

# Load the data
X_train = load('X_kannada_MNIST_train.npz')
X_test = load('X_kannada_MNIST_test.npz')
y_train = load('y_kannada_MNIST_train.npz')
y_test = load('y_kannada_MNIST_test.npz')

oh_y_train = to_categorical(y_train)
oh_y_test = to_categorical(y_test)

# Normalize the data
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Reshape the data
img_rows, img_cols = 28, 28
input_shape = (img_rows, img_cols, 1)

X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)

# Output the shapes
print('x_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')


In [None]:
model = Sequential()

# Cell 1 - 1x28x28
model.add(Conv2D(32, (5, 5), input_shape=(1, 28, 28), padding='same', data_format='channels_first'))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2), data_format='channels_first'))

# Cell 2 - 32x4x14
model.add(Conv2D(64, (3, 3), input_shape=(16, 14, 14), padding='same', data_format='channels_first'))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2, 2), data_format='channels_first'))

# Output - 64x7x7
model.add(Flatten())
model.add(Dense(128))
model.add(Activation("relu"))
model.add(Dropout(0.2))
model.add(Dense(50))
model.add(Activation("relu"))
model.add(Dropout(0.2))
model.add(Dense(10))
model.add(Activation("softmax"))

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

model.fit(X_train, oh_y_train,
          batch_size=256,
          epochs=30,
          verbose=1,
          validation_data=(X_test, oh_y_test))

# evaluate the model
scores = model.evaluate(X_train, oh_y_train)
print("Training Set:", "\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

scores = model.evaluate(X_test, oh_y_test)
print("Test Set:", "\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

In [None]:
knn_X_train = X_train.reshape(X_train.shape[0], 28*28)
knn_X_test = X_test.reshape(X_test.shape[0], 28*28)

In [None]:
# Train final k-NN 
knn_clf = KNeighborsClassifier(n_neighbors=1, algorithm="brute") 
knn_clf.fit(knn_X_train, y_train)

In [None]:
# Check the accuracy on this particular split to make sure that it is not too far removed from k-fold.
knn_predictions_test = knn_clf.predict(knn_X_test)
print("k-NN Accuracy Test:", accuracy_score(y_test, knn_predictions_test))

In [None]:
nn_pred = model.predict_classes(X_test)

right = 0
for i in range(len(nn_pred)):
    if knn_predictions_test[i] == nn_pred[i]:
        right += 1
print("Agreement:", right/len(nn_pred))

In [None]:
# Check confusion matrix kNN
confusion_matrix(y_test, knn_predictions_test, labels=None, sample_weight=None)

In [None]:
# Check confusion matrix NN 
confusion_matrix(y_test, model.predict_classes(X_test), labels=None, sample_weight=None)

In [None]:
# Save the CBR model to disk
pickle.dump(knn_clf, open('k-nn_model.sav', 'wb'))

In [None]:
# Save Keras Models to disk
model.save("NN.h5")

In [None]:
# Save Dataframes
np.save("X_train", X_train)
np.save("X_test", X_test)
np.save("y_train", y_train)
np.save("y_test", y_test)

np.save("knn_X_train", knn_X_train)
np.save("knn_X_test", knn_X_test)

np.save("oh_y_train", oh_y_train)
np.save("oh_y_test", oh_y_test)