In [None]:
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, BatchNormalization, Flatten, Dropout, Dense
from mnist import MNIST
import matplotlib.pyplot as plt

In [None]:
#run once to download data
#Source for dataset: https://www.nist.gov/node/1298471/emnist-dataset

# !wget https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip
# !unzip gzip.zip #use Expand-Archive -Path "your_archive.zip" -DestinationPath "destination_folder" in windows
# !rm gzip.zip #use del in windows

In [None]:
# load the entire EMNIST dataset as numpy arrays
emnist_data = MNIST(path='gzip', return_type='numpy')
emnist_data.select_emnist('byclass')
X_train, y_train = emnist_data.load_training()
X_test, y_test = emnist_data.load_testing()

In [None]:
img_dimension = 28
# Reshape tensors to [n, y, x, 1] and normalize the pixel values between [0, 1]
x_train = X_train.reshape(-1, img_dimension, img_dimension, 1).astype('float32') / 255.0
x_test = X_test.reshape(-1, img_dimension, img_dimension, 1).astype('float32') / 255.0

In [None]:
# get number of classes
unique_classes = np.unique(y_train)
num_classes = len(unique_classes)

input_shape = (img_dimension, img_dimension, 1)

# weight the classes (to combat the imbalance)
class_weights = dict(enumerate(compute_class_weight(class_weight='balanced', classes=unique_classes, y=y_train)))

# Convert class vectors to binary class matrices
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [None]:
#Create model
kernel_size = (5, 5)
def createmodel():
    return Sequential([
        Convolution2D(16, kernel_size=kernel_size, padding='same', input_shape=input_shape, activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        BatchNormalization(),
        Dropout(0.4),
        Convolution2D(32, kernel_size=kernel_size, padding='same', activation= 'relu'), #strides=2,
        MaxPooling2D(pool_size=(2, 2)),
        BatchNormalization(),
        Dropout(0.4),
        Convolution2D(64, kernel_size=kernel_size, padding='same', activation= 'relu'),
        MaxPooling2D(pool_size =(2,2)),
        BatchNormalization(),
        Dropout(0.4),
        Flatten(),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.4),
        Dense(num_classes, activation='softmax'),
    ])
#creating and compiling model
model = createmodel()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
#Train model (Takes a few hours to train)
es = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    mode='min',
    verbose=1,
    patience=10,
    restore_best_weights=True)
model.fit(x_train, y_train,
          #class_weight=class_weights,
          batch_size=10000,
          epochs=200,
          verbose=1,
          shuffle=True,
          validation_data=(x_test, y_test),
          callbacks=[es])

In [None]:
#Evaluate model
score = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

def plotres(x, metric):
    plt.plot(x[metric])
    plt.plot(x['val_'+metric])
    plt.title(metric.upper())
    plt.ylabel(metric)
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'])
    plt.show()

plotres(model.history.history, "accuracy")
plotres(model.history.history, "loss")

In [None]:
y_pred = model.predict(x_test)

In [None]:
#Save model
model.save('letter_classifier.h5')