In [None]:
# COLAB ONLY

# Install Kaggle from PyPI
!pip install -q kaggle
 
# Kaggle: auth
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json
 
# Download dataset
!kaggle datasets download -d crawford/emnist
 
# Dataset: extract balanced only
!unzip emnist.zip emnist-bymerge-test.csv
!unzip emnist.zip emnist-bymerge-train.csv
!unzip emnist.zip emnist-bymerge-mapping.txt

In [None]:
import numpy
import pandas
import tensorflow as tf
import matplotlib.pyplot as plt

In [None]:
train_set = pandas.read_csv('emnist-bymerge-train.csv')
test_set = pandas.read_csv('emnist-bymerge-test.csv')

In [None]:
train_set = train_set.iloc[numpy.random.permutation(len(train_set))]
test_set = test_set.iloc[numpy.random.permutation(len(test_set))]

In [None]:
train_label = numpy.array(train_set.iloc[:,0].values).reshape([train_set.shape[0], 1])
train_data = numpy.array(train_set.iloc[:,1:].values).reshape([train_set.shape[0], 28, 28, 1])

test_label = numpy.array(test_set.iloc[:,0].values).reshape([test_set.shape[0], 1])
test_data = numpy.array(test_set.iloc[:,1:].values).reshape([test_set.shape[0], 28, 28, 1])

In [None]:
label = {
    0: '0',
    1: '1',
    2: '2',
    3: '3',
    4: '4',
    5: '5',
    6: '6', 
    7: '7',
    8: '8', 
    9: '9',
    10: 'A',
    11: 'B',
    12: 'C',
    13: 'D',
    14: 'E',
    15: 'F',
    16: 'G',
    17: 'H',
    18: 'I',
    19: 'J',
    20: 'K',
    21: 'L',
    22: 'M',
    23: 'N',
    24: 'O',
    25: 'P',
    26: 'Q',
    27: 'R',
    28: 'S',
    29: 'T',
    30: 'U',
    31: 'V',
    32: 'W',
    33: 'X',
    34: 'Y',
    35: 'Z',
    36: 'a',
    37: 'b',
    38: 'd',
    39: 'e',
    40: 'f',
    41: 'g',
    42: 'h',
    43: 'n',
    44: 'q',
    45: 'r',
    46: 't'
}

In [None]:
# Transform label
train_label = tf.keras.utils.to_categorical(train_label, 47)
test_label = tf.keras.utils.to_categorical(test_label, 47)

In [None]:
train_data = train_data.astype('float16')
test_data = test_data.astype('float16')

In [None]:
train_data /= 255
test_data /= 255

In [None]:
model = tf.keras.models.Sequential()

In [None]:
# Block 1
model.add(tf.keras.layers.Conv2D(32,3, padding="same", input_shape=(28,28,1)))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.Conv2D(32,3, padding="same"))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))
model.add(tf.keras.layers.Dropout(0.25))

# Block 2
model.add(tf.keras.layers.Conv2D(64,3, padding  ="same"))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.Conv2D(64,3, padding  ="same"))
model.add(tf.keras.layers.LeakyReLU())
model.add(tf.keras.layers.MaxPool2D(pool_size=(2,2)))
model.add(tf.keras.layers.Dropout(0.25))

model.add(tf.keras.layers.Flatten())

model.add(tf.keras.layers.Dense(512, activation='relu'))
model.add(tf.keras.layers.Dense(128, activation='relu'))
model.add(tf.keras.layers.Dense(47, activation="softmax"))

In [None]:
model.compile(optimizer=tf.optimizers.Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(train_data, train_label, epochs=3, batch_size=256, validation_split=0.1)

In [None]:
print(model.history.history.keys())
# summarize history for accuracy
plt.plot(model.history.history['accuracy'])
plt.plot(model.history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='upper left')
plt.savefig('acc_basic_cnn.png')

plt.show()
# summarize history for loss
plt.plot(model.history.history['loss'])
plt.plot(model.history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'valid'], loc='upper left')
plt.savefig('loss_basic_cnn.png')
plt.show()

In [None]:
test_loss, test_acc = model.evaluate(test_data, test_label)

In [None]:
test_p = numpy.argmax(model.predict(test_data),axis=1)
test_l = numpy.argmax(test_label,axis=1)

In [None]:
rows = 5 # defining no. of rows in figure
cols = 10 # defining no. of colums in figure

f = plt.figure(figsize=(2 * cols, 2 * rows)) # defining a figure 

test_data *= 255
test_data = test_data.astype('uint8')

for i in range(rows * cols): 
    f.add_subplot(rows, cols, i+1) # adding sub plot to figure on each iteration
    plt.imshow(test_data[i].reshape([28,28]),cmap="gray")
    plt.axis("off")
    plt.title(str('P: ' + label[test_p[i]]) + str(' L: ' + label[test_l[i]]) , y=-0.2, color="green")
    plt.savefig('result_cnn.png')

In [None]:
from sklearn import metrics
cm = metrics.confusion_matrix(test_l, test_p)

In [None]:
import itertools
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting normalize=True.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, numpy.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = numpy.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

class_names = [str(i) for i in range(47)]
numpy.set_printoptions(precision=2)

# Plot non-normalized confusion matrix
plt.figure(figsize=(50,20))
plot_confusion_matrix(cm, classes=class_names,
                      title='Confusion matrix, without normalization')

# Plot normalized confusion matrix
plt.figure(figsize=(50,20))
plot_confusion_matrix(cm, classes=class_names, normalize=True,
                      title='Normalized confusion matrix')

plt.show()

In [None]:
rows = 5
cols = 10
count = 50

f = plt.figure(figsize=(2*cols,2*rows))
sub_plot = 1
for i in range(test_data.shape[0]):
    if test_l[i]!=test_p[i] and count != 0:
        count -= 1
        f.add_subplot(rows,cols,sub_plot) 
        sub_plot+=1
        plt.imshow(test_data[i].reshape([28,28]),cmap="gray")
        plt.axis("off")
        plt.title("T: "+label[test_l[i]]+" P: "+label[test_p[i]], y=-0.2, color="Green")
plt.savefig("error_plots_cnn.png")
plt.show()

In [None]:
model.save('cnn_model.h5')

In [None]:
loaded = tf.keras.models.load_model("cnn_model.h5")

In [None]:
test_loss, test_acc = loaded.evaluate(test_data, test_label)