In [None]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
import random
from PIL import Image
import cv2
import os
import csv
from sklearn.model_selection import train_test_split

## Conver image to csv file

We are using data from NIST dataset. The problem of this data set is the the images have 128 x 128 pixels, which may cause intensive computation. By convention, we need to resize the images to 28 x 28 pixels. Here we use the conversion process described in this paper(https://arxiv.org/pdf/1702.05373v1.pdf).

In [None]:
# Just resize the original image to 28 x 28 directly

def simple_convert(img):
    fig, axs = plt.subplots(1,2)
    axs[0].imshow(img, cmap='gray_r')
    axs[0].set_title('original imgage')
    img = cv2.resize(img,(28,28),interpolation = cv2.INTER_CUBIC)

In [None]:
#sample_image = np.uint8(sample_image)
def convert(img,size):
    # eg: size = (28,28)
    blur = cv2.GaussianBlur(img, (5,5),1)
    ret, thresh = cv2.threshold(blur, 127, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    
    contours= cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    ctrs = contours[1]
    if len(ctrs) ==1:
        img = cv2.resize(blur, size, interpolation = cv2.INTER_CUBIC)
        return img
    else:
        x,y,w,h = (0,0,0,0)
        for ctr in ctrs:
            x_,y_,w_,h_ = cv2.boundingRect(ctr)
            if w_*h_ == 128*128:
                pass
            elif w_*h_ > w*h:
                x,y,w,h = x_,y_,w_,h_

        img = blur[y:y+h,x:x+w]
        img = cv2.copyMakeBorder(img,2,2,2,2,cv2.BORDER_CONSTANT, value = 255)

        img = cv2.resize(img, size, interpolation = cv2.INTER_CUBIC)
        return img

In [None]:
# Now write the images to csv

letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
labels = [i for i in range(52)]
letter_code = [str(i) for i in range(41,50)]+['4a','4b','4c','4d','4d','4f']+[str(i) for i in range(50,60)] + ['5a']
letter_code = letter_code+[str(i) for i in range(61,70)]+['6a','6b','6c','6d','6d','6f']+[str(i) for i in range(70, 80)] + ['7a']

In [None]:
with open('../EMNIST_28_28_v2.csv','w',newline = '') as f:
    column_name = ['label']
    column_name.extend(['pixel%d'%i for i in range(28*28)])
    writer = csv.writer(f)
    writer.writerow(column_name)
    for i in labels:
        path = '/Users/taotao/Downloads/by_class/'+letter_code[i]+'/train_'+letter_code[i]
        for img_path in sorted(os.listdir(path)):
            img = cv2.imread(os.path.join(path,img_path),0)
            img_converted = convert(img,(28,28))
            row_data = [i]
            print('label = {}'.format(img_path),end = '\r')
            row_data.extend(img_converted.flatten())
            writer.writerow(row_data)

## Prepare for training

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense,Activation,Conv2D
from keras.layers import MaxPool2D,Flatten,Dropout,ZeroPadding2D,BatchNormalization
from sklearn.metrics import confusion_matrix

In [None]:
# We use external code here, from website https://www.kaggle.com/grfiv4/plot-a-confusion-matrix
def plot_confusion_matrix(cm,
                          target_names,
                          title='Confusion matrix',
                          cmap=None,
                          normalize=True):
    """
    given a sklearn confusion matrix (cm), make a nice plot

    Arguments
    ---------
    cm:           confusion matrix from sklearn.metrics.confusion_matrix

    target_names: given classification classes such as [0, 1, 2]
                  the class names, for example: ['high', 'medium', 'low']

    title:        the text to display at the top of the matrix

    cmap:         the gradient of the values displayed from matplotlib.pyplot.cm
                  see http://matplotlib.org/examples/color/colormaps_reference.html
                  plt.get_cmap('jet') or plt.cm.Blues

    normalize:    If False, plot the raw numbers
                  If True, plot the proportions

    Usage
    -----
    plot_confusion_matrix(cm           = cm,                  # confusion matrix created by
                                                              # sklearn.metrics.confusion_matrix
                          normalize    = True,                # show proportions
                          target_names = y_labels_vals,       # list of names of the classes
                          title        = best_estimator_name) # title of graph

    Citiation
    ---------
    http://scikit-learn.org/stable/auto_examples/model_selection/plot_confusion_matrix.html

    """
    import itertools

    accuracy = np.trace(cm) / float(np.sum(cm))
    misclass = 1 - accuracy

    if cmap is None:
        cmap = plt.get_cmap('Blues')

    plt.figure(figsize=(15, 12))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()

    if target_names is not None:
        tick_marks = np.arange(len(target_names))
        plt.xticks(tick_marks, target_names, rotation=45)
        plt.yticks(tick_marks, target_names)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]


    thresh = cm.max() / 1.5 if normalize else cm.max() / 2
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if normalize:
            plt.text(j, i, "{:0.4f}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            plt.text(j, i, "{:,}".format(cm[i, j]),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")


    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label\naccuracy={:0.4f}; misclass={:0.4f}'.format(accuracy, misclass))
    plt.show()

In [None]:
#This code is from Bibliography[10]
#do one hot encoding for labels
#aka if original label is 3
#after one-hot, it becomes a 26-long array
#[0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]  
def one_hot(labels):
    num_labels=labels.shape[0]
    result=np.zeros((num_labels,26))
    offset=np.arange(num_labels)*26
    result.flat[offset+labels.ravel()]=1
    return result  

## Basic NN

This part, we create a simple NN model which serve as baseline

In [None]:
#Reading and preprocessing dataset
alphabet = pd.read_csv('../EMNIST_28_28_v2.csv')
#split features and labels
images=alphabet.iloc[:,1:].values
raw_labels=alphabet.iloc[:,0].values.ravel()

print('The dimensions of features are',images.shape)
print('The dimensions of raw labels are',raw_labels.shape)

In [None]:
labels=one_hot(raw_labels-1)
images=images.reshape(images.shape[0],28,28,1).astype("float32")
images=images/255
X_train, X_test, Y_train, Y_test = train_test_split(images, labels, test_size = 0.3)
y = np.array([j for i in Y_test for j in range(len(i)) if i[j] != 0.0])
letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'#label for plots in the future

In [None]:
baseline = Sequential()
baseline.add(Flatten())
baseline.add(Dense(512,activation='relu'))
baseline.add(Dropout(0.2))
baseline.add(Dense(26,activation='softmax'))
baseline.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
#Fitting baseline model
baseline.fit(x=X_train,y=Y_train,batch_size=300,epochs=50,verbose=1,validation_split=0.2)
baseValLoss = baseline.history.history['val_loss']
baseValAcc = baseline.history.history['val_acc']
baseAcc = baseline.history.history['acc']
baseLoss = baseline.history.history['loss']
epoch = baseline.history.epoch
plt.plot(epoch,baseAcc,'b',label = 'train_acc')
plt.plot(epoch,baseValAcc,'bo',label = 'val_acc')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('Baseline Model')
plt.legend()
plt.show()
plt.plot(epoch,baseLoss,'r',label = 'train_loss')
plt.plot(epoch,baseValLoss,'ro',label = 'val_loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('Baseline Model')
plt.legend()
plt.show()

In [None]:
plt.plot(epoch,baseAcc,'b',label = 'train_acc')
plt.plot(epoch,baseValAcc,'bo',label = 'val_acc')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('Baseline Model')
plt.legend()
plt.show()
plt.plot(epoch,baseLoss,'r',label = 'train_loss')
plt.plot(epoch,baseValLoss,'ro',label = 'val_loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('Baseline Model')
plt.legend()
plt.show()

In [None]:
#Prediction performance of the baseline model
yPredBase = np.array([np.argmax(i) for i in baseline.predict(X_test)])
confusion = confusion_matrix(y, yPredBase, labels=[i for i in range(26)]) 
plot_confusion_matrix(cm=confusion,normalize=False,target_names=[i for i in letters])

## CNN

This part, we use csv file to train using CNN

As mentioned in report, the basic architecture of CNN build by Keras is following this tutorial: 
https://machinelearningmastery.com/handwritten-digit-recognition-using-convolutional-neural-networks-python-keras/


In [None]:
#read data from csv file
alphabet = pd.read_csv('../EMNIST_28_28_v2.csv')
#shuffle the data set
alphabet=alphabet.sample(frac=1)
#split features and labels
images=alphabet.iloc[:,1:].values
raw_labels=alphabet.iloc[:,0].values.ravel()

In [None]:
labels=one_hot(raw_labels)

In [None]:
images=images.reshape(images.shape[0],28,28,1).astype("float32")
images=images/255

In [None]:
# Built the model
cnn = Sequential()

Following layers is a modified code from reference

In [None]:
#Layer-1
#Basic Convolutional layer and ReLU Layer===========
#ReLU is an activation layer, we do it after every Convolutional layer.
#How it works?  If input is x, then output is  max(0, x)
cnn.add(Conv2D(64,kernel_size=(3,3),strides=(1,1),input_shape=(28,28,1), activation='relu'))
cnn.add(Conv2D(64,kernel_size=(3,3),activation='relu'))
#Normalization Layer=========
cnn.add(BatchNormalization(epsilon=1e-6,axis=1))
#Pooling Layer==========
#Reduce number of parameters and prevent OVERFITTING,usually the pool size is (2,2)
#MaxPool means, if we have a 2*2 block, we choose the biggest number 
#  4  5
#  8  1
#Then after MaxPool, the output is 8, a 1*1 block.
cnn.add(MaxPool2D(pool_size=(2,2)))
cnn.add(Dropout(0.5))

In [None]:
#Layer-2
#Padding Layer===========
#Since each time we use Convolutional layer, the input size would become smaller,
#so we add a Padding, here we set it to (1,1),
#which means if input size is H*W, after this layer, it would become (H+1)*(W+1)
cnn.add(Conv2D(64,kernel_size=(3,3),padding='same',activation='relu'))

#Basic Convolutional layer and ReLU Layer===========
cnn.add(Conv2D(64,kernel_size=(3,3),activation='relu'))
#Normalization Layer=========
cnn.add(BatchNormalization(epsilon=1e-6,axis=1))
#Pooling Layer==========
cnn.add(MaxPool2D(pool_size=(2,2)))
cnn.add(Dropout(0.5))

In [None]:
#Layer-3
#Padding Layer===========
cnn.add(Conv2D(64,kernel_size=(3,3),padding='same',activation='relu'))
#Basic Convolutional layer and ReLU Layer===========
cnn.add(Conv2D(64,kernel_size=(3,3),activation='relu'))
#Normalization Layer=========
cnn.add(BatchNormalization(epsilon=1e-6,axis=1))
#Pooling Layer==========
cnn.add(MaxPool2D(pool_size=(2,2)))
cnn.add(Dropout(0.5))

In [None]:
#Fully Connected Layer --- aka Last Layer
#Dropout Layer===========
#This should always be in the last layer
#It randomly drops out some parameter, still it prevents OVERFITTING.
cnn.add(Dropout(0.25))
cnn.add(Flatten())

#Dense Layer==========
#Its job is to do classification
cnn.add(Dense(512, activation='relu'))
cnn.add(Dense(26, activation='softmax'))

cnn.summary()

In [None]:
#Compile the model and save check point
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
checkpoint=ModelCheckpoint(filepath="best_weights.hdf5",monitor='val_acc',save_best_only=True)

# Train and Predict
#Epoch means the times you want to train,here I just set epoch=1, since one round takes about 30 mins.
#validation_split, I split 80% as training data, 20% as test data
#verbose=1 means print the log, =0 dont print
model.fit(images,labels,batch_size=64,epochs=50,verbose=1,validation_split=0.2,callbacks=[checkpoint])
model.save("cnn.h5")

In [None]:
#Model performance
cnnValLoss = cnn.history.history['val_loss']
cnnValAcc = cnn.history.history['val_acc']
cnnAcc = cnn.history.history['acc']
cnnLoss = cnn.history.history['loss']
epoch = cnn.history.epoch
plt.plot(epoch,cnnAcc,'b',label = 'train_acc')
plt.plot(epoch,cnnValAcc,'bo',label = 'val_acc')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('CNN with 3 Conv Layers')
plt.legend()
plt.figure()
plt.plot(epoch,cnnLoss,'r',label = 'train_loss')
plt.plot(epoch,cnnValLoss,'ro',label = 'val_loss')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('CNN with 3 Conv Layers')
plt.legend()
plt.show()

In [None]:
#Load the saved optimal weights as the final model
cnn.load_weights('best_weights.hdf5')
cnn.save('shapes_cnn.h5')

In [None]:
#Draw confusion_matrix
Y_pred = np.array([np.argmax(i) for i in model.predict(X_test)])
Y = np.array([j for i in Y_test for j in range(len(i)) if i[j] != 0.0])
confusion = tf.confusion_matrix(labels=Y, predictions=Y_pred, num_classes=26)

In [None]:
from sklearn.metrics import confusion_matrix
confusion = confusion_matrix(Y, Y_pred, labels=[i for i in range(26)]) 
letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
plot_confusion_matrix(cm = confusion, normalize=False, target_names=[i for i in letters], title='Confusion Matrix')

In [None]:
val_loss = model.history.history['val_loss']
val_acc = model.history.history['val_acc']
acc = model.history.history['acc']
loss = model.history.history['loss']
epoch = model.history.epoch
plt.plot(epoch,acc, label = 'train_acc')
plt.plot(epoch, loss, label = 'train_loss')
plt.plot(epoch, val_acc, label = 'val_acc')
plt.plot(epoch, val_loss, label = 'val_loss')
plt.xlabel('epoch')
plt.ylabel('acc-loss')

plt.legend()
plt.show()

## AlexNet

This part, we use csv file to train using AlexNet

The basic architecture of AlexNet build by TensorFlow is following this tutorial: 
https://www.digitalocean.com/community/tutorials/how-to-build-a-neural-network-to-recognize-handwritten-digits-with-tensorflow
The code is NOT exactly the same as the tutorial.


In [None]:
#read data from csv file
alphabet = pd.read_csv('../EMNIST_28_28_v2.csv')
#shuffle the data set
alphabet=alphabet.sample(frac=1)
#split features and labels
images=alphabet.iloc[:,1:].values
raw_labels=alphabet.iloc[:,0].values.ravel()

In [None]:
labels=one_hot(raw_labels)

In [None]:
#just set validation to be 2000 we have limited hardware resource
validation=2000

train_images = images[validation:]
train_labels = labels[validation:]

validation_images = images[:validation]
validation_labels = labels[:validation]

n_batch = train_images.shape[0] / 100

Following code is a modified code from reference

In [None]:
x=tf.placeholder(tf.float32,[None,784])
y=tf.placeholder(tf.float32,[None,26])
keep_prob = tf.placeholder(tf.float32)

In [None]:
#def weight and bias for convolutional layer and pooling layer
def weight_variable(shape):
    return tf.Variable(tf.random_normal(shape))


def bias_variable(shape):
    return tf.Variable(tf.random_normal(shape))


def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [None]:
x_image = tf.reshape(x, [-1, 28, 28, 1])

In [None]:
#The parameter us following github code as reference
#https://github.com/wwzzyyzzrr/DaChuang/blob/97e07590453a9d6fb3a644ce6abc4a99e2c2d015/Recognition/prediction.py

#Layer-1
W_conv1 = weight_variable([11, 11, 1, 64])
b_conv1 = bias_variable([64])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

#Layer-2
W_conv2 = weight_variable([5, 5, 64,192])
b_conv2 = bias_variable([192])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

#Layer-3
W_conv3 = weight_variable([3, 3, 192,384])
b_conv3 = bias_variable([384])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)

#Layer-4
W_conv4 = weight_variable([3, 3, 384,256])
b_conv4 = bias_variable([256])
h_conv4 = tf.nn.relu(conv2d(h_conv3, W_conv4) + b_conv4)

#Layer-5
W_conv5 = weight_variable([3, 3, 256,256])
b_conv5 = bias_variable([256])
h_conv5 = tf.nn.relu(conv2d(h_conv4, W_conv5) + b_conv5)
h_pool5 = max_pool_2x2(h_conv5)

#Fully Connected Layer 
dense1 = tf.reshape(h_pool5, [-1, weight_variable([4*4*256, 1024]).get_shape().as_list()[0]])
dense1 = tf.nn.relu(tf.matmul(dense1, weight_variable([4*4*256, 1024])) + bias_variable([1024]),name='fc1')
dense2 = tf.nn.relu(tf.matmul(dense1, weight_variable([1024, 1024])) + bias_variable([1024]),name='fc2')

y_conv = tf.matmul(dense2,  weight_variable([1024, 26])) + bias_variable([26])

In [None]:
#Use the mean of cross entropy as loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=y_conv))
#Use gradient descent to optimize the parameters and the rate is set to 0.1
train_step = tf.train.AdadeltaOptimizer(learning_rate=0.1).minimize(loss)

#set the accuracy
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_conv, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

#set the name of saved model
global_step = tf.Variable(0, name='global_step', trainable=False)
saver = tf.train.Saver()

#Initialize
init = tf.global_variables_initializer()

In [None]:
the_loss = np.zeros([50])
accuracy_n = np.zeros([50])

In [None]:
with tf.Session() as sess:
     sess.run(init)

    for epoch in range(1,51):
         for batch in range(int(n_batch)):
            batch_x = train_images[batch*100:(batch+1)*100]
            batch_y = train_labels[batch*100:(batch+1)*100]

            sess.run(train_step,feed_dict = {x:batch_x,y:batch_y,keep_prob:0.5})

        accuracy_n[epoch-1] = sess.run(accuracy,feed_dict={x:validation_images, y:validation_labels,keep_prob:1.0})
        the_loss[epoch-1] = sess.run(loss,feed_dict={x:validation_images, y:validation_labels,keep_prob:1.0})
        the_loss[epoch-1]/=10000000000000
        print("Round:" + str(epoch) +",accuracy:"+str(accuracy_n[epoch-1])+",loss:"+str(the_loss[epoch-1]))

        global_step.assign(epoch).eval()
        saver.save(sess,"../alexnet.ckpt",global_step = global_step)

In [None]:
#plot loss and accuracy
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
lns1 = ax1.plot(np.arange(50), the_loss, label="Loss")
lns2 = ax2.plot(np.arange(50), accuracy_n, 'r', label="Accuracy")
ax1.set_xlabel('iteration')
ax1.set_ylabel('training loss')
ax2.set_ylabel('training accuracy')
lns = lns1 + lns2
labels = ["Loss", "Accuracy"]
plt.legend(lns, labels, loc=7)

In [None]:
with tf.Session() as sess:
    sess.run(init)
    saver.restore(sess, "../alexnet.ckpt-50")

    test_x = validation_images
    conv_y_preditct = y_conv.eval(feed_dict={x: test_x, keep_prob: 1.0})
    test_pred = np.argmax(conv_y_preditct, axis=1)
    print(test_pred.shape)

In [None]:
#Same,plot confusion matrix
letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
y = np.array([j for i in validation_labels for j in range(len(i)) if i[j] != 0.0])
confusion = confusion_matrix(y,  test_pred, labels=[i for i in range(26)]) 
plot_confusion_matrix(cm=confusion,normalize=False,target_names=[i for i in letters])