 #                                         E-MNIST Recognition 

### Preprocessing 

Importing necessary functions to pre-process the dataset.

In [2]:
import numpy as np
import pandas as pd
import keras
from keras.datasets import mnist
from PIL import Image
import PIL.ImageOps
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
train = pd.read_csv("emnist-byclass-train.csv", header = None)
test = pd.read_csv("emnist-byclass-test.csv", header = None)

In [3]:
class_mapping = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'

In [4]:
 #class_map = class_mapping[34]

In [5]:
#arr = np.array(class_map)

In [6]:
#type(arr)

In [7]:
#train.shape

Visualizing the dataset:

In [8]:
#train.head()

In [9]:
#test.head()

Splitting features and labels on both training and testing datasets.

Note that features_train was earlier a dataframe, and we pass ndarray into out model, hence we extract the values.

In [10]:
features_train = train.iloc[:,1:].values
features_test = test.iloc[:,1:].values
labels_train = train.iloc[:,[0]].values
labels_test = test.iloc[:,[0]].values

In [11]:
#features_test

In [12]:
#features_test.shape

Since during the tuning we may get float values, hence we convert the int-32 datatype to float32 datatype.

Performing normalization also.

In [13]:
features_train = features_train.astype('float32')
features_test = features_test.astype('float32')
features_train /= 255
features_test /= 255

In [14]:
#features_train.shape

One-hot Encoding for the labels, and 47 classes are there.

In [15]:
labels_train = keras.utils.to_categorical(labels_train, 62)
labels_test = keras.utils.to_categorical(labels_test, 62)

In [16]:
#labels_train.shape

### Visualizing the dataset 

In [17]:
#import matplotlib.pyplot as plt
#%matplotlib inline

In [18]:
#plt.imshow(features_train[30].reshape([28,28]), cmap = 'Greys_r')

##### Since the images are not in correct orientation, we need to flip it, and then perform 90 degrees anti-clockwise rotation.

In [19]:
def rotate(img):
    img = np.reshape(img,[28,28])
    img = np.fliplr(img)
    img = np.rot90(img)
    return img.reshape([28,28])

We use the defined rotate function rotate the array using simple numpy functions. Then we apply the np.apply_along_axis to apply the function across the matrices.

In [20]:
features_train = np.apply_along_axis(rotate, 1, features_train)
features_test = np.apply_along_axis(rotate, 1, features_test)

To check the orientation/channeling of the image vector according to the dimensions:

In [21]:
img_rows, img_cols = 28, 28
from keras import backend as K 
if K.image_data_format() == 'channels_first':
    #features_train = features_train.reshape(features_train.shape[0],1, img_rows, img_cols)
    #features_test = features_test.reshape(features_test.shape[0],1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
    
else:
    features_train = features_train.reshape(features_train.shape[0], img_rows, img_cols, 1)
    features_test = features_test.reshape(features_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)
    print("Channels Last.")

Channels Last.


In [22]:
#plt.imshow(features_train[30].reshape([28,28]), cmap = 'Greys_r')
#labels_train[30]
#plt.title("Class" + (features_train[40]) + ', Label :' + (class_mapping[features_train[40]]))

In [23]:
#class_mapping[30]

### Model Layout 

Using a convulational network, with 32 input neurons, and 

In [24]:
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Conv2D, MaxPooling2D, Flatten

In [25]:
model = Sequential()
model.add(Conv2D(32, kernel_size = (3,3), activation = 'relu',input_shape = input_shape))
model.add(Conv2D(64,(3,3), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Conv2D(128, (3,3), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Dropout(0.25))
model.add(Conv2D(256, (3,3), activation = 'relu'))
model.add(MaxPooling2D(2,2))
model.add(Flatten())
model.add(Dense(512, activation = 'relu'))
model.add(Dropout(0.25))
model.add(Dense(62, activation = 'softmax'))

In [26]:
model.compile(loss = 'categorical_crossentropy', optimizer =keras.optimizers.Adamax(), metrics = ['accuracy'])

In [None]:
model.fit(features_train, labels_train, epochs = 30, validation_data = (features_test, labels_test), verbose = 1)

###  Data Augmentation

In [None]:
from keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False) 

In [None]:
from keras.callbacks import ReduceLROnPlateau
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)

In [None]:
history = model.fit_generator(datagen.flow(features_train,labels_train, batch_size=128),
                              epochs = 10, validation_data = (features_test,labels_test),
                              verbose = 1, steps_per_epoch=features_train.shape[0] // 128
                              , callbacks=[learning_rate_reduction])

In [None]:
score = model.evaluate(features_test, labels_test, verbose = 0)
print("Test loss:", score[0])
print("Test Accuracy: ", score[1]*100, "%")

### Plot Loss and Accuracy

print(history.history.keys())

# accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='lower right')
plt.show()

# loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

### Testing out model 

In [None]:
#im = Image.open('output.png').convert("L") # returns image object
    #out_img = imread('output.png')
#im = PIL.ImageOps.invert(im) 
    #out_img = np.invert(out_img)
#im = im.resize((28,28))
    #out_img = imresize(out_img, (28,28))
#im = np.reshape(im, (1,28,28,1))

In [None]:
#im = Image.open('output.png').convert("L")
#im = PIL.ImageOps.invert(im) 
#plt.imshow(im)

In [None]:
#out =model.predict(im)
#result = np.argmax(out,axis = 1)[0]
#print('Char: ', class_mapping[result])

###  Saving model to deploy into Production

The model is to be saved in JSON format to pass into the script.

In [None]:
model_json = model.to_json() 
with open("model.json", "w") as json_file:
    json_file.write(model_json)

Serializing weights to HDF5 

In [None]:
model.save_weights("model.h5")
print('Model saved to disk.')

### Save model as Tensorflowjs 

In [4]:
import tensorflowjs as tfjs
from keras.models import model_from_json

In [5]:
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")

Loaded model from disk


In [7]:
tfjs.converters.save_keras_model(loaded_model, "model_tfjs")