In [None]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

from PIL import Image, ImageFilter
%matplotlib inline

import tensorflow as tf
from keras.datasets import mnist
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.optimizers import Adam
from keras.layers.normalization import BatchNormalization
from keras.utils import np_utils
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.advanced_activations import LeakyReLU 
from keras.preprocessing.image import ImageDataGenerator

np.random.seed(25)
learn = tf.contrib.learn
tf.logging.set_verbosity(tf.logging.ERROR)

In [None]:
# EMNIST has the same format as MNIST
# Using an MNIST pipeline and 
from tensorflow.contrib.learn.python.learn.datasets.mnist import extract_images, extract_labels

with open('C:/Users/thanga/Documents/Python/TFletter/letters/emnist-byclass-train-images-idx3-ubyte.gz', 'rb') as f:
  train_images = extract_images(f)
with open('C:/Users/thanga/Documents/Python/TFletter/letters/emnist-byclass-train-labels-idx1-ubyte.gz', 'rb') as f:
  train_labels = extract_labels(f)

with open('C:/Users/thanga/Documents/Python/TFletter/letters/emnist-byclass-test-images-idx3-ubyte.gz', 'rb') as f:
  test_images = extract_images(f)
with open('C:/Users/thanga/Documents/Python/TFletter/letters/emnist-byclass-test-labels-idx1-ubyte.gz', 'rb') as f:
  test_labels = extract_labels(f)

# Convert to np ndarrays
X_train = train_images.astype(np.float32)
y_train = np.asarray(train_labels, dtype=np.int32)
X_test = test_images.astype(np.float32)
y_test = np.asarray(test_labels, dtype=np.int32)

# Normalize
X_train/=255
X_test/=255

In [None]:

max_examples=500000
X_train = X_train[:max_examples]
y_train = y_train[:max_examples]

In [None]:
max_examples=50000
X_test = X_test[:max_examples]
y_test = y_test[:max_examples]

In [None]:
X_test.shape

In [None]:
# Convert to one hot vectors or categorical
number_of_classes = 62

Y_train = np_utils.to_categorical(y_train, number_of_classes)
Y_test = np_utils.to_categorical(y_test, number_of_classes)

y_train[0], Y_train[0]

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
# CNN LAYERS CONSTRUCTION
model = Sequential()

model.add(Conv2D(32, (3, 3), input_shape=(28,28,1)))
model.add(Activation('relu'))
BatchNormalization(axis=-1)
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

BatchNormalization(axis=-1)
model.add(Conv2D(64,(3, 3)))
model.add(Activation('relu'))
BatchNormalization(axis=-1)
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())

# Fully connected layer

BatchNormalization()
model.add(Dense(512))
model.add(Activation('relu'))
BatchNormalization()
model.add(Dropout(0.2))
model.add(Dense(62))

# model.add(Convolution2D(10,3,3, border_mode='same'))
# model.add(GlobalAveragePooling2D())
model.add(Activation('softmax'))

In [None]:
model.summary()

In [None]:
# Compile model
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

In [None]:
# Data augmentation to images
gen = ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,
                         height_shift_range=0.08, zoom_range=0.08)

test_gen = ImageDataGenerator()

In [None]:
train_generator = gen.flow(X_train, Y_train, batch_size=100)
test_generator = test_gen.flow(X_test, Y_test, batch_size=100)

In [None]:
# Fit the model and save it
model.fit_generator(train_generator, steps_per_epoch=X_train.shape[0]//100, epochs=1, 
                    validation_data=test_generator, validation_steps=X_train.shape[0]//100)
save_dir = "C:/tmp/"
model_name = 'keras_emnist_model_2.h5'
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)


In [None]:
emnist_model = load_model('C:/tmp/keras_emnist.h5')

In [None]:
score = emnist_model.evaluate(X_test, Y_test)
print()
print('Test accuracy: ', score[1])

In [None]:
predictions = model.predict_classes(X_test)

predictions = list(predictions)
actuals = list(y_test)

sub = pd.DataFrame({'Actual': actuals, 'Predictions': predictions})
sub.to_csv('C:/tmp/output_cnn.csv', index=False)