## Convolutional Neural Networks(CNN) MODEL - ELIRAM AND PROSPER
### ByClass dataset
### The full complement of the NIST Special Database 19 is available in the ByClass split. This dataset have the same image information but differ in the number of images in each class. The dataset have an uneven number of images per class and there are more digits than letters. The number of letters roughly equate to the frequency of use in the English language.
### train: 697,932
### test: 116,323
### total: 814,255
### classes: ByClass 62 (unbalanced)
## Obtain accuracy: 86,9 %

In [None]:
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

## Libraries and Data Import

In [None]:
!pip install python-mnist

In [None]:
import numpy as np
from mnist.loader import MNIST

emndata = MNIST('input')
#This will load the train and test data
X_train, y_train = emndata.load('../input/emnist/emnist_source_files/emnist-byclass-train-images-idx3-ubyte',
                               '../input/emnist/emnist_source_files/emnist-byclass-train-labels-idx1-ubyte')
X_test, y_test = emndata.load('../input/emnist/emnist_source_files/emnist-byclass-test-images-idx3-ubyte',
                             '../input/emnist/emnist_source_files/emnist-byclass-test-labels-idx1-ubyte')

# We Convert data to numpy arrays and normalize images to the interval [0, 1] for normalization
X_train = np.array(X_train) / 255.0
y_train = np.array(y_train)
X_test = np.array(X_test) / 255.0
y_test = np.array(y_test)

In [None]:
# Checking the shape of our data
X_train.shape, y_train.shape, X_test.shape, y_test.shape

# Getting Data ready for pre-processing

In [None]:
#Reshaping all images into 28*28*1 
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)

In [None]:
# Checking the new shape
X_train.shape, X_test.shape

In [None]:
from matplotlib import pyplot as plt
#Display a random image
plt.imshow(X_train[0])
plt.show()

### Creation of model

In [None]:
from keras.models import Sequential
from keras import optimizers
from keras.layers import Convolution2D, MaxPooling2D, Dropout, Flatten, Dense, Reshape
from keras import backend as K
from keras.constraints import maxnorm
from keras.utils import np_utils

# Converting train images and test images values into float
train_images = X_train.astype('float32')
test_images = X_test.astype('float32')

# One hot encoding
train_labels = np_utils.to_categorical(y_train, 62)
test_labels = np_utils.to_categorical(y_test, 62)

In [None]:
# Spliting our dataframes to train(train_images, train_labels) and validation(X_valid, y_valid) subsets
# We will use the validation subset(X_valid, y_valid) for our final predictions
from sklearn.model_selection import train_test_split
train_images, X_valid, train_labels, y_valid = train_test_split(train_images, train_labels, test_size=.25, random_state=2)

In [None]:
# Overview of subsets shapes
train_images.shape, train_labels.shape, test_images.shape, test_labels.shape, X_valid.shape, y_valid.shape

# CNN Model building 

In [None]:
# Set the CNN model 
# Our CNN architechture is In -> [[Conv2D->relu]*2 -> MaxPool2D -> Dropout]*2 -> Flatten -> Dense -> Dropout -> Out

model = Sequential()

model.add(Convolution2D(32, (5,5), input_shape=(28,28,1),
                             activation='relu',padding='same',
                            kernel_constraint=maxnorm(3)))
model.add(Convolution2D(32, (5,5), input_shape=(28,28,1),
                             activation='relu',
                            kernel_constraint=maxnorm(3)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, (3,3), input_shape=(28,28,1),
                             activation='relu',padding='same',
                            kernel_constraint=maxnorm(3)))
model.add(Convolution2D(64, (3,3), input_shape=(28,28,1),
                             activation='relu',
                            kernel_constraint=maxnorm(3)))
model.add(MaxPooling2D(pool_size=(2,2), strides=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation='relu', kernel_constraint=maxnorm(3)))
model.add(Dropout(0.5))
model.add(Dense(62, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

### Training of model and evaluation

In [None]:
# history = model.fit(train_images,train_labels,validation_data=(test_images, test_labels), 
#                         batch_size=128, epochs=5)

In [None]:
# # Evaluating model on test data. 
# scores = model.evaluate(test_images,test_labels, verbose = 0)
# print("Accuracy: %.2f%%"%(scores[1]*100))

## Creating model history graphs

In [None]:
# from matplotlib import pyplot as plt
# print(history.history.keys())
# # summarize history for accuracy
# plt.plot(history.history['accuracy'])
# plt.plot(history.history['val_accuracy'])
# plt.title('Model Accuracy')
# plt.ylabel('Accuracy')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Test'], loc='upper left')
# plt.grid()
# plt.show()
# # summarize history for loss
# plt.plot(history.history['loss'])
# plt.plot(history.history['val_loss'])
# plt.title('Model loss')
# plt.ylabel('Loss')
# plt.xlabel('Epoch')
# plt.legend(['Train', 'Test'], loc='upper left')
# plt.grid()
# plt.show()

## Predicting a single image using the model

In [None]:
# Load the model 
from keras.models import load_model
from keras.models import model_from_json

json_file = open('../input/ml-emnist-recognition-project-model-save/model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

loaded_model.load_weights('../input/ml-emnist-recognition-project-model-save/model.h5')

model = loaded_model
print('Model successfully loaded')

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

#Showing the model accuracy
scores = model.evaluate(test_images,test_labels, verbose = 0)
print("Accuracy: %.2f%%"%(scores[1]*100))

In [None]:
# Creating of a dictionary that maps indexes to the labels
import pandas as pd

label_map = pd.read_csv("../input/emnist/emnist-byclass-mapping.txt", delimiter = ' ', index_col=0,header=None, squeeze=True)
label_dict = {}
for index, label in enumerate(label_map):
    label_dict[index] = chr(label)
 
label_dict

In [None]:
# Create a function to make an image prediction 

def make_prediction(image_index, sample_dataset, label_map):
    prediction = model.predict(sample_dataset[image_index].reshape(-1,28,28,1))
    predicted_label = np.argmax(prediction)
    return label_map[predicted_label]

# Predict the 6738th element
prediction = make_prediction(6738, X_valid, label_dict)

# Print our prediction
print("The predicted caracter is: ", prediction)


# Showing the real 6738th image character
char = X_valid[6738].reshape(28,28)
plt.imshow(char)
plt.show

## Saving the model

In [None]:
# from keras.models import load_model
# from keras.models import model_from_json

# #saves the model info as json file
# model_json = model.to_json()
# with open("model.json", "w") as json_file:
#     json_file.write(model_json)
 

# # Creates a HDF5 file 'model.h5'
# model.save_weights("model.h5")
# print("Saved model to disk")

## Loading the model

In [None]:
# from keras.models import load_model
# from keras.models import model_from_json

# json_file = open('../input/ml-emnist-recognition-project-model-save/model.json', 'r')
# loaded_model_json = json_file.read()
# json_file.close()
# loaded_model = model_from_json(loaded_model_json)

# loaded_model.load_weights('../input/ml-emnist-recognition-project-model-save/model.h5')

# model = loaded_model
# print('Model successfully loaded')

# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# #evaluating model on test data. will take time
# scores = model.evaluate(test_images,test_labels, verbose = 0)
# print("Accuracy: %.2f%%"%(scores[1]*100))