# Libraries


In [17]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import sys
import os
import random
from pathlib import Path
import imageio
import skimage
import skimage.io
import skimage.transform
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import scipy
from sklearn.model_selection import train_test_split
from sklearn import metrics
from keras import optimizers
from keras.initializers import Constant
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPool2D, Dropout, BatchNormalization,LeakyReLU
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from keras.utils import to_categorical
from keras.layers.advanced_activations import LeakyReLU, PReLU
import tensorflow_addons as tfa
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
import tensorflow as tf


# Parameters to be used in the convolutional network

In [27]:
IMAGE_PATH = 'chinese_mnist/data/'
IMAGE_WIDTH = 64
IMAGE_HEIGHT = 64
IMAGE_CHANNELS = 1
RANDOM_STATE = 42
TEST_SIZE = 0.2
VAL_SIZE = 0.2
BATCH_SIZE = 32
NO_EPOCHS = 50
PATIENCE = 5
VERBOSE = 1

# Data

In [19]:
data_df = pd.read_csv('chinese_mnist/chinese_mnist.csv')
print(data_df.shape) 
data_df.sample(100).head(4)

(15000, 5)


Unnamed: 0,suite_id,sample_id,code,value,character
5166,23,6,15,100000000,亿
12752,77,2,7,6,六
14746,76,6,9,8,八
2318,37,8,12,100,百


# Checking the amount of images stored

In [20]:
image_files = list(os.listdir(IMAGE_PATH))
print("Number of image files: {}".format(len(image_files)))

Number of image files: 15000


# Adding the full name of the image

In [21]:
def create_file_name(x):
    
    file_name = f"input_{x[0]}_{x[1]}_{x[2]}.jpg"
    return file_name

In [22]:
data_df["file"] = data_df.apply(create_file_name, axis=1)
data_df.head()

Unnamed: 0,suite_id,sample_id,code,value,character,file
0,1,1,10,9,九,input_1_1_10.jpg
1,1,10,10,9,九,input_1_10_10.jpg
2,1,2,10,9,九,input_1_2_10.jpg
3,1,3,10,9,九,input_1_3_10.jpg
4,1,4,10,9,九,input_1_4_10.jpg


# Checking the amount of images


In [23]:
file_names = list(data_df['file'])
print("Matching image names: {}".format(len(set(file_names).intersection(image_files))))

Matching image names: 15000


# Checking the dimension of the images

In [24]:
def read_image_sizes(file_name):
    image = skimage.io.imread(IMAGE_PATH + file_name)
    return list(image.shape)

In [28]:
tqdm.pandas()
m = np.stack(data_df['file'].progress_apply(read_image_sizes))
df = pd.DataFrame(m,columns=['w','h'])
data_df = pd.concat([data_df,df],axis=1, sort=False)
data_df.head()

100%|███████████████████████████████████████████████████████████████████████████| 15000/15000 [00:15<00:00, 992.12it/s]


Unnamed: 0,suite_id,sample_id,code,value,character,file,w,h
0,1,1,10,9,九,input_1_1_10.jpg,64,64
1,1,10,10,9,九,input_1_10_10.jpg,64,64
2,1,2,10,9,九,input_1_2_10.jpg,64,64
3,1,3,10,9,九,input_1_3_10.jpg,64,64
4,1,4,10,9,九,input_1_4_10.jpg,64,64


# Checking samples and sub-samples of the data

In [29]:
print(f"Number of suites: {data_df.suite_id.nunique()}")
print(f"Samples: {data_df.sample_id.unique()}")

Number of suites: 100
Samples: [ 1 10  2  3  4  5  6  7  8  9]


# Creating datasets to build the network

In [30]:
train_df, test_df = train_test_split(data_df, 
                                     test_size=TEST_SIZE, random_state=RANDOM_STATE, stratify=data_df["code"].values)

In [31]:
train_df, val_df = train_test_split(train_df, 
                                    test_size=VAL_SIZE, random_state=RANDOM_STATE, stratify=train_df["code"].values)

In [32]:
print("Train set rows: {}".format(train_df.shape[0]))
print("Test  set rows: {}".format(test_df.shape[0]))
print("Val   set rows: {}".format(val_df.shape[0]))

Train set rows: 9600
Test  set rows: 3000
Val   set rows: 2400


# Encoding images to create network

In [33]:
def read_image(file_name):
    image = skimage.io.imread(IMAGE_PATH + file_name)
    image = skimage.transform.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT, 1), mode='reflect')
    return image[:,:,:]

In [34]:
def categories_encoder(dataset, var='character'):
    X = np.stack(dataset['file'].apply(read_image))
    y = pd.get_dummies(dataset[var], drop_first=False)
    return X, y

In [35]:
X_train, y_train = categories_encoder(train_df)
X_val, y_val = categories_encoder(val_df)
X_test, y_test = categories_encoder(test_df)

# Creating network topology, inspired by [Xiaoa (2017)](https://arxiv.org/pdf/1702.07975.pdf)

- the architecture is composed of 7 layers of convolution
- A hidden layer of 1024 neurons
- as an activation function PRelu is used
- It was not necessary to use a pruning process, as used in the paper




In [36]:
model=Sequential()

model.add(Conv2D(filters = 64, kernel_size=(3,3), input_shape=(64, 64, 1), padding='same'))
model.add(PReLU(alpha_initializer=Constant(value=0.25)))
model.add(MaxPool2D(2))


model.add(Conv2D(filters = 128, kernel_size=(3,3), padding='same'))
model.add(PReLU(alpha_initializer=Constant(value=0.25)))
model.add(MaxPool2D(2))


model.add(Conv2D(filters = 160, kernel_size=(3,3),  padding='same'))
model.add(PReLU(alpha_initializer=Constant(value=0.25)))
model.add(MaxPool2D(2))


model.add(Conv2D(filters = 256, kernel_size=(3,3), padding='same'))
model.add(PReLU(alpha_initializer=Constant(value=0.25)))
model.add(Conv2D(filters = 256, kernel_size=(3,3), padding='same'))
model.add(PReLU(alpha_initializer=Constant(value=0.25)))
model.add(MaxPool2D(2))


model.add(Conv2D(filters = 384, kernel_size=(3,3),  padding='same'))
model.add(PReLU(alpha_initializer=Constant(value=0.25)))
model.add(Conv2D(filters = 384, kernel_size=(3,3), padding='same'))
model.add(PReLU(alpha_initializer=Constant(value=0.25)))
model.add(MaxPool2D(2))


model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(1024))
model.add(PReLU(alpha_initializer=Constant(value=0.25)))
model.add(Dropout(0.5))
model.add(Dense(15, activation='softmax'))
model.add(PReLU(alpha_initializer=Constant(value=0.25)))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 64, 64, 64)        640       
_________________________________________________________________
p_re_lu (PReLU)              (None, 64, 64, 64)        262144    
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 32, 32, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 32, 32, 128)       73856     
_________________________________________________________________
p_re_lu_1 (PReLU)            (None, 32, 32, 128)       131072    
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 16, 16, 128)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 16, 16, 160)       1

# Training the neural network

In [37]:
annealer = LearningRateScheduler(lambda x: 1e-3 * 0.99 ** (x+NO_EPOCHS))
earlystopper = EarlyStopping(monitor='loss', patience=PATIENCE, verbose=VERBOSE)
checkpointer = ModelCheckpoint('best_model.h5',
                                monitor='val_accuracy',
                                verbose=VERBOSE,
                                save_best_only=True,
                                save_weights_only=True)

In [38]:
train_model  = model.fit(X_train, y_train,
                  batch_size=BATCH_SIZE,
                  epochs=NO_EPOCHS,
                  verbose=1,
                  validation_data=(X_val, y_val),
                  callbacks=[earlystopper, checkpointer, annealer])

Epoch 1/50
Epoch 00001: val_accuracy improved from -inf to 0.93333, saving model to best_model.h5
Epoch 2/50
Epoch 00002: val_accuracy improved from 0.93333 to 0.96292, saving model to best_model.h5
Epoch 3/50
Epoch 00003: val_accuracy improved from 0.96292 to 0.99208, saving model to best_model.h5
Epoch 4/50
Epoch 00004: val_accuracy did not improve from 0.99208
Epoch 5/50
Epoch 00005: val_accuracy did not improve from 0.99208
Epoch 6/50
Epoch 00006: val_accuracy improved from 0.99208 to 0.99542, saving model to best_model.h5
Epoch 7/50
Epoch 00007: val_accuracy did not improve from 0.99542
Epoch 8/50
Epoch 00008: val_accuracy did not improve from 0.99542
Epoch 9/50
Epoch 00009: val_accuracy did not improve from 0.99542
Epoch 10/50
Epoch 00010: val_accuracy did not improve from 0.99542
Epoch 11/50
Epoch 00011: val_accuracy did not improve from 0.99542
Epoch 12/50
Epoch 00012: val_accuracy did not improve from 0.99542
Epoch 13/50
Epoch 00013: val_accuracy did not improve from 0.99542
E

# Visualizing the evolution of loss and accuracy

In [39]:
def create_trace(x,y,ylabel,color):
        trace = go.Scatter(
            x = x,y = y,
            name=ylabel,
            marker=dict(color=color),
            mode = "markers+lines",
            text=x
        )
        return trace
    
def plot_accuracy_and_loss(train_model):
    hist = train_model.history
    acc = hist['accuracy']
    val_acc = hist['val_accuracy']
    loss = hist['loss']
    val_loss = hist['val_loss']
    epochs = list(range(1,len(acc)+1))
    #define the traces
    
    trace_ta = create_trace(epochs,acc,"Training accuracy", "Green")
    trace_va = create_trace(epochs,val_acc,"Validation accuracy", "Red")
    trace_tl = create_trace(epochs,loss,"Training loss", "Blue")
    trace_vl = create_trace(epochs,val_loss,"Validation loss", "Magenta")
    fig = tools.make_subplots(rows=1,cols=2, subplot_titles=('Training and validation accuracy',
                                                             'Training and validation loss'))
    #add traces to the figure
    fig.append_trace(trace_ta,1,1)
    fig.append_trace(trace_va,1,1)
    fig.append_trace(trace_tl,1,2)
    fig.append_trace(trace_vl,1,2)
    #set the layout for the figure
    fig['layout']['xaxis'].update(title = 'Epoch')
    fig['layout']['xaxis2'].update(title = 'Epoch')
    fig['layout']['yaxis'].update(title = 'Accuracy', range=[0,1])
    fig['layout']['yaxis2'].update(title = 'Loss', range=[0,1])
    #plot
    iplot(fig, filename='accuracy-loss')

plot_accuracy_and_loss(train_model)

# Reviewing the metrics in the validation and test sets

In [42]:
def test_accuracy_report(model):
    predicted = model.predict(X_test)
    test_predicted = np.argmax(predicted, axis=1)
    test_truth = np.argmax(y_test.values, axis=1)
    print(metrics.classification_report(test_truth, test_predicted, target_names=y_test.columns)) 
    test_res = model.evaluate(X_test, y_test.values, verbose=0)
    print('Loss function: %s, accuracy:' % test_res[0], test_res[1])

In [43]:
model_optimal = model
model_optimal.load_weights('best_model.h5')
score = model_optimal.evaluate(X_test, y_test, verbose=0)
print(f'Best validation loss: {score[0]}, accuracy: {score[1]}')

test_accuracy_report(model_optimal)

Best validation loss: 0.02186800166964531, accuracy: 0.996999979019165
              precision    recall  f1-score   support

           一       1.00      1.00      1.00       200
           七       1.00      1.00      1.00       200
           万       1.00      1.00      1.00       200
           三       1.00      1.00      1.00       200
           九       1.00      0.99      1.00       200
           二       1.00      1.00      1.00       200
           五       1.00      1.00      1.00       200
           亿       1.00      1.00      1.00       200
           八       1.00      1.00      1.00       200
           六       1.00      1.00      1.00       200
           十       0.98      0.98      0.98       200
           千       0.98      0.98      0.98       200
           四       0.99      1.00      1.00       200
           百       1.00      0.99      0.99       200
           零       1.00      1.00      1.00       200

    accuracy                           1.00      3000
   macro 

In [44]:
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.02186800166964531
Test accuracy: 0.996999979019165


In [45]:
score = model.evaluate(X_val, y_val, verbose=0)
print('Val loss:', score[0])
print('Val accuracy:', score[1])

Val loss: 0.015015154145658016
Val accuracy: 0.9983333349227905


# Conclusions

- The model shows no signs of being over-fitting or under-fitting
- Metrics of more than 99% accuracy were obtained in both the test data and the validation data
- The architecture of the network is not exactly the same as the paper used but it has the same amount of layers.
- In the table above we can notice the accuracy per element, revealing that the element with less accuracy has a value of 0.97
- These results confirm the efficiency of the study done by Xiaoa