In [30]:
import os, cv2, csv
import numpy as np
from keras.models import Sequential
from keras.models import load_model
from keras.models import Model
from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.utils  import np_utils
from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard

SIZE = 5
MODEL_FOLDER = "model/"
WIDTH = 140
HEIGHT = 48
PROCESSED_FOLDER = "processed/"
LABEL_CSV_FILE = 'label.csv'
dic19 = {'2':0, '3':1, '4':2, '5':3, '7':4, '9':5, 'A':6, 'C':7, 'F':8, 'H':9, 'K':10, 'M':11, 'N':12, 'P':13, 'Q':14, 'R':15, 'T':16, 'Y':17, 'Z':18}


In [31]:
def to_onelist(text):
    label_list = []
    for c in text:
        onehot = [0 for _ in range(len(dic19))]
        onehot[dic19[c]] = 1
        label_list.append(onehot)
    return label_list

In [32]:
#creat CNN model
print('Creating CNN model...')
tensor_in = Input((HEIGHT, WIDTH, 3))
tensor_out = tensor_in
tensor_out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(tensor_out)
tensor_out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(tensor_out)
tensor_out = MaxPooling2D(pool_size=(2, 2))(tensor_out)
tensor_out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(tensor_out)
tensor_out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(tensor_out)
tensor_out = MaxPooling2D(pool_size=(2, 2))(tensor_out)
tensor_out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(tensor_out)
tensor_out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(tensor_out)
tensor_out = BatchNormalization(axis=1)(tensor_out)
tensor_out = MaxPooling2D(pool_size=(2, 2))(tensor_out)
tensor_out = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu')(tensor_out)
tensor_out = Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu')(tensor_out)
tensor_out = MaxPooling2D(pool_size=(2, 2))(tensor_out)
tensor_out = Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu')(tensor_out)
tensor_out = BatchNormalization(axis=1)(tensor_out)
tensor_out = MaxPooling2D(pool_size=(2, 2))(tensor_out)

tensor_out = Flatten()(tensor_out)
tensor_out = Dropout(0.5)(tensor_out)

tensor_out = [Dense(19, name='digit1', activation='softmax')(tensor_out),\
              Dense(19, name='digit2', activation='softmax')(tensor_out),\
              Dense(19, name='digit3', activation='softmax')(tensor_out),\
              Dense(19, name='digit4', activation='softmax')(tensor_out)]

Creating CNN model...


In [33]:
model = Model(inputs=tensor_in, outputs=tensor_out)
model.compile(loss='categorical_crossentropy', optimizer='Adamax', metrics=['accuracy'])
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 48, 140, 3)   0                                            
__________________________________________________________________________________________________
conv2d_28 (Conv2D)              (None, 48, 140, 32)  896         input_4[0][0]                    
__________________________________________________________________________________________________
conv2d_29 (Conv2D)              (None, 46, 138, 32)  9248        conv2d_28[0][0]                  
__________________________________________________________________________________________________
max_pooling2d_16 (MaxPooling2D) (None, 23, 69, 32)   0           conv2d_29[0][0]                  
____________________________________________________________________________________________

In [34]:
def read_train_data(filename, size):
    train_data = []
    if os.path.isdir(filename):
        train_data = np.stack([np.array(cv2.imread(filename + str(index) + ".jpg"))/255.0 for index in range(1, size + 1)])
    return train_data

In [35]:
def read_label_data(filename):
    train_label = []
    traincsv = open(filename, 'r', encoding = 'utf8')

    read_label =  [to_onelist(row[0]) for row in csv.reader(traincsv)]
    train_label = [[] for _ in range(4)]
    for arr in read_label:
        for index in range(4):
            train_label[index].append(arr[index])
    train_label = [arr for arr in np.asarray(train_label)]
    return train_label

In [37]:

print("Reading training data...")

train_data = read_train_data(PROCESSED_FOLDER, SIZE)
train_label = read_label_data(LABEL_CSV_FILE)

print("Reading completed")

Reading training data...
Reading completed


array([[[[0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        ],
         ...,
         [0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        ]],

        [[0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        ],
         ...,
         [0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        ]],

        [[0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        ],
         ...,
         [0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        ]],

        ...,

        [[0.        , 0.        , 0.        ],
         [0.        , 0.        , 0.        ]

In [6]:
filepath = MODEL_FOLDER + "{epoch:02d}-{loss:.2f}-{val_loss:.2f}.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_digit4_acc', verbose=1, save_best_only=False, mode='max')
earlystop = EarlyStopping(monitor='val_loss', patience=8, verbose=1, mode='auto')
tensorBoard = TensorBoard(log_dir = 'logs', histogram_freq = 1)
callbacks_list = [tensorBoard, earlystop, checkpoint]

In [7]:
model = load_model(MODEL_FOLDER + "25-1.10-3.01.hdf5")

W0916 10:01:58.558650 4711863744 deprecation.py:323] From /usr/local/lib/python3.7/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [8]:
history = model.fit(train_data, train_label, validation_split=0.2, batch_size=50, epochs=10, verbose=1, callbacks=callbacks_list)

W0916 10:02:01.011471 4711863744 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/keras/callbacks.py:1120: The name tf.summary.histogram is deprecated. Please use tf.compat.v1.summary.histogram instead.

W0916 10:02:01.154010 4711863744 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/keras/callbacks.py:1122: The name tf.summary.merge_all is deprecated. Please use tf.compat.v1.summary.merge_all instead.

W0916 10:02:01.158941 4711863744 deprecation_wrapper.py:119] From /usr/local/lib/python3.7/site-packages/keras/callbacks.py:1125: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.



Train on 2800 samples, validate on 700 samples
Epoch 1/10

Epoch 00001: saving model to model/01-0.95-1.02.hdf5
Epoch 2/10

Epoch 00002: saving model to model/02-0.77-1.01.hdf5
Epoch 3/10

Epoch 00003: saving model to model/03-0.65-0.88.hdf5
Epoch 4/10

Epoch 00004: saving model to model/04-0.57-0.91.hdf5
Epoch 5/10

Epoch 00005: saving model to model/05-0.46-0.85.hdf5
Epoch 6/10

Epoch 00006: saving model to model/06-0.40-0.87.hdf5
Epoch 7/10

Epoch 00007: saving model to model/07-0.35-0.79.hdf5
Epoch 8/10

Epoch 00008: saving model to model/08-0.31-1.03.hdf5
Epoch 9/10

Epoch 00009: saving model to model/09-0.28-0.79.hdf5
Epoch 10/10

Epoch 00010: saving model to model/10-0.22-0.76.hdf5


In [None]:
import matplotlib.pyplot as plt
def show_train_history(train_history, train, validation):
    plt.plot(train_history.history[train])
    plt.plot(train_history.history[validation])
    plt.title('Train History')
    plt.ylabel(train)
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()

In [None]:
show_train_history(history, 'digit1_acc', 'val_digit1_acc')