In [1]:
import numpy as np
import pandas as pd

from datetime import datetime
from IPython.display import display
import cv2
import os
import time

import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from tensorflow.keras.layers import Activation, Concatenate, GlobalMaxPooling2D
from tensorflow.keras.layers import GlobalAveragePooling2D, Reshape, Permute, multiply
#from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input

from tensorflow.keras.applications.xception import Xception, preprocess_input

from tensorflow.keras.utils import Sequence
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping, TensorBoard, LambdaCallback
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split

import imgaug as ia
from imgaug import augmenters as iaa

## Create image augmenter

In [2]:
def create_augmenter(train=True):
    # from https://github.com/aleju/imgaug
    # Sometimes(0.5, ...) applies the given augmenter in 50% of all cases,
    # e.g. Sometimes(0.5, GaussianBlur(0.3)) would blur roughly every second image.
    sometimes = lambda aug: iaa.Sometimes(0.5, aug)

    # Define our sequence of augmentation steps that will be applied to every image
    # All augmenters with per_channel=0.5 will sample one value _per image_
    # in 50% of all cases. In all other cases they will sample new values
    # _per channel_.
    if train:
        seq = iaa.Sequential(
            [
                # apply the following augmenters to most images
                # crop images by -5% to 10% of their height/width
                sometimes(iaa.CropAndPad(
                    percent=(-0.05, 0.05),
                    pad_mode=ia.ALL, # random mode from all available modes will be sampled per image.
                    pad_cval=(0, 255) # The constant value to use if the pad mode is constant or the end value to use if the mode is linear_ramp
                )),
                sometimes(iaa.Affine(
                    scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
                    rotate=(-5, 5), # rotate by -45 to +45 degrees
                    shear=(-5, 5), # shear by -16 to +16 degrees
                    cval=(0, 255), # if mode is constant, use a cval between 0 and 255
                    mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
                )),
            ],
        )
    else:
        pass
    return seq

## MultiOutputDataGenerator is for Keras multiple output modelÂ¶

In [4]:
# reference: https://www.kaggle.com/mpalermo/keras-pipeline-custom-generator-imgaug
class BaseDataGenerator(Sequence):
    '''
        Generates data for Keras
    '''
    def __init__(self, images=None, images_paths=None, labels=None, batch_size=64, image_dimensions = (512, 512, 3),
                 shuffle=False, augmenter=None, preprocessor=None,
                 return_label=True, total_classes=None):
        self.labels      = labels              # array of labels
        self.images = images
        self.images_paths = images_paths        # array of image paths
        self.dim          = image_dimensions    # image dimensions
        self.batch_size   = batch_size          # batch size
        self.shuffle      = shuffle             # shuffle bool
        self.augmenter      = augmenter           # augmenter
        self.preprocessor = preprocessor
        self.return_label = return_label
        self.total_classes = total_classes

        if self.images is None:
            self.total_len = len(self.images_paths)
        else:
            self.total_len = len(self.images)
        
        if images is None and images_paths is None:
            raise Exception("Must give images or images_paths")
        self.on_epoch_end()

    def __len__(self):
        '''
            Denotes the number of batches per epoch
        '''
        return int(np.ceil(self.total_len / self.batch_size))

    def on_epoch_end(self):
        '''
            Updates indexes after each epoch
        '''
        self.indexes = np.arange(self.total_len)
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def gather_batch_item(self, index):
        '''
            Generate one batch of data
        '''
        # selects indices of data for next batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # select data and load images
        if self.images is None:
            images = [cv2.imread(self.images_paths[k]) for k in indexes]
        else:
            images = [self.images[k] for k in indexes]
        

        # preprocess and augment data
        if self.augmenter:
            images = self.augmenter.augment_images(images)

        images= np.array([self.preprocess_image(cv2.resize(img, self.dim[:2])) for img in images])
        
        if self.return_label:
            labels = np.array([self.labels[k] for k in indexes])
            labels = to_categorical(labels, num_classes=self.total_classes)
            return images, labels
        else:
            return images
    
    def __getitem__(self, index):
        return self.gather_batch_item(index)
        
    def preprocess_image(self, images):
        if self.preprocessor is None:
            images = images / 255.
            pass
        else:
            images = self.preprocessor(images)
        return images
    
class MultiOutputDataGenerator(BaseDataGenerator):
    '''
        Generates multiple output data for Keras
    '''
    def __init__(self, images, images_paths, labels, batch_size=64, image_dimensions = (512, 512, 3),
                 shuffle=False, augmenter=None, preprocessor=None,
                 return_label=True, total_classes=None, output_names=None, tta_augmentors=None):
        # Init parent's parameter
        super().__init__(images, images_paths,
                labels, batch_size, image_dimensions,
                 shuffle, augmenter, preprocessor,
                 return_label, total_classes)
        
        self.output_names = output_names
        self.tta_augmentors = tta_augmentors
    
    def __getitem__(self, index):
        '''
            Generate one batch of data for multiple output model
        '''
        if self.return_label:
            images, labels = self.gather_batch_item(index)
            output_dict = {}
            # Copy labels to each output name
            for output_name in self.output_names:
                output_dict[output_name] = labels
            if self.tta_augmentors != None:
                images = self.get_tta_images(images)
            return images, output_dict
        else:
            images = self.gather_batch_item(index)
            if self.tta_augmentors != None:
                images = self.get_tta_images(images)
            return images
    def get_tta_images(self, images):
        '''
            If test time augmentation is used, apply augmentation to test images.
        '''
        aug_images = []
        # Original
        aug_images.append(images)
        for augmentor in self.tta_augmentors:
            aug_images.append(augmentor.augment_images(images))
        images = aug_images
        return images

## Process  data

In [5]:
all_unicodes = pd.read_csv('input/unicode_translation.csv')
all_unicodes.head()

Unnamed: 0,Unicode,char
0,U+0031,1
1,U+0032,2
2,U+0034,4
3,U+0036,6
4,U+0039,9


In [6]:
all_unicodes = all_unicodes.sort_values('Unicode')

In [7]:
unicode_to_word = {}
word_to_unicode = {}
unicode_to_encode = {}

start_index = 0
for unicode, word in all_unicodes.values:
    unicode_to_encode[unicode] = start_index
    unicode_to_word[unicode] = word
    word_to_unicode[word] = unicode
    start_index += 1

In [8]:
# pretrain model input size
image_shape = (96, 96, 3)

total_classes = len(all_unicodes)
batch_size = 48
all_img_path = 'input/chars'

In [4]:
def create_path_labels(all_path):
    '''
        Create training csv file by path.
    '''
    img_paths, label_list, encode_list = [], [], []
    for img_name in all_path:
        word_unicode = img_name.split('_')[0]
        label_list.append(word_unicode)
        encode_list.append(unicode_to_encode[word_unicode])
        img_paths.append(all_img_path + '/' + img_name)
    df = pd.DataFrame({'img_path': img_paths, 'label': label_list, 'encode': encode_list})
    return df

In [10]:
# Create csv, only need to run this cell at the first time
char_img_names = list(os.listdir(all_img_path))
df_all = create_path_labels(char_img_names)
df_all.shape
df_all.to_csv("input/all_chars.csv", index=False)

In [11]:
df_all = pd.read_csv("input/all_chars.csv")

In [12]:
df_train, df_val = train_test_split(df_all, test_size=0.1, random_state=42)

In [13]:
df_train.shape

(615117, 3)

In [14]:
df_val.shape

(68347, 3)

In [15]:
df_train.head()

Unnamed: 0,img_path,label,encode
393683,input/chars/U+308B_572-41.jpg,U+308B,120
146670,input/chars/U+305F_349-103.jpg,U+305F,76
471214,input/chars/U+4E91_2707-117.jpg,U+4E91,300
499128,input/chars/U+5229_806-216.jpg,U+5229,567
163714,input/chars/U+3066_1058-232.jpg,U+3066,83


In [17]:
train_datagen = MultiOutputDataGenerator(images=None, images_paths=df_train['img_path'].values, labels=df_train['encode'].values,
                              batch_size=batch_size, image_dimensions=image_shape, shuffle=True,
                              augmenter=create_augmenter(train=True), preprocessor=preprocess_input,
                             return_label=True, total_classes=total_classes, output_names=['original_out', 'se_out'])

val_datagen = MultiOutputDataGenerator(images=None, images_paths=df_val['img_path'].values, labels=df_val['encode'].values,
                              batch_size=20, image_dimensions=image_shape, shuffle=True,
                              augmenter=None,
                                preprocessor=preprocess_input,
                                return_label=True, total_classes=total_classes, output_names=['original_out', 'se_out'])

In [18]:
print(len(train_datagen))
print(len(val_datagen))

12814
3417


## Create Model

In [19]:
def squeeze_excite_block(tensor, ratio=16):
    # From: https://github.com/titu1994/keras-squeeze-excite-network
    init = tensor
    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
    filters = K.int_shape(init)[channel_axis]
    se_shape = (1, 1, filters)

    se = GlobalAveragePooling2D()(init)
    se = Reshape(se_shape)(se)
    se = Dense(filters // ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)(se)
    se = Dense(filters, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(se)

    if K.image_data_format() == 'channels_first':
        se = Permute((3, 1, 2))(se)

    x = multiply([init, se])
    return x

In [20]:
pretrained = Xception(include_top=False, weights='imagenet', input_shape=image_shape, pooling=None)
x = pretrained.output

# Original branch
gavg = GlobalAveragePooling2D()(x)
gmax = GlobalMaxPooling2D()(x)
original_concat = Concatenate(axis=-1)([gavg, gmax,])
original_concat = Dropout(0.5)(original_concat)
original_final = Dense(total_classes, activation='softmax', name='original_out')(original_concat)

# SE branch
se_out = squeeze_excite_block(x)
se_gavg = GlobalAveragePooling2D()(se_out)
se_gmax = GlobalMaxPooling2D()(se_out)
se_concat = Concatenate(axis=-1)([se_gavg, se_gmax,])
se_concat = Dropout(0.5)(se_concat)
se_final = Dense(total_classes, activation='softmax', name='se_out')(se_concat)

model = Model(inputs=pretrained.input, outputs=[original_final, se_final])

In [21]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 96, 96, 3)]  0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 47, 47, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 47, 47, 32)   128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 47, 47, 32)   0           block1_conv1_bn[0][0]            
______________________________________________________________________________________________

In [23]:
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

checkpointer = ModelCheckpoint(filepath='models/classification/weights.best.Xception_best.hdf5', 
                                  verbose=1, save_best_only=True)


logdir = f".\logs\warmup"
# Create target directory if it doesn't exist
if not os.path.exists(logdir):
    os.mkdir(logdir)
    

tensorboard_callback = TensorBoard(log_dir=logdir)

early_stop = EarlyStopping(monitor="val_loss",
                               mode="min",
                               patience=15,
                               restore_best_weights=True)

In [24]:
model.compile(optimizer='adam',
              loss={'original_out': 'categorical_crossentropy', 'se_out': 'categorical_crossentropy'},
              loss_weights={'original_out': 1., 'se_out': 1.}, metrics=['accuracy'])

In [25]:
history = model.fit_generator(generator=train_datagen,
                                       validation_data=val_datagen,
                                       epochs=10,
                                       callbacks=[tensorboard_callback, early_stop, checkpointer],
                                       verbose=1,
                                       )

Epoch 1/10
Epoch 00001: val_loss improved from inf to 0.36380, saving model to models/cls/weights.best.Xception_best.hdf5
Epoch 2/10
Epoch 00002: val_loss improved from 0.36380 to 0.35271, saving model to models/cls/weights.best.Xception_best.hdf5
Epoch 3/10
Epoch 00003: val_loss improved from 0.35271 to 0.34680, saving model to models/cls/weights.best.Xception_best.hdf5
Epoch 4/10
Epoch 00004: val_loss did not improve from 0.34680
Epoch 5/10
Epoch 00005: val_loss did not improve from 0.34680
Epoch 6/10
Epoch 00006: val_loss did not improve from 0.34680
Epoch 7/10
Epoch 00007: val_loss did not improve from 0.34680
Epoch 8/10
  823/12814 [>.............................] - ETA: 55:38 - loss: 0.1314 - original_out_loss: 0.0649 - se_out_loss: 0.0665 - original_out_accuracy: 0.9836 - se_out_accuracy: 0.9832

KeyboardInterrupt: 