In [1]:
import numpy as np
import pandas as pd

from datetime import datetime
from IPython.display import display
import cv2
import os
import time

import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization, Lambda
from tensorflow.keras.layers import Activation, Concatenate, GlobalMaxPooling2D
from tensorflow.keras.layers import GlobalAveragePooling2D, Reshape, Permute, multiply
#from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input

from tensorflow.keras.applications.xception import Xception, preprocess_input

from tensorflow.keras.utils import Sequence
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping, TensorBoard, LambdaCallback
from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import train_test_split

import imgaug as ia
from imgaug import augmenters as iaa

In [2]:
def create_augmenter(train=True):
    # from https://github.com/aleju/imgaug
    # Sometimes(0.5, ...) applies the given augmenter in 50% of all cases,
    # e.g. Sometimes(0.5, GaussianBlur(0.3)) would blur roughly every second image.
    sometimes = lambda aug: iaa.Sometimes(0.5, aug)

    # Define our sequence of augmentation steps that will be applied to every image
    # All augmenters with per_channel=0.5 will sample one value _per image_
    # in 50% of all cases. In all other cases they will sample new values
    # _per channel_.
    if train:
        seq = iaa.Sequential(
            [
                # apply the following augmenters to most images
                # crop images by -5% to 10% of their height/width
                sometimes(iaa.CropAndPad(
                    percent=(-0.05, 0.05),
                    pad_mode=ia.ALL, # random mode from all available modes will be sampled per image.
                    pad_cval=(0, 255) # The constant value to use if the pad mode is constant or the end value to use if the mode is linear_ramp
                )),
                sometimes(iaa.Affine(
                    scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, # scale images to 80-120% of their size, individually per axis
                    rotate=(-5, 5), # rotate by -45 to +45 degrees
                    shear=(-5, 5), # shear by -16 to +16 degrees
                    cval=(0, 255), # if mode is constant, use a cval between 0 and 255
                    mode=ia.ALL # use any of scikit-image's warping modes (see 2nd image from the top for examples)
                )),
            ],
        )
    else:
        pass
    return seq

In [3]:
# reference: https://www.kaggle.com/mpalermo/keras-pipeline-custom-generator-imgaug
class BaseDataGenerator(Sequence):
    '''
        Generates data for Keras
    '''
    def __init__(self, images=None, images_paths=None, labels=None, batch_size=64, image_dimensions = (512, 512, 3),
                 shuffle=False, augmenter=None, preprocessor=None,
                 return_label=True, total_classes=None):
        self.labels      = labels              # array of labels
        self.images = images
        self.images_paths = images_paths        # array of image paths
        self.dim          = image_dimensions    # image dimensions
        self.batch_size   = batch_size          # batch size
        self.shuffle      = shuffle             # shuffle bool
        self.augmenter      = augmenter           # augmenter
        self.preprocessor = preprocessor
        self.return_label = return_label
        self.total_classes = total_classes

        if self.images is None:
            self.total_len = len(self.images_paths)
        else:
            self.total_len = len(self.images)
        
        if images is None and images_paths is None:
            raise Exception("Must give images or images_paths")
        self.on_epoch_end()

    def __len__(self):
        '''
            Denotes the number of batches per epoch
        '''
        return int(np.ceil(self.total_len / self.batch_size))

    def on_epoch_end(self):
        '''
            Updates indexes after each epoch
        '''
        self.indexes = np.arange(self.total_len)
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def gather_batch_item(self, index):
        '''
            Generate one batch of data
        '''
        # selects indices of data for next batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # select data and load images
        if self.images is None:
            images = [cv2.imread(self.images_paths[k]) for k in indexes]
        else:
            images = [self.images[k] for k in indexes]
        

        # preprocess and augment data
        if self.augmenter:
            images = self.augmenter.augment_images(images)

        images= np.array([self.preprocess_image(cv2.resize(img, self.dim[:2])) for img in images])
        
        if self.return_label:
            labels = np.array([self.labels[k] for k in indexes])
            labels = to_categorical(labels, num_classes=self.total_classes)
            return images, labels
        else:
            return images
    
    def __getitem__(self, index):
        return self.gather_batch_item(index)
        
    def preprocess_image(self, images):
        if self.preprocessor is None:
            images = images / 255.
            pass
        else:
            images = self.preprocessor(images)
        return images
    
class MultiOutputDataGenerator(BaseDataGenerator):
    '''
        Generates multiple output data for Keras
    '''
    def __init__(self, images, images_paths, labels, batch_size=64, image_dimensions = (512, 512, 3),
                 shuffle=False, augmenter=None, preprocessor=None,
                 return_label=True, total_classes=None, output_names=None, tta_augmentors=None):
        # Init parent's parameter
        super().__init__(images, images_paths,
                labels, batch_size, image_dimensions,
                 shuffle, augmenter, preprocessor,
                 return_label, total_classes)
        
        self.output_names = output_names
        self.tta_augmentors = tta_augmentors
    
    def __getitem__(self, index):
        '''
            Generate one batch of data for multiple output model
        '''
        if self.return_label:
            images, labels = self.gather_batch_item(index)
            output_dict = {}
            # Copy labels to each output name
            for output_name in self.output_names:
                output_dict[output_name] = labels
            if self.tta_augmentors != None:
                images = self.get_tta_images(images)
            return images, output_dict
        else:
            images = self.gather_batch_item(index)
            if self.tta_augmentors != None:
                images = self.get_tta_images(images)
            return images
    def get_tta_images(self, images):
        '''
            If test time augmentation is used, apply augmentation to test images.
        '''
        aug_images = []
        # Original
        aug_images.append(images)
        for augmentor in self.tta_augmentors:
            aug_images.append(augmentor.augment_images(images))
        images = aug_images
        return images

In [4]:
all_unicodes = pd.read_csv('input/unicode_translation.csv')
all_unicodes.head()

Unnamed: 0,Unicode,char
0,U+0031,1
1,U+0032,2
2,U+0034,4
3,U+0036,6
4,U+0039,9


In [5]:
all_unicodes = all_unicodes.sort_values('Unicode')

In [6]:
unicode_to_word = {}
word_to_unicode = {}
unicode_to_encode = {}
encode_to_unicode = {}

start_index = 0
for unicode, word in all_unicodes.values:
    unicode_to_encode[unicode] = start_index
    encode_to_unicode[start_index] = unicode
    unicode_to_word[unicode] = word
    word_to_unicode[word] = unicode
    start_index += 1

In [7]:
# pretrain model input size
image_shape = (96, 96, 3)

total_classes = len(all_unicodes)
batch_size = 128
base_img_path = 'input/cropped'

In [8]:
def create_test_path_labels(all_path):
    df = pd.DataFrame({'img_name': [], 'img_path': [], 'x': [], 'y': []})
    for img_name in all_path:
        cropped_image_paths = pd.Series(list(os.listdir('{}/{}'.format(base_img_path, img_name))))
        y_x = cropped_image_paths.str.split('_', expand=True)
        temp_df = pd.DataFrame()
        temp_df['x'] = y_x[1].str.replace('.jpg', '')
        temp_df['y'] = y_x[0]
        temp_df['img_name'] = img_name
        temp_df['img_path'] = '{}/{}'.format(base_img_path, img_name) + '/' + cropped_image_paths
        df = df.append(temp_df, sort=False)
    return df

In [9]:
all_cropped_images = list(os.listdir(base_img_path))
df_all = create_test_path_labels(all_cropped_images)
display(df_all.head())
print(df_all.shape)
df_all.to_csv("input/test_cropped_words.csv", index=False)

Unnamed: 0,img_name,img_path,x,y
0,test_00145af3,input/cropped_old/test_00145af3/100_1301.jpg,1301,100
1,test_00145af3,input/cropped_old/test_00145af3/100_972.jpg,972,100
2,test_00145af3,input/cropped_old/test_00145af3/1026_2891.jpg,2891,1026
3,test_00145af3,input/cropped_old/test_00145af3/1027_3005.jpg,3005,1027
4,test_00145af3,input/cropped_old/test_00145af3/102_1184.jpg,1184,102


(1145833, 4)


In [10]:
df_test = pd.read_csv("input/test_cropped_words.csv")
print(df_test.shape)
display(df_test.head())

(1145833, 4)


Unnamed: 0,img_name,img_path,x,y
0,test_00145af3,input/cropped_old/test_00145af3/100_1301.jpg,1301,100
1,test_00145af3,input/cropped_old/test_00145af3/100_972.jpg,972,100
2,test_00145af3,input/cropped_old/test_00145af3/1026_2891.jpg,2891,1026
3,test_00145af3,input/cropped_old/test_00145af3/1027_3005.jpg,3005,1027
4,test_00145af3,input/cropped_old/test_00145af3/102_1184.jpg,1184,102


In [12]:
test_datagen = MultiOutputDataGenerator(images=None, images_paths=df_test['img_path'].values, labels=None,
                              batch_size=batch_size, image_dimensions=image_shape, shuffle=False,
                              augmenter=None,
                                preprocessor=preprocess_input,
                            return_label=False, total_classes=total_classes, output_names=['original_out', 'se_out'])

## Create Model

In [13]:
def squeeze_excite_block(tensor, ratio=16):
    # From: https://github.com/titu1994/keras-squeeze-excite-network
    init = tensor
    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
    filters = K.int_shape(init)[channel_axis]
    se_shape = (1, 1, filters)

    se = GlobalAveragePooling2D()(init)
    se = Reshape(se_shape)(se)
    se = Dense(filters // ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)(se)
    se = Dense(filters, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(se)

    if K.image_data_format() == 'channels_first':
        se = Permute((3, 1, 2))(se)

    x = multiply([init, se])
    return x

def mix_output_layer(tensors, ratio=[1,1]):
    weights = K.variable([1,1])
    # tensor's shape: [branches number, batch size, classes]
    weights = K.reshape(weights, (len(tensors), 1, 1))
    return K.mean(tensors *  weights, axis=0)

def onehot_to_label(tensor):
    return K.argmax(tensor, axis=-1)

In [14]:
pretrained = Xception(include_top=False, weights='imagenet', input_shape=image_shape, pooling=None)
x = pretrained.output

# Original branch
gavg = GlobalAveragePooling2D()(x)
gmax = GlobalMaxPooling2D()(x)
original_concat = Concatenate(axis=-1)([gavg, gmax,])
original_concat = Dropout(0.5)(original_concat)
original_final = Dense(total_classes, activation='softmax', name='original_out')(original_concat)

# SE branch
se_out = squeeze_excite_block(x)
se_gavg = GlobalAveragePooling2D()(se_out)
se_gmax = GlobalMaxPooling2D()(se_out)
se_concat = Concatenate(axis=-1)([se_gavg, se_gmax,])
se_concat = Dropout(0.5)(se_concat)
se_final = Dense(total_classes, activation='softmax', name='se_out')(se_concat)

model = Model(inputs=pretrained.input, outputs=[original_final, se_final])
model.load_weights('models/classification/weights.best.Xception_best.hdf5', )

In [None]:
# Combine branches to output label directly, doing this can save memory usage

model_output = model.output

mix_branches = Lambda(function=mix_output_layer, arguments={'ratio': [1,1]})([model_output[0], model_output[1]])
onehot_output = Lambda(function=onehot_to_label)(mix_branches)
model = Model(inputs=model.input, outputs=onehot_output)

In [16]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 96, 96, 3)]  0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 47, 47, 32)   864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 47, 47, 32)   128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 47, 47, 32)   0           block1_conv1_bn[0][0]            
____________________________________________________________________________________________

In [17]:
pred = model.predict_generator(generator=test_datagen,
                                       verbose=1,
                                       )



In [23]:
pred.shape

(1145833,)

In [24]:
df_test['label'] = pred

In [25]:
df_test.to_csv('test_pred.csv', index=False)

In [26]:
df_test.head()

Unnamed: 0,img_name,img_path,x,y,label
0,test_00145af3,input/cropped_old/test_00145af3/100_1301.jpg,1301,100,76
1,test_00145af3,input/cropped_old/test_00145af3/100_972.jpg,972,100,504
2,test_00145af3,input/cropped_old/test_00145af3/1026_2891.jpg,2891,1026,361
3,test_00145af3,input/cropped_old/test_00145af3/1027_3005.jpg,3005,1027,91
4,test_00145af3,input/cropped_old/test_00145af3/102_1184.jpg,1184,102,88


In [27]:
new_df = pd.DataFrame()

## Convert the predicted data to original data format

In [28]:
previous_image_id = 'test_00145af3'
image_ids = []
store_xy = []
centers = []
center_str = ''
max_num = 1200
word_count = 0
for i in df_test.values:
    current_image_id = i[0]
    # Same 
    if current_image_id == previous_image_id:
        if word_count < max_num:
            # First one
            if len(center_str) == 0:
                center_str += encode_to_unicode[i[4]]
                center_str = center_str + ' ' + str(round(i[3]))
                center_str = center_str + ' ' + str(round(i[2]))
            else:
                center_str = center_str + ' ' + encode_to_unicode[i[4]]
                center_str = center_str + ' ' + str(round(i[3]))
                center_str = center_str + ' ' + str(round(i[2]))
    else:
        image_ids.append(previous_image_id)
        centers.append(center_str)
        # Reset
        previous_image_id = current_image_id
        center_str = ''

In [29]:
new_df['image_id'] = np.array(image_ids)
new_df['labels'] = np.array(centers)

In [30]:
new_df

Unnamed: 0,image_id,labels
0,test_00145af3,U+305F 100 1301 U+516D 100 972 U+4F4D 1026 289...
1,test_001c37e2,U+3068 1009 2365 U+4E07 1009 2673 U+4E8B 1011 ...
2,test_003aa33a,U+3053 1000 2200 U+5546 1003 1794 U+751F 1003 ...
3,test_00665e33,U+308B 1057 3004 U+3066 1058 2223 U+4E00 1059 ...
4,test_006964dc,U+3046 1113 2654 U+3066 1114 2011 U+3042 1116 ...
...,...,...
4072,test_ff9f4bbf,U+3057 1003 854 U+3092 1171 1059 U+8896 1171 9...
4073,test_ffb1f141,U+3089 1033 1178 U+308F 1034 835 U+3044 1035 1...
4074,test_ffe0bb66,U+4F55 1150 1389 U+305C 1152 1192 U+307E 1152 ...
4075,test_fff039a9,U+4E8B 1154 3304 U+4F55 1155 2662 U+65AF 1156 ...


In [31]:
sample_submission = pd.read_csv('input/sample_submission.csv')

In [32]:
sample_submission

Unnamed: 0,image_id,labels
0,test_00145af3,U+003F 1 1 U+FF2F 2 2
1,test_001c37e2,U+003F 1 1 U+FF2F 2 2
2,test_003aa33a,U+003F 1 1 U+FF2F 2 2
3,test_00665e33,U+003F 1 1 U+FF2F 2 2
4,test_006964dc,U+003F 1 1 U+FF2F 2 2
...,...,...
4145,test_ffb1f141,U+003F 1 1 U+FF2F 2 2
4146,test_ffe0bb66,U+003F 1 1 U+FF2F 2 2
4147,test_fff039a9,U+003F 1 1 U+FF2F 2 2
4148,test_fff50dbc,U+003F 1 1 U+FF2F 2 2


In [33]:
sample_submission.drop('labels', axis=1,inplace=True)

In [34]:
submission = pd.merge(left=sample_submission, right=new_df, on='image_id', how='left')

In [35]:
submission.head()

Unnamed: 0,image_id,labels
0,test_00145af3,U+305F 100 1301 U+516D 100 972 U+4F4D 1026 289...
1,test_001c37e2,U+3068 1009 2365 U+4E07 1009 2673 U+4E8B 1011 ...
2,test_003aa33a,U+3053 1000 2200 U+5546 1003 1794 U+751F 1003 ...
3,test_00665e33,U+308B 1057 3004 U+3066 1058 2223 U+4E00 1059 ...
4,test_006964dc,U+3046 1113 2654 U+3066 1114 2011 U+3042 1116 ...


In [36]:
submission.shape

(4150, 2)

In [37]:
submission.to_csv('submissions/submission.csv', index=False)