In [1]:
# %tensorflow_version 2.x 
# On Colab, need this to specify tensorflow version
import numpy as np
import pandas as pd 
from tqdm.auto import tqdm
from glob import glob
import time, gc, sys
import cv2

sys.path.append("../tools/")
from helpers import (get_n, get_dummies, resize, plot_loss,
                     MultiOutputDataGenerator, plot_acc,
                    image_from_char)

In [2]:
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Input, Activation, Concatenate
from tensorflow.keras.layers import MaxPool2D, AveragePooling2D, GlobalAveragePooling2D
from tensorflow.keras.layers import Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.applications import DenseNet121
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import PIL.Image as Image, PIL.ImageDraw as ImageDraw, PIL.ImageFont as ImageFont
from matplotlib import pyplot as plt
import seaborn as sns
%matplotlib inline

In [3]:
import os
DATA_FOLDER = "../../data/"
for dirname, _, filenames in os.walk(DATA_FOLDER):
    for filename in filenames:
        print(os.path.join(dirname, filename))

../../data/test_image_data_3.parquet
../../data/train.csv
../../data/test_image_data_1.parquet
../../data/train_image_data_2.parquet
../../data/train_image_data_3.parquet
../../data/test_image_data_2.parquet
../../data/test_image_data_0.parquet
../../data/train_image_data_0.parquet
../../data/class_map.csv
../../data/test.csv
../../data/train_image_data_1.parquet
../../data/sample_submission.csv
../../data/Kalpurush_Fonts/kalpurush-2.ttf


In [4]:
train_df_ = pd.read_csv(DATA_FOLDER+'/train.csv')
test_df_ = pd.read_csv(DATA_FOLDER + '/test.csv')
class_map_df = pd.read_csv(DATA_FOLDER + '/class_map.csv')
sample_sub_df = pd.read_csv(DATA_FOLDER + '/sample_submission.csv')

### Number of unique values

In [5]:
print(f'Number of unique grapheme roots: {train_df_["grapheme_root"].nunique()}')
print(f'Number of unique vowel diacritic: {train_df_["vowel_diacritic"].nunique()}')
print(f'Number of unique consonant diacritic: {train_df_["consonant_diacritic"].nunique()}')

Number of unique grapheme roots: 168
Number of unique vowel diacritic: 11
Number of unique consonant diacritic: 7


In [6]:
train_df_ = train_df_.drop(['grapheme'], axis=1, inplace=False)

In [7]:
train_df_[['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']] = train_df_[['grapheme_root', 'vowel_diacritic', 'consonant_diacritic']].astype('uint8')

In [8]:
IMG_SIZE=64
N_CHANNELS=1

## Basic Model

In [9]:
def get_model(base_filters=32, nlevels=4, ncov=4, kernel_size=(3,3), 
              activation='relu', momentum=0.15, dropout_rate=0.3):
  keras.backend.clear_session()
  tf.random.set_seed(2040)
  inputs = Input(shape = (IMG_SIZE, IMG_SIZE, 1))
  if activation.lower() == 'leakyrelu':
    activation = keras.layers.LeakyReLU(alpha=0.05)
  
  model = Conv2D(filters=base_filters, kernel_size=kernel_size, padding='SAME', 
                 activation=activation, 
                 input_shape=(IMG_SIZE, IMG_SIZE, 1))(inputs)
  for _ in range(ncov-1):
    model = Conv2D(filters=base_filters, kernel_size=kernel_size, padding='SAME', 
                   activation=activation)(model)
  model = BatchNormalization(momentum=momentum)(model)
  model = MaxPool2D(pool_size=(2, 2))(model)
  model = Conv2D(filters=base_filters, kernel_size=(5, 5), padding='SAME',
                 activation=activation)(model)
  model = Dropout(rate=dropout_rate)(model)
  for i in range(nlevels-1):
    for _ in range(ncov):
      model = Conv2D(filters=2**(i+1)*base_filters, kernel_size=kernel_size, 
                     padding='SAME', activation=activation)(model)
    model = BatchNormalization(momentum=momentum)(model)
    model = MaxPool2D(pool_size=(2, 2))(model)
    model = Conv2D(filters=2**(i+1)*base_filters, kernel_size=(5, 5), 
                   padding='SAME', activation=activation)(model)
    model = BatchNormalization(momentum=momentum)(model)
    model = Dropout(rate=dropout_rate)(model)

  model = Flatten()(model)
  model = Dense(1024, activation = activation)(model)
  model = Dropout(rate=0.3)(model)
  dense = Dense(512, activation = activation)(model)

  head_root = Dense(168, activation = 'softmax', name='root')(dense)
  head_vowel = Dense(11, activation = 'softmax', name='vowel')(dense)
  head_consonant = Dense(7, activation = 'softmax', name='consonant')(dense)

  model = Model(inputs=inputs, outputs=[head_root, head_vowel, head_consonant])
  return model

In [9]:
def build_densenet(SIZE, rate=0.3):
    densenet = DenseNet121(weights='imagenet', include_top=False)

    input = Input(shape=(SIZE, SIZE, 1))
    x = Conv2D(3, (3, 3), padding='same')(input)
    
    x = densenet(x)
    
    x = GlobalAveragePooling2D()(x)
    x = BatchNormalization()(x)
    x = Dropout(rate)(x)
#     x = Dense(256, activation='relu')(x)
    x = Dense(512, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(rate)(x)

    # multi output
    grapheme_root = Dense(168, activation = 'softmax', name='root')(x)
    vowel_diacritic = Dense(11, activation = 'softmax', name='vowel')(x)
    consonant_diacritic = Dense(7, activation = 'softmax', name='consonant')(x)

    # model
    model = Model(inputs=input, outputs=[grapheme_root, vowel_diacritic, consonant_diacritic])
    
    return model

In [10]:
# model = get_model(base_filters=32, activation='relu', nlevels=4, ncov=4,
#                   kernel_size=(3,3))
# model = build_densenet(SIZE=64, rate=0.3)
model = keras.models.load_model("dense121-3.h5")
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 64, 64, 1)]  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 64, 64, 3)    30          input_2[0][0]                    
__________________________________________________________________________________________________
densenet121 (Model)             multiple             7037504     conv2d[0][0]                     
__________________________________________________________________________________________________
global_average_pooling2d (Globa (None, 1024)         0           densenet121[1][0]                
______________________________________________________________________________________________

In [11]:
# from tensorflow.keras.utils import plot_model
# plot_model(model, to_file='model.png');

In [12]:
# weights = {'root': 0.333, 'vowel': 0.333, 'consonant':0.333}
# # lr = 1e-3
# # opt = keras.optimizers.RMSprop(learning_rate=lr, rho=0.9)
# model.compile(optimizer=Adam(lr=0.002), loss='categorical_crossentropy', 
#               loss_weights=weights, metrics=['accuracy'])

In [16]:
# Learning rate will be half after 3 epochs if accuracy is not increased
lr_scheduler = []
targets = ['root', 'vowel', 'consonant']
for target in targets:
    lr_scheduler.append(ReduceLROnPlateau(monitor=f'{target}_accuracy', 
                                            patience=3,
                                            verbose=1,
                                            factor=0.5, 
                                            min_lr=0.00001))

# Callback : Save best model
cp = ModelCheckpoint('dense121-4.h5',
                           monitor = 'val_root_accuracy',
                           save_best_only = True,
                           save_weights_only = False,
                           mode = 'auto',
#                            save_freq = 1,
                           verbose = 0)
# Callback : Early Stop
es = EarlyStopping(monitor='val_root_accuracy',
                          mode = 'auto',
                          patience = 4,
                          min_delta = 0,
                          verbose = 1)

cb = [*lr_scheduler, cp]

In [17]:
batch_size = 256
epochs = 80

In [18]:
HEIGHT = 137
WIDTH = 236

In [19]:
histories = []
# for i in range(1):
for i in [3]:
    train_df = pd.merge(pd.read_parquet(DATA_FOLDER + f'/train_image_data_{i}.parquet'), train_df_, on='image_id').drop(['image_id'], axis=1)

    X_train = train_df.drop(['grapheme_root', 'vowel_diacritic', 'consonant_diacritic'], axis=1)
    X_train = resize(X_train, size=IMG_SIZE)/255

    # CNN takes images in shape `(batch_size, h, w, channels)`, so reshape the images
    X_train = X_train.values.reshape(-1, IMG_SIZE, IMG_SIZE, N_CHANNELS)

    Y_train_root = pd.get_dummies(train_df['grapheme_root']).values
    Y_train_vowel = pd.get_dummies(train_df['vowel_diacritic']).values
    Y_train_consonant = pd.get_dummies(train_df['consonant_diacritic']).values

    # Divide the data into training and validation set
    x_train, x_test, y_train_root, y_test_root, y_train_vowel, y_test_vowel, y_train_consonant, y_test_consonant = \
    train_test_split(X_train, Y_train_root, Y_train_vowel, Y_train_consonant, test_size=0.08, random_state=666)
    del train_df
    del X_train
    del Y_train_root, Y_train_vowel, Y_train_consonant

    # Data augmentation for creating more training data
    datagen = MultiOutputDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=8,  # randomly rotate images in the range (degrees, 0 to 180)
        # rotation_range=20,  # randomly rotate images in the range (degrees, 0 to 180)
        # zoom_range = 0.20, # Randomly zoom image 
        zoom_range = 0.15, # Randomly zoom image 
        width_shift_range=0.15,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.15,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=False,  # randomly flip images
        vertical_flip=False)  # randomly flip images


    # This will just calculate parameters required to augment the given data. This won't perform any augmentations
    datagen.fit(x_train)

    # Fit the model
    # history = model.fit_generator(datagen.flow(x_train, {'root': y_train_root, 'vowel': 
    history = model.fit(datagen.flow(x_train, {'root': y_train_root, 'vowel':y_train_vowel, 'consonant': y_train_consonant}, batch_size=batch_size),
                              epochs = epochs, validation_data = (x_test, [y_test_root, y_test_vowel, y_test_consonant]), 
                              steps_per_epoch=x_train.shape[0] // batch_size, 
                              callbacks=cb)
#                               callbacks=lr_scheduler)

    histories.append(history)

    # Delete to reduce memory usage
    del x_train
    del x_test
    del y_train_root
    del y_test_root
    del y_train_vowel
    del y_test_vowel
    del y_train_consonant
    del y_test_consonant
    gc.collect()

HBox(children=(FloatProgress(value=0.0, max=50210.0), HTML(value='')))


  {'root': '...', 'vowel': '...', 'consonant': '...'}
    to  
  ['...', '...', '...']
Train for 180 steps, validate on 4017 samples
Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80


Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80


Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80


Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80


Epoch 80/80


In [None]:
# def save_model(base_filters=32, nlevels=4, ncov=4, kernel_size=(3,3), 
#               activation='relu', momentum=0.15, dropout_rate=0.3):
#   filepath = os.path.join(DATA_FOLDER, "saved_model", f'''{str(base_filters)}-{str(nlevels)}-{str(ncov)}-{str(kernel_size[0])}-{activation}-{str(momentum)}-{str(dropout_rate)}.h5''')
#   print(filepath)
#   model.save(filepath)

In [None]:
# save_model()

In [None]:
# import pickle
# with open(os.path.join(DATA_FOLDER, "saved_model", "history_basic_model_leaky"), "wb") as pf:
#   pickle.dump(histories[0].history, pf)

In [None]:
for dataset in range(1):
    plot_loss(histories[dataset], epochs, f'Training Dataset: {dataset}')
    plot_acc(histories[dataset], epochs, f'Training Dataset: {dataset}')

In [None]:
del histories
gc.collect()

In [None]:
preds_dict = {
    'grapheme_root': [],
    'vowel_diacritic': [],
    'consonant_diacritic': []
}

In [None]:
components = ['consonant_diacritic', 'grapheme_root', 'vowel_diacritic']
target=[] # model predictions placeholder
row_id=[] # row_id place holder
for i in range(4):
    df_test_img = pd.read_parquet(DATA_FOLDER + f'/test_image_data_{i}.parquet') 
    df_test_img.set_index('image_id', inplace=True)

    X_test = resize(df_test_img, need_progress_bar=False)/255
    X_test = X_test.values.reshape(-1, IMG_SIZE, IMG_SIZE, N_CHANNELS)
    
    preds = model.predict(X_test)

    for i, p in enumerate(preds_dict):
        preds_dict[p] = np.argmax(preds[i], axis=1)

    for k,id in enumerate(df_test_img.index.values):  
        for i,comp in enumerate(components):
            id_sample=id+'_'+comp
            row_id.append(id_sample)
            target.append(preds_dict[comp][k])
    del df_test_img
    del X_test
    gc.collect()

df_sample = pd.DataFrame(
    {
        'row_id': row_id,
        'target':target
    },
    columns = ['row_id','target'] 
)
df_sample.to_csv('submission.csv',index=False)
df_sample.head()