In [1]:
# Imports
import pyarrow.parquet as pq
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import cv2, os, gc, datetime

import tensorflow as tf
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu_devices[0], True)

from tensorflow.keras.models import Model
from tensorflow.keras.models import clone_model
from tensorflow.keras.layers import Dense,Conv2D,Flatten,MaxPool2D,Dropout,BatchNormalization, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau

  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)


In [16]:
# Define Training Data
train_paths = [
    './data/train_image_data_0.parquet',
    './data/train_image_data_1.parquet',
    './data/train_image_data_2.parquet',
    './data/train_image_data_3.parquet'
]

test_paths = [
    './data/test_image_data_0.parquet',
    './data/test_image_data_1.parquet',
    './data/test_image_data_2.parquet',
    './data/test_image_data_3.parquet'
]

HEIGHT = 137
WIDTH = 236
SIZE = 224
BATCH_SIZE = 64

TB_LOG_DIR = "logs/scalars/" + datetime.datetime.now().strftime("%d-%m-%Y_%H:%M:%S")
MODEL_PATH = "saved_model/model.h5"

In [3]:
# Make image dumping directory
train_dir_path = './data/images/train'
test_dir_path = './data/images/test'

if not os.path.isdir(train_dir_path):
    os.makedirs(train_dir_path)
    
if not os.path.isdir(test_dir_path):
    os.makedirs(test_dir_path)
    
model_dir = os.path.split(MODEL_PATH)[0]
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)

In [10]:
# Define functions to crop and resize the images

def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    return rmin, rmax, cmin, cmax

def crop_resize(img, size, pad=16):
    #crop a box around pixels large than the threshold 
    #some images contain line at the sides
    ymin,ymax,xmin,xmax = bbox(img[5:-5,5:-5] > 80)
    #cropping may cut too much, so we need to add it back
    xmin = xmin - 13 if (xmin > 13) else 0
    ymin = ymin - 10 if (ymin > 10) else 0
    xmax = xmax + 13 if (xmax < WIDTH - 13) else WIDTH
    ymax = ymax + 10 if (ymax < HEIGHT - 10) else HEIGHT
    img = img[ymin:ymax,xmin:xmax]
    #remove lo intensity pixels as noise
    img[img < 28] = 0
    lx, ly = xmax-xmin,ymax-ymin
    l = max(lx,ly) + pad
    #make sure that the aspect ratio is kept in rescaling
    img = np.pad(img, [((l-ly)//2,), ((l-lx)//2,)], mode='constant')
    return cv2.resize(img,(size,size))

In [5]:
# Preprocess the Train images and write them down
for path in train_paths:
    df = pd.read_parquet(path)

    for row in tqdm(df.values):
        im_name = row[0]
        image = np.array(row[1:])
        image = 255 - image.reshape(HEIGHT, WIDTH).astype(np.uint8)
        image = (image*(255.0/image.max())).astype(np.uint8)
        image = crop_resize(image, SIZE, 20)
        _, image = cv2.threshold(image,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
        image = cv2.morphologyEx(image, cv2.MORPH_OPEN, np.ones([3,3], np.uint8))
        cv2.imwrite(f"{os.path.join(train_dir_path, im_name)}.jpg", image)
    
    del df, row, im_name, image, _
    gc.collect()

HBox(children=(FloatProgress(value=0.0, max=50210.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=50210.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=50210.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=50210.0), HTML(value='')))




In [16]:
# Preprocess the Test images and write them down
for path in test_paths:
    df = pd.read_parquet(path)

    for row in tqdm(df.values):
        im_name = row[0]
        image = np.array(row[1:])
        image = 255 - image.reshape(HEIGHT, WIDTH).astype(np.uint8)
        image = (image*(255.0/image.max())).astype(np.uint8)
        image = crop_resize(image, SIZE, 20)
        _, image = cv2.threshold(image,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
        image = cv2.morphologyEx(image, cv2.MORPH_OPEN, np.ones([3,3], np.uint8))
        cv2.imwrite(f"{os.path.join(test_dir_path, im_name)}.jpg", image)
    
    del df, row, im_name, image
    gc.collect()

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))




# Train the Model

In [4]:
# Read train labels
train_label_path = "./data/train.csv"
df = pd.read_csv(train_label_path)

In [5]:
# Dropping the last column (not required for training)
df.drop(['grapheme'], axis=1, inplace=True)
df.head()

Unnamed: 0,image_id,grapheme_root,vowel_diacritic,consonant_diacritic
0,Train_0,15,9,5
1,Train_1,159,0,0
2,Train_2,22,3,5
3,Train_3,53,2,2
4,Train_4,71,9,5


In [6]:
# Separating the data from the labels and one hot encoding the labels
y_root = pd.get_dummies(df['grapheme_root'])
y_vowel = pd.get_dummies(df['vowel_diacritic'])
y_consonant = pd.get_dummies(df['consonant_diacritic'])
x_name = df['image_id']

In [7]:
# Train Test Split
x_train_name, x_val_name, y_root_train, y_root_val, y_vowel_train, y_vowel_val, y_consonant_tarin, y_consonant_val = train_test_split(x_name, y_root, y_vowel, y_consonant, test_size=0.2, random_state=3) 

In [8]:
# Verifying the shapes
x_train_name.shape, x_val_name.shape, y_root_train.shape, y_root_val.shape, y_vowel_train.shape, y_vowel_val.shape, y_consonant_tarin.shape, y_consonant_val.shape

((160672,),
 (40168,),
 (160672, 168),
 (40168, 168),
 (160672, 11),
 (40168, 11),
 (160672, 7),
 (40168, 7))

In [9]:
# Create the Data Generator
def data_generator(x, y_root, y_vowel, y_consonant, batch_size=16, saved_img_path='./data/images/train', image_shape=(299, 299)):
    assert len(x) == len(y_root) == len(y_vowel) == len(y_consonant), 'Lengths of all inputs should be same'
    
    num_splits = round(len(x) // batch_size) + 1
    
    x_splits = np.array_split(x, num_splits)
        
    y_root_splits = np.array_split(y_root, num_splits)
    y_vowel_splits = np.array_split(y_vowel, num_splits)
    y_consonant_splits = np.array_split(y_consonant, num_splits)    
    
    i = 0
    
    while True:
        xs = list()
        
        x_batch = x_splits[i].values
        y_root_batch = y_root_splits[i].values
        y_vowel_batch = y_vowel_splits[i].values
        y_consonant_batch = y_consonant_splits[i].values
        
        i += 1
        if i > num_splits-1: i = 0
        
        for x_ in x_batch:
            path = os.path.join(saved_img_path, f"{x_}.jpg")
            image = cv2.resize(cv2.imread(path, 0), image_shape, cv2.INTER_AREA)/255
            xs.append(np.expand_dims(image, axis=2))
        
        yield np.array(xs), [y_root_batch, y_vowel_batch, y_consonant_batch]
        

In [10]:
# Keras imports
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.applications.resnet_v2 import ResNet50V2, ResNet152V2
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.nasnet import NASNetLarge, NASNetMobile
from tensorflow.keras.metrics import Accuracy, Precision, Recall
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, TensorBoard, ModelCheckpoint
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adadelta, Nadam, Adagrad

In [11]:
# Callbacks
tensorboard_callback = TensorBoard(log_dir=TB_LOG_DIR)
checkpoint_callback = ModelCheckpoint(filepath=MODEL_PATH, monitor='val_loss', verbose=0, save_weights_only=False, save_best_only=True)
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=6, verbose=1)
reduce_lr_callback = ReduceLROnPlateau(monitor='val_loss', factor=0.1, min_lr=1e-6, patience=2, verbose=1)

In [19]:
inputs = Input(shape = (SIZE, SIZE, 1))

model = Conv2D(filters=3, kernel_size=(3, 3), padding='same', activation='relu', input_shape=(SIZE, SIZE, 1))(inputs)
model = ResNet50V2(include_top=False, weights='imagenet')(model)

model = Flatten()(model)
model = Dense(1024, activation = "relu")(model)
model = Dropout(rate=0.5)(model)
dense = Dense(512, activation = "relu")(model)

head_root = Dense(168, activation = 'softmax', name="root")(dense)
head_vowel = Dense(11, activation = 'softmax', name="vowel")(dense)
head_consonant = Dense(7, activation = 'softmax', name="consonant")(dense)

model = Model(inputs=inputs, outputs=[head_root, head_vowel, head_consonant])

Downloading data from https://github.com/keras-team/keras-applications/releases/download/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5


In [20]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 224, 224, 1) 0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 224, 224, 3)  30          input_3[0][0]                    
__________________________________________________________________________________________________
resnet50v2 (Model)              multiple             23564800    conv2d_1[0][0]                   
__________________________________________________________________________________________________
flatten_1 (Flatten)             (None, 100352)       0           resnet50v2[1][0]                 
____________________________________________________________________________________________

In [21]:
opt = Adam(learning_rate=0.001)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[Accuracy(), Precision(), Recall()])

In [22]:
train_gen = data_generator(x_train_name, y_root_train, y_vowel_train, y_consonant_tarin, batch_size=BATCH_SIZE, image_shape=(SIZE, SIZE), saved_img_path='/home/ani/Documents/temp')
val_gen = data_generator(x_val_name, y_root_val, y_vowel_val, y_consonant_val, batch_size=BATCH_SIZE, image_shape=(SIZE, SIZE), saved_img_path='/home/ani/Documents/temp')

In [23]:
# Fit the model
history = model.fit_generator(train_gen, epochs=200, validation_data=val_gen,
                              steps_per_epoch=len(x_train_name)//BATCH_SIZE + 1, 
                              validation_steps=len(x_val_name)//BATCH_SIZE + 1,
                              callbacks=[tensorboard_callback,
                                         checkpoint_callback,
                                         early_stopping_callback,
                                         reduce_lr_callback])

Epoch 1/200


ResourceExhaustedError: OOM when allocating tensor with shape[100352,1024] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:MatMul]