In [None]:
from warnings import filterwarnings
filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2 as cv

%matplotlib inline

from tensorflow.keras import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

from tqdm import tqdm_notebook
from notifyme import notify

In [None]:
SHAPE = (137,236)
SHAPE_NEW = 64

In [None]:
labels = pd.read_csv("./train.csv")

In [None]:
grapheme_root_ohe = OneHotEncoder(dtype=np.uint16,sparse=False)
vowel_diacritic_ohe = OneHotEncoder(dtype=np.uint16,sparse=False)
consonant_diacritic_ohe = OneHotEncoder(dtype=np.uint16,sparse=False)

grapheme_root_ohe.fit(labels[['grapheme_root']])
vowel_diacritic_ohe.fit(labels[['vowel_diacritic']])
consonant_diacritic_ohe.fit(labels[['consonant_diacritic']])

In [None]:
inputs = Input(shape = (SHAPE_NEW,SHAPE_NEW,1),name='input')

model = Conv2D(filters=32, kernel_size=(3, 3), padding='SAME', activation='relu',input_shape=(SHAPE_NEW,SHAPE_NEW,1))(inputs)
model = BatchNormalization(momentum=0.15)(model)
model = MaxPool2D(pool_size=(2, 2))(model)
model = Conv2D(filters=32, kernel_size=(5, 5), padding='SAME', activation='relu')(model)
model = Dropout(rate=0.3)(model)

model = Conv2D(filters=64, kernel_size=(3, 3), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = MaxPool2D(pool_size=(2, 2))(model)
model = Conv2D(filters=64, kernel_size=(5, 5), padding='SAME', activation='relu')(model)
model = BatchNormalization(momentum=0.15)(model)
model = Dropout(rate=0.3)(model)

model = Flatten()(model)
model = Dense(1024, activation = "relu")(model)
model = Dropout(rate=0.3)(model)
dense = Dense(512, activation = "relu")(model)

dense = Dense(168, activation = 'softmax',name='grapheme_root')(dense)


model = Model(inputs=inputs,outputs=[dense])

In [None]:
df = pd.read_parquet("./train_image_data_0.parquet")
df = pd.merge(df,labels,on='image_id')

In [None]:
def resize(img):
    return cv.resize(img.reshape(SHAPE).astype(np.uint16),(SHAPE_NEW,SHAPE_NEW))

def input_flow(X,sharpen=1):
    for i in range(X.shape[0]):
        row = X.iloc[i].values
        yield ({
                'input':resize(row[1:-4]).reshape(1,SHAPE_NEW,SHAPE_NEW,1)/255
            },
            {'grapheme_root':grapheme_root_ohe.transform([row[-4:-3]]),
        })

In [None]:
model.compile(optimizer="adam",loss='categorical_crossentropy',metrics=['accuracy'])

model.fit_generator(input_flow(df),steps_per_epoch=df.shape[0], epochs=10)