# OCR Bengali
# KAGGLE SUBMISSION

## Libraries

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import math as math
import gc
import os


## To see files in Kaggle folder
#for dirname, _, filenames in os.walk('/kaggle/input'):
#for dirname, _, filenames in os.walk('/Users/franf/Python/Projects/OCR_BANGALI'):
#    for filename in filenames:
#        print(os.path.join(dirname, filename))


## Crop and resize functions

In [2]:
def resize(X,size=128):
    ## This function resizes each image to size x size. It also inverses the image and normalize it. 
    ## INPUT
    ##### X numpy array of 4D: (training samples, X, Y, Channels)
    ##### size: new size of image
    ## OUTPUT
    ##### XP tf tensor: X resized and normalized
    X=tf.math.divide(tf.subtract(255.0,X),255.0)
    XP=tf.image.resize(X,[size,size])
    return XP

def crop_and_resize(X,size=64,threshold=0.01):
    ## This function crops and resizes each image to size x size. It also inverses the image and normalize it.
    ## Operations:
    ## (1) Normalization to 1
    ## (2) Reverse image
    ## (3) Crop to a rectangle that assure that ((1-threshold*2)x100)% of the sum of the value of pixels is maintained.
    ## (4) resizes to size x size
    ## INPUT
    ##### X numpy array of 4D: (training samples, X, Y, Channels)
    ##### size: new size of image
    ##### threshold: see (3) operation
    ## OUTPUT
    ##### X_crop tf tensor: X crop, resized and normalized
    
    ## (1) Normalization 
    X=tf.math.divide(tf.subtract(255.0,X),255.0)
    ## (2) Reverse
    Xreverse=tf.reverse(tf.reverse(X,[1]),[2])
    
    shapeX=tf.shape(X)
    m0=shapeX[0]
    m1=shapeX[1]
    m2=shapeX[2]
    m3=shapeX[3]
    
    ## (3) Calculation of rectangles to crop
    X_aux=tf.math.multiply(tf.cast(X,tf.float32),tf.cast((tf.math.greater(X,0.5)),tf.float32))
    suma=tf.reshape(tf.math.reduce_sum(tf.math.reduce_sum(tf.math.reduce_sum(X_aux,axis=1),axis=1),axis=1),[-1,1])
    y  = tf.math.divide(tf.math.cumsum(tf.math.reduce_sum(tf.reshape(X_aux,[m0,m1,m2]),axis=2),axis=1),suma)
    
    y1 = tf.cast(tf.math.greater(y,threshold),tf.float32)
    y1 = tf.reshape(tf.math.argmax(y1,axis=1),[-1,1])
    y1 = tf.cast(y1,tf.float32)/tf.cast(m1,tf.float32)*0.6
    
    y2 = tf.cast(tf.math.greater(y,1.0-threshold),tf.float32)
    y2 = tf.reshape(tf.math.argmax(y2,axis=1),[-1,1])
    y2 = 1.0-(1.0-tf.cast(y2,tf.float32)/tf.cast(m1,tf.float32))*0.6
    
    x  = tf.math.divide(tf.math.cumsum(tf.math.reduce_sum(tf.reshape(X_aux,[m0,m1,m2]),axis=1),axis=1),suma)
    x1 = tf.cast(tf.math.greater(x,threshold),tf.float32)
    x1 = tf.reshape(tf.math.argmax(x1,axis=1),[-1,1])
    x1 = tf.cast(x1,tf.float32)/tf.cast(m2,tf.float32)*0.6

    x2 = tf.cast(tf.math.greater(x,1.0-threshold),tf.float32)
    x2 = tf.reshape(tf.math.argmax(x2,axis=1),[-1,1])
    x2 = 1.0-(1.0-tf.cast(x2,tf.float32)/tf.cast(m2,tf.float32))*0.6
      
    boxes=tf.cast(tf.reshape(tf.transpose(tf.stack([y1,x1,y2,x2])),[m0,4]),tf.float32)
    
    ## (3) & (4) Crop and resize
    X_crop=tf.image.crop_and_resize(X,
                                     boxes=boxes,
                                     box_indices=tf.range(0,m0,dtype=tf.int32),
                                     crop_size=[size,size],
                                     method='bilinear'
                                     )
    return X_crop





## Layers to pre-process during testing and avoid memory exceed

In [3]:
class ResizeLayer(tf.keras.layers.Layer):
    def __init__(self, size=128):
        super(ResizeLayer, self).__init__()
        self.size = size
    def build(self, input_shape):
        None
    def call(self, input):
        return resize(input,self.size)
    
class CropResizeLayer(tf.keras.layers.Layer):
    def __init__(self, size=128):
        super(CropResizeLayer, self).__init__()
        self.size = size
    def build(self, input_shape):
        None
    def call(self, input):
        return crop_and_resize(input,self.size) 
    
class onlyone(tf.keras.layers.Layer):
    def __init__(self):
        super(onlyone, self).__init__()
    def build(self, input_shape):
        None
    def call(self, input):
        depth = input.shape[1]
        indices = tf.math.argmax(input,axis=1)
        return tf.one_hot(indices, depth)

## Model

In [4]:
Path2models='/Users/franf/Python/Projects/OCR_BANGALI/Trained models/'
## Models with 64x64 input
Model64 = ['OCRB64_DenseNet121_V20200306.h5',
          'OCRB64_DenseNet169_V20200306.h5',
          'OCRB64_ResNet50V2_V20200305.h5',
          'OCRB64MU_DenseNet121_V20200309.h5']
## Models with 128x128 input
Model128 = ['OCRB128_DenseNet121_V20200304.h5',
           'OCRB128_DenseNet169_V20200228.h5',
           'OCRB128_ResNet50V2_V20200304_V2.h5']


In [5]:
N_CHANNELS=1
HEIGHT = 137
WIDTH = 236
#SIZE = 64 or 128

inputs =  tf.keras.layers.Input(shape = (HEIGHT,WIDTH,1))
model_R  =  ResizeLayer(128)(inputs)
model_CR  =  CropResizeLayer(64)(inputs)

my0=[]
my1=[]
my2=[]
for i in range(len(Model64)):
    model=tf.keras.models.load_model(Path2models+Model64[i])
    miy0,miy1,miy2 =  model(model_CR)
    # If it is desired one_hot before adding, uncomment this:
    # miy0==onlyone()(miy0)
    # miy1==onlyone()(miy1)
    # miy2==onlyone()(miy2)
    my0.append(miy0)
    my1.append(miy1)
    my2.append(miy2)

for i in range(len(Model128)):
    model=tf.keras.models.load_model(Path2models+Model128[i])
    miy0,miy1,miy2 =  model(model_R)
    # If it is desired one_hot before adding, uncomment this:
    # miy0==onlyone()(miy0)
    # miy1==onlyone()(miy1)
    # miy2==onlyone()(miy2)
    my0.append(miy0)
    my1.append(miy1)
    my2.append(miy2)

grapheme = tf.keras.layers.Add()(my0)
vowel = tf.keras.layers.Add()(my1)
consonant = tf.keras.layers.Add()(my2)

model = tf.keras.Model(inputs=inputs, outputs=[grapheme, vowel, consonant])
model.summary()

model.compile(optimizer = tf.optimizers.Adam(),
              loss = ['categorical_crossentropy','categorical_crossentropy','categorical_crossentropy'],
              metrics=['accuracy'])

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 137, 236, 1) 0                                            
__________________________________________________________________________________________________
crop_resize_layer (CropResizeLa (None, 64, 64, 1)    0           input_1[0][0]                    
__________________________________________________________________________________________________
resize_layer (ResizeLayer)      (None, 128, 128, 1)  0           input_1[0][0]                    
__________________________________________________________________________________________________
model_9 (Model)                 [(None, 168), (None, 7352856     crop_resize_layer[0][0]          
______________________________________________________________________________________________

## Prediction and submission file

In [6]:
preds_dict = {
    'grapheme_root': [],
    'vowel_diacritic': [],
    'consonant_diacritic': []
}

components = ['consonant_diacritic', 'grapheme_root', 'vowel_diacritic']
target=[] # model predictions placeholder
row_id=[] # row_id place holder
for i in range(4):
    #df_test_img = pd.read_parquet('/kaggle/input/bengaliai-cv19/test_image_data_{}.parquet'.format(i)) 
    df_test_img = pd.read_parquet('/Users/franf/Python/Projects/OCR_BANGALI/Data/test_image_data_{}.parquet'.format(i)) 
    df_test_img.set_index('image_id', inplace=True)
    print(df_test_img.shape)
    TESTX=df_test_img.iloc[:, :].values.reshape(-1, HEIGHT, WIDTH,1)
    print(TESTX.shape)
    
    preds = model.predict(TESTX)
    
    for i, p in enumerate(preds_dict):
        preds_dict[p] = np.argmax(preds[i], axis=1)

    for k,id in enumerate(df_test_img.index.values):  
        for i,comp in enumerate(components):
            id_sample=id+'_'+comp
            row_id.append(id_sample)
            target.append(preds_dict[comp][k])
    del df_test_img
    del TESTX
    gc.collect()

df_sample = pd.DataFrame(
    {
        'row_id': row_id,
        'target':target
    },
    columns = ['row_id','target'] 
)
df_sample.to_csv('submission.csv',index=False)
df_sample.head()


(3, 32332)
(3, 137, 236, 1)
(3, 32332)
(3, 137, 236, 1)
(3, 32332)
(3, 137, 236, 1)
(3, 32332)
(3, 137, 236, 1)


Unnamed: 0,row_id,target
0,Test_0_consonant_diacritic,0
1,Test_0_grapheme_root,3
2,Test_0_vowel_diacritic,0
3,Test_1_consonant_diacritic,0
4,Test_1_grapheme_root,93
