In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import gc
import cv2
from math import ceil

from tensorflow import keras
from sklearn.model_selection import train_test_split
import os

In [None]:
WIDTH = 236
HEIGHT = 137

In [None]:
IMG_SIZE=128
HEIGHT_NEW = 128
WIDTH_NEW = 128
N_CHANNELS=1

## CNN Model

In [None]:
# trainedmodel = "../input/models/basic_model_refined.h5"
# trainedmodel = "../input/refined-models/basic_model_rerefined.h5"
# trainedmodel = "../input/modelsusecropresize/basic_model_rerefined.h5"
# trainedmodel = "../input/modelsusecropresize/dense121-4.h5"
# trainedmodel = "../input/pretrainedmodel/dense121_refined.h5"
# trainedmodel = "../input/pretrainedmodel/dense121_refined_2.h5"
# trainedmodel = "../input/pretrainedmodel/densenet121_ratio_kept.h5"
# trainedmodel = "../input/pretrainedmodel/densenet121_128x128-2.h5"
trainedmodel2 = "../input/pretrainedmodel/dense121_128x128-3.h5"
# trainedmodel2 = "../input/pretrainedmodel/dense121_128x128_2.h5"
# trainedmodel1 = "../input/pretrainedmodel/dense121_128x128_1.h5"
trainedmodel1 = "../input/pretrainedmodel/densenet121_128x128_1-rr-final.h5"
model1 = keras.models.load_model(trainedmodel1)
model2 = keras.models.load_model(trainedmodel2)
# model3 = keras.models.load_model(trainedmodel3)
model1.summary()

In [None]:
BATCH_SIZE = 24
# Dir
DIR = '../input/bengaliai-cv19'

In [None]:
class TestDataGenerator(keras.utils.Sequence):
    def __init__(self, X, batch_size = 16, img_size = (512, 512, 3), *args, **kwargs):
        self.X = X
        self.indices = np.arange(len(self.X))
        self.batch_size = batch_size
        self.img_size = img_size
                    
    def __len__(self):
        return int(ceil(len(self.X) / self.batch_size))

    def __getitem__(self, index):
        indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        X = self.__data_generation(indices)
        return X
    
    def __data_generation(self, indices):
        X = np.empty((self.batch_size, *self.img_size))
        
        for i, index in enumerate(indices):
            image = self.X[index]
            image = np.stack((image,)*N_CHANNELS, axis=-1)
            image = image.reshape(-1, HEIGHT_NEW, WIDTH_NEW, N_CHANNELS)
            
            X[i,] = image
        
        return X

In [None]:
# Image Prep
def resize_image(img, WIDTH_NEW, HEIGHT_NEW):
    image = img.reshape(HEIGHT, WIDTH)
    _, thresh = cv2.threshold(image, 30, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(thresh,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)[-2:]

    idx = 0
    ls_xmin = []
    ls_ymin = []
    ls_xmax = []
    ls_ymax = []
    for cnt in contours:
        idx += 1
        x,y,w,h = cv2.boundingRect(cnt)
        ls_xmin.append(x)
        ls_ymin.append(y)
        ls_xmax.append(x + w)
        ls_ymax.append(y + h)
    xmin = min(ls_xmin)
    ymin = min(ls_ymin)
    xmax = max(ls_xmax)
    ymax = max(ls_ymax)

    roi = image[ymin:ymax,xmin:xmax]
    image_resized = cv2.resize(roi, (WIDTH_NEW, HEIGHT_NEW), interpolation = cv2.INTER_AREA)
    image_resized = (image_resized/255).astype(np.float16)

    return image_resized.reshape(-1)

In [None]:
# Create Submission File
tgt_cols = ['grapheme_root','vowel_diacritic','consonant_diacritic']

# Create Predictions
row_ids, targets = [], []

In [None]:
# Loop through Test Parquet files (X)
i = 0 
# Test Files Placeholder
test_files = []

# Read Parquet file
df = pd.read_parquet(os.path.join(DIR, 'test_image_data_'+str(i)+'.parquet'))
# Get Image Id values
image_ids = df['image_id'].values 
# Drop Image_id column
df = df.drop(['image_id'], axis = 1)
df.astype(np.uint8)
# Loop over rows in Dataframe and generate images 
X = np.empty([df.shape[0], int(WIDTH_NEW*HEIGHT_NEW)], dtype=np.float16)
for image_id, index in zip(image_ids, range(df.shape[0])):
    test_files.append(image_id)
    roi = resize_image(df.loc[df.index[index]].values, WIDTH_NEW, HEIGHT_NEW)
    X[index,:] = roi
roi = None 
df = None
del roi
del df
gc.collect()
# Data_Generator
data_generator_test = TestDataGenerator(X, batch_size = BATCH_SIZE, img_size = (HEIGHT_NEW, WIDTH_NEW, N_CHANNELS))

# Predict with all 3 models
preds1 = model1.predict_generator(data_generator_test)
preds2 = model2.predict_generator(data_generator_test)

# Loop over Preds
for i, image_id in enumerate(test_files):
    for subi, col in zip(range(len(preds1)), tgt_cols):
        sub_preds1 = preds1[subi]
        sub_preds2 = preds2[subi]
#         sub_preds3 = preds3[subi]

        # Set Prediction with average of 5 predictions
        row_ids.append(image_id+'_'+col)
        sub_pred_value = np.argmax(sub_preds1[i] + sub_preds2[i])
#         sub_pred_value = np.argmax(sub_preds1[i])
        targets.append(sub_pred_value)

# Cleanup
data_generator_test = None
preds1 = None
preds2 = None
X = None
del data_generator_test
del preds1
del X
del test_files
gc.collect()

In [None]:
# Loop through Test Parquet files (X)
i = 1 
# Test Files Placeholder
test_files = []

# Read Parquet file
df = pd.read_parquet(os.path.join(DIR, 'test_image_data_'+str(i)+'.parquet'))
# Get Image Id values
image_ids = df['image_id'].values 
# Drop Image_id column
df = df.drop(['image_id'], axis = 1)
df.astype(np.uint8)
# Loop over rows in Dataframe and generate images 
X = np.empty([df.shape[0], int(WIDTH_NEW*HEIGHT_NEW)], dtype=np.float16)
for image_id, index in zip(image_ids, range(df.shape[0])):
    test_files.append(image_id)
    roi = resize_image(df.loc[df.index[index]].values, WIDTH_NEW, HEIGHT_NEW)
    X[index,:] = roi
roi = None 
df = None
del roi
del df
gc.collect()
# Data_Generator
data_generator_test = TestDataGenerator(X, batch_size = BATCH_SIZE, img_size = (HEIGHT_NEW, WIDTH_NEW, N_CHANNELS))

# Predict with all 3 models
preds1 = model1.predict_generator(data_generator_test)
preds2 = model2.predict_generator(data_generator_test)

# Loop over Preds
for i, image_id in enumerate(test_files):
    for subi, col in zip(range(len(preds1)), tgt_cols):
        sub_preds1 = preds1[subi]
        sub_preds2 = preds2[subi]
#         sub_preds3 = preds3[subi]

        # Set Prediction with average of 5 predictions
        row_ids.append(image_id+'_'+col)
        sub_pred_value = np.argmax(sub_preds1[i] + sub_preds2[i])
#         sub_pred_value = np.argmax(sub_preds1[i])
        targets.append(sub_pred_value)

# Cleanup
data_generator_test = None
preds1 = None
preds2 = None
X = None
del data_generator_test
del preds1
del X
del test_files
gc.collect()

In [None]:
# Loop through Test Parquet files (X)
i = 2 
# Test Files Placeholder
test_files = []

# Read Parquet file
df = pd.read_parquet(os.path.join(DIR, 'test_image_data_'+str(i)+'.parquet'))
# Get Image Id values
image_ids = df['image_id'].values 
# Drop Image_id column
df = df.drop(['image_id'], axis = 1)
df.astype(np.uint8)
# Loop over rows in Dataframe and generate images 
X = np.empty([df.shape[0], int(WIDTH_NEW*HEIGHT_NEW)], dtype=np.float16)
for image_id, index in zip(image_ids, range(df.shape[0])):
    test_files.append(image_id)
    roi = resize_image(df.loc[df.index[index]].values, WIDTH_NEW, HEIGHT_NEW)
    X[index,:] = roi
roi = None 
df = None
del roi
del df
gc.collect()
# Data_Generator
data_generator_test = TestDataGenerator(X, batch_size = BATCH_SIZE, img_size = (HEIGHT_NEW, WIDTH_NEW, N_CHANNELS))

# Predict with all 3 models
preds1 = model1.predict_generator(data_generator_test)
preds2 = model2.predict_generator(data_generator_test)

# Loop over Preds
for i, image_id in enumerate(test_files):
    for subi, col in zip(range(len(preds1)), tgt_cols):
        sub_preds1 = preds1[subi]
        sub_preds2 = preds2[subi]
#         sub_preds3 = preds3[subi]
#             sub_preds4 = preds4[subi]
#             sub_preds5 = preds5[subi]

        # Set Prediction with average of 5 predictions
        row_ids.append(image_id+'_'+col)
        sub_pred_value = np.argmax(sub_preds1[i] + sub_preds2[i])
#         sub_pred_value = np.argmax(sub_preds1[i])
        targets.append(sub_pred_value)

# Cleanup
data_generator_test = None
preds1 = None
preds2 = None
X = None
del data_generator_test
del preds1
del X
del test_files
gc.collect()

In [None]:
# Loop through Test Parquet files (X)
i = 3 
# Test Files Placeholder
test_files = []

# Read Parquet file
df = pd.read_parquet(os.path.join(DIR, 'test_image_data_'+str(i)+'.parquet'))
# Get Image Id values
image_ids = df['image_id'].values 
# Drop Image_id column
df = df.drop(['image_id'], axis = 1)
df.astype(np.uint8)
# Loop over rows in Dataframe and generate images 
X = np.empty([df.shape[0], int(WIDTH_NEW*HEIGHT_NEW)], dtype=np.float16)
for image_id, index in zip(image_ids, range(df.shape[0])):
    test_files.append(image_id)
    roi = resize_image(df.loc[df.index[index]].values, WIDTH_NEW, HEIGHT_NEW)
    X[index,:] = roi
roi = None 
df = None
del roi
del df
gc.collect()
# Data_Generator
data_generator_test = TestDataGenerator(X, batch_size = BATCH_SIZE, img_size = (HEIGHT_NEW, WIDTH_NEW, N_CHANNELS))

# Predict with all 3 models
preds1 = model1.predict_generator(data_generator_test)
preds2 = model2.predict_generator(data_generator_test)
#     preds4 = model4.predict_generator(data_generator_test, verbose = 1)
#     preds5 = model5.predict_generator(data_generator_test, verbose = 1)

# Loop over Preds
for i, image_id in enumerate(test_files):
    for subi, col in zip(range(len(preds1)), tgt_cols):
        sub_preds1 = preds1[subi]
        sub_preds2 = preds2[subi]
#         sub_preds3 = preds3[subi]

        # Set Prediction with average of 5 predictions
        row_ids.append(image_id+'_'+col)
        sub_pred_value = np.argmax(sub_preds1[i] + sub_preds2[i])
#         sub_pred_value = np.argmax(sub_preds1[i])
        targets.append(sub_pred_value)

# Cleanup
data_generator_test = None
preds1 = None
preds2 = None
X = None
del data_generator_test
del preds1
del X
del test_files
gc.collect()

In [None]:
# Create and Save Submission File
submit_df = pd.DataFrame({'row_id':row_ids,'target':targets}, columns = ['row_id','target'])
submit_df.to_csv('submission.csv', index = False)
print(submit_df.head(40))