In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import _pickle as pickle
from tqdm import tqdm_notebook as tqdm
import cv2
from keras.utils import np_utils

from sklearn.model_selection import train_test_split



import keras
import tensorflow
from keras.datasets import mnist
from keras.models import Model, Sequential
from keras.applications import VGG16
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
import keras.backend as K
from keras.layers import Input, Conv2D, Lambda, Activation,LeakyReLU,ZeroPadding2D, Dense,MaxPool2D,Reshape, Flatten,MaxPooling2D,Dropout, BatchNormalization,UpSampling2D,GlobalMaxPooling2D, GlobalAveragePooling2D
# from tensorflow.keras.engine.input_layer import Input
from keras.layers import merge
from keras.optimizers import SGD,Adam
from keras.regularizers import l2
import numpy.random as rng
from sklearn.utils import shuffle

from keras.callbacks import ReduceLROnPlateau, EarlyStopping

In [None]:
def save(file,name, folder = ""):
    if folder != "":
        outfile = open('./'+folder+'/'+name+'.pickle', 'wb')
    else:
        outfile = open(name+'.pickle', 'wb')
    pickle.dump(file, outfile)
    outfile.close
    
def load(name, folder = ""):
    if folder != "":
        outfile = open('./'+folder+'/'+name+'.pickle', 'rb')
    else:
        outfile = open(name+'.pickle', 'rb')
    file = pickle.load(outfile)
    outfile.close
    return file

HEIGHT = 137
WIDTH = 236
SIZE = 84
def bbox(img):
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]
    return rmin, rmax, cmin, cmax

def crop_resize(img0, size=SIZE, pad=16):
    #crop a box around pixels large than the threshold 
    #some images contain line at the sides
    ymin,ymax,xmin,xmax = bbox(img0[5:-5,5:-5] > 80)
    #cropping may cut too much, so we need to add it back
    xmin = xmin - 13 if (xmin > 13) else 0
    ymin = ymin - 10 if (ymin > 10) else 0
    xmax = xmax + 13 if (xmax < WIDTH - 13) else WIDTH
    ymax = ymax + 10 if (ymax < HEIGHT - 10) else HEIGHT
    img = img0[ymin:ymax,xmin:xmax]
    #remove lo intensity pixels as noise
    img[img < 28] = 0
    lx, ly = xmax-xmin,ymax-ymin
    l = max(lx,ly) + pad
    #make sure that the aspect ratio is kept in rescaling
    img = np.pad(img, [((l-ly)//2,), ((l-lx)//2,)], mode='constant')
    return cv2.resize(img,(size,size)) /255

In [None]:
train = [
    'train_image_data_0.parquet',
    'train_image_data_1.parquet',
    'train_image_data_2.parquet',
    'train_image_data_3.parquet'
]

test = [
    'test_image_data_0.parquet',
    'test_image_data_1.parquet',
    'test_image_data_2.parquet',
    'test_image_data_3.parquet'
]

subs = 'sample_submission.csv'

models = [
    'resnet18',
    'densenet121'
]

weights = [
    0.5,
    0.5
]

## Resizing and cropping the images 

In [None]:
for i in range(4):
    df = pd.read_parquet(test[i])
    
    dataset = list(np.zeros(df.shape[0]))
    ids = df['image_id'].values
    
    for j in tqdm(range(df.shape[0])):
        img = 255 - df.iloc[j].values[1:].astype(np.uint8).reshape((137, 236))
        img = crop_resize(img)
        dataset[j] = img
    
    dataset = np.array(dataset)
    
    save((dataset, ids), 'test_set_'+str(i))

## Loading the preprocessed images

In [None]:
def prediction(models, weights):
    
    preds = [[] for i in range(len(models))]
    ids = np.array([])
    
    for i in range(4):
        dataset, ids1 = load('test_set_'+str(i))
        
        ids = np.concatenate([ids, ids1])
        
        for j in range(len(models)):
            print('data : '+str(i)+' model '+str(j))
            model = load(models[j], 'models')
            
            pred = model.predict(dataset.reshape((dataset.shape[0], 84,84,1)))
#             print(pred)
            preds[j].extend(np.array(pred))
    
    preds = np.array(preds)
    
    pred = weights[0] * preds[0]
    for i in range(1,len(models)):
        pred += preds[i] * weights[i]
    
    
    pred_gr = pred[:,:168]
    pred_vd = pred[:,168:179]
    pred_cd = pred[:, 179:]
        
    pred_gr = np.argmax(pred_gr, axis = 1)
    pred_vd = np.argmax(pred_vd, axis = 1)
    pred_cd = np.argmax(pred_cd, axis = 1)    
    
    
    row_id = []
    target = []
    for i in range(len(ids)):        
        row_id.append(ids[i]+'_consonant_diacritic')
        row_id.append(ids[i]+'_grapheme_root')
        row_id.append(ids[i]+'_vowel_diacritic')
        
        target.append(pred_cd[i])
        target.append(pred_gr[i])
        target.append(pred_vd[i])
    
    dico = {'row_id':row_id, 'target':target}
    
    df = pd.DataFrame(dico)
    return df

In [None]:
submissions = pd.read_csv(subs)

df = prediction(models, weights)

df.index = df['row_id']

submissions['target'] = df.loc[submissions['row_id'].values]['target'].values.astype(int)

submissions.to_csv('submission.csv', index = False)

In [None]:
submissions