In [67]:
# !pip install /kaggle/input/my-packages/kaggle/working/packages/*.whl

In [68]:
import numpy as np
import warnings
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import random
import pandas as pd
import os
import time

In [69]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional, Add, Activation
from tensorflow.keras.models import Model
from tensorflow.keras.activations import relu, sigmoid, softmax
import tensorflow.keras.backend as K
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import CSVLogger, TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import tensorflow as tf

In [70]:
class Config:
    max_width = 2634
    max_height = 163
    min_width = 851
    min_height = 58
    
    true_width = 2048
    true_height = 128
    input_shape = (true_height,true_width,1)
    filters=[32,64,128,128,256,256,512]
    pool_sizes=[3,3,1,1,1,1,(3,1)]
    strides=[3,3,1,1,1,1,1]
    lstm_units = 512
    
    time_steps = 227
    max_label_len = 227
    
    label=[' ', '#', "'", '(', ')', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'Á', 'Â', 'Ê', 'Í', 'Ó', 'Ô', 'Ý', 'à', 'á', 'â', 'ã', 'è', 'é', 'ê', 'ì', 'í', 'ò', 'ó', 'ô', 'õ', 'ù', 'ú', 'ý', 'ă', 'Đ', 'đ', 'ĩ', 'ũ', 'Ơ', 'ơ', 'Ư', 'ư', 'ạ', 'Ả', 'ả', 'Ấ', 'ấ', 'ầ', 'ẩ', 'ẫ', 'ậ', 'ắ', 'ằ', 'ẳ', 'ẵ', 'ặ', 'ẹ', 'ẻ', 'ẽ', 'ế', 'ề', 'ể', 'ễ', 'ệ', 'ỉ', 'ị', 'ọ', 'ỏ', 'ố', 'ồ', 'Ổ', 'ổ', 'ỗ', 'ộ', 'ớ', 'ờ', 'Ở', 'ở', 'ỡ', 'ợ', 'ụ', 'ủ', 'Ứ', 'ứ', 'ừ', 'ử', 'ữ', 'ự', 'ỳ', 'ỵ', 'ỷ', 'ỹ']
    

In [71]:
def img_processing(path_img):
    image = cv2.imread(path_img)
    gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
    h,w=image.shape[:2]
    image_resize = cv2.resize(gray,(int(Config.true_height/h * w),Config.true_height),interpolation=cv2.INTER_AREA)
    h,w=image_resize.shape[:2]
    try:
        img_padding = np.pad(image_resize, ((0,0),(0, Config.true_width-w)), 'median')
    except:
        img_padding = cv2.resize(gray,(Config.true_width,Config.true_height),interpolation=cv2.INTER_AREA)
    image_gau = cv2.GaussianBlur(img_padding,(5,5),1)

    thresh = cv2.adaptiveThreshold(  
                                src=image_gau,
                                maxValue=255,
                                adaptiveMethod=cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                thresholdType=cv2.THRESH_BINARY_INV,
                                blockSize=11,
                                C=9)
    # Remove horizontal
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30,1))
    detected_lines_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=1)
    # Remove vertical lines 
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 60))
    detected_lines_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=1)
    detected_line=cv2.bitwise_or(detected_lines_vertical,detected_lines_horizontal)
    detected_line_not=cv2.bitwise_not(detected_line)
    detected_line_and=cv2.bitwise_and(detected_line_not,thresh)
    # kernel = np.ones((1, 3), np.uint8)
    # clean_img = cv2.morphologyEx(detected_line_and, cv2.MORPH_CLOSE, kernel, iterations=2)
    repair_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,5))
    result = cv2.morphologyEx(detected_line_and, cv2.MORPH_CLOSE, repair_kernel, iterations=1)
    
    repair_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,1))
    result = cv2.morphologyEx(result, cv2.MORPH_CLOSE, repair_kernel, iterations=1)
    # result = cv2.bitwise_not(result)

    img = np.expand_dims(result, axis=2)
    img = img/255.0
    return img

In [72]:
class CRNN:
    def __init__(self,input_shape, num_classes,max_label_len, filters= Config.filters, pool_sizes=Config.pool_sizes, strides=Config.strides, lstm_units=Config.lstm_units, dropout_rate=0.2):
        self.input_shape = input_shape
        self.num_classes = num_classes
        self.max_label_len= max_label_len
        self.lstm_units = lstm_units
        self.dropout_rate = dropout_rate
        
        self.filters = filters
        self.pool_sizes=pool_sizes
        self.strides=strides
        self.crnn_model = None
        self._build_model()
    
    def _conv_block(self,x,i,MaxPool=False,BatchNormal=False,last_block=False):
        i=i-1
        x=Conv2D(self.filters[i],(3,3),padding='same')(x)
        if BatchNormal:
            x=BatchNormalization()(x)
        if MaxPool and last_block:
            x=MaxPool2D(pool_size=self.pool_sizes[i])(x)
        elif MaxPool:
            x=MaxPool2D(pool_size=self.pool_sizes[i],strides=self.strides[i])(x)

        x=Activation('relu')(x)
        return x

    def _residual_block(self,x,i):
        i=i-1
        y=Conv2D(self.filters[i],(3,3),padding='same')(x)
        y=BatchNormalization()(y)
        y=Add()([y,x])
        y=Activation('relu')(y)
        return y
    
    def ctc_lambda_func(self,args):
        y_pred, labels, input_length, label_length = args
        return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
    
    def _build_model(self):
        inputs=Input(shape=self.input_shape)
        # Block 1
        x=self._conv_block(inputs,1,MaxPool=True)
        # print("Block 1",x.shape)
        # x_1=x
        # Block 2
        x=self._conv_block(x,2,MaxPool=True)
        # print("Block 2",x.shape)
        # Block 3
        x=self._conv_block(x,3,BatchNormal=True)
        # print("Block 3",x.shape)
        # Block 4
        x=self._residual_block(x,4)
        # print("Block 4",x.shape)
        # Block 5
        x=self._conv_block(x,5,BatchNormal=True)
        # print("Block 5",x.shape)
        # Block 6
        x=self._residual_block(x,6)
        # print("Block 6",x.shape)
        # Block 7
        x=self._conv_block(x,7,BatchNormal=True,MaxPool=True,last_block=True)
        # print("Block 7",x.shape)
        
        x = MaxPool2D(pool_size=(3,1))(x)
        # print(x.shape)
        
        squeezed = Lambda(lambda x: K.squeeze(x, 1))(x)
        
        blstm_1 = Bidirectional(LSTM(self.lstm_units, return_sequences=True, dropout = self.dropout_rate))(squeezed)
        blstm_2 = Bidirectional(LSTM(self.lstm_units, return_sequences=True, dropout = self.dropout_rate))(blstm_1)
        
        outputs = Dense(self.num_classes, activation = 'softmax')(blstm_2)
        
        self.crnn_model = Model(inputs, outputs)
        
    def summary(self):
        self.crnn_model.summary()
        
    def get_model(self):
        return self.crnn_model
    
    def compile(self):
        labels = Input(name='the_labels', shape=[self.max_label_len], dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        
        loss_out = Lambda(self.ctc_lambda_func, output_shape=(1,), name='ctc')([self.crnn_model.output, labels, input_length, label_length])
        
        self.model = Model(inputs=[self.crnn_model.input, labels, input_length, label_length], outputs=loss_out)
        
        optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
        
        self.model.compile(loss = {'ctc': lambda y_true, y_pred: y_pred}, optimizer = optimizer)
        return self.model
    
    def load_weights(self, path):
        self.crnn_model.load_weights(path)
        
    def predict(self, input_batch):
        return self.crnn_model.predict(input_batch)

In [73]:
crnn = CRNN(Config.input_shape, len(Config.label)+1, Config.max_label_len)

In [74]:
crnn.load_weights('./check_weight.hdf5')

In [None]:
def predict_answer(img):
    img = np.array([img])
    prediction = crnn.predict(img)
    out = K.get_value(K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0])*prediction.shape[1],
                        greedy=True)[0][0])
    pred = ""
    for p in out[0]:  
        if int(p) != -1:
            pred += Config.label[int(p)]
    return pred

In [None]:
import time
import pandas as pd
import os
import cv2

def predict(image_folder, output_file_path=None):
    prediction = pd.DataFrame(columns=['id', 'answer', 'elapsed_time'])    
    index = 0
    for person_id in os.listdir(image_folder):
        for image_id in os.listdir(os.path.join(image_folder, person_id)):
            fp = os.path.join(image_folder, person_id, image_id)
            image_id = os.path.join(person_id, image_id)

            # Start inference
            start = time.time()
            image = img_processing(fp)
            answer = predict_answer(image)
            end = time.time()
            prediction.loc[index] = [image_id, answer, end - start]
            index += 1
    prediction.to_csv(output_file_path, index=False)

In [None]:
predict("private_test/images", "team_00_private_test_pred.csv")