## Data Cleaning

In [1]:
import numpy as np
import pandas as pd

In [2]:
train_data = pd.read_csv("written_name_train_v2.csv")
train_data.dropna(inplace=True)
train_data.drop(train_data[train_data['IDENTITY'].str.lower()=='unreadable'].index, inplace=True)
train_data.reset_index(drop=True, inplace=True)
train_data.isnull().sum()

FILENAME    0
IDENTITY    0
dtype: int64

In [3]:
test = pd.read_csv("written_name_test_v2.csv")
test.dropna(inplace=True)
test.drop(test[test['IDENTITY'].str.lower()=='unreadable'].index, inplace=True)
test.reset_index(drop=True, inplace=True)
test.isnull().sum()

FILENAME    0
IDENTITY    0
dtype: int64

In [4]:
validation = pd.read_csv("written_name_validation_v2.csv")
validation.dropna(inplace=True)
validation.drop(validation[validation['IDENTITY'].str.lower()=='unreadable'].index, inplace=True)
validation.reset_index(drop=True, inplace=True)
validation.isnull().sum()

FILENAME    0
IDENTITY    0
dtype: int64

In [5]:
train_data['IDENTITY'] = train_data['IDENTITY'].str.upper()
validation['IDENTITY'] = validation['IDENTITY'].str.upper()

## Preprocessing

In [6]:
from skimage.io import imread_collection as imd
import imageio as im

In [7]:
train_size=10000
valid_size=1000

In [8]:
def preprocess(img):
    (h, w) = img.shape
    
    final_img = np.ones([64, 256])*255 # blank white image
    
    # crop
    if w > 256:
        img = img[:, :256]
        
    if h > 64:
        img = img[:64, :]
    
    
    final_img[:h, :w] = img
    final_img=final_img.swapaxes(-2,-1)[...,::-1]
    return final_img

In [9]:
def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])

In [10]:
train_img=[]
im_main="train_v2/train/"
for i in range(train_size):
    im_dir=im_main+train_data.loc[i,'FILENAME']
    img=im.imread(im_dir)
    img=rgb2gray(img)
    img=preprocess(img)
    img=img/255
    train_img.append(img)
    if i%1000==0:
        print(i)
    

0
1000
2000
3000
4000
5000
6000
7000
8000
9000


In [11]:
valid_img=[]
im_main="validation_v2/validation/"
for i in range(valid_size):
    im_dir=im_main+validation.loc[i,'FILENAME']
    img=im.imread(im_dir)
    img=rgb2gray(img)
    img=preprocess(img)
    img=img/255
    valid_img.append(img)
    if i%500==0:
        print(i)

0
500


In [12]:
train_img = np.array(train_img).reshape(-1, 256, 64, 1)
valid_img = np.array(valid_img).reshape(-1, 256, 64, 1)

In [13]:
train_img.shape

(10000, 256, 64, 1)

In [31]:
valid_img.shape

(1000, 256, 64, 1)

## Dimensonality Reduction

In [16]:
from sklearn.decomposition import PCA

In [264]:
pca = PCA(.9995)

In [271]:
pca.fit(train_img[0])

PCA(copy=True, iterated_power='auto', n_components=0.9995, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)

In [266]:
for i in range(500):
    pca.fit(train_img[i])

In [272]:
pca.n_components_

30

In [None]:
for i in range(len(train_img)):
    x=pca.transform(train_img[i])
    train_img[i]=x
    if i%500==0:
        print(i)

In [None]:
for i in range(len(valid_img)):
    valid_img[i]=pca.transform(valid_img[i])
    if i%500==0:
        print(i)

In [348]:
train_img.shape

(20809, 256, 64, 1)

## Loss Function

In [14]:
from keras.layers import Input

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [15]:
from keras.layers import Lambda

In [16]:
alphabets = u"ABCDEFGHIJKLMNOPQRSTUVWXYZ-' "
max_str_len = 24 
num_of_characters = len(alphabets) + 1 
num_of_timestamps = 32 


def label_to_num(label):
    label_num = []
    for ch in label:
        label_num.append(alphabets.find(ch))
        
    return np.array(label_num)

def num_to_label(num):
    ret = ""
    for ch in num:
        if ch == -1:  
            break
        else:
            ret+=alphabets[ch]
    return ret

In [17]:
train_y = np.ones([train_size, max_str_len]) * -1
train_label_len = np.zeros([train_size, 1])
train_input_len = np.ones([train_size, 1]) * (num_of_timestamps-2)
train_output = np.zeros([train_size])

for i in range(train_size):
    train_label_len[i] = len(train_data.loc[i, 'IDENTITY'])
    train_y[i, 0:len(train_data.loc[i, 'IDENTITY'])]= label_to_num(train_data.loc[i, 'IDENTITY'])

In [18]:
valid_y = np.ones([valid_size, max_str_len]) * -1
valid_label_len = np.zeros([valid_size, 1])
valid_input_len = np.ones([valid_size, 1]) * (num_of_timestamps-2)
valid_output = np.zeros([valid_size])

for i in range(valid_size):
    valid_label_len[i] = len(validation.loc[i, 'IDENTITY'])
    valid_y[i, 0:len(validation.loc[i, 'IDENTITY'])]= label_to_num(validation.loc[i, 'IDENTITY']) 

In [19]:
def ctc_func(args):
    ypred, labels, input_length, label_length=args
    ypred = ypred[:, 2:, :]
    return K.ctc_batch_cost(labels, ypred, input_length, label_length)

In [20]:
labels = Input(name='gtruth_labels', shape=[max_str_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')

## Building Model

In [21]:
import keras
from keras.models import Model, Sequential
from keras.layers import Input, Conv2D, MaxPooling2D, Reshape, Bidirectional, LSTM, Dense, Lambda, Activation, BatchNormalization, Dropout

In [22]:
from keras import backend as K
from keras.optimizers import SGD

In [23]:
l0=Input(shape=(256, 64, 1))
l1=Conv2D(32, (3, 3), padding='same',activation='relu')(l0)
l2=BatchNormalization()(l1)
l3=MaxPooling2D(pool_size=(2, 2))(l2)
l4=Conv2D(64, (3, 3), padding='same',activation='relu')(l3)
l5=BatchNormalization()(l4)
l6=MaxPooling2D(pool_size=(2, 2))(l5)
l7=Dropout(0.2)(l6)
l8=Conv2D(128, (3, 3), padding='same',activation='relu')(l7)
l9=BatchNormalization()(l8)
l10=MaxPooling2D(pool_size=(2, 2))(l9)
l11=Dropout(0.2)(l10)
l12=Reshape(target_shape=((32,1024)))(l11)
l13=Dense(21, activation='relu')(l12)
l14=Bidirectional(LSTM(256, return_sequences=True))(l13)
l15=Bidirectional(LSTM(256, return_sequences=True))(l14)
l16=Bidirectional(LSTM(256, return_sequences=True))(l15)
ypred=Dense(num_of_characters,activation='softmax')(l16)
model = Model(inputs=l0, outputs=ypred)




In [24]:
model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 256, 64, 1)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 256, 64, 32)       320       
_________________________________________________________________
batch_normalization_1 (Batch (None, 256, 64, 32)       128       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 128, 32, 32)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 128, 32, 64)       18496     
_________________________________________________________________
batch_normalization_2 (Batch (None, 128, 32, 64)       256       
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 64, 16, 64)        0   

In [26]:
ctc_loss = Lambda(ctc_func, output_shape=(1,), name='ctc')([ypred, labels, input_length, label_length])
model = Model(inputs=[l0, labels, input_length, label_length], outputs=ctc_loss)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [27]:
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=SGD(lr = 0.1))

In [29]:
model.fit(x=[train_img, train_y, train_input_len, train_label_len], y=train_output, 
                validation_data=([valid_img, valid_y, valid_input_len, valid_label_len], valid_output),
                epochs=4, batch_size=128)


Train on 10000 samples, validate on 1000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.callbacks.History at 0x1ca653e0cc8>

In [34]:
model.save('model.h5')

In [65]:
valid_img.shape

(1000, 256, 64, 1)

In [74]:
preds = model.predict([valid_img, valid_y, valid_input_len, valid_label_len])

In [97]:
from numpy import  newaxis

In [75]:
preds.shape

(1000, 1)

In [98]:
preds = preds[:, :, newaxis]

In [88]:
input_length=np.ones(preds.shape[0])
input_length.shape

(1000,)

In [79]:
def ctc_decode(args):
     y_pred, input_length =args
     seq_len = tf.squeeze(input_length,axis=1)

     return K.ctc_decode(y_pred=y_pred, input_length=seq_len, greedy=True, beam_width=100, top_paths=1)


In [None]:
decoded, _ = K.ctc_decode(preds, input_length)

In [99]:
ctc_decode = K.ctc_decode(preds,input_length)[0][0]

In [101]:
decoded=K.get_value(ctc_decode)

In [102]:
prediction = []
for i in range(valid_size):    
    prediction.append(num_to_label(decoded[i]))

In [104]:
y_true = validation.loc[0:valid_size, 'IDENTITY']
correct_char = 0
total_char = 0
correct = 0

for i in range(valid_size):
    pr = prediction[i]
    tr = y_true[i]
    total_char += len(tr)
    
    for j in range(min(len(tr), len(pr))):
        if tr[j] == pr[j]:
            correct_char += 1
            
    if pr == tr :
        correct += 1 
    
print('Correct characters predicted : %.2f%%' %(correct_char*100/total_char ))
print('Correct words predicted      : %.2f%%' %(correct*100/valid_size ))

Correct characters predicted : 60.00%
Correct words predicted      : 55.00%
