In [1]:
import fnmatch
import cv2
import numpy as np
import string
import time
import math

from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import Sequence

from keras.layers import Dense, LSTM, Reshape, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional, TimeDistributed
from keras.models import Model
from keras.activations import relu, sigmoid, softmax
import keras.backend as K
from tensorflow.keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping

import os
import tensorflow as tf
import random
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.utils import shuffle
from collections import defaultdict


In [2]:
#char_list = string.ascii_letters+string.digits

punclist='.?,'
#Character sets to choose from.
smallletters=string.ascii_lowercase
capitalletters=string.ascii_uppercase
digits=string.digits
char_list=smallletters+capitalletters+digits+punclist

chars = defaultdict(int)
 
def encode_to_labels(txt):
    # encoding each output word into digits
    dig_lst = []
    for index, char in enumerate(txt):
        try:
            dig_lst.append(char_list.index(char))
        except:
            print(char)
        
    return dig_lst

def find_dominant_color(image):
        #Resizing parameters
        width, height = 150,150
        image = image.resize((width, height),resample = 0)
        #Get colors from image object
        pixels = image.getcolors(width * height)
        #Sort them by count number(first element of tuple)
        sorted_pixels = sorted(pixels, key=lambda t: t[0])
        #Get the most frequent color
        dominant_color = sorted_pixels[-1][1]
        return dominant_color
    
    

def preprocess_img(img, imgSize):
    "put img into target img of size imgSize, transpose for TF and normalize gray-values"


    if img is None:
        img = np.zeros([imgSize[1], imgSize[0]]) 
        print("Image None!")


    (wt, ht) = imgSize
    (h, w) = img.shape
    fx = w / wt
    fy = h / ht
    f = max(fx, fy)
    newSize = (max(min(wt, int(w / f)), 1),
               max(min(ht, int(h / f)), 1))  
    img = cv2.resize(img, newSize, interpolation=cv2.INTER_CUBIC) 
    most_freq_pixel=find_dominant_color(Image.fromarray(img))
    target = np.ones([ht, wt]) * most_freq_pixel  
    target[0:newSize[1], 0:newSize[0]] = img

    img = target

    return img

def counting_characters(labels):
    for label in labels:
        for char in label:
            chars[char] += 1
    return chars




In [3]:

annot=open('content/Data-generator-for-CRNN/annotation.txt','r').readlines()
imagenames=[]
txts=[]

for cnt in annot:

    filename,txt=cnt.split('~')[0],cnt.split('~')[1].split('\n')[0]
    imagenames.append(filename)
    txts.append(txt)
c = list(zip(imagenames, txts))

random.shuffle(c)

imagenames, txts = zip(*c)

class DataGenerator(Sequence):
    i=0
    def __init__(self, dataset,words,max_label_len,char_list, batch_size=64, shuffle=True):
        'Initialization'
        self.batch_size = batch_size
        self.dataset = dataset
        self.words = words
        self.shuffle = shuffle
        self.indexes = 0
        self.max_label_len=max_label_len
        self.char_list=char_list
        self.on_epoch_end()
    
    def __len__(self):
        'Denotes the number of batches per epoch' 
        return int(np.floor(len(self.dataset) / self.batch_size))-1
        #return math.ceil(len(self.dataset) / self.batch_size)
        
    def __getitem__(self, index):# chyba indexuje dla batcha ale nwm
        'Generate one batch of data'
        training_img = []
        training_txt = []
        train_input_length = []
        train_label_length = []
        orig_txt = []
        
        for ind in range(self.batch_size):
            img = cv2.imread('content/Data-generator-for-CRNN/images/'+imagenames[self.i],0)
            img=preprocess_img(img,(128,32))
            img=np.expand_dims(img,axis=-1)
            img = img/255.
            txt = txts[self.i]

            orig_txt.append(txt)   
            train_label_length.append(len(txt))
            train_input_length.append(31)
            training_img.append(img)
            training_txt.append(encode_to_labels(txt))
            self.i=self.i+1

        train_padded_txt = pad_sequences(training_txt, maxlen=self.max_label_len, padding='post', value = len(self.char_list))
        training_img = np.array(training_img)
        train_input_length = np.array(train_input_length)
        train_label_length = np.array(train_label_length)
        return [training_img, train_padded_txt, train_input_length, train_label_length],np.zeros(len(training_img))

    def on_epoch_end(self):
        print("Next epoch: ",self.i)
        self.i=0

max_label_len=20
prop=int(np.floor(len(imagenames)*0.95))# split generated data
trainTuples=imagenames[:prop]
validTuples=imagenames[prop:]

trainTxts=txts[:prop]
validTxts=txts[prop:]  
         
train_generator = DataGenerator(dataset=trainTuples,words=trainTxts,max_label_len=20,char_list=char_list)
valid_generator = DataGenerator(dataset=validTuples,words=validTxts,max_label_len=20,char_list=char_list)

Next epoch:  0
Next epoch:  0


In [4]:
inputs = Input(shape=(32,128,1))
 

conv_1 = Conv2D(64, (3,3), activation = 'relu', padding='same')(inputs)

pool_1 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_1)
 
conv_2 = Conv2D(128, (3,3), activation = 'relu', padding='same')(pool_1)
pool_2 = MaxPool2D(pool_size=(2, 2), strides=2)(conv_2)
 
conv_3 = Conv2D(256, (3,3), activation = 'relu', padding='same')(pool_2)
 
conv_4 = Conv2D(256, (3,3), activation = 'relu', padding='same')(conv_3)

pool_4 = MaxPool2D(pool_size=(2, 1))(conv_4)
 
conv_5 = Conv2D(512, (3,3), activation = 'relu', padding='same')(pool_4)

batch_norm_5 = BatchNormalization()(conv_5)
 
conv_6 = Conv2D(512, (3,3), activation = 'relu', padding='same')(batch_norm_5)
batch_norm_6 = BatchNormalization()(conv_6)

pool_6 = MaxPool2D(pool_size=(2, 1))(batch_norm_6)

 
conv_7 = Conv2D(512, (2,2), activation = 'relu')(pool_6)
 
squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)


blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(squeezed)
blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2))(blstm_1)
 
outputs = (Dense(len(char_list)+1, activation = 'softmax'))(blstm_2)


act_model = Model(inputs, outputs)

2022-06-18 13:05:57.093704: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-18 13:05:57.122376: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-18 13:05:57.122569: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-18 13:05:57.123571: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

In [5]:
act_model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32, 128, 1)]      0         
                                                                 
 conv2d (Conv2D)             (None, 32, 128, 64)       640       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 16, 64, 64)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 16, 64, 128)       73856     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 8, 32, 128)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 8, 32, 256)        295168

In [6]:
labels = Input(name='the_labels', shape=[max_label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
 
 
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args

    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
 
 
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([outputs, labels, input_length, label_length])


model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out)

In [7]:
#model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = 'adam',metrics=['accuracy'])
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),metrics=['accuracy'])


es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5) 
    
filepath='CRNN_model.hdf5'
checkpoint = ModelCheckpoint(filepath=filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
callbacks_list = [checkpoint,es]

In [8]:
epochs = 10
history = model.fit(train_generator,validation_data = valid_generator,epochs = epochs,verbose = 1, callbacks = callbacks_list)

Epoch 1/10


2022-06-18 13:06:03.010924: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8401
2022-06-18 13:06:03.966078: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-06-18 13:06:04.862723: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.



Epoch 1: val_loss improved from inf to 0.15634, saving model to CRNN_model.hdf5
Next epoch:  3800000
Epoch 2/10

Epoch 2: val_loss improved from 0.15634 to 0.08125, saving model to CRNN_model.hdf5
Next epoch:  3799936
Epoch 3/10

Epoch 3: val_loss improved from 0.08125 to 0.06282, saving model to CRNN_model.hdf5
Next epoch:  3799936
Epoch 4/10

Epoch 4: val_loss improved from 0.06282 to 0.04488, saving model to CRNN_model.hdf5
Next epoch:  3799936
Epoch 5/10

Epoch 5: val_loss did not improve from 0.04488
Next epoch:  3799936
Epoch 6/10

Epoch 6: val_loss improved from 0.04488 to 0.04254, saving model to CRNN_model.hdf5
Next epoch:  3799936
Epoch 7/10

Epoch 7: val_loss improved from 0.04254 to 0.03696, saving model to CRNN_model.hdf5
Next epoch:  3799936
Epoch 8/10

Epoch 8: val_loss improved from 0.03696 to 0.02566, saving model to CRNN_model.hdf5
Next epoch:  3799936
Epoch 9/10

Epoch 9: val_loss did not improve from 0.02566
Next epoch:  3799936
Epoch 10/10

Epoch 10: val_loss did 

In [9]:
# load the saved best model weights
act_model.load_weights('CRNN_model.hdf5')
 
# predict outputs on validation images
prediction = act_model.predict(valid_img[10:20])
 
# use CTC decoder
out = K.get_value(K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0])*prediction.shape[1],
                         greedy=True)[0][0])
 
# see the results
i = 10
for x in out:
    print("original_text = ", valid_orig_txt[i])
    print("predicted text = ", end = '')
    for p in x:  
        if int(p) != -1:
            print(char_list[int(p)], end = '')       
    print('\n')
    i+=1

NameError: name 'valid_img' is not defined

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train','Validation'], loc='upper left')
plt.savefig('my_plot.png')
plt.show
