In [40]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, optimizers, losses
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Bidirectional,Input
from tensorflow.keras.layers import BatchNormalization, TimeDistributed,LSTM,Dropout,Reshape
from tensorflow.keras import Model
from tensorflow.keras import backend as K
import numpy as np 
import pandas as pd
import os
from data import preproc as pp
from matplotlib import pyplot as plt
from PIL import Image
import random
import cv2

In [2]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [4]:
#data_dir = '/home/austin/Documents/Github/handwritingnotebook/data/'
data_dir = '/Documents/handwritingnotebook/data/'

In [5]:
data_csv = pd.read_csv(data_dir+'words_csv/2020-06-03 11:39:42.000901.csv')

In [6]:
BATCH_SIZE = 25
IMG_HEIGHT = data_csv['height'].max()
IMG_WIDTH = data_csv['width'].max()
DATASET_SIZE = data_csv.shape[0]
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz.,()?!':; "

In [7]:
IMG_HEIGHT

464

In [8]:
IMG_WIDTH

783

In [9]:
data_practice = data_csv.truth.tolist()

In [10]:
#dataset = tf.data.TextLineDataset(data_dir+'words_csv/2020-06-03 11:39:42.000901.csv').skip(1)

In [11]:
def convert_to_onehot(data):
    #Creates a dict, that maps to every char of alphabet an unique int based on position
    char_to_int = dict((c,i) for i,c in enumerate(alphabet))
    encoded_data = []
    #Replaces every char in data with the mapped int
    encoded_data.append([char_to_int[char] for char in data])
    encoded_data = encoded_data[0] # Prints the int encoded array

    #This part now replaces the int by an one-hot array with size alphabet
    letter = [0. for _ in range(len(alphabet)+1)]
    for value in encoded_data:
        #At first, the whole array is initialized with 0
        #Only at the number of the int, 1 is written
        letter[value] = 1.
    letter = tf.convert_to_tensor(letter)
    return letter


In [12]:
# Translation of characters to unique integer values
def text_to_labels(text):
    ret = []
    for char in text:
        ret.append(alphabet.find(char))
    ret = tf.convert_to_tensor(ret)
    return ret


In [13]:
# Reverse translation of numerical classes back to characters
def labels_to_text(labels):
    ret = []
    for c in labels:
        if c == len(alphabet):  # CTC Blank
            ret.append("")
        else:
            ret.append(alphabet[c])
    return "".join(ret)

In [14]:
def preprocess_image(img_path, data_dir, imgSize):
    img_path = data_dir + 'words_screenshot_labeled/' + img_path
    img = cv2.imread(img_path,0)
    (wt, ht) = imgSize
    (h, w) = img.shape
    fx = w / wt
    fy = h / ht
    f = max(fx, fy)
    newSize = (max(min(wt, int(w / f)), 1), max(min(ht, int(h / f)), 1)) # scale according to f (result at least 1 and at most wt or ht)
    img = cv2.resize(img, newSize)
    target = np.ones([ht, wt]) * 255
    target[0:newSize[1], 0:newSize[0]] = img

    # transpose for TF
    img = cv2.transpose(target)

    # normalize
    (m, s) = cv2.meanStdDev(img)
    m = m[0][0]
    s = s[0][0]
    img = img - m
    img = img / s if s>0 else img
    img = tf.convert_to_tensor(img)
    img = tf.expand_dims(img,2)
    return img

In [15]:
def create_img_dataset(dataset,data_dir):
    img_dataset_list = []
    for index, row in dataset.iterrows():
        img_dataset_list.append(preprocess_image(row[0],data_dir,[128,32]))
    data_as_dataset = tf.data.Dataset.from_tensor_slices(img_dataset_list)
    return data_as_dataset

In [16]:
def create_label_dataset(dataset):
    label_dataset_list = []
    for index, row in dataset.iterrows():
        label_dataset_list.append(convert_to_onehot(row[1]))
    data_as_dataset = tf.data.Dataset.from_tensor_slices(label_dataset_list)
    return data_as_dataset

In [17]:
imgs = create_img_dataset(data_csv,data_dir)
labels = create_label_dataset(data_csv)

In [18]:
labeled_ds = tf.data.Dataset.zip((imgs,labels))

In [19]:
train_size = int(0.90 * DATASET_SIZE)
test_size = int(0.10 * DATASET_SIZE)

full_dataset = labeled_ds.shuffle(DATASET_SIZE)
train_ds = full_dataset.take(train_size)
train_ds = train_ds.batch(BATCH_SIZE, drop_remainder=True)
test_ds = full_dataset.skip(train_size)
test_ds = test_ds.batch(BATCH_SIZE, drop_remainder=True)

In [20]:
test_ds

<BatchDataset shapes: ((25, 128, 32, 1), (25, 63)), types: (tf.float64, tf.float32)>

In [62]:
inputs = layers.Input(name="input", shape=(128,32,1))

cnn = layers.Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding="same")(inputs)
cnn = layers.BatchNormalization()(cnn)
cnn = layers.LeakyReLU(alpha=0.01)(cnn)
cnn = layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn)

cnn = layers.Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn)
cnn = layers.BatchNormalization()(cnn)
cnn = layers.LeakyReLU(alpha=0.01)(cnn)
cnn = layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn)

cnn = layers.Dropout(rate=0.2)(cnn)
cnn = layers.Conv2D(filters=48, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn)
cnn = layers.BatchNormalization()(cnn)
cnn = layers.LeakyReLU(alpha=0.01)(cnn)
cnn = layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(cnn)

cnn = layers.Dropout(rate=0.2)(cnn)
cnn = layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn)
cnn = layers.BatchNormalization()(cnn)
cnn = layers.LeakyReLU(alpha=0.01)(cnn)

cnn = layers.Dropout(rate=0.2)(cnn)
cnn = layers.Conv2D(filters=80, kernel_size=(3, 3), strides=(1, 1), padding="same")(cnn)
cnn = layers.BatchNormalization()(cnn)
cnn = layers.LeakyReLU(alpha=0.01)(cnn)

shape = cnn.get_shape()
blstm = layers.Reshape((shape[1], shape[2] * shape[3]))(cnn)

blstm = layers.Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm)
blstm = layers.Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm)
blstm = layers.Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm)
blstm = layers.Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm)
blstm = layers.Bidirectional(LSTM(units=256, return_sequences=True, dropout=0.5))(blstm)

blstm = layers.Dropout(rate=0.5)(blstm)
outputs = layers.Dense(units=63, activation="softmax")(blstm)


In [63]:
model = tf.keras.Model(inputs = inputs, outputs = outputs, name = 'handwriting_ctc')

In [64]:
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1)

In [81]:
def ctc_loss_lambda_func(y_true, y_pred):
    """Function for computing the CTC loss"""

    if len(y_true.shape) > 2:
        y_true = tf.squeeze(y_true)

    # y_pred.shape = (batch_size, string_length, alphabet_size_1_hot_encoded)
    # output of every model is softmax
    # so sum across alphabet_size_1_hot_encoded give 1
    #               string_length give string length
    input_length = tf.math.reduce_sum(y_pred, axis=-1, keepdims=False)
    input_length = tf.math.reduce_sum(input_length, axis=-1, keepdims=True)
    
    # y_true strings are padded with 0
    # so sum of non-zero gives number of characters in this string
    label_length = tf.math.count_nonzero(y_true, axis=-1, keepdims=True, dtype="int64")

    loss = tf.nn.ctc_loss(y_true, 
                          y_pred, 
                          label_length, 
                          input_length, 
                          logits_time_major=False
                         )

    # average loss across all entries in the batch
    loss = tf.reduce_mean(loss)

    return loss


In [82]:
model.compile(loss = ctc_loss_lambda_func,
              optimizer= 'adam',
              metrics=['accuracy'])

TypeError: Value passed to parameter 'indices' has DataType float32 not in list of allowed values: uint8, int32, int64

In [79]:
model.summary()

Model: "handwriting_ctc"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 128, 32, 1)]      0         
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 128, 32, 16)       160       
_________________________________________________________________
batch_normalization_15 (Batc (None, 128, 32, 16)       64        
_________________________________________________________________
leaky_re_lu_15 (LeakyReLU)   (None, 128, 32, 16)       0         
_________________________________________________________________
max_pooling2d_9 (MaxPooling2 (None, 64, 16, 16)        0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 64, 16, 32)        4640      
_________________________________________________________________
batch_normalization_16 (Batc (None, 64, 16, 32)    

In [80]:
model.fit(train_ds, epochs=10)

Train for 19 steps
Epoch 1/10
 1/19 [>.............................] - ETA: 52s

ValueError: Dimensions must be equal, but are 25 and 16 for 'metrics/accuracy/Equal' (op: 'Equal') with input shapes: [25], [25,16].