In [1]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  0


In [2]:
import os
import datetime
import string
import sys

# Adding parent's path
sys.path.append('../../')

# define parameters
source = "bentham"
arch = "flor"
epochs = 20 #1000
batch_size = 16

# define paths
source_path = os.path.join("..", "data", f"{source}.hdf5")
output_path = os.path.join("..", "output", source, arch)
target_path = os.path.join(output_path, "checkpoint_weights.hdf5")
os.makedirs(output_path, exist_ok=True)

# define input size, number max of chars per line and list of valid chars
input_size = (1024, 128, 1)
max_text_length = 128
charset_base = string.printable[:95]

print("source:", source_path)
print("output", output_path)
print("target", target_path)
print("charset:", charset_base)

source: ../data/bentham.hdf5
output ../output/bentham/flor
target ../output/bentham/flor/checkpoint_weights.hdf5
charset: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 


In [3]:
from utils.generator import DataGenerator

dtgen = DataGenerator(source=source_path,
                      batch_size=batch_size,
                      charset=charset_base,
                      max_text_length=max_text_length)

print(f"Train images: {dtgen.size['train']}")
print(f"Validation images: {dtgen.size['valid']}")
print(f"Test images: {dtgen.size['test']}")

Train images: 8807
Validation images: 1372
Test images: 820


In [4]:
from src.model.model import HTRModel

# create and compile HTRModel
model = HTRModel(architecture=arch,
                 input_size=input_size,
                 vocab_size=dtgen.tokenizer.vocab_size,
                 beam_width=10,
                 stop_tolerance=20,
                 reduce_tolerance=15)

model.compile(learning_rate=0.001)
model.summary(output_path, "summary.txt")

# get default callbacks and load checkpoint weights file (HDF5) if exists
model.load_checkpoint(target=target_path)

callbacks = model.get_callbacks(logdir=output_path, checkpoint=target_path, verbose=1)

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 1024, 128, 1)]    0         
_________________________________________________________________
conv2d (Conv2D)              (None, 512, 64, 16)       160       
_________________________________________________________________
p_re_lu (PReLU)              (None, 512, 64, 16)       16        
_________________________________________________________________
batch_normalization (BatchNo (None, 512, 64, 16)       112       
_________________________________________________________________
full_gated_conv2d (FullGated (None, 512, 64, 16)       4640      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 512, 64, 32)       4640      
_________________________________________________________________
p_re_lu_1 (PReLU)            (None, 512, 64, 32)       32    

In [5]:
# to calculate total and average time per epoch
start_time = datetime.datetime.now()

h = model.fit(x=dtgen.next_train_batch(),
              epochs=epochs,
              steps_per_epoch=dtgen.steps['train'],
              validation_data=dtgen.next_valid_batch(),
              validation_steps=dtgen.steps['valid'],
              callbacks=callbacks,
              shuffle=True,
              verbose=1)

total_time = datetime.datetime.now() - start_time

loss = h.history['loss']
val_loss = h.history['val_loss']

min_val_loss = min(val_loss)
min_val_loss_i = val_loss.index(min_val_loss)

time_epoch = (total_time / len(loss))
total_item = (dtgen.size['train'] + dtgen.size['valid'])

t_corpus = "\n".join([
    f"Total train images:      {dtgen.size['train']}",
    f"Total validation images: {dtgen.size['valid']}",
    f"Batch:                   {dtgen.batch_size}\n",
    f"Total time:              {total_time}",
    f"Time per epoch:          {time_epoch}",
    f"Time per item:           {time_epoch / total_item}\n",
    f"Total epochs:            {len(loss)}",
    f"Best epoch               {min_val_loss_i + 1}\n",
    f"Training loss:           {loss[min_val_loss_i]:.8f}",
    f"Validation loss:         {min_val_loss:.8f}"
])

with open(os.path.join(output_path, "train.txt"), "w") as lg:
    lg.write(t_corpus)
    print(t_corpus)

Epoch 1/20

Epoch 00001: val_loss improved from inf to 18.04536, saving model to ../output/bentham/flor/checkpoint_weights.hdf5
Epoch 2/20

Epoch 00002: val_loss improved from 18.04536 to 17.07082, saving model to ../output/bentham/flor/checkpoint_weights.hdf5
Epoch 3/20

Epoch 00003: val_loss improved from 17.07082 to 16.47123, saving model to ../output/bentham/flor/checkpoint_weights.hdf5
Epoch 4/20

Epoch 00004: val_loss improved from 16.47123 to 14.97396, saving model to ../output/bentham/flor/checkpoint_weights.hdf5
Epoch 5/20

Epoch 00005: val_loss did not improve from 14.97396
Epoch 6/20

Epoch 00006: val_loss did not improve from 14.97396
Epoch 7/20

Epoch 00007: val_loss improved from 14.97396 to 14.29444, saving model to ../output/bentham/flor/checkpoint_weights.hdf5
Epoch 8/20

Epoch 00008: val_loss improved from 14.29444 to 13.53467, saving model to ../output/bentham/flor/checkpoint_weights.hdf5
Epoch 9/20

Epoch 00009: val_loss improved from 13.53467 to 13.36485, saving mo

In [None]:
!sudo update-pciids Rishabh.1@


[sudo] password for turing: 