In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import datetime
from pathlib import Path
import sys
sys.path.append("../../")

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub

from kerastuner import tuners
from kerastuner import HyperParameters

from tqdm.auto import tqdm

import captchanet

data_dir = Path('/home/hadim/.data/Neural_Network/captchanet')
dataset_dir = data_dir / 'dataset_v6'

train_data_dir = dataset_dir / 'training'
val_data_dir = dataset_dir / 'validation'

tokenizer_path = dataset_dir / "tokenizer.json"

log_dir = data_dir / 'log'
log_dir.mkdir(exist_ok=True, parents=True)

model_dir = data_dir / 'model'
model_dir.mkdir(exist_ok=True, parents=True)
  
# Get tokenizer
with open(tokenizer_path) as f:
  #tokenizer = tf.keras.preprocessing.text.tokenizer_from_json(f.read())
  from keras_preprocessing import text
  tokenizer = text.tokenizer_from_json(f.read())

In [2]:
# Build the dataset
def make_dataset(data_dir, tokenizer, batch_size, image_size=None, shuffle=False, n=None):

  fnames = [str(p) for p in data_dir.glob("*.tfrecord")]
  dataset = tf.data.TFRecordDataset(fnames)
  if n:
    dataset = dataset.take(n)
  if shuffle:
      dataset = dataset.shuffle(buffer_size=2048)
      
  # We could infer it from the dataset but here it's hard-coded.
  max_len_word = 10
      
  decode_fn = captchanet.decode_data(tokenizer, max_len_word, image_size=image_size)
  dataset = dataset.map(map_func=decode_fn, num_parallel_calls=tf.data.experimental.AUTOTUNE)
  
  dataset = dataset.batch(batch_size)
  dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
  return dataset

batch_size = 24
image_size = (299, 299)
train_dataset = make_dataset(train_data_dir, tokenizer, batch_size=batch_size, image_size=image_size, shuffle=True, n=None)
val_dataset = make_dataset(val_data_dir, tokenizer, batch_size=batch_size, image_size=image_size, shuffle=False, n=None)

W0714 19:35:29.309437 140267627185984 deprecation.py:323] From /home/hadim/conda/envs/captchanet/lib/python3.7/site-packages/tensorflow/python/ops/image_ops_impl.py:1511: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


In [3]:
def build_model(image_shape, image_type, vocabulary_size, max_len_word, params=None, do_fine_tuning=False, do_build=True):

  def _builder(hp):

    dropout_rate = hp.Choice('dropout_rate', values=[0.1, 0.2, 0.3, 0.4, 0.5], default=0.2)
    use_regularizer = hp.Choice('use_regularizer', values=[True, False], default=True)
    regularizer_value = hp.Choice('regularizer_value', values=[1e-1, 1e-2, 1e-3, 1e-4, 1e-5], default=1e-4)
    optimizer_name = hp.Choice('optimizer_name', values=['sgd', 'adam', 'rmsprop'], default='sgd')
    starting_lr = hp.Choice('starting_lr', values=[1e-1, 1e-2, 1e-3], default=1e-3)
    momentum = hp.Choice('momentum', values=[0.9, 0.95, 0.99], default=0.9)

    tf.keras.backend.clear_session()

    #module_selection = ("mobilenet_v2", 224, 1280)
    module_selection = ("inception_v3", 299, 2048)

    handle_base, pixels, feature_size = module_selection
    module_handle = f"https://tfhub.dev/google/tf2-preview/{handle_base}/feature_vector/4"
    model_image_shape = (pixels, pixels, 3)

    inputs = tf.keras.layers.Input(name='image', shape=model_image_shape, dtype=image_type)
    x = inputs

    x = hub.KerasLayer(module_handle, output_shape=[feature_size], trainable=do_fine_tuning)(x)
    x = tf.keras.layers.Dropout(dropout_rate)(x)

    padding_vocabulary_size = vocabulary_size + 1
    outputs = []
    for i in range(max_len_word):
      if use_regularizer:
        regularizer = tf.keras.regularizers.l2(regularizer_value)
      else:
        regularizer = None
      out = tf.keras.layers.Dense(padding_vocabulary_size, kernel_regularizer=regularizer, activation='softmax', name=f'character_{i}')(x)
      outputs.append(out)
    outputs = tf.keras.layers.Concatenate()(outputs)
    outputs = tf.keras.layers.Reshape((max_len_word, padding_vocabulary_size))(outputs)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    # Optimizer Parameters
    sgd_params = {}
    sgd_params['learning_rate'] = starting_lr
    sgd_params['momentum'] = momentum
    sgd_params['nesterov'] = True

    adam_params = {}
    adam_params['learning_rate'] = starting_lr
    adam_params['amsgrad'] = True

    rmsprop_params = {}
    rmsprop_params['learning_rate'] = starting_lr

    # Build optimizer.
    if optimizer_name == 'sgd':
      optimizer = tf.keras.optimizers.SGD(**sgd_params)
    elif optimizer_name == 'adam':
      optimizer = tf.keras.optimizers.Adam(**adam_params)
    elif optimizer_name == 'rmsprop':
      optimizer = tf.keras.optimizers.RMSprop(**rmsprop_params)

    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

  hp = HyperParameters()

  if params:
    for key, value in params.items():
      hp.Fixed(key, value)
      
  if not do_build:
    return _builder

  return _builder(hp)

In [9]:
# Callbacks
callbacks = []

log_path = log_dir / datetime.datetime.now().strftime("%Y.%m.%d-%H.%M.%S")
log_path.mkdir(exist_ok=True)

tb = captchanet.LRTensorBoard(log_dir=str(log_path), write_images=False, write_graph=True)
#callbacks.append(tb)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.8, patience=3, min_delta=5e-4, min_lr=1e-6)
callbacks.append(reduce_lr)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=1e-3, patience=10)
#callbacks.append(early_stop)

chkpt_dir = log_path / 'checkpoints'
chkpt_dir.mkdir(exist_ok=True)
chkpt_path = chkpt_dir / 'weights_{epoch:02d}_{val_loss:.3f}.hdf5'
chkpt = tf.keras.callbacks.ModelCheckpoint(str(chkpt_path), monitor='val_loss', save_best_only=True, save_weights_only=False)
callbacks.append(chkpt)

tqdm_progress = captchanet.TQDMCallback()
callbacks.append(tqdm_progress)

csv_logger = tf.keras.callbacks.CSVLogger(str(log_path / 'log.csv'))
callbacks.append(csv_logger)

# Get image shape
image, label = [d for d in train_dataset.take(1)][0]
image_shape = image.shape[1:]
image_type = image.dtype

do_fine_tuning = True

params = {}
params['dropout_rate'] = 0.4
params['use_regularizer'] = True
params['regularizer_value'] = 1e-5
params['optimizer_name'] = 'adam'
params['starting_lr'] = 1e-2
params['momentum'] = 0.95

# Build the model.
vocabulary_size = len(tokenizer.index_word)
model = build_model(image_shape, image_type, vocabulary_size, max_len_word=10, params=params, do_fine_tuning=do_fine_tuning)

In [None]:
n_epochs = 500
model.fit(train_dataset, validation_data=val_dataset, epochs=n_epochs, callbacks=callbacks, verbose=0)

HBox(children=(IntProgress(value=0, description='Training', max=500, style=ProgressStyle(description_width='in…

HBox(children=(IntProgress(value=1, bar_style='info', description='Epoch: 0', max=1, style=ProgressStyle(descr…

In [None]:
# Save model
model_path = model_dir / 'v7'
model_path.mkdir(exist_ok=True)
model.save(str(model_path / 'model'))

# Save tokenizer
with open(model_path / tokenizer_path.name, 'w') as f:
  f.write(tokenizer.to_json())

# Save history
history = pd.DataFrame(model.history.history)
history_path = model_path / 'history.csv'
history.to_csv(str(history_path), index=False)

# Pack and zip the model directory
import shutil
archive_path = model_dir / model_path.stem
shutil.rmtree(model_path / '.ipynb_checkpoints', ignore_errors=True)
shutil.make_archive(archive_path, 'zip', root_dir=model_path)

In [None]:
# Check history
history = pd.read_csv(history_path)

n = 3
size = 3.5
ncols = 3
w_h_scale = 2
figsize = (ncols * size * w_h_scale, size)
fig, axs = plt.subplots(nrows=n//ncols, ncols=ncols, figsize=figsize)
axs = axs.flatten()

axs[0].plot(history['val_accuracy'], label='val_accuracy')
axs[0].plot(history['accuracy'], label='accuracy')
axs[0].set_xlabel('epoch')
axs[0].legend()

axs[1].plot(history['val_loss'], label='val_loss')
axs[1].plot(history['loss'], label='loss')
axs[1].set_xlabel('epoch')
axs[1].legend()

axs[2].plot(history['lr'])
axs[2].set_xlabel('epoch')

fig.savefig(model_path / 'history.png')

In [None]:
loaded_model = model

# Perform prediction
model_image_size = (299, 299)
n  = 8
fnames = [str(p) for p in val_data_dir.glob("*.tfrecord")]
dataset = tf.data.TFRecordDataset(fnames)
dataset = dataset.map(map_func=captchanet.decode_data(tokenizer, max_len_word=10, image_size=model_image_size, input_as_dict=True))
dataset = dataset.shuffle(1024)
dataset = dataset.batch(n)
data = [d for d in dataset.take(1)][0]

images = data['image']
labels = data['label']

labels = loaded_model(images)

# Decode
labels = labels.numpy().argmax(axis=2)
predicted_words = [tokenizer.sequences_to_texts([label])[0] for label in labels]
predicted_words = [word.replace(' ', '') for word in predicted_words]

# Plot
original_images = data['original_image'].numpy()
words = data['word'].numpy()
words = [w.decode('utf-8').replace('0', '') for w in words]

size = 2
ncols = 2
nrows = n // ncols
ratio = original_images.shape[2] / original_images.shape[1]
figsize = (ncols * size * ratio, size * nrows)
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, constrained_layout=True)
axs = axs.flatten()

for image, word, predicted_word, ax in zip(original_images, words, predicted_words, axs):
  ax.imshow(image)
  
  mark = 'OK' if predicted_word == word else 'WRONG'
  text = f'True: {word} ({len(word)})'
  text += f' - Predicted: {predicted_word} ({len(predicted_word)})'
  text += f" - {mark}"
  ax.set_title(text, fontsize=14)
  
fig.savefig(model_path / 'example_prediction.png')