In [43]:
import os
from pathlib import Path
import tensorflow as tf
from tensorflow.keras import layers
import pandas as pd
import numpy as np

## Test set loading

In [None]:
# Download test set from Google Cloud Storage
!gsutil -m cp -r "gs://fmnh_datasets/IAM_Words_test/" .
# !gsutil -m cp -r "gs://fmnh_datasets/IAM_Words_test/" .
# !gsutil -m cp -r "gs://fmnh_datasets/IAM_Words_test/" .

In [38]:
data_dir = Path('IAM_Words_test')
metadata_file_name = 'word_metadata2.csv'
CHAR_LIST: str = '\' !"#&()[]*+,-./0123456789:;?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
MAX_LABEL_LENGTH = 30
IMAGE_FORMAT = 'png'
BATCH_SIZE = 64
characters = sorted(set(list(CHAR_LIST)))

metadata = pd.read_csv(os.path.join(data_dir, metadata_file_name))
metadata.head()

images = sorted(list(map(str, list(data_dir.rglob(f'*.{IMAGE_FORMAT}')))))
print(len(images))
labels = list()

metadata['word_image_basenames'] = metadata['image_location'].map(lambda b: b.split('\\')[-1])
labels = [os.path.basename(l) for l in images]
labels = [metadata[metadata['word_image_basenames'] == b] for b in labels]
labels = [b['transcription'].item() for b in labels]
labels = [str(e).ljust(MAX_LABEL_LENGTH) for e in labels]

test_images = np.array(images)
test_labels = np.array(labels)

print(f'Testing images ({test_images.shape[0]}) and labels ({test_labels.shape[0]}) loaded.')

10831
Testing images (10831) and labels (10831) loaded.


In [39]:
# Desired image dimensions
img_width = 400
img_height = 100

# Factor by which the image is going to be downsampled
# by the convolutional blocks. We will be using two
# convolution blocks and each block will have
# a pooling layer which downsample the features by a factor of 2.
# Hence total downsampling factor would be 4.
downsample_factor = 4

# Mapping characters to integers
char_to_num = layers.experimental.preprocessing.StringLookup(
    vocabulary=list(characters), mask_token=None
)

# Mapping integers back to original characters
num_to_char = layers.experimental.preprocessing.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)

def encode_single_sample(img_path, label):
    # 1. Read image
    img = tf.io.read_file(img_path)
    # 2. Decode and convert to grayscale
    # img = tf.io.decode_png(img, channels=1)
    img = tf.io.decode_jpeg(img, channels=1)
    # 3. Convert to float32 in [0, 1] range
    img = tf.image.convert_image_dtype(img, tf.float32)
    # 4. Resize to the desired size
    img = tf.image.resize(img, [img_height, img_width])
    # 5. Transpose the image because we want the time
    # dimension to correspond to the width of the image.
    img = tf.transpose(img, perm=[1, 0, 2])
    # 6. Map the characters in label to numbers
    label = char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))
    # 7. Return a dict as our model is expecting two inputs
    return {"image": img, "label": label}

test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
test_dataset = (
    test_dataset.map(
        encode_single_sample, num_parallel_calls=tf.data.experimental.AUTOTUNE
    )
    .batch(BATCH_SIZE)
    .prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
)
print(test_dataset.element_spec)

{'image': TensorSpec(shape=(None, 400, 100, 1), dtype=tf.float32, name=None), 'label': TensorSpec(shape=(None, None), dtype=tf.int64, name=None)}


## Model loading

In [47]:
MODEL_NAME = 'run_55_all'
model_uri = f'gs://iam-model-staging/{MODEL_NAME}/model'
!gsutil -m cp -r $model_uri .

Copying gs://iam-model-staging/run_55_all/model/keras_metadata.pb...
Copying gs://iam-model-staging/run_55_all/model/run_55_all-training_history.csv...
Copying gs://iam-model-staging/run_55_all/model/saved_model.pb...               
Copying gs://iam-model-staging/run_55_all/model/variables/variables.data-00000-of-00001...
Copying gs://iam-model-staging/run_55_all/model/variables/variables.index...
- [5/7 files][ 83.0 MiB/ 83.0 MiB]  99% Done                                    

In [48]:
prediction_model_filename = Path('./model')
prediction_model = tf.keras.models.load_model(prediction_model_filename)
opt = tf.keras.optimizers.Adam()
prediction_model.compile(optimizer=opt)

# A utility function to decode the output of the network
def decode_batch_predictions(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    # Use greedy search. For complex tasks, you can use beam search
    results = tf.keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
        :, :MAX_LABEL_LENGTH
    ]
    # Iterate over the results and get back the text
    output_text = []
    for res in results:
        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode('utf-8')
        output_text.append(res)
    return output_text

## Prediction generation

In [49]:
prediction_results = pd.DataFrame(columns=['label', 'prediction'])
for batch in test_dataset:
    images = batch['image']
    labels = batch['label']
    preds = prediction_model.predict(batch)
    pred_texts = decode_batch_predictions(preds)
    pred_texts = [t.replace('[UNK]', '').replace(' ', '') for t in pred_texts]
    orig_texts = []
    for label in labels:
        label = tf.strings.reduce_join(num_to_char(label)).numpy().decode("utf-8")
        orig_texts.append(label)
    orig_texts = [t.replace('[UNK]', '').replace(' ', '') for t in orig_texts]
    new_results = pd.DataFrame(zip(orig_texts, pred_texts), columns=['label', 'prediction'])
    prediction_results = prediction_results.append(new_results, ignore_index=True)
print(prediction_results)
prediction_results.to_csv(f'{MODEL_NAME}-predictions.csv')

  inputs = self._flatten_to_reference_inputs(inputs)


        label prediction
0           A          A
1        more       more
2           a          a
3        Foot       Foot
4         and        and
...       ...        ...
10826      In         In
10827  beside     beside
10828      in         in
10829       '          "
10830    went       went

[10831 rows x 2 columns]
