In [1]:
import sys
import os
root_dir = os.path.join(os.getcwd(), '..')
sys.path.append(root_dir)

from pathlib import Path
import tensorflow as tf
import pandas as pd
import numpy as np

from utilities import create_model
from utilities import TestConfiguration
from utilities import TestDataset
config = TestConfiguration()

IMAGE_SET_NAME = 'IAM_Words_test' # 'standley_4058_test' # config.IMAGE_SET_NAME
METADATA_FILENAME = 'words_metadata.csv' # config.METADATA_FILE_NAME

2022-07-07 18:15:47.196494: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-07 18:15:47.206988: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-07 18:15:47.207704: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-07-07 18:15:47.209186: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

## Load the test set locally

In [None]:
# OPTIONAL: Download test set from Google Cloud Storage
IMAGE_BUCKET = 'fmnh_datasets'

storage_path = f'gs://{IMAGE_BUCKET}/{IMAGE_SET_NAME}/'
!gsutil -m cp -r $storage_path ../images

In [2]:
local_image_location = f'../images/{IMAGE_SET_NAME}'
data_dir = Path(local_image_location)

test_dataset = TestDataset()
test_dataset.create_dataset(32, data_dir, METADATA_FILENAME)

10828


## Model loading

In [3]:
# Load model from Google Cloud Storage
MODEL_BUCKET = 'fmnh_models'
MODEL_NAME = 'iam_train-run_55'
if not os.path.exists('models'):
    os.makedirs('models')

model_uri = f'gs://{MODEL_BUCKET}/{MODEL_NAME}'
!gsutil -m cp -r $model_uri ./models/
prediction_model_filename = Path(f'./models/{MODEL_NAME}')
prediction_model = tf.keras.models.load_model(prediction_model_filename)

Copying gs://fmnh_models/iam_train-run_55/keras_metadata.pb...
Copying gs://fmnh_models/iam_train-run_55/run_55_all-training_history.csv...    
Copying gs://fmnh_models/iam_train-run_55/saved_model.pb...                     
Copying gs://fmnh_models/iam_train-run_55/variables/variables.index...          
Copying gs://fmnh_models/iam_train-run_55/variables/variables.data-00000-of-00001...
- [5/5 files][ 83.0 MiB/ 83.0 MiB] 100% Done                                    
Operation completed over 5 objects/83.0 MiB.                                     


In [None]:
# OR, load model from local filesystem
MODEL_NAME = 'fine_tuned-prediction'
model_location = Path(f'../transfer_learning/{MODEL_NAME}.model')
prediction_model = tf.keras.models.load_model(model_location)

In [5]:
# OR, use already-downloaded model
MODEL_NAME = 'iam_train-run_55'
model_location = Path(f'./models/{MODEL_NAME}')
prediction_model = tf.keras.models.load_model(model_location)

In [6]:
opt = tf.keras.optimizers.Adam()
prediction_model.compile(optimizer=opt)

## Prediction generation

In [None]:
prediction_results = pd.DataFrame(columns=['label', 'prediction'])
for batch in test_dataset.test_dataset:
    images = batch['image']
    labels = batch['label']
    preds = prediction_model.predict(images)
    pred_texts = test_dataset.decode_batch_predictions(preds)
    pred_texts = [t.replace('[UNK]', '').replace(' ', '') for t in pred_texts]
    orig_texts = []
    for label in labels:
        label = tf.strings.reduce_join(test_dataset.num_to_char(label)).numpy().decode("utf-8")
        orig_texts.append(label)
    orig_texts = [t.replace('[UNK]', '').replace(' ', '') for t in orig_texts]
    new_results = pd.DataFrame(zip(orig_texts, pred_texts), columns=['label', 'prediction'])
    prediction_results = prediction_results.append(new_results, ignore_index=True)
print(prediction_results)

if not os.path.exists('predictions'):
    os.makedirs('predictions')
prediction_results.to_csv(Path('predictions', f'{MODEL_NAME}-predictions.csv'))

2022-07-07 18:34:18.961770: I tensorflow/stream_executor/cuda/cuda_dnn.cc:384] Loaded cuDNN version 8200


