In [4]:
import sys
import os
root_dir = os.path.join(os.getcwd(), '..')
sys.path.append(root_dir)

from pathlib import Path
import tensorflow as tf
import pandas as pd
import numpy as np

from utilities import create_model
from utilities import TestConfiguration
from utilities import TestDataset
config = TestConfiguration()

IMAGE_SET_NAME = 'standley_4058_test' # config.IMAGE_SET_NAME
METADATA_FILENAME = 'words_metadata.csv' # config.METADATA_FILE_NAME

## Load the test set locally

In [2]:
# Download test set from Google Cloud Storage
IMAGE_BUCKET = 'fmnh_datasets'

storage_path = f'gs://{IMAGE_BUCKET}/{IMAGE_SET_NAME}/'
!gsutil -m cp -r $storage_path .

^C
Caught CTRL-C (signal 2) - exiting


In [5]:
data_dir = Path(IMAGE_SET_NAME)

test_dataset = TestDataset()
test_dataset.create_dataset(32, data_dir, METADATA_FILENAME)

405


## Model loading

In [6]:
# Load model from Google Cloud Storage
MODEL_BUCKET = 'iam-model-staging'
MODEL_NAME = 'run_55_all'
model_uri = f'gs://{MODEL_BUCKET}/{MODEL_NAME}/model'
!gsutil -m cp -r $model_uri .
prediction_model_filename = Path('./model')
prediction_model = tf.keras.models.load_model(prediction_model_filename)

Copying gs://iam-model-staging/run_55_all/model/keras_metadata.pb...
Copying gs://iam-model-staging/run_55_all/model/variables/variables.data-00000-of-00001...
Copying gs://iam-model-staging/run_55_all/model/run_55_all-training_history.csv...
Copying gs://iam-model-staging/run_55_all/model/variables/variables.index...    
Copying gs://iam-model-staging/run_55_all/model/saved_model.pb...               
\ [5/7 files][ 83.0 MiB/ 83.0 MiB]  99% Done                                    

In [7]:
# Load model from local filesystem
MODEL_NAME = 'fine_tuned-prediction'
model_location = Path(f'../transfer_learning/{MODEL_NAME}.model')
prediction_model = tf.keras.models.load_model(model_location)

In [8]:
opt = tf.keras.optimizers.Adam()
prediction_model.compile(optimizer=opt)

## Prediction generation

In [11]:
prediction_results = pd.DataFrame(columns=['label', 'prediction'])
for batch in test_dataset.test_dataset:
    images = batch['image']
    labels = batch['label']
    preds = prediction_model.predict(images)
    pred_texts = test_dataset.decode_batch_predictions(preds)
    pred_texts = [t.replace('[UNK]', '').replace(' ', '') for t in pred_texts]
    orig_texts = []
    for label in labels:
        label = tf.strings.reduce_join(test_dataset.num_to_char(label)).numpy().decode("utf-8")
        orig_texts.append(label)
    orig_texts = [t.replace('[UNK]', '').replace(' ', '') for t in orig_texts]
    new_results = pd.DataFrame(zip(orig_texts, pred_texts), columns=['label', 'prediction'])
    prediction_results = prediction_results.append(new_results, ignore_index=True)
print(prediction_results)
if not os.path.exists('predictions'):
    os.makedirs('predictions')
prediction_results.to_csv(Path('predictions', f'{MODEL_NAME}-predictions.csv'))

           label   prediction
0             .)            "
1          Maxon         Maan
2    delitescens  dillilconan
3       radicans     Pilicana
4           Damp         trtt
..           ...          ...
400     ropinqua    pospirnou
401    macrosora        aarsu
402          Chr          po.
403   Polypodium    Beleuiaem
404       plesio          Pas

[405 rows x 2 columns]
