In [1]:
import argparse
import os
import json

In [9]:
import tensorflow as tf

In [2]:
from pandas import to_datetime
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [3]:
from google.cloud import storage
storage_client = storage.Client()

In [4]:
# running local, in the tpuestimator-tensorflow-1-15-vm, on Jupyter Lab
_LOCAL_ROOT_DIR = '{0}/gcp/cbidmltsf'.format(os.getenv("HOME"))

In [5]:
_LOCAL_ROOT_DIR

'/home/jupyter/gcp/cbidmltsf'

In [6]:
_SLDB_ID = 'CPE04015_desbI_H_2017-04-01_00:00:00_2018-02-28_23:00:00_H008001001_D008024001_W004168001'

In [7]:
_BUCKET_NAME = 'cbidmltsf'

In [10]:
# read_features is declared, later imported from data
read_features = {
    'hourly': tf.io.VarLenFeature(dtype=tf.float32),
    'daily': tf.io.VarLenFeature(dtype=tf.float32),
    'weekly': tf.io.VarLenFeature(dtype=tf.float32),
    'target': tf.io.VarLenFeature(dtype=tf.float32),
    'oh_wd': tf.io.VarLenFeature(dtype=tf.float32),
    'oh_dh': tf.io.VarLenFeature(dtype=tf.float32),
    'timestamp': tf.io.VarLenFeature(dtype=tf.string)
}

In [11]:
bucket = storage_client.get_bucket(_BUCKET_NAME)

In [12]:
# build a path to the scaler file in GCP
scaler_path = 'sldbs/{}/scaler.save'.format(_SLDB_ID)
blob = bucket.blob(scaler_path)
# temporarily download the scaler to VM's block storage, use the original name
blob.download_to_filename('scaler.save')

In [13]:
# now build a path to the sldb.json file in GCP
sldb_path = 'sldbs/{}/sldb.json'.format(_SLDB_ID)
blob = bucket.blob(sldb_path)
# temporarily download the json file to VM's block storage, use the original name
blob.download_to_filename('sldb.json')

In [14]:
# recover the sldb dictionary from vm's block storage
with open('sldb.json', 'r') as filename:
    sldb = json.load(filename)

In [15]:
_EQUIPMENT = sldb['description']['equipment']

In [16]:
_EQUIPMENT

'CPE04015'

In [17]:
# no main function, build it later...

In [18]:
import numpy as np

In [19]:
# module bokeh is not accessible from Jupyter Lab, but it is available from command line
# on /home/developer, so let's continue
from bokeh.plotting import figure, output_file, save

ImportError: No module named 'bokeh'

In [21]:
from sklearn.externals import joblib

In [22]:
model_dir = '086401_083201_043201_TPU_17_00'

In [25]:
saved_model_path = 'models/{0}/export/exporter'.format(model_dir)
saved_model_path

'models/086401_083201_043201_TPU_17_00/export/exporter'

In [26]:
def list_files(bucket_folder):
    """List all files in GCP bucket."""
    # ToDo: restructure the main function as this method uses the global variable 'bucket'
    files = bucket.list_blobs(prefix=bucket_folder)
    files_list = [file.name for file in files if '.' in file.name]
    return files_list

In [27]:
all_files = list_files(saved_model_path)

In [28]:
[file for file in all_files]

['models/086401_083201_043201_TPU_17_00/export/exporter/1583418813/saved_model.pb',
 'models/086401_083201_043201_TPU_17_00/export/exporter/1583418813/variables/variables.data-00000-of-00001',
 'models/086401_083201_043201_TPU_17_00/export/exporter/1583418813/variables/variables.index']

In [29]:
# isolate the names of the subdirectories in export/exporter (one for each training process)
prefix_length = len(saved_model_path)
# get the string that identifies the last saved model directory
latest_saved_model_id = sorted(list(set([file[prefix_length+1:prefix_length+11] for file in all_files])))[-1]

In [30]:
latest_saved_model_id

'1583418813'

In [31]:
_LATEST_SAVED_MODEL_DIR = 'gs://{0}/{1}/{2}'.format(_BUCKET_NAME,
                                                    saved_model_path,
                                                    latest_saved_model_id)

In [32]:
_LATEST_SAVED_MODEL_DIR

'gs://cbidmltsf/models/086401_083201_043201_TPU_17_00/export/exporter/1583418813'

In [33]:
# build a prediction function
predict_fn = tf.contrib.predictor.from_saved_model(_LATEST_SAVED_MODEL_DIR)

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.loader.load or tf.compat.v1.saved_model.load. There will be a new function for importing SavedModels in Tensorflow 2.0.
INFO:tensorflow:Restoring parameters from gs://cbidmltsf/models/086401_083201_043201_TPU_17_00/export/exporter/1583418813/variables/variables


In [34]:
predict_fn

SavedModelPredictor with feed tensors {'example_bytes': <tf.Tensor 'Placeholder:0' shape=() dtype=string>} and fetch_tensors {'forecast': <tf.Tensor 'output_4_1/Sigmoid:0' shape=(1, 1) dtype=float32>}

In [35]:
# load the scaler from the vm's block storage
scaler = joblib.load('scaler.save')



In [36]:
test_dataset_filename = 'gs://cbidmltsf/sldbs/{0}/test.tfrecord'.format(_SLDB_ID)

In [37]:
test_dataset_filename

'gs://cbidmltsf/sldbs/CPE04015_desbI_H_2017-04-01_00:00:00_2018-02-28_23:00:00_H008001001_D008024001_W004168001/test.tfrecord'

In [38]:
 test_dataset = tf.data.TFRecordDataset(test_dataset_filename)

In [39]:
test_dataset

<TFRecordDatasetV1 shapes: (), types: tf.string>

In [40]:
dataset = test_dataset.map(lambda row: row)
# ToDo: find out if there is a faster way to build the predictions list
# (with no iterator on the dataset)
iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()

Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.


In [41]:
predictions_list = []

In [42]:
with tf.compat.v1.Session() as sess:
    try:
        while True:
            example = sess.run(next_element)
            predictions_list.append(predict_fn({'example_bytes': example}))
    except:
        pass

In [43]:
len(predictions_list)

130

In [44]:
predictions_list[:10]

[{'forecast': array([[0.6250299]], dtype=float32)},
 {'forecast': array([[0.65739983]], dtype=float32)},
 {'forecast': array([[0.601554]], dtype=float32)},
 {'forecast': array([[0.23903506]], dtype=float32)},
 {'forecast': array([[0.3743351]], dtype=float32)},
 {'forecast': array([[0.46934643]], dtype=float32)},
 {'forecast': array([[0.49625847]], dtype=float32)},
 {'forecast': array([[0.60185355]], dtype=float32)},
 {'forecast': array([[0.37686834]], dtype=float32)},
 {'forecast': array([[0.41060016]], dtype=float32)}]

In [45]:
# get scalar values from predictions list
predictions = [p['forecast'][0][0] for p in predictions_list]
# pass predictions to an array
predictions = np.asarray(predictions)
# inverse-scale predictions
predictions = scaler.inverse_transform(predictions.reshape(-1, 1))
# remove all dimensions equal to 1 in the predictions array
predictions = np.squeeze(predictions)

In [47]:
predictions[:10]

array([10.043574 , 10.563725 ,  9.666341 ,  3.8410423,  6.015172 ,
        7.5419044,  7.9743524,  9.671155 ,  6.0558786,  6.5979133],
      dtype=float32)

In [48]:
objective_shapes = {
    'hourly': [sldb['components']['hourly']['m'], 1],
    'daily': [sldb['components']['daily']['m'], 1],
    'weekly': [sldb['components']['weekly']['m'], 1],
    'target': [sldb['components']['hourly']['no_targets'], 1],
    'oh_wd': [7, 1],  # Monday to Sunday
    'oh_dh': [24, 1],  # midnight to 23:00
    'timestamp': [sldb['components']['hourly']['no_targets'], 1]
}

In [49]:
# do not pass objective shapes as a parameter, get them from an outer scope variable instead
def _parse_dataset_function(example_proto):
    # parse the input tf.Example proto using the dictionary above
    row = tf.io.parse_single_example(example_proto, read_features)
    # pass objective shape as a list of lists [hourly_shape, daily_shape, weekly_shape]
    hourly = tf.reshape(row['hourly'].values, objective_shapes['hourly'])
    daily = tf.reshape(row['daily'].values, objective_shapes['daily'])
    weekly = tf.reshape(row['weekly'].values, objective_shapes['weekly'])
    target = tf.reshape(row['target'].values, objective_shapes['target'])
    oh_wd = tf.reshape(row['oh_wd'].values, objective_shapes['oh_wd'])
    oh_dh = tf.reshape(row['oh_dh'].values, objective_shapes['oh_dh'])
    timestamp = tf.reshape(row['timestamp'].values, objective_shapes['timestamp'])

    # important: this is a different parse function from the one in training
    # it returns target as a feature to use target values and timestamps for plotting
    return {'hourly': hourly,
            'daily': daily,
            'weekly': weekly,
            'target': target,
            'oh_wd': oh_wd,
            'oh_dh': oh_dh,
            'timestamp': timestamp}

In [50]:
# test_dataset was previously acquired from tfrecord file
# use it again to build arrays for targets and timestamps
dataset = test_dataset.map(lambda row: _parse_dataset_function(row))
iterator = dataset.make_one_shot_iterator()
next_element = iterator.get_next()

In [51]:
# ToDo: merge the operations in this iterator in the previous use of the same iterator???
timestamps_list = []
targets_list = []
with tf.compat.v1.Session() as sess:
    try:
        while True:
            item = sess.run(next_element)
            # get the timestamp, then the scalar value, then the string value, then convert it to datetime
            timestamps_list.append(to_datetime(item['timestamp'][0][0].decode()))
            # get the target, then the scalar value
            targets_list.append(item['target'][0][0])
    except:
        pass

In [54]:
timestamps = np.asarray(timestamps_list)
targets = np.asarray(targets_list)
targets = scaler.inverse_transform(targets.reshape(-1, 1))
targets = np.squeeze(targets)
# so far, predictions and targets are two NumPy arrays

In [57]:
# then evaluate metrics here
mse = mean_squared_error(targets, predictions)
mae = mean_absolute_error(targets, predictions)
# ToDo: persist metrics to a dictionary or database for reporting the complete experiment over a single circuit

In [58]:
mse, mae

(2.542434, 1.18655)

In [42]:
# everything is ready now for plotting