## This is the code to build SLDBs for the autoregressive transformer architecture.
## It superseedes the make_sldb.py script

In [1]:
# replace the variable 'labels' with 'targets', as the latter is more adequate for regression problems

In [2]:
import os
import numpy as np
import pandas as pd
import pyarrow
import json
import joblib

In [3]:
# scale datasets to improve neural networks performance
from sklearn.preprocessing import MinMaxScaler

In [4]:
from datetime import datetime, timedelta

In [5]:
import tensorflow as tf

In [6]:
# files in the time series directory
# scaler.save
# ts.json
# ts.pkl

In [7]:
# files in the SLDB directory:
# train.tfrecord
# eval.tfrecord
# test.tfrecord
# sldb.json

In [8]:
# a dictionary to configure the SLDB
# ToDo: transfer this dictionary to dplstm/configs/sldb_config.py

# modify the dictionary structure:
# no_targets must be the same for all components, then move it to an upper level
# remove components and use the same structure as in architecture_parameters

# ToDo: build all sldb dictionaries on the basis of list-type parameters,
#  by iterating on them to avoid comments on the non-used resolutions, like
#  m = [8, 8, 8], tau = [1, 24, 168], no_targets = [24] or
#  m = [256], tau = [1], no_targets = [24]

sldb = {
    'ts': 'CPE04115_H_kw_20201021084001',
    'embedding': {
        'hourly': 168
    },
    'tau': {
        'hourly': 1
    },
    'no_targets': 168
}

In [9]:
# time series was built and persisted in a different code
# SLDB constructions begins here

In [10]:
# load the required time series
time_series_folder = '/home/developer/gcp/cbidmltsf/timeseries/{}'.format(sldb['ts'])
pickle_filename = '{}/ts.pkl'.format(time_series_folder)
ts_df = pd.read_pickle(pickle_filename)

In [11]:
ts_df

Unnamed: 0_level_0,kw_scaled
timestamp,Unnamed: 1_level_1
2016-01-01 00:00:00,0.274317
2016-01-01 01:00:00,0.217363
2016-01-01 02:00:00,0.168545
2016-01-01 03:00:00,0.122996
2016-01-01 04:00:00,0.080440
...,...
2018-07-31 19:00:00,0.652287
2018-07-31 20:00:00,0.656872
2018-07-31 21:00:00,0.690028
2018-07-31 22:00:00,0.609612


In [12]:
# expand time series dataframe with six columns for sine-cosine pos encoding over hour, day, month

In [13]:
# prepare sine-cosine positional encoding for the time series
hours_in_day = 24
days_in_month = 30
months_in_year = 12

In [14]:
# build arrays with indexes hour, day, and month
timestamp_hour = np.array(ts_df.index.hour)
timestamp_day = np.array(ts_df.index.day)
timestamp_month = np.array(ts_df.index.month)

In [15]:
# build arrays with positional encoding components and cast them to float32
sin_hour = np.sin(2*np.pi*timestamp_hour/hours_in_day).astype(np.float32)
cos_hour = np.cos(2*np.pi*timestamp_hour/hours_in_day).astype(np.float32)

sin_day = np.sin(2*np.pi*timestamp_day/days_in_month).astype(np.float32)
cos_day = np.cos(2*np.pi*timestamp_day/days_in_month).astype(np.float32)

sin_month = np.sin(2*np.pi*timestamp_month/months_in_year).astype(np.float32)
cos_month = np.cos(2*np.pi*timestamp_month/months_in_year).astype(np.float32)

In [16]:
# now expand the time series dataframe with positional encoding components
# pass the pos encoding arrays to dataframe as lists
ts_df['sin_hour'] = list(sin_hour)
ts_df['cos_hour'] = list(cos_hour)
ts_df['sin_day'] = list(sin_day)
ts_df['cos_day'] = list(cos_day)
ts_df['sin_month'] = list(sin_month)
ts_df['cos_month'] = list(cos_month)

In [17]:
# review the final time series dataframe
ts_df.head()

Unnamed: 0_level_0,kw_scaled,sin_hour,cos_hour,sin_day,cos_day,sin_month,cos_month
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-01-01 00:00:00,0.274317,0.0,1.0,0.207912,0.978148,0.5,0.866025
2016-01-01 01:00:00,0.217363,0.258819,0.965926,0.207912,0.978148,0.5,0.866025
2016-01-01 02:00:00,0.168545,0.5,0.866025,0.207912,0.978148,0.5,0.866025
2016-01-01 03:00:00,0.122996,0.707107,0.707107,0.207912,0.978148,0.5,0.866025
2016-01-01 04:00:00,0.08044,0.866025,0.5,0.207912,0.978148,0.5,0.866025


In [18]:
# evaluation stage is not used for TPU-based training,
# however, evaluation dataset might be useful to get stats from CPU-based training
stages = ['train', 'eval', 'test']

In [19]:
# split data set into train/eval/test at time series level
# to avoid data overlapping at SLDB level
split = np.array([0.8, 0.9, 1.0])

In [20]:
# get indexes of the scaled time series for train, validation, and test thresholds
# train_eval_limit = np.int(ts_df.count()*split[0])
# eval_test_limit = np.int(ts_df.count()*split[1])

# use the number of rows in the time series (as it has now more than a column, and count() returns a vector)
train_eval_limit = np.int(ts_df.shape[0]*split[0])
eval_test_limit = np.int(ts_df.shape[0]*split[1])

In [21]:
# a dictionary to manage the time series for the different model stages
ts = dict()

In [22]:
# get the time series portion for train set
ts['train'] = ts_df[:train_eval_limit]
print('{0} lectures in train time series from {1} to {2}'.format(ts['train'].count()[0],
                                                                 ts['train'].index[0],
                                                                 ts['train'].index[-1]))

18103 lectures in train time series from 2016-01-01 00:00:00 to 2018-01-24 08:00:00


In [23]:
# get the time series portion for eval set
ts['eval'] = ts_df[train_eval_limit:eval_test_limit]
print('{0} lectures in eval time series from {1} to {2}'.format(ts['eval'].count()[0],
                                                                ts['eval'].index[0],
                                                                ts['eval'].index[-1]))

2263 lectures in eval time series from 2018-01-24 09:00:00 to 2018-04-28 16:00:00


In [24]:
# get the time series portion for test set
ts['test'] = ts_df[eval_test_limit:]
print('{} lectures in test time series from {} to {}'.format(ts['test'].count()[0],
                                                             ts['test'].index[0],
                                                             ts['test'].index[-1]))

2263 lectures in test time series from 2018-04-28 17:00:00 to 2018-07-31 23:00:00


In [26]:
# one-hot encoding is not required for autoregressive transformer
# then, comment the following function

In [27]:
# a function to one-hot encode a timestamp
# def one_hot_encode(timestamp):
#     # input: a timestamp
#     # output: a 7-bit list encoding the week-day, and a 24-bit list encoding the day-hour
#     fv_weekday = np.zeros(7)
#     fv_hour = np.zeros(24)
#     fv_weekday[timestamp.weekday()] = 1.
#     fv_hour[timestamp.hour] = 1.
#     return list(fv_weekday), list(fv_hour)

In [28]:
# start prototype for building SLDB for transformer

In [29]:
# start with time series for training set
# how many lectures-columns in time series?
ts['train'].shape

(18103, 7)

In [30]:
ts['train']['kw_scaled']

timestamp
2016-01-01 00:00:00    0.274317
2016-01-01 01:00:00    0.217363
2016-01-01 02:00:00    0.168545
2016-01-01 03:00:00    0.122996
2016-01-01 04:00:00    0.080440
                         ...   
2018-01-24 04:00:00    0.073374
2018-01-24 05:00:00    0.084031
2018-01-24 06:00:00    0.180768
2018-01-24 07:00:00    0.264623
2018-01-24 08:00:00    0.305140
Name: kw_scaled, Length: 18103, dtype: float64

In [31]:
ts['train']['sin_hour']

timestamp
2016-01-01 00:00:00    0.000000
2016-01-01 01:00:00    0.258819
2016-01-01 02:00:00    0.500000
2016-01-01 03:00:00    0.707107
2016-01-01 04:00:00    0.866025
                         ...   
2018-01-24 04:00:00    0.866025
2018-01-24 05:00:00    0.965926
2018-01-24 06:00:00    1.000000
2018-01-24 07:00:00    0.965926
2018-01-24 08:00:00    0.866025
Name: sin_hour, Length: 18103, dtype: float64

In [53]:
# is it more efficient to save arrays or tensors to TFRecord, then parse them into TPUEstimator?

In [54]:
# https://stackoverflow.com/questions/62513518/how-to-save-a-tensor-to-tfrecord

In [65]:
x = tf.constant([[2.1, 3.1, 3.1],
                 [1.025, 5.0255, 9.02555]], dtype='float32')

In [66]:
x

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[2.1    , 3.1    , 3.1    ],
       [1.025  , 5.0255 , 9.02555]], dtype=float32)>

In [67]:
x2 = tf.io.serialize_tensor(x)

In [68]:
x2

<tf.Tensor: shape=(), dtype=string, numpy=b'\x08\x01\x12\x08\x12\x02\x08\x02\x12\x02\x08\x03"\x18ff\x06@ffF@ffF@33\x83?\xe5\xd0\xa0@\xa7h\x10A'>

In [69]:
record_file = 'temp.tfrecord'

In [70]:
with tf.io.TFRecordWriter(record_file) as writer:
    # get value with .numpy()
    writer.write(x2.numpy())

In [71]:
parse_tensor_f32 = lambda x: tf.io.parse_tensor(x, tf.float32)

In [72]:
dataset = (tf.data.TFRecordDataset('temp.tfrecord').map(parse_tensor_f32))

In [73]:
dataset

<MapDataset shapes: <unknown>, types: tf.float32>

In [74]:
for row in dataset:
    tf.print(row)

[[2.1 3.1 3.1]
 [1.025 5.0255 9.02555]]


In [75]:
recovered_tensors_list = [row for row in dataset]

In [79]:
recovered_tensors_list[0][:1]

<tf.Tensor: shape=(1, 3), dtype=float32, numpy=array([[2.1, 3.1, 3.1]], dtype=float32)>

In [None]:
# also review this link to pass directly from NumPy arrays to TFRecord
# https://stackoverflow.com/questions/45427637/numpy-to-tfrecords-is-there-a-more-simple-way-to-handle-batch-inputs-from-tfrec/45428167#45428167

In [None]:
# SLDB for transformer has the following structure:
# features: kw_scaled, sin_hour, cos_hour, sin_day, cos_day, sin_month, cos_month (?, 168, 7)
# targets: kw_scaled, sin_hour, cos_hour, sin_day, cos_day, sin_month, cos_month (?, 168, 7)

In [None]:
# data structure to convert to TFRecords: list of floats

In [32]:
# build all the possible sub-series of sldb['embedding']['hourly'] elements (the embedding dimension)

In [38]:
dataset = ts['train']

In [39]:
m = sldb['embedding']['hourly']

In [40]:
variable = 'kw_scaled'

In [51]:
kw_scaled = list()
sin_hour = list()


for start_value in range(dataset.shape[0] - m + 1)[:1]:
    end_value = start_value + m
    features.append(list(dataset[variable][start_value: end_value]))

In [52]:
features

[[0.2743168806216446,
  0.21736327928290877,
  0.1685451319754877,
  0.1229963510152855,
  0.08044035730610422,
  0.049252771601447254,
  0.06771694414961615,
  0.04966028029780689,
  0.023158269869922754,
  0.06114179908116879,
  0.16809888671103324,
  0.2312611851840375,
  0.28965888578135557,
  0.3168271651804737,
  0.3402728603856615,
  0.3714852374938993,
  0.36978470215452797,
  0.3824701534742829,
  0.4496292910433308,
  0.5634892351077267,
  0.5334598727891104,
  0.5155023745516245,
  0.4356089775870218,
  0.32928794440527764,
  0.23602655779108606,
  0.14696731408384156,
  0.09737443541452018,
  0.04251570767836266,
  0.044440915112684776,
  0.06843589485345969,
  0.127692772531125,
  0.15101760964385602,
  0.18723320188724568,
  0.24428829303439048,
  0.30788211687597355,
  0.3780053766356517,
  0.4135345568923978,
  0.43940206233488555,
  0.4341393121934971,
  0.40126436158262146,
  0.37682158711466807,
  0.4163840188414669,
  0.471797454232745,
  0.5937355222076746,
  0.594

In [19]:
def make_sldb_for_autoregressive_transformer(time_series, m, tau, n_targets):
    """
    Input:
           time series: original time series
           m: embedding dimension
           tau: lag
           n_targets: number of targets to predict
    Output:
           features: list of features ['hourly', 'sin_hour', 'cos_hour', 'sin_day', 'cos_day',
                                       'sin_month', 'cos_month']
           targets: list of targets
    """
    # a set of empty lists to store feature vectors and targets
    features = []
    targets = []
    sequence = range(m * tau, time_series.shape[0] - n_targets + 1)
    for i in sequence:
        # uncomment the following line to preview features sequence timestamps (to verify the functionality)
        # features.append(list(time_series.iloc[(i - m * tau):i:tau].index))
        features.append(list(time_series.iloc[(i - m * tau):i:tau]))
        # uncomment the following line to preview targets sequence timestamps (to verify the functionality)
        # targets.append(list(time_series.iloc[i:(i + n_targets):1].index))
        targets.append(list(time_series.iloc[i:(i + n_targets):1]))
        # get the timestamps for the target values (just one for the first experiment)
        targets_timestamps_list = list(time_series.index[i:(i + n_targets):1])
        # EXTRACT TIMESTAMPS AS BYTES FOR TFRECORD PERSISTENCE
        targets_timestamps_list_as_bytes = [timestamp.strftime("%Y-%m-%d %H:%M:%S").encode() for timestamp in
                                           targets_timestamps_list]
        timestamps.append(targets_timestamps_list_as_bytes)
        # build one-hot vectors for week-day and day-hour
        # pass the timestamp(s) in the list, not the list!
        oh_wd_vectors, oh_dh_vectors = one_hot_encode(targets_timestamps_list[0])
        # the one-hot-encode function already returns lists, then,
        oh_wds.append(oh_wd_vectors)
        oh_dhs.append(oh_dh_vectors)

    # uncomment the following line to return NumPy arrays instead of Python lists
    # features, targets, timestamps = np.array(features), np.array(targets), np.array(timestamps)

    return features, targets, timestamps, oh_wds, oh_dhs

In [20]:
# create a dictionary to temporarily store the following SLDBs:
# train (hourly, daily, weekly, targets, timestamps)
# test (hourly, daily, weekly, targets, timestamps)
# no eval(uation) dataset as the model will be trained on TPUs

In [21]:
sldb_full = {
    'train': {
        'hourly': {},
    },
    'eval': {
        'hourly': {},
    },
    'test': {
        'hourly': {},
    }
}

In [23]:
# a list to iterate on data resolutions
resolutions = [
    'hourly'
]

In [24]:
# BUILD ALL THE SLDBs!!!
for stage in stages:
    # train, eval, test
    # for component_key in sldb['components'].keys():
    for resolution in resolutions:
        # hourly, daily, weekly
        sldb_full[stage][resolution]['features'], \
        sldb_full[stage][resolution]['targets'], \
        sldb_full[stage][resolution]['timestamps'], \
        sldb_full[stage][resolution]['oh_wds'], \
        sldb_full[stage][resolution]['oh_dhs'] = \
        make_features_targets_timestamps_ohvs(
            ts[stage][variable],
            sldb['embedding'][resolution],
            sldb['tau'][resolution],
            sldb['no_targets'])

In [25]:
# verify that the target is stored as a no_targets-element list
len(sldb_full['test']['hourly']['targets'][0]) == sldb['no_targets']

True

In [26]:
# a list to iterate on the sldb items
items = ['features', 'targets', 'timestamps', 'oh_wds', 'oh_dhs']

In [27]:
# a dictionary to collect statistics
sldb['stats'] = {
    'train': {
        'hourly': {}
    },
    'eval': {
        'hourly': {}
    },
    'test': {
        'hourly': {}
    }
}

In [28]:
# report statistics on stages and resolutions of SLDBs
# and persist them to the sldb['stats'] level
for stage in stages:
    for resolution in resolutions:
        for item in items:
            # fill the values in the stats sub-dictionary
            sldb['stats'][stage][resolution][item] = len(sldb_full[stage][resolution][item])
            # timestamps are persisted as bytes, as in b'YYYY-MM-DD HH:MM;SS'
            # but are required as strings, as in 'YYYY-MM-DD HH:MM;SS'
            from_timestamp_str = sldb_full[stage][resolution]['timestamps'][0][0].decode()
            sldb['stats'][stage][resolution]['from'] = from_timestamp_str
            to_timestamp_str = sldb_full[stage][resolution]['timestamps'][-1][0].decode()
            sldb['stats'][stage][resolution]['to'] = to_timestamp_str
            # and log them
            print('{0} {3} / {1} / {2} from {4} to {5}'.format(len(sldb_full[stage][resolution][item]),
                                                               stage,
                                                               resolution,
                                                               item,
                                                               from_timestamp_str,
                                                               to_timestamp_str))

18016 features / train / hourly from 2016-01-03 16:00:00 to 2018-01-23 09:00:00
18016 targets / train / hourly from 2016-01-03 16:00:00 to 2018-01-23 09:00:00
18016 timestamps / train / hourly from 2016-01-03 16:00:00 to 2018-01-23 09:00:00
18016 oh_wds / train / hourly from 2016-01-03 16:00:00 to 2018-01-23 09:00:00
18016 oh_dhs / train / hourly from 2016-01-03 16:00:00 to 2018-01-23 09:00:00
2176 features / eval / hourly from 2018-01-27 01:00:00 to 2018-04-27 17:00:00
2176 targets / eval / hourly from 2018-01-27 01:00:00 to 2018-04-27 17:00:00
2176 timestamps / eval / hourly from 2018-01-27 01:00:00 to 2018-04-27 17:00:00
2176 oh_wds / eval / hourly from 2018-01-27 01:00:00 to 2018-04-27 17:00:00
2176 oh_dhs / eval / hourly from 2018-01-27 01:00:00 to 2018-04-27 17:00:00
2176 features / test / hourly from 2018-05-01 09:00:00 to 2018-07-31 00:00:00
2176 targets / test / hourly from 2018-05-01 09:00:00 to 2018-07-31 00:00:00
2176 timestamps / test / hourly from 2018-05-01 09:00:00 to 2

In [29]:
# get the number of rows in the smaller resolution-based dataset, for alignment purposes
for stage in stages:
    sldb['stats'][stage]['trimmed_to_count'] = min([sldb['stats'][stage][resolution]['features'] for resolution in resolutions])
    print('Dataset on {} stage was trimmed to {} rows.'.format(stage, sldb['stats'][stage]['trimmed_to_count']))

Dataset on train stage was trimmed to 18016 rows.
Dataset on eval stage was trimmed to 2176 rows.
Dataset on test stage was trimmed to 2176 rows.


In [30]:
# a new dictionary with final, trimmed data
tfrecords = {
    'train': {}, # hourly, targets, timestamps, oh_wds, oh_dhs to be added
    'eval': {}, # hourly, targets, timestamps, oh_wds, oh_dhs to be added
    'test': {}, # hourly, targets, timestamps, oh_wds, oh_dhs to be added
}

In [31]:
for stage in stages:
    # isolate this value, just for readability
    value_to_trim = sldb['stats'][stage]['trimmed_to_count']
    tfrecords[stage]['hourly'] = sldb_full[stage]['hourly']['features'][-value_to_trim:]
    # targets and timestamps can be acquired from any resolution-based, temporary dataset (hourly, daily, weekly)
    tfrecords[stage]['targets'] = sldb_full[stage]['hourly']['targets'][-value_to_trim:]
    # find out the adequate way to persist timestamps (string?, bytes?)
    # in the meantime, do not persist them to tfrecord files
    tfrecords[stage]['timestamps'] = sldb_full[stage]['hourly']['timestamps'][-value_to_trim:]
    tfrecords[stage]['oh_wds'] = sldb_full[stage]['hourly']['oh_wds'][-value_to_trim:]
    tfrecords[stage]['oh_dhs'] = sldb_full[stage]['hourly']['oh_dhs'][-value_to_trim:]

In [34]:
# verify again specs for the contents in tfrecords dictionary
tfrecords['test']['targets'][0]

[0.27823004873060264,
 0.40741262967066194,
 0.49106657266592824,
 0.5816427403797735,
 0.6710010303927116,
 0.6887106145943898,
 0.614355772136012,
 0.5468139172741853,
 0.5245032035141817,
 0.47839351704796385,
 0.5006414775676535,
 0.5530295869907114,
 0.5636542528878115,
 0.5204056493410911,
 0.3663867304012334,
 0.24111499337604692,
 0.16053983281297213,
 0.12441101048211534,
 0.11958365936611504,
 0.10361722072871249,
 0.11260023086994597,
 0.2184184633978169,
 0.3035010110244274,
 0.3366866289114252]

In [35]:
# a function to encode float values for serialized examples
def _float_feature_from_list_of_values(list_of_values):
    """Returns a float_list from a list of floats / doubles."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=list_of_values))

In [36]:
# a function to encode byte values for serialized examples
def _bytes_feature_from_list_of_values(list_of_values):
    """Returns a bytes_list from a list of strings / bytes."""
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=list_of_values))

In [37]:
# ToDo: pass one-hot vectors as _int_features and decode when reading dataset???

In [38]:
# a string with the basic specifications of the SLDB, as part of the SLDB identifier
sldb_specs = '{:03d}{:03d}_{:03d}'.format(sldb['embedding']['hourly'],
                                          sldb['tau']['hourly'],
                                          sldb['no_targets'])

sldb_specs

'064001_024'

In [39]:
# build a time-based identifer for the SLDB
sldb_identifier = '{}_{}'.format(sldb['ts'], sldb_specs)
sldb_identifier

'CPE04115_H_kw_20201021084001_064001_024'

In [40]:
sldb_dir = '/home/developer/gcp/cbidmltsf/sldbs/{}'.format(sldb_identifier)

In [41]:
try:
    os.mkdir(sldb_dir)
    print('Directory {} was created.'.format(sldb_dir))
except FileExistsError:
    print('Error: directory {} already exists.'.format(sldb_dir))

Directory /home/developer/gcp/cbidmltsf/sldbs/CPE04115_H_kw_20201021084001_064001_024 was created.


In [42]:
# now persist SLDBs as TFRecords
for stage in stages:
    N_ROWS = sldb['stats'][stage]['trimmed_to_count']
    filename = '{}/{}.tfrecord'.format(sldb_dir, stage)
    with tf.io.TFRecordWriter(filename) as writer:
        # get an iterable with the indexes of the NumPy array to be stored in the TFRecord file
        for row in np.arange(N_ROWS):
            example = tf.train.Example(
                # features within the example
                features=tf.train.Features(
                    # individual feature definition
                    # [lecture[0] for lecture in Xadj_train[row]] flattens the adjacent hours array
                    feature={'hourly': _float_feature_from_list_of_values(tfrecords[stage]['hourly'][row]),
                             'target': _float_feature_from_list_of_values(tfrecords[stage]['targets'][row]),
                             'oh_wd': _float_feature_from_list_of_values(tfrecords[stage]['oh_wds'][row]),
                             'oh_dh': _float_feature_from_list_of_values(tfrecords[stage]['oh_dhs'][row]),
                             # timestamps to be incorporated later as _byte_feature???
                             'timestamp': _bytes_feature_from_list_of_values(tfrecords[stage]['timestamps'][row])
                             }
                )
            )
            serialized_example = example.SerializeToString()
            writer.write(serialized_example)

In [43]:
# build a path for the json file
json_filename = '{}/sldb.json'.format(sldb_dir)

In [44]:
# persist the final, compact dictionary to JSON
with open(json_filename, 'w') as filename:
    json.dump(sldb, filename, indent=4)

In [45]:
# do not forget to sync sldbs/ from local to GS after the previous operations!

In [46]:
!gsutil rsync -d -r /home/developer/gcp/cbidmltsf/sldbs gs://cbidmltsf/sldbs

Building synchronization state...
Starting synchronization...
Copying file:///home/developer/gcp/cbidmltsf/sldbs/CPE04115_H_kw_20201021084001_064001_024/eval.tfrecord [Content-Type=application/octet-stream]...
Copying file:///home/developer/gcp/cbidmltsf/sldbs/CPE04115_H_kw_20201021084001_064001_024/sldb.json [Content-Type=application/json]...
Copying file:///home/developer/gcp/cbidmltsf/sldbs/CPE04115_H_kw_20201021084001_064001_024/test.tfrecord [Content-Type=application/octet-stream]...
Copying file:///home/developer/gcp/cbidmltsf/sldbs/CPE04115_H_kw_20201021084001_064001_024/train.tfrecord [Content-Type=application/octet-stream]...
\ [4 files][ 23.2 MiB/ 23.2 MiB]    2.2 MiB/s                                   
Operation completed over 4 objects/23.2 MiB.                                     
