In [1]:
import json
import numpy as np
import pandas as pd
import os

In [2]:
from datetime import timedelta

In [3]:
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [4]:
from bokeh.plotting import figure, show, output_file, save

from bokeh.io import output_notebook

from bokeh.models import Span, Range1d

from bokeh.palettes import d3

output_notebook()

In [5]:
pd.set_option('display.max_rows', 20)

In [6]:
def symmetric_mean_absolute_percentage_error(targets, predictions):
    '''
    predictions: a list with the predicted values
    targets: a list with the actual values
    '''
    import numpy as np
    # lists to NumPy arrays
    targets, predictions = np.array(targets), np.array(predictions)
    # verify predictions and targets have the same shape
    if predictions.shape == targets.shape:
            return(np.sum(2*np.abs(predictions - targets) /
                          (np.abs(targets) + np.abs(predictions)))/predictions.shape[0])

In [7]:
# build a list to select specific pickle files per model architecture
selected_pkl_list = [
    # 'DMSLSTM_TPU_006_00_test_024.pkl',
    # 'DMSLSTM_TPU_006_01_test_024.pkl',
    # 'DMSLSTM_TPU_006_02_test_024.pkl',
    # 'DMSLSTM_TPU_006_03_test_024.pkl',
    # 'DMSLSTM_TPU_006_04_test_024.pkl',
    # 'DMSLSTM_TPU_006_05_test_024.pkl',
    # 'DMSLSTM_TPU_006_06_test_024.pkl',
    # 'DMSLSTM_TPU_006_07_test_024.pkl',
    # 'DMSLSTM_TPU_006_08_test_024.pkl',
    # 'DMSLSTM_TPU_006_09_test_024.pkl',
    'EDSLSTM_TPU_013_00_test_024.pkl',
    'EDSLSTM_TPU_013_01_test_024.pkl',
    'EDSLSTM_TPU_013_02_test_024.pkl',
    'EDSLSTM_TPU_013_03_test_024.pkl',
    'EDSLSTM_TPU_013_04_test_024.pkl',
    'EDSLSTM_TPU_013_05_test_024.pkl',
    'EDSLSTM_TPU_013_06_test_024.pkl',
    'EDSLSTM_TPU_013_07_test_024.pkl',
    'EDSLSTM_TPU_013_08_test_024.pkl',
    'EDSLSTM_TPU_013_09_test_024.pkl',
    # 'BSCTRFM_TPU_010_00_test_024.pkl',
    # 'BSCTRFM_TPU_010_01_test_024.pkl',
    # 'BSCTRFM_TPU_010_02_test_024.pkl',
    # 'BSCTRFM_TPU_010_03_test_024.pkl',
    # 'BSCTRFM_TPU_010_04_test_024.pkl',
    # 'BSCTRFM_TPU_010_05_test_024.pkl',
    # 'BSCTRFM_TPU_010_06_test_024.pkl',
    # 'BSCTRFM_TPU_010_07_test_024.pkl',
    # 'BSCTRFM_TPU_010_08_test_024.pkl',
    # 'BSCTRFM_TPU_010_09_test_024.pkl',
]

In [8]:
# a Pandas dataframe to store all predictions detail items
global_df_columns = [
    'model_id', 'execution', 'dataset',
    'string_timestamps', 'predictions', 'targets',
    'mae', 'rmse', 'smape'
]

global_df = pd.DataFrame(columns=global_df_columns)

In [9]:
global_df

Unnamed: 0,model_id,execution,dataset,string_timestamps,predictions,targets,mae,rmse,smape


In [10]:
# collect prediction items from all available pickle files into a single dataframe
# for available_pickle in avail_pkl_list:
#     buffer_df = pd.read_pickle('../database/predictions_detail/{}'.format(available_pickle))
#     # append buffer to final dataframe
#     df = df.append(buffer_df, ignore_index=True)  

In [11]:
# collect prediction items from all available pickle files into a single dataframe
for selected_pickle in selected_pkl_list:
    buffer_df = pd.read_pickle('../database/predictions_detail/{}'.format(selected_pickle))
    # append buffer to final dataframe
    global_df = global_df.append(buffer_df, ignore_index=True)  

In [12]:
global_df

Unnamed: 0,model_id,execution,dataset,string_timestamps,predictions,targets,mae,rmse,smape,inference
0,EDSLSTM_TPU_013,0,test,"[2018-05-03 16:00:00, 2018-05-03 17:00:00, 201...","[2994.7255859375, 2945.77197265625, 2856.72216...","[2935.38671875, 2798.293212890625, 2750.489990...",54.286067,70.648563,0.020626,024
1,EDSLSTM_TPU_013,0,test,"[2018-05-03 17:00:00, 2018-05-03 18:00:00, 201...","[2919.458984375, 2855.485107421875, 2812.19750...","[2798.293212890625, 2750.489990234375, 2751.09...",57.175028,79.449794,0.021461,024
2,EDSLSTM_TPU_013,0,test,"[2018-05-03 18:00:00, 2018-05-03 19:00:00, 201...","[2825.159912109375, 2809.958251953125, 2908.29...","[2750.489990234375, 2751.091552734375, 2919.40...",61.477341,90.464846,0.022772,024
3,EDSLSTM_TPU_013,0,test,"[2018-05-03 19:00:00, 2018-05-03 20:00:00, 201...","[2808.2451171875, 2912.028564453125, 2964.9411...","[2751.091552734375, 2919.401611328125, 2951.41...",66.452530,99.042837,0.024317,024
4,EDSLSTM_TPU_013,0,test,"[2018-05-03 20:00:00, 2018-05-03 21:00:00, 201...","[2862.04541015625, 2951.86279296875, 2746.2924...","[2919.401611328125, 2951.41650390625, 2723.131...",71.849614,104.112965,0.026099,024
...,...,...,...,...,...,...,...,...,...,...
21205,EDSLSTM_TPU_013,9,test,"[2018-07-30 20:00:00, 2018-07-30 21:00:00, 201...","[3057.451171875, 2844.880859375, 2518.390625, ...","[2799.75, 2921.840087890625, 2782.763427734375...",92.538940,126.555024,0.038371,024
21206,EDSLSTM_TPU_013,9,test,"[2018-07-30 21:00:00, 2018-07-30 22:00:00, 201...","[2891.31884765625, 2521.29443359375, 2162.1452...","[2921.840087890625, 2782.763427734375, 2480.18...",80.237335,112.259492,0.034319,024
21207,EDSLSTM_TPU_013,9,test,"[2018-07-30 22:00:00, 2018-07-30 23:00:00, 201...","[2589.351318359375, 2176.74658203125, 1943.031...","[2782.763427734375, 2480.181640625, 2127.68823...",79.562047,107.152354,0.033709,024
21208,EDSLSTM_TPU_013,9,test,"[2018-07-30 23:00:00, 2018-07-31 00:00:00, 201...","[2267.000732421875, 1967.896484375, 1818.34655...","[2480.181640625, 2127.688232421875, 1933.56994...",67.925257,91.302114,0.028471,024


In [13]:
# day-1 predictions for this model
global_df['string_timestamps'][0]

array(['2018-05-03 16:00:00', '2018-05-03 17:00:00',
       '2018-05-03 18:00:00', '2018-05-03 19:00:00',
       '2018-05-03 20:00:00', '2018-05-03 21:00:00',
       '2018-05-03 22:00:00', '2018-05-03 23:00:00',
       '2018-05-04 00:00:00', '2018-05-04 01:00:00',
       '2018-05-04 02:00:00', '2018-05-04 03:00:00',
       '2018-05-04 04:00:00', '2018-05-04 05:00:00',
       '2018-05-04 06:00:00', '2018-05-04 07:00:00',
       '2018-05-04 08:00:00', '2018-05-04 09:00:00',
       '2018-05-04 10:00:00', '2018-05-04 11:00:00',
       '2018-05-04 12:00:00', '2018-05-04 13:00:00',
       '2018-05-04 14:00:00', '2018-05-04 15:00:00'], dtype='<U19')

In [14]:
# day-7 predictions for this model
global_df['string_timestamps'][144]

array(['2018-05-09 16:00:00', '2018-05-09 17:00:00',
       '2018-05-09 18:00:00', '2018-05-09 19:00:00',
       '2018-05-09 20:00:00', '2018-05-09 21:00:00',
       '2018-05-09 22:00:00', '2018-05-09 23:00:00',
       '2018-05-10 00:00:00', '2018-05-10 01:00:00',
       '2018-05-10 02:00:00', '2018-05-10 03:00:00',
       '2018-05-10 04:00:00', '2018-05-10 05:00:00',
       '2018-05-10 06:00:00', '2018-05-10 07:00:00',
       '2018-05-10 08:00:00', '2018-05-10 09:00:00',
       '2018-05-10 10:00:00', '2018-05-10 11:00:00',
       '2018-05-10 12:00:00', '2018-05-10 13:00:00',
       '2018-05-10 14:00:00', '2018-05-10 15:00:00'], dtype='<U19')

In [15]:
# a Pandas dataframe to store all predictions detail items
buffer_df_columns = [
    'timestamp',
    'model_id', 'execution',
    'prediction', 'target']

predictions_df = pd.DataFrame(columns=buffer_df_columns)

In [16]:
predictions_df

Unnamed: 0,timestamp,model_id,execution,prediction,target


In [17]:
# use the prediction values for the three architectures for local forecasting comparison
# plot a number of 24-hour ahead predictions, starting the first prediction available,
# against ground thruth

In [18]:
# get a filtered dataframe from a given model-execution-dataset-inference combination
dataset, inference = 'test', '024'

for model_id in ['EDSLSTM_TPU_013']:
    for execution in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
        flag = \
        global_df.model_id.eq(model_id) & \
        global_df.execution.eq(execution) & \
        global_df.dataset.eq(dataset) & \
        global_df.inference.eq(inference)
        # reset index to use the same row, drop the index column
        filtered_df = global_df[flag].reset_index(drop=True)

        # pass the array predictions and array targets for selected intervals to dataframe columns
        start_indexes = [0, 24, 48, 72, 96, 120, 144]
        for start_index in start_indexes: 

            buffer_df = pd.DataFrame(columns=buffer_df_columns)

            buffer_df['timestamp'] = pd.to_datetime(filtered_df.iloc[start_index]['string_timestamps'])
            buffer_df['model_id'] = filtered_df.iloc[start_index]['model_id']
            buffer_df['execution'] = filtered_df.iloc[start_index]['execution']
            buffer_df['prediction'] = filtered_df.iloc[start_index]['predictions']
            buffer_df['target'] = filtered_df.iloc[start_index]['targets']

            predictions_df = pd.concat([predictions_df, buffer_df])

# reset index to use the same row, drop the index column
predictions_df = predictions_df.reset_index(drop=True)

In [19]:
predictions_df

Unnamed: 0,timestamp,model_id,execution,prediction,target
0,2018-05-03 16:00:00,EDSLSTM_TPU_013,0,2994.725586,2935.386719
1,2018-05-03 17:00:00,EDSLSTM_TPU_013,0,2945.771973,2798.293213
2,2018-05-03 18:00:00,EDSLSTM_TPU_013,0,2856.722168,2750.489990
3,2018-05-03 19:00:00,EDSLSTM_TPU_013,0,2810.001953,2751.091553
4,2018-05-03 20:00:00,EDSLSTM_TPU_013,0,2944.323242,2919.401611
...,...,...,...,...,...
1675,2018-05-10 11:00:00,EDSLSTM_TPU_013,9,2617.711670,2599.053223
1676,2018-05-10 12:00:00,EDSLSTM_TPU_013,9,2698.030029,2709.363281
1677,2018-05-10 13:00:00,EDSLSTM_TPU_013,9,2742.030762,2810.871582
1678,2018-05-10 14:00:00,EDSLSTM_TPU_013,9,2760.031006,2839.586426


In [20]:
bigquery_arima_columns = [
    'forecast_timestamp',
    'forecast_value',
    'standard_error',
    'confidence_level',
    'prediction_interval_lower_bound',
    'prediction_interval_upper_bound',
    'confidence_interval_lower_bound',
    'confidence_interval_upper_bound'
]

In [21]:
bigquery_edslstm_df = pd.DataFrame(columns=bigquery_arima_columns)

In [22]:
bigquery_edslstm_df

Unnamed: 0,forecast_timestamp,forecast_value,standard_error,confidence_level,prediction_interval_lower_bound,prediction_interval_upper_bound,confidence_interval_lower_bound,confidence_interval_upper_bound


In [23]:
csv_files = [
    'arima_edslstm_forecast_01.csv',
    'arima_edslstm_forecast_02.csv',
    'arima_edslstm_forecast_03.csv',
    'arima_edslstm_forecast_04.csv',
    'arima_edslstm_forecast_05.csv',
    'arima_edslstm_forecast_06.csv',
    'arima_edslstm_forecast_07.csv',
]

In [24]:
for csv_file in csv_files:
    buffer_bigquery_edslstm_df = pd.read_csv(
        '/home/developer/gcp/cbidmltsf/database/bigquery/{}'.format(csv_file)
    )
    bigquery_edslstm_df = pd.concat([bigquery_edslstm_df, buffer_bigquery_edslstm_df])


bigquery_edslstm_df = bigquery_edslstm_df.reset_index(drop=True)

In [25]:
bigquery_edslstm_df

Unnamed: 0,forecast_timestamp,forecast_value,standard_error,confidence_level,prediction_interval_lower_bound,prediction_interval_upper_bound,confidence_interval_lower_bound,confidence_interval_upper_bound
0,2018-05-03T16:00:00Z,2954.026482,60.669203,0.9,2854.342663,3053.710301,2854.342663,3053.710301
1,2018-05-03T17:00:00Z,2866.280454,70.187800,0.9,2750.956902,2981.604005,2750.956902,2981.604005
2,2018-05-03T18:00:00Z,2813.009191,74.943120,0.9,2689.872311,2936.146071,2689.872311,2936.146071
3,2018-05-03T19:00:00Z,2814.260236,84.105707,0.9,2676.068573,2952.451899,2676.068573,2952.451899
4,2018-05-03T20:00:00Z,2920.222781,88.824318,0.9,2774.278104,3066.167458,2774.278104,3066.167458
...,...,...,...,...,...,...,...,...
163,2018-05-10T11:00:00Z,2612.015469,118.201405,0.9,2417.802148,2806.228790,2417.802148,2806.228790
164,2018-05-10T12:00:00Z,2719.695063,119.546664,0.9,2523.271385,2916.118741,2523.271385,2916.118741
165,2018-05-10T13:00:00Z,2798.355831,120.876834,0.9,2599.746588,2996.965073,2599.746588,2996.965073
166,2018-05-10T14:00:00Z,2821.522553,122.192457,0.9,2620.751648,3022.293457,2620.751648,3022.293457


In [26]:
# timestamp for EDSLSTM_TPU_006 first prediction is '2018-06-26 00:00:00'
# daily interval endings are '2018-06-26 23:00:00', '2018-06-27 23:00:00', ..., '2018-07-02 23:00:00'
# BigQuery ARIMA is fed with the 8 previous weeks,
# starting on '2018-05-01 00:00:00' - '2018-06-25 23:00:00' (for the first 24-hour ahead prediction)


# timestamp for EDSLSTM_TPU_013 first prediction is '2018-05-03 16:00:00'
# daily interval endings are '2018-05-04 15:00:00', '2018-05-05 15:00:00', ..., '2018-05-10 15:00:00'
# BigQuery ARIMA is fed with the 8 previous weeks,
# prediction for day 1 is based on a model trained from '2018-03-08 16:00:00' to '2018-05-03 15:00:00'
# prediction for day 2 is based on a model trained from '2018-03-09 16:00:00' to '2018-05-04 15:00:00'
# prediction for day 3 is based on a model trained from '2018-03-10 16:00:00' to '2018-05-05 15:00:00'
# prediction for day 4 is based on a model trained from '2018-03-11 16:00:00' to '2018-05-06 15:00:00'
# prediction for day 5 is based on a model trained from '2018-03-12 16:00:00' to '2018-05-07 15:00:00'
# prediction for day 6 is based on a model trained from '2018-03-13 16:00:00' to '2018-05-08 15:00:00'
# prediction for day 7 is based on a model trained from '2018-03-14 16:00:00' to '2018-05-09 15:00:00'


# timestamp for BSCTRFM_TPU_010 first prediction is '2018-05-14 23:00:00'
# daily interval endings are '2018-05-15 22:00:00', '2018-05-16 22:00:00', ..., '2018-05-21 22:00:00'

In [27]:
# build separated dataframes with metrics per model and date interval

In [28]:
metrics_columns = [
    'model_id', 'execution', 'start_timestamp', 'end_timestamp', 'mae', 'rmse', 'smape'
]

In [29]:
edslstm_metrics_df = pd.DataFrame(columns=metrics_columns)
edslstm_metrics_df

Unnamed: 0,model_id,execution,start_timestamp,end_timestamp,mae,rmse,smape


In [30]:
# The next cell calculates performace metrics for 1-day, 2-day, ..., up to 7-day periods
# which is not really useful as all predictions are produced on a 24-hour basis,
# then markdown the cell and change metrics calculation to a day 1, day 2, ... up to day 7 scheme

model_id = 'DMSLSTM_TPU_006'

executions = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

start_timestamp = '2018-06-26 00:00:00'

end_timestamps = [
    '2018-06-26 23:00:00',
    '2018-06-27 23:00:00',
    '2018-06-28 23:00:00',
    '2018-06-29 23:00:00',
    '2018-06-30 23:00:00',
    '2018-07-01 23:00:00',
    '2018-07-02 23:00:00',
]

for execution in executions:
    
    for end_timestamp in end_timestamps:

        flag = \
        predictions_df.model_id.eq(model_id) & \
        predictions_df.execution.eq(execution) & \
        predictions_df.timestamp.ge(start_timestamp) & \
        predictions_df.timestamp.le(end_timestamp)
        
        results_df = predictions_df[flag].reset_index(drop=True)
        
        mae = mean_absolute_error(results_df.target, results_df.prediction)
        rmse = sqrt(mean_squared_error(results_df.target, results_df.prediction))
        smape =  symmetric_mean_absolute_percentage_error(results_df.target, results_df.prediction)

        row_list = [
            model_id, execution,
            start_timestamp, end_timestamp,
            mae, rmse, smape
        ]
        row_metrics_df = pd.DataFrame([row_list], columns=metrics_columns)
        
        dmslstm_metrics_df = pd.concat([dmslstm_metrics_df, row_metrics_df])

In [31]:
# execution with best metrics over all intervals
# dmslstm_metrics_df.groupby(['execution']).mean().style.highlight_min(color = 'lightgreen', axis = 0)

In [32]:
# execution with worst metrics over all intervals
# dmslstm_metrics_df.groupby(['execution']).mean().style.highlight_max(color = 'yellow', axis = 0)

In [33]:
# metrics average for 10 executions on different intervals
# dmslstm_metrics_df.groupby(['end_timestamp']).mean()

In [34]:
# metrics standard deviation for 10 executions on different intervals
# dmslstm_metrics_df.groupby(['end_timestamp']).std()

In [35]:
# now get metrics from BigQuery ARIMA
# use target values from results_df with execution = 0 (they are the same across executions)

In [36]:
arima_one_week_mae = list()

for start_index in [0, 24, 48, 72, 96, 120, 144]:
    end_index = start_index + 24
    mae = mean_absolute_error(
        predictions_df.target[start_index:end_index],
        bigquery_edslstm_df.forecast_value[start_index:end_index]
    )
    
    arima_one_week_mae.append(mae)
    print('ARIMA MAE for {} to {} interval is: {:.4f}'.format(start_index, end_index, mae))

ARIMA MAE for 0 to 24 interval is: 42.7504
ARIMA MAE for 24 to 48 interval is: 94.4938
ARIMA MAE for 48 to 72 interval is: 146.2935
ARIMA MAE for 72 to 96 interval is: 160.2514
ARIMA MAE for 96 to 120 interval is: 104.5032
ARIMA MAE for 120 to 144 interval is: 71.7451
ARIMA MAE for 144 to 168 interval is: 81.3132


In [37]:
arima_one_week_rmse = list()

for start_index in [0, 24, 48, 72, 96, 120, 144]:
    end_index = start_index + 24
    rmse = sqrt(mean_squared_error(
        predictions_df.target[start_index:end_index],
        bigquery_edslstm_df.forecast_value[start_index:end_index]
    ))
    
    arima_one_week_rmse.append(rmse)
    print('ARIMA RMSE for {} to {} interval is: {:.4f}'.format(start_index, end_index, rmse))

ARIMA RMSE for 0 to 24 interval is: 54.8031
ARIMA RMSE for 24 to 48 interval is: 112.5502
ARIMA RMSE for 48 to 72 interval is: 199.6776
ARIMA RMSE for 72 to 96 interval is: 167.9489
ARIMA RMSE for 96 to 120 interval is: 164.4347
ARIMA RMSE for 120 to 144 interval is: 94.2247
ARIMA RMSE for 144 to 168 interval is: 102.9821


In [38]:
arima_one_week_smape = list()

for start_index in [0, 24, 48, 72, 96, 120, 144]:
    end_index = start_index + 24
    
    # adjust SMAPE to percentage value
    smape = 100*symmetric_mean_absolute_percentage_error(
        predictions_df.target[start_index:end_index],
        bigquery_edslstm_df.forecast_value[start_index:end_index]
    )
    
    arima_one_week_smape.append(smape)
    print('ARIMA SMAPE for {} to {} interval is: {:.4f}'.format(start_index, end_index, smape))

ARIMA SMAPE for 0 to 24 interval is: 1.7587
ARIMA SMAPE for 24 to 48 interval is: 3.6541
ARIMA SMAPE for 48 to 72 interval is: 6.0585
ARIMA SMAPE for 72 to 96 interval is: 7.2724
ARIMA SMAPE for 96 to 120 interval is: 4.2612
ARIMA SMAPE for 120 to 144 interval is: 2.9962
ARIMA SMAPE for 144 to 168 interval is: 3.6075


for end_index in [24, 48, 72, 96, 120, 144, 168]:
    mae = mean_absolute_error(
        predictions_df.target[:end_index],
        bigquery_dmslstm_df.forecast_value[:end_index]
    )
    print('ARIMA MAE for {}-hour interval is: {:.4f}'.format(end_index, mae))

for end_index in [24, 48, 72, 96, 120, 144, 168]:
    rmse = sqrt(mean_squared_error(
        predictions_df.target[:end_index],
        bigquery_dmslstm_df.forecast_value[:end_index]
    ))
    print('ARIMA RMSE for {}-hour interval is: {:.4f}'.format(end_index, rmse))

for end_index in [24, 48, 72, 96, 120, 144, 168]:
    smape = symmetric_mean_absolute_percentage_error(
        predictions_df.target[:end_index],
        bigquery_dmslstm_df.forecast_value[:end_index]
    )
    print('ARIMA SMAPE for {}-hour interval is: {:.4f}'.format(end_index, smape))

In [39]:
# a flag to isolate all pairs prediction-target for a given model in predictions_df
selected_model = 'EDSLSTM_TPU_013'
flag = predictions_df['model_id'] == selected_model

In [40]:
average_predictions_df = predictions_df[flag].groupby(['timestamp']).mean()

In [41]:
average_predictions_df

Unnamed: 0_level_0,prediction,target
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-05-03 16:00:00,2979.469702,2935.386719
2018-05-03 17:00:00,2934.481934,2798.293213
2018-05-03 18:00:00,2860.230493,2750.489990
2018-05-03 19:00:00,2817.940405,2751.091553
2018-05-03 20:00:00,2965.644092,2919.401611
...,...,...
2018-05-10 11:00:00,2618.650732,2599.053223
2018-05-10 12:00:00,2711.556079,2709.363281
2018-05-10 13:00:00,2763.647900,2810.871582
2018-05-10 14:00:00,2780.691431,2839.586426


In [42]:
plots = dict()

In [43]:
# a datetime range for the prediction interval
# for datetime in pd.date_range(start='2018-07-02 00:00:00', end='2018-07-02 23:00:00', freq='H'):
#     print(datetime)

In [44]:
day = 7
start, end = 24*(day - 1), 24*day

size = 8

plots['edslstm'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='EDSLSTM (average) vs. ARIMA vs. Real / Day {}.'.format(day),
    toolbar_location=None,
)

plots['edslstm'].grid.grid_line_alpha=0.3

plots['edslstm'].xaxis.axis_label = 'Date'
plots['edslstm'].yaxis.axis_label = 'Active Power [KW]'

plots['edslstm'].title.text_font_size = '14pt'

plots['edslstm'].xaxis.axis_label_text_font_size = "14pt"
plots['edslstm'].yaxis.axis_label_text_font_size = "14pt"

plots['edslstm'].xaxis.major_label_text_font_size = "12pt"
plots['edslstm'].yaxis.major_label_text_font_size = "12pt"

# a custom x range to visuallly improve the the plot
plots['edslstm'].x_range = Range1d(
    start=average_predictions_df.index[start] - timedelta(hours=1),
    end=average_predictions_df.index[end-1] + timedelta(hours=2)
)

# the target value in average_predictions_df is the average of targets, then the only target
plots['edslstm'].square(
    x=average_predictions_df.index[start:end],
    y=average_predictions_df.prediction[start:end],
    size=size,
    fill_color=None,
    color='black',
    legend_label='EDSLSTM'
)
plots['edslstm'].line(
    x=average_predictions_df.index[start:end],
    y=average_predictions_df.prediction[start:end],
    color='black',
    legend_label='EDSLSTM'
)

plots['edslstm'].triangle(
    x=pd.to_datetime(bigquery_edslstm_df.forecast_timestamp)[start:end],
    y=bigquery_edslstm_df.forecast_value[start:end],
    size=size,
    fill_color=None,
    color='black',
    legend_label='ARIMA'
)
plots['edslstm'].line(
    x=pd.to_datetime(bigquery_edslstm_df.forecast_timestamp)[start:end],
    y=bigquery_edslstm_df.forecast_value[start:end],
    color='black',
    legend_label='ARIMA'
)

plots['edslstm'].circle(
    x=average_predictions_df.index[start:end],
    y=average_predictions_df.target[start:end],
    size=size,
    fill_color=None,
    color='black',
    legend_label='Real'
)
plots['edslstm'].line(
    x=average_predictions_df.index[start:end],
    y=average_predictions_df.target[start:end],
    color='black',
    legend_label='Real'
)

plots['edslstm'].legend.label_text_font_size = '12pt'
plots['edslstm'].legend.location = 'bottom_left'

# uncomment the following two lines to save plot
# output_file('/home/developer/gcp/cbidmltsf/datasets/cfe/{}_H_kw.html'.format(device))
# save(fig_kw)

# uncomment the following line to display plot
show(plots['edslstm'])

In [45]:
# a Pandas dataframe to store all predictions detail items
buffer_df_columns = [
    'start_timestamp',
    'end_timestamp',
    'model_id',
    'execution',
    'mae',
    'rmse',
    'smape'
]

day_by_day_predictions_df = pd.DataFrame(columns=buffer_df_columns)

In [46]:
day_by_day_predictions_df

Unnamed: 0,start_timestamp,end_timestamp,model_id,execution,mae,rmse,smape


In [47]:

model_id, dataset, inference = 'EDSLSTM_TPU_013', 'test', '024'
executions = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

num_days = 28
start_indexes = 24*np.arange(num_days)

for execution in executions:
    
    flag = \
    global_df.model_id.eq(model_id) & \
    global_df.execution.eq(execution) & \
    global_df.dataset.eq(dataset) & \
    global_df.inference.eq(inference)

    # reset index to use the same row, drop the index column
    filtered_df = global_df[flag].reset_index(drop=True)
    
    for start_index in start_indexes:
        
        list_to_row = [
            pd.to_datetime(filtered_df.iloc[start_index]['string_timestamps'][0]),
            pd.to_datetime(filtered_df.iloc[start_index]['string_timestamps'][-1]),
            filtered_df.iloc[start_index]['model_id'],
            filtered_df.iloc[start_index]['execution'],
            filtered_df.iloc[start_index]['mae'],
            filtered_df.iloc[start_index]['rmse'],
            filtered_df.iloc[start_index]['smape']
        ]
        
        list_to_row_df = pd.DataFrame([list_to_row], columns=buffer_df_columns)

        day_by_day_predictions_df = pd.concat([day_by_day_predictions_df, list_to_row_df])

day_by_day_predictions_df = day_by_day_predictions_df.reset_index(drop=True)

In [48]:
day_by_day_predictions_df

Unnamed: 0,start_timestamp,end_timestamp,model_id,execution,mae,rmse,smape
0,2018-05-03 16:00:00,2018-05-04 15:00:00,EDSLSTM_TPU_013,0,54.286067,70.648563,0.020626
1,2018-05-04 16:00:00,2018-05-05 15:00:00,EDSLSTM_TPU_013,0,65.790395,72.422381,0.026387
2,2018-05-05 16:00:00,2018-05-06 15:00:00,EDSLSTM_TPU_013,0,68.757655,101.250896,0.027657
3,2018-05-06 16:00:00,2018-05-07 15:00:00,EDSLSTM_TPU_013,0,46.927139,53.531686,0.019349
4,2018-05-07 16:00:00,2018-05-08 15:00:00,EDSLSTM_TPU_013,0,121.353363,172.418498,0.047132
...,...,...,...,...,...,...,...
275,2018-05-26 16:00:00,2018-05-27 15:00:00,EDSLSTM_TPU_013,9,62.703659,78.526338,0.023223
276,2018-05-27 16:00:00,2018-05-28 15:00:00,EDSLSTM_TPU_013,9,75.656754,84.783441,0.027857
277,2018-05-28 16:00:00,2018-05-29 15:00:00,EDSLSTM_TPU_013,9,119.990946,143.788272,0.040315
278,2018-05-29 16:00:00,2018-05-30 15:00:00,EDSLSTM_TPU_013,9,136.834112,149.032396,0.047717


In [49]:
# locating the model execution with the best performance
day_by_day_predictions_df.groupby(['execution']).mean()

Unnamed: 0_level_0,mae,rmse,smape
execution,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,80.53736,98.37096,0.031397
1,81.680708,98.814697,0.032065
2,83.215136,101.496831,0.032688
3,79.282793,97.205786,0.031045
4,81.806174,99.32731,0.032085
5,78.879657,95.051772,0.031144
6,81.165047,99.0604,0.031852
7,81.871259,99.181595,0.032242
8,86.678961,105.540888,0.033974
9,84.665324,102.06978,0.033103


In [50]:
# execution 5 presents the best error metrics!

In [51]:
# get the time line, on a daily basis, from the grouped dataframe
day_by_day_predictions_df.groupby(['start_timestamp']).mean().index

DatetimeIndex(['2018-05-03 16:00:00', '2018-05-04 16:00:00',
               '2018-05-05 16:00:00', '2018-05-06 16:00:00',
               '2018-05-07 16:00:00', '2018-05-08 16:00:00',
               '2018-05-09 16:00:00', '2018-05-10 16:00:00',
               '2018-05-11 16:00:00', '2018-05-12 16:00:00',
               '2018-05-13 16:00:00', '2018-05-14 16:00:00',
               '2018-05-15 16:00:00', '2018-05-16 16:00:00',
               '2018-05-17 16:00:00', '2018-05-18 16:00:00',
               '2018-05-19 16:00:00', '2018-05-20 16:00:00',
               '2018-05-21 16:00:00', '2018-05-22 16:00:00',
               '2018-05-23 16:00:00', '2018-05-24 16:00:00',
               '2018-05-25 16:00:00', '2018-05-26 16:00:00',
               '2018-05-27 16:00:00', '2018-05-28 16:00:00',
               '2018-05-29 16:00:00', '2018-05-30 16:00:00'],
              dtype='datetime64[ns]', name='start_timestamp', freq=None)

In [66]:
# compare metrics: EDSLSTM average predictions vs. ARIMA
# express them as percentage for tabulation

In [86]:
# get a metric average (on the 10 executions) for a given 24-hour interval
edslstm_average_one_week_mae_mean = day_by_day_predictions_df.groupby(['start_timestamp']).mean()['mae'][:7]

In [87]:
for x in edslstm_average_one_week_mae_mean:
    print('{:0.4f}'.format(x))

54.5234
63.8401
65.6171
55.0586
118.6643
55.7155
66.2347


In [88]:
edslstm_average_one_week_mae_std = day_by_day_predictions_df.groupby(['start_timestamp']).std()['mae'][:7]

In [89]:
for x in edslstm_average_one_week_mae_std:
    print('{:0.4f}'.format(x))

2.3951
4.0132
4.4611
5.6558
4.6559
16.8942
4.3673


In [90]:
edslstm_average_one_week_rmse_mean = day_by_day_predictions_df.groupby(['start_timestamp']).mean()['rmse'][:7]

In [91]:
for x in edslstm_average_one_week_rmse_mean:
    print('{:0.4f}'.format(x))

73.7896
70.4892
92.3386
66.9185
168.3410
67.6061
78.0422


In [92]:
edslstm_average_one_week_rmse_std = day_by_day_predictions_df.groupby(['start_timestamp']).std()['rmse'][:7]

In [93]:
for x in edslstm_average_one_week_rmse_std:
    print('{:0.4f}'.format(x))

2.5624
3.7769
11.6959
6.9864
4.4768
18.6421
5.2829


In [94]:
edslstm_average_one_week_smape_mean = 100*day_by_day_predictions_df.groupby(['start_timestamp']).mean()['smape'][:7]

In [95]:
for x in edslstm_average_one_week_smape_mean:
    print('{:0.4f}'.format(x))

2.0528
2.5696
2.6614
2.3029
4.6691
2.2712
2.8146


In [96]:
edslstm_average_one_week_smape_std = 100*day_by_day_predictions_df.groupby(['start_timestamp']).std()['smape'][:7]

In [97]:
for x in edslstm_average_one_week_smape_std:
    print('{:0.4f}'.format(x))

0.0971
0.1727
0.1491
0.2539
0.1741
0.7302
0.1797


In [98]:
baseline_mae_pct = -100*(arima_one_week_mae - edslstm_average_one_week_mae_mean)/arima_one_week_mae

In [99]:
for x in baseline_mae_pct:
    print('{:0.2f}'.format(x))

27.54
-32.44
-55.15
-65.64
13.55
-22.34
-18.54


In [100]:
baseline_rmse_pct = -100*(arima_one_week_rmse - edslstm_average_one_week_rmse_mean)/arima_one_week_rmse

In [101]:
for x in baseline_rmse_pct:
    print('{:0.2f}'.format(x))

34.64
-37.37
-53.76
-60.16
2.38
-28.25
-24.22


In [102]:
baseline_smape_pct = -100*(arima_one_week_smape - edslstm_average_one_week_smape_mean)/arima_one_week_smape

In [103]:
for x in baseline_smape_pct:
    print('{:0.2f}'.format(x))

16.72
-29.68
-56.07
-68.33
9.57
-24.20
-21.98


In [102]:
size = 4

plots['edslstm_week_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='EDSLSTM Performance: {} for {} Consecutive 24-hour Prediction Intervals.'\
    .format('MAE', num_days),
    toolbar_location=None,
)

plots['edslstm_week_metric'].grid.grid_line_alpha=0.3

# a custom x range to visuallly improve the the plot
plots['edslstm_week_metric'].x_range = Range1d(
    start=pd.to_datetime('2018-05-02 16:00:00'),
    end=pd.to_datetime('2018-06-01 16:00:00')
)

plots['edslstm_week_metric'].xaxis.axis_label = 'Date'
plots['edslstm_week_metric'].yaxis.axis_label = 'MAE [KW]'

plots['edslstm_week_metric'].title.text_font_size = '14pt'

plots['edslstm_week_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['edslstm_week_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['edslstm_week_metric'].xaxis.major_label_text_font_size = "12pt"
plots['edslstm_week_metric'].yaxis.major_label_text_font_size = "12pt"


plots['edslstm_week_metric'].circle(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=day_by_day_predictions_df.groupby(['start_timestamp']).mean()['mae'],
    color='black',
    size=size,
    fill_color=None,
)
plots['edslstm_week_metric'].line(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=day_by_day_predictions_df.groupby(['start_timestamp']).mean()['mae'],
    color='black',
)

arima_one_week_mae_average = Span(
    location=np.mean(np.array(arima_one_week_mae)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['edslstm_week_metric'].add_layout(arima_one_week_mae_average)

show(plots['edslstm_week_metric'])

In [103]:
size = 4

plots['edslstm_week_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='EDSLSTM Performance: {} for {} Consecutive 24-hour Prediction Intervals.'\
    .format('RMSE', num_days),
    toolbar_location=None,
)

plots['edslstm_week_metric'].grid.grid_line_alpha=0.3

# a custom x range to visuallly improve the the plot
plots['edslstm_week_metric'].x_range = Range1d(
    start=pd.to_datetime('2018-05-02 16:00:00'),
    end=pd.to_datetime('2018-06-01 16:00:00')
)

plots['edslstm_week_metric'].title.text_font_size = '14pt'

plots['edslstm_week_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['edslstm_week_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['edslstm_week_metric'].xaxis.major_label_text_font_size = "12pt"
plots['edslstm_week_metric'].yaxis.major_label_text_font_size = "12pt"

plots['edslstm_week_metric'].xaxis.axis_label = 'Date'
plots['edslstm_week_metric'].yaxis.axis_label = 'RMSE [KW]'

plots['edslstm_week_metric'].circle(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=day_by_day_predictions_df.groupby(['start_timestamp']).mean()['rmse'],
    color='black',
    size=size,
    fill_color=None,
)
plots['edslstm_week_metric'].line(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=day_by_day_predictions_df.groupby(['start_timestamp']).mean()['rmse'],
    color='black',
)

arima_one_week_rmse_average = Span(
    location=np.mean(np.array(arima_one_week_rmse)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['edslstm_week_metric'].add_layout(arima_one_week_rmse_average)


show(plots['edslstm_week_metric'])

In [104]:
size = 4

plots['edslstm_week_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='EDSLSTM Performance: {} for {} Consecutive 24-hour Prediction Intervals.'\
    .format('SMAPE', num_days),
    toolbar_location=None,
)

plots['edslstm_week_metric'].grid.grid_line_alpha=0.3

# a custom x range to visuallly improve the the plot
plots['edslstm_week_metric'].x_range = Range1d(
    start=pd.to_datetime('2018-05-02 16:00:00'),
    end=pd.to_datetime('2018-06-01 16:00:00')
)

plots['edslstm_week_metric'].title.text_font_size = '14pt'

plots['edslstm_week_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['edslstm_week_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['edslstm_week_metric'].xaxis.major_label_text_font_size = "12pt"
plots['edslstm_week_metric'].yaxis.major_label_text_font_size = "12pt"

plots['edslstm_week_metric'].xaxis.axis_label = 'Date'
plots['edslstm_week_metric'].yaxis.axis_label = 'SMAPE [%]'

# adjust SMAPE for percentage value
plots['edslstm_week_metric'].circle(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=100*day_by_day_predictions_df.groupby(['start_timestamp']).mean()['smape'],
    color='black',
    size=size,
    fill_color=None,
)
# adjust SMAPE for percentage value
plots['edslstm_week_metric'].line(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=100*day_by_day_predictions_df.groupby(['start_timestamp']).mean()['smape'],
    color='black',
)

arima_one_week_smape_average = Span(
    location=np.mean(np.array(arima_one_week_smape)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['edslstm_week_metric'].add_layout(arima_one_week_smape_average)


show(plots['edslstm_week_metric'])

In [70]:
# MAE, RMSE, and SMAPE for rolling predictions in 4 weeks
# from global dataframe
start, end = 0, 672
size = 4

plots['edslstm_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='EDSLSTM Performance: {} for {} Consecutive 1-hour Rolling Predictions.'\
    .format('MAE', end),
    toolbar_location=None,
)

plots['edslstm_metric'].grid.grid_line_alpha=0.3

plots['edslstm_metric'].title.text_font_size = '14pt'

plots['edslstm_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['edslstm_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['edslstm_metric'].xaxis.major_label_text_font_size = "12pt"
plots['edslstm_metric'].yaxis.major_label_text_font_size = "12pt"

plots['edslstm_metric'].xaxis.axis_label = 'Date'
plots['edslstm_metric'].yaxis.axis_label = 'MAE [KW]'

plots['edslstm_metric'].circle(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=global_df['mae'][start:end],
    color='black',
    size=size,
    fill_color=None,
)
plots['edslstm_metric'].line(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=global_df['mae'][start:end],
    color='black',
)

arima_one_week_mae_average = Span(
    location=np.mean(np.array(arima_one_week_mae)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['edslstm_metric'].add_layout(arima_one_week_mae_average)

show(plots['edslstm_metric'])

In [71]:
# MAE, RMSE, and SMAPE for rolling predictions in 4 weeks
# from global dataframe
start, end = 0, 672
size = 4

plots['edslstm_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='EDSLSTM Performance: {} for {} Consecutive 1-hour Rolling Predictions.'\
    .format('RMSE', end),
    toolbar_location=None,
)

plots['edslstm_metric'].grid.grid_line_alpha=0.3

plots['edslstm_metric'].title.text_font_size = '14pt'

plots['edslstm_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['edslstm_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['edslstm_metric'].xaxis.major_label_text_font_size = "12pt"
plots['edslstm_metric'].yaxis.major_label_text_font_size = "12pt"

plots['edslstm_metric'].xaxis.axis_label = 'Date'
plots['edslstm_metric'].yaxis.axis_label = 'RMSE [KW]'

plots['edslstm_metric'].circle(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=global_df['rmse'][start:end],
    color='black',
    size=size,
    fill_color=None,
)
plots['edslstm_metric'].line(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=global_df['rmse'][start:end],
    color='black',
)

arima_one_week_rmse_average = Span(
    location=np.mean(np.array(arima_one_week_rmse)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['edslstm_metric'].add_layout(arima_one_week_rmse_average)

show(plots['edslstm_metric'])

In [72]:
# MAE, RMSE, and SMAPE for rolling predictions in 4 weeks
# from global dataframe
start, end = 0, 672
size = 4

plots['edslstm_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='EDSLSTM Performance: {} for {} Consecutive 1-hour Rolling Predictions.'\
    .format('SMAPE', end),
    toolbar_location=None,
)

plots['edslstm_metric'].grid.grid_line_alpha=0.3

plots['edslstm_metric'].title.text_font_size = '14pt'

plots['edslstm_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['edslstm_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['edslstm_metric'].xaxis.major_label_text_font_size = "12pt"
plots['edslstm_metric'].yaxis.major_label_text_font_size = "12pt"

plots['edslstm_metric'].xaxis.axis_label = 'Date'
plots['edslstm_metric'].yaxis.axis_label = 'SMAPE [%]'

# adjust SMAPE for percentage value
plots['edslstm_metric'].circle(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=100*global_df['smape'][start:end],
    color='black',
    size=size,
    fill_color=None,
)
# adjust SMAPE for percentage value
plots['edslstm_metric'].line(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=100*global_df['smape'][start:end],
    color='black',
)

arima_one_week_smape_average = Span(
    location=np.mean(np.array(arima_one_week_smape)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['edslstm_metric'].add_layout(arima_one_week_smape_average)

show(plots['edslstm_metric'])

In [75]:
! ls -l /home/developer/DEPFIE-SCOM/ScD_Thesis/results

total 208
-rw-rw-r-- 1 developer developer 44853 sep 20 13:27 DMSLSTM_TPU_006_09_loss.csv
-rw-rw-r-- 1 developer developer 44752 sep 20 13:27 DMSLSTM_TPU_006_09_lrs.csv
-rw-rw-r-- 1 developer developer    68 sep 21 12:13 EDSLSTM_TPU_013_05_eval.csv
-rw-rw-r-- 1 developer developer 44489 sep 21 12:13 EDSLSTM_TPU_013_05_loss.csv
-rw-rw-r-- 1 developer developer 44597 sep 21 12:12 EDSLSTM_TPU_013_05_lrs.csv
-rw-rw-r-- 1 developer developer 27526 may 23 11:05 transformer_metrics.ods


In [76]:
eval_loss_df = pd.read_csv(
    '/home/developer/DEPFIE-SCOM/ScD_Thesis/results/EDSLSTM_TPU_013_05_eval.csv'
)

In [77]:
eval_loss_df

Unnamed: 0,Wall time,Step,Value
0,1624464000.0,1340,0.000988


In [78]:
loss_df = pd.read_csv(
    '/home/developer/DEPFIE-SCOM/ScD_Thesis/results/EDSLSTM_TPU_013_05_loss.csv'
)

In [79]:
loss_df

Unnamed: 0,Wall time,Step,Value
0,1.624463e+09,0,0.044428
1,1.624463e+09,2,0.048627
2,1.624463e+09,3,0.048322
3,1.624463e+09,4,0.047089
4,1.624463e+09,7,0.048250
...,...,...,...
995,1.624464e+09,1333,0.001345
996,1.624464e+09,1334,0.001301
997,1.624464e+09,1335,0.001125
998,1.624464e+09,1338,0.001690


In [80]:
lrs_df = pd.read_csv(
    '/home/developer/DEPFIE-SCOM/ScD_Thesis/results/EDSLSTM_TPU_013_05_lrs.csv'
)

In [81]:
lrs_df

Unnamed: 0,Wall time,Step,Value
0,1.624463e+09,0,0.000000
1,1.624463e+09,2,0.000022
2,1.624463e+09,3,0.000033
3,1.624463e+09,4,0.000044
4,1.624463e+09,7,0.000077
...,...,...,...
995,1.624464e+09,1333,0.000003
996,1.624464e+09,1334,0.000003
997,1.624464e+09,1335,0.000003
998,1.624464e+09,1338,0.000003


In [109]:

plots['edslstm_lrs'] = figure(
    plot_width=960,
    plot_height=320,
    title='EDSLSTM Learning Rate Schedule.',
    toolbar_location=None,
)

plots['edslstm_lrs'].grid.grid_line_alpha=0.3

# a custom x range to visuallly improve the the plot
plots['edslstm_lrs'].x_range = Range1d(
    start=0,
    end=1440
)

plots['edslstm_lrs'].title.text_font_size = '14pt'

plots['edslstm_lrs'].xaxis.axis_label_text_font_size = "14pt"
plots['edslstm_lrs'].yaxis.axis_label_text_font_size = "14pt"

plots['edslstm_lrs'].xaxis.major_label_text_font_size = "12pt"
plots['edslstm_lrs'].yaxis.major_label_text_font_size = "12pt"

plots['edslstm_lrs'].xaxis.axis_label = 'Training Step'
plots['edslstm_lrs'].yaxis.axis_label = 'Learning Rate'

# adjust SMAPE for percentage value
plots['edslstm_lrs'].line(
    x=lrs_df['Step'],
    y=lrs_df['Value'],
    color='black',
)

show(plots['edslstm_lrs'])

In [84]:
eval_loss = 0.000988

In [113]:

plots['edslstm_loss'] = figure(
    plot_width=960,
    plot_height=320,
    title='EDSLSTM Training Loss.',
    toolbar_location=None,
)

plots['edslstm_loss'].grid.grid_line_alpha=0.3

# a custom x range to visuallly improve the the plot
plots['edslstm_loss'].x_range = Range1d(
    start=0,
    end=1440
)

# a custom y range to visuallly improve the the plot
plots['edslstm_loss'].y_range = Range1d (
    start=0,
    end=0.02
)

plots['edslstm_loss'].title.text_font_size = '14pt'

plots['edslstm_loss'].xaxis.axis_label_text_font_size = "14pt"
plots['edslstm_loss'].yaxis.axis_label_text_font_size = "14pt"

plots['edslstm_loss'].xaxis.major_label_text_font_size = "12pt"
plots['edslstm_loss'].yaxis.major_label_text_font_size = "12pt"

plots['edslstm_loss'].xaxis.axis_label = 'Training Step'
plots['edslstm_loss'].yaxis.axis_label = 'Loss'

plots['edslstm_loss'].line(
    x=loss_df['Step'],
    y=loss_df['Value'],
    color='black',
)

final_eval_loss = Span(
    location=eval_loss,
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['edslstm_loss'].add_layout(final_eval_loss)

show(plots['edslstm_loss'])

In [4]:
from tensorboard.backend.event_processing import event_accumulator

In [5]:
def get_wall_time(path_to_logdir):
    '''
    receives a UNIX path to a TensorBoard logdir of a model
    returns the wall time for the model training process
    '''
    # an event accumulator to the logdir
    ea = event_accumulator.EventAccumulator(path_to_logdir,
                                            size_guidance={ # see below regarding this argument
                                                # event_accumulator.COMPRESSED_HISTOGRAMS: 500, # not used
                                                # event_accumulator.IMAGES: 4, # not used
                                                # event_accumulator.AUDIO: 4, # not used
                                                event_accumulator.SCALARS: 0, # retrieve all
                                                event_accumulator.TENSORS: 0, # retrieve all
                                                # event_accumulator.HISTOGRAMS: 1 # not used
                                            }
                                           )
    # loads events from file
    ea.Reload()
    
    # wall time is end time - start time
    wall_time = ea.Tensors('loss')[-1][0] - ea.Tensors('loss')[0][0]
    print("Wall time for model in '{}' is {} seconds.".format(path_to_logdir,
                                                            wall_time))
    return wall_time

In [6]:
models_list = [
    '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_00',
    '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_01',
    '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_02',
    '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_03',
    '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_04',
    '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_05',
    '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_06',
    '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_07',
    '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_08',
    '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_09',
]

In [7]:
# a dataframe for wall times
wall_times_list = [get_wall_time(model) for model in models_list
    
]

Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.
Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.


Wall time for model in '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_00' is 28.576439142227173 seconds.
Wall time for model in '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_01' is 28.590346097946167 seconds.


Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.
Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.


Wall time for model in '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_02' is 28.598254919052124 seconds.
Wall time for model in '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_03' is 28.64410400390625 seconds.


Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.
Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.


Wall time for model in '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_04' is 28.65162682533264 seconds.
Wall time for model in '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_05' is 28.654091119766235 seconds.


Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.
Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.


Wall time for model in '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_06' is 28.659013032913208 seconds.
Wall time for model in '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_07' is 28.676450967788696 seconds.


Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.
Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.


Wall time for model in '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_08' is 28.672755002975464 seconds.
Wall time for model in '/home/developer/gcp/cbidmltsf/models/EDSLSTM_TPU_013_09' is 28.656620979309082 seconds.


In [8]:
wall_times_list

[28.576439142227173,
 28.590346097946167,
 28.598254919052124,
 28.64410400390625,
 28.65162682533264,
 28.654091119766235,
 28.659013032913208,
 28.676450967788696,
 28.672755002975464,
 28.656620979309082]

In [9]:
print('{:0.4f}'.format(np.mean(wall_times_list)))

28.6380


In [10]:
print('{:0.4f}'.format(np.std(wall_times_list)))

0.0341
