In [1]:
import json
import numpy as np
import pandas as pd
import os

In [2]:
from datetime import timedelta

In [3]:
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [4]:
from bokeh.plotting import figure, show, output_file, save

from bokeh.io import output_notebook

from bokeh.models import Span, Range1d

from bokeh.palettes import d3

output_notebook()

In [5]:
pd.set_option('display.max_rows', 20)

In [6]:
def symmetric_mean_absolute_percentage_error(targets, predictions):
    '''
    predictions: a list with the predicted values
    targets: a list with the actual values
    '''
    import numpy as np
    # lists to NumPy arrays
    targets, predictions = np.array(targets), np.array(predictions)
    # verify predictions and targets have the same shape
    if predictions.shape == targets.shape:
            return(np.sum(2*np.abs(predictions - targets) /
                          (np.abs(targets) + np.abs(predictions)))/predictions.shape[0])

In [7]:
# build a list to select specific pickle files per model architecture
selected_pkl_list = [
    'DMSLSTM_TPU_006_00_test_024.pkl',
    'DMSLSTM_TPU_006_01_test_024.pkl',
    'DMSLSTM_TPU_006_02_test_024.pkl',
    'DMSLSTM_TPU_006_03_test_024.pkl',
    'DMSLSTM_TPU_006_04_test_024.pkl',
    'DMSLSTM_TPU_006_05_test_024.pkl',
    'DMSLSTM_TPU_006_06_test_024.pkl',
    'DMSLSTM_TPU_006_07_test_024.pkl',
    'DMSLSTM_TPU_006_08_test_024.pkl',
    'DMSLSTM_TPU_006_09_test_024.pkl',
    # 'EDSLSTM_TPU_013_00_test_024.pkl',
    # 'EDSLSTM_TPU_013_01_test_024.pkl',
    # 'EDSLSTM_TPU_013_02_test_024.pkl',
    # 'EDSLSTM_TPU_013_03_test_024.pkl',
    # 'EDSLSTM_TPU_013_04_test_024.pkl',
    # 'EDSLSTM_TPU_013_05_test_024.pkl',
    # 'EDSLSTM_TPU_013_06_test_024.pkl',
    # 'EDSLSTM_TPU_013_07_test_024.pkl',
    # 'EDSLSTM_TPU_013_08_test_024.pkl',
    # 'EDSLSTM_TPU_013_09_test_024.pkl',
    # 'BSCTRFM_TPU_010_00_test_024.pkl',
    # 'BSCTRFM_TPU_010_01_test_024.pkl',
    # 'BSCTRFM_TPU_010_02_test_024.pkl',
    # 'BSCTRFM_TPU_010_03_test_024.pkl',
    # 'BSCTRFM_TPU_010_04_test_024.pkl',
    # 'BSCTRFM_TPU_010_05_test_024.pkl',
    # 'BSCTRFM_TPU_010_06_test_024.pkl',
    # 'BSCTRFM_TPU_010_07_test_024.pkl',
    # 'BSCTRFM_TPU_010_08_test_024.pkl',
    # 'BSCTRFM_TPU_010_09_test_024.pkl',
]

In [8]:
# a Pandas dataframe to store all predictions detail items
global_df_columns = [
    'model_id', 'execution', 'dataset',
    'string_timestamps', 'predictions', 'targets',
    'mae', 'rmse', 'smape'
]

global_df = pd.DataFrame(columns=global_df_columns)

In [9]:
global_df

Unnamed: 0,model_id,execution,dataset,string_timestamps,predictions,targets,mae,rmse,smape


In [10]:
# collect prediction items from all available pickle files into a single dataframe
# for available_pickle in avail_pkl_list:
#     buffer_df = pd.read_pickle('../database/predictions_detail/{}'.format(available_pickle))
#     # append buffer to final dataframe
#     df = df.append(buffer_df, ignore_index=True)  

In [11]:
# collect prediction items from all available pickle files into a single dataframe
for selected_pickle in selected_pkl_list:
    buffer_df = pd.read_pickle('../database/predictions_detail/{}'.format(selected_pickle))
    # append buffer to final dataframe
    global_df = global_df.append(buffer_df, ignore_index=True)  

In [12]:
global_df

Unnamed: 0,model_id,execution,dataset,string_timestamps,predictions,targets,mae,rmse,smape,inference
0,DMSLSTM_TPU_006,0,test,"[2018-06-26 00:00:00, 2018-06-26 01:00:00, 201...","[2093.879638671875, 1927.9315185546875, 1822.9...","[2053.264892578125, 1844.294921875, 1765.15002...",52.396067,59.112906,0.021802,024
1,DMSLSTM_TPU_006,0,test,"[2018-06-26 01:00:00, 2018-06-26 02:00:00, 201...","[1901.080810546875, 1807.9185791015625, 1739.8...","[1844.294921875, 1765.1500244140625, 1706.6466...",51.331202,58.302371,0.021373,024
2,DMSLSTM_TPU_006,0,test,"[2018-06-26 02:00:00, 2018-06-26 03:00:00, 201...","[1793.4420166015625, 1720.0396728515625, 1712....","[1765.1500244140625, 1706.6466064453125, 1707....",45.125661,52.517229,0.018894,024
3,DMSLSTM_TPU_006,0,test,"[2018-06-26 03:00:00, 2018-06-26 04:00:00, 201...","[1726.946044921875, 1720.50341796875, 1752.954...","[1706.6466064453125, 1707.993408203125, 1769.0...",45.675191,52.190143,0.018830,024
4,DMSLSTM_TPU_006,0,test,"[2018-06-26 04:00:00, 2018-06-26 05:00:00, 201...","[1722.3309326171875, 1738.3778076171875, 1913....","[1707.993408203125, 1769.0233154296875, 1965.5...",49.626831,57.079129,0.020314,024
...,...,...,...,...,...,...,...,...,...,...
8405,DMSLSTM_TPU_006,9,test,"[2018-07-30 20:00:00, 2018-07-30 21:00:00, 201...","[2926.29736328125, 2936.49853515625, 2790.7832...","[2799.75, 2921.840087890625, 2782.763427734375...",59.274200,71.346521,0.026415,024
8406,DMSLSTM_TPU_006,9,test,"[2018-07-30 21:00:00, 2018-07-30 22:00:00, 201...","[2956.5146484375, 2795.34814453125, 2485.92626...","[2921.840087890625, 2782.763427734375, 2480.18...",60.710444,75.011349,0.027460,024
8407,DMSLSTM_TPU_006,9,test,"[2018-07-30 22:00:00, 2018-07-30 23:00:00, 201...","[2729.295166015625, 2392.3544921875, 2151.1196...","[2782.763427734375, 2480.181640625, 2127.68823...",63.389394,73.793573,0.026784,024
8408,DMSLSTM_TPU_006,9,test,"[2018-07-30 23:00:00, 2018-07-31 00:00:00, 201...","[2315.3046875, 2041.801513671875, 1896.9989013...","[2480.181640625, 2127.688232421875, 1933.56994...",95.103083,109.546923,0.037640,024


In [13]:
# a Pandas dataframe to store all predictions detail items
buffer_df_columns = [
    'timestamp',
    'model_id', 'execution',
    'prediction', 'target']

predictions_df = pd.DataFrame(columns=buffer_df_columns)

In [14]:
predictions_df

Unnamed: 0,timestamp,model_id,execution,prediction,target


In [15]:
# use the prediction values for the three architectures for local forecasting comparison
# plot a number of 24-hour ahead predictions, starting the first prediction available,
# against ground thruth

In [16]:
# get a filtered dataframe from a given model-execution-dataset-inference combination
dataset, inference = 'test', '024'

for model_id in ['DMSLSTM_TPU_006']:
    for execution in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
        flag = \
        global_df.model_id.eq(model_id) & \
        global_df.execution.eq(execution) & \
        global_df.dataset.eq(dataset) & \
        global_df.inference.eq(inference)
        # reset index to use the same row, drop the index column
        filtered_df = global_df[flag].reset_index(drop=True)

        # pass the array predictions and array targets for selected intervals to dataframe columns
        start_indexes = [0, 24, 48, 72, 96, 120, 144]
        for start_index in start_indexes: 

            buffer_df = pd.DataFrame(columns=buffer_df_columns)

            buffer_df['timestamp'] = pd.to_datetime(filtered_df.iloc[start_index]['string_timestamps'])
            buffer_df['model_id'] = filtered_df.iloc[start_index]['model_id']
            buffer_df['execution'] = filtered_df.iloc[start_index]['execution']
            buffer_df['prediction'] = filtered_df.iloc[start_index]['predictions']
            buffer_df['target'] = filtered_df.iloc[start_index]['targets']

            predictions_df = pd.concat([predictions_df, buffer_df])

# reset index to use the same row, drop the index column
predictions_df = predictions_df.reset_index(drop=True)

In [17]:
predictions_df

Unnamed: 0,timestamp,model_id,execution,prediction,target
0,2018-06-26 00:00:00,DMSLSTM_TPU_006,0,2093.879639,2053.264893
1,2018-06-26 01:00:00,DMSLSTM_TPU_006,0,1927.931519,1844.294922
2,2018-06-26 02:00:00,DMSLSTM_TPU_006,0,1822.998535,1765.150024
3,2018-06-26 03:00:00,DMSLSTM_TPU_006,0,1756.306396,1706.646606
4,2018-06-26 04:00:00,DMSLSTM_TPU_006,0,1744.353027,1707.993408
...,...,...,...,...,...
1675,2018-07-02 19:00:00,DMSLSTM_TPU_006,9,2625.929688,2698.608154
1676,2018-07-02 20:00:00,DMSLSTM_TPU_006,9,2726.130127,2740.811768
1677,2018-07-02 21:00:00,DMSLSTM_TPU_006,9,2765.543213,2807.528320
1678,2018-07-02 22:00:00,DMSLSTM_TPU_006,9,2606.041504,2662.060059


In [18]:
bigquery_arima_columns = [
    'forecast_timestamp',
    'forecast_value',
    'standard_error',
    'confidence_level',
    'prediction_interval_lower_bound',
    'prediction_interval_upper_bound',
    'confidence_interval_lower_bound',
    'confidence_interval_upper_bound'
]

In [19]:
bigquery_dmslstm_df = pd.DataFrame(columns=bigquery_arima_columns)

In [20]:
bigquery_dmslstm_df

Unnamed: 0,forecast_timestamp,forecast_value,standard_error,confidence_level,prediction_interval_lower_bound,prediction_interval_upper_bound,confidence_interval_lower_bound,confidence_interval_upper_bound


In [21]:
csv_files = [
    'arima_dmslstm_forecast_01.csv',
    'arima_dmslstm_forecast_02.csv',
    'arima_dmslstm_forecast_03.csv',
    'arima_dmslstm_forecast_04.csv',
    'arima_dmslstm_forecast_05.csv',
    'arima_dmslstm_forecast_06.csv',
    'arima_dmslstm_forecast_07.csv',
]

In [22]:
for csv_file in csv_files:
    buffer_bigquery_dmslstm_df = pd.read_csv(
        '/home/developer/gcp/cbidmltsf/database/bigquery/{}'.format(csv_file)
    )
    bigquery_dmslstm_df = pd.concat([bigquery_dmslstm_df, buffer_bigquery_dmslstm_df])


bigquery_dmslstm_df = bigquery_dmslstm_df.reset_index(drop=True)

In [23]:
bigquery_dmslstm_df

Unnamed: 0,forecast_timestamp,forecast_value,standard_error,confidence_level,prediction_interval_lower_bound,prediction_interval_upper_bound,confidence_interval_lower_bound,confidence_interval_upper_bound
0,2018-06-26T00:00:00Z,2065.622667,42.183845,0.9,1996.311608,2134.933726,1996.311608,2134.933726
1,2018-06-26T01:00:00Z,1847.367862,56.134851,0.9,1755.134307,1939.601417,1755.134307,1939.601417
2,2018-06-26T02:00:00Z,1730.821131,65.136549,0.9,1623.797145,1837.845116,1623.797145,1837.845116
3,2018-06-26T03:00:00Z,1643.273364,71.687604,0.9,1525.485527,1761.061200,1525.485527,1761.061200
4,2018-06-26T04:00:00Z,1629.161044,76.792769,0.9,1502.985059,1755.337029,1502.985059,1755.337029
...,...,...,...,...,...,...,...,...
163,2018-07-02T19:00:00Z,2538.007847,109.880501,0.9,2357.466363,2718.549332,2357.466363,2718.549332
164,2018-07-02T20:00:00Z,2635.984217,111.570375,0.9,2452.666150,2819.302283,2452.666150,2819.302283
165,2018-07-02T21:00:00Z,2633.233707,113.232812,0.9,2447.184139,2819.283276,2447.184139,2819.283276
166,2018-07-02T22:00:00Z,2511.745255,114.869586,0.9,2323.006350,2700.484160,2323.006350,2700.484160


In [24]:
# timestamp for DMSLSTM_TPU_006 first prediction is '2018-06-26 00:00:00'
# daily interval endings are '2018-06-26 23:00:00', '2018-06-27 23:00:00', ..., '2018-07-02 23:00:00'
# BigQuery ARIMA is fed with the 8 previous weeks,
# starting on '2018-05-01 00:00:00' - '2018-06-25 23:00:00' (for the first 24-hour ahead prediction)


# timestamp for EDSLSTM_TPU_013 first prediction is '2018-05-03 16:00:00'
# daily interval endings are '2018-05-04 15:00:00', '2018-05-05 15:00:00', ..., '2018-05-10 15:00:00'

# timestamp for BSCTRFM_TPU_010 first prediction is '2018-05-14 23:00:00'
# daily interval endings are '2018-05-15 22:00:00', '2018-05-16 22:00:00', ..., '2018-05-21 22:00:00'

In [25]:
# build separated dataframes with metrics per model and date interval

In [26]:
metrics_columns = [
    'model_id', 'execution', 'start_timestamp', 'end_timestamp', 'mae', 'rmse', 'smape'
]

In [27]:
dmslstm_metrics_df = pd.DataFrame(columns=metrics_columns)
dmslstm_metrics_df

Unnamed: 0,model_id,execution,start_timestamp,end_timestamp,mae,rmse,smape


In [28]:
# The next cell calculates performace metrics for 1-day, 2-day, ..., up to 7-day periods
# which is not really useful as all predictions are produced on a 24-hour basis,
# then markdown the cell and change metrics calculation to a day 1, day 2, ... up to day 7 scheme

model_id = 'DMSLSTM_TPU_006'

executions = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

start_timestamp = '2018-06-26 00:00:00'

end_timestamps = [
    '2018-06-26 23:00:00',
    '2018-06-27 23:00:00',
    '2018-06-28 23:00:00',
    '2018-06-29 23:00:00',
    '2018-06-30 23:00:00',
    '2018-07-01 23:00:00',
    '2018-07-02 23:00:00',
]

for execution in executions:
    
    for end_timestamp in end_timestamps:

        flag = \
        predictions_df.model_id.eq(model_id) & \
        predictions_df.execution.eq(execution) & \
        predictions_df.timestamp.ge(start_timestamp) & \
        predictions_df.timestamp.le(end_timestamp)
        
        results_df = predictions_df[flag].reset_index(drop=True)
        
        mae = mean_absolute_error(results_df.target, results_df.prediction)
        rmse = sqrt(mean_squared_error(results_df.target, results_df.prediction))
        smape =  symmetric_mean_absolute_percentage_error(results_df.target, results_df.prediction)

        row_list = [
            model_id, execution,
            start_timestamp, end_timestamp,
            mae, rmse, smape
        ]
        row_metrics_df = pd.DataFrame([row_list], columns=metrics_columns)
        
        dmslstm_metrics_df = pd.concat([dmslstm_metrics_df, row_metrics_df])

In [29]:
# execution with best metrics over all intervals
# dmslstm_metrics_df.groupby(['execution']).mean().style.highlight_min(color = 'lightgreen', axis = 0)

In [30]:
# execution with worst metrics over all intervals
# dmslstm_metrics_df.groupby(['execution']).mean().style.highlight_max(color = 'yellow', axis = 0)

In [31]:
# metrics average for 10 executions on different intervals
# dmslstm_metrics_df.groupby(['end_timestamp']).mean()

In [32]:
# metrics standard deviation for 10 executions on different intervals
# dmslstm_metrics_df.groupby(['end_timestamp']).std()

In [33]:
# now get metrics from BigQuery ARIMA
# use target values from results_df with execution = 0 (they are the same across executions)

In [34]:
arima_one_week_mae = list()

for start_index in [0, 24, 48, 72, 96, 120, 144]:
    end_index = start_index + 24
    mae = mean_absolute_error(
        predictions_df.target[start_index:end_index],
        bigquery_dmslstm_df.forecast_value[start_index:end_index]
    )
    
    arima_one_week_mae.append(mae)
    print('ARIMA MAE for {} to {} interval is: {:.4f}'.format(start_index, end_index, mae))

ARIMA MAE for 0 to 24 interval is: 76.3232
ARIMA MAE for 24 to 48 interval is: 104.6651
ARIMA MAE for 48 to 72 interval is: 59.6213
ARIMA MAE for 72 to 96 interval is: 47.9564
ARIMA MAE for 96 to 120 interval is: 122.5390
ARIMA MAE for 120 to 144 interval is: 123.2520
ARIMA MAE for 144 to 168 interval is: 156.7935


In [35]:
arima_one_week_rmse = list()

for start_index in [0, 24, 48, 72, 96, 120, 144]:
    end_index = start_index + 24
    rmse = sqrt(mean_squared_error(
        predictions_df.target[start_index:end_index],
        bigquery_dmslstm_df.forecast_value[start_index:end_index]
    ))
    
    arima_one_week_rmse.append(rmse)
    print('ARIMA RMSE for {} to {} interval is: {:.4f}'.format(start_index, end_index, rmse))

ARIMA RMSE for 0 to 24 interval is: 96.6190
ARIMA RMSE for 24 to 48 interval is: 130.3832
ARIMA RMSE for 48 to 72 interval is: 78.7560
ARIMA RMSE for 72 to 96 interval is: 59.1122
ARIMA RMSE for 96 to 120 interval is: 156.7504
ARIMA RMSE for 120 to 144 interval is: 165.9855
ARIMA RMSE for 144 to 168 interval is: 164.5955


In [36]:
arima_one_week_smape = list()

for start_index in [0, 24, 48, 72, 96, 120, 144]:
    end_index = start_index + 24
    
    # adjust SMAPE to percentage value
    smape = 100*symmetric_mean_absolute_percentage_error(
        predictions_df.target[start_index:end_index],
        bigquery_dmslstm_df.forecast_value[start_index:end_index]
    )
    
    arima_one_week_smape.append(smape)
    print('ARIMA SMAPE for {} to {} interval is: {:.4f}'.format(start_index, end_index, smape))

ARIMA SMAPE for 0 to 24 interval is: 3.3754
ARIMA SMAPE for 24 to 48 interval is: 4.2182
ARIMA SMAPE for 48 to 72 interval is: 2.4816
ARIMA SMAPE for 72 to 96 interval is: 1.9413
ARIMA SMAPE for 96 to 120 interval is: 5.0354
ARIMA SMAPE for 120 to 144 interval is: 5.3700
ARIMA SMAPE for 144 to 168 interval is: 6.9179


for end_index in [24, 48, 72, 96, 120, 144, 168]:
    mae = mean_absolute_error(
        predictions_df.target[:end_index],
        bigquery_dmslstm_df.forecast_value[:end_index]
    )
    print('ARIMA MAE for {}-hour interval is: {:.4f}'.format(end_index, mae))

for end_index in [24, 48, 72, 96, 120, 144, 168]:
    rmse = sqrt(mean_squared_error(
        predictions_df.target[:end_index],
        bigquery_dmslstm_df.forecast_value[:end_index]
    ))
    print('ARIMA RMSE for {}-hour interval is: {:.4f}'.format(end_index, rmse))

for end_index in [24, 48, 72, 96, 120, 144, 168]:
    smape = symmetric_mean_absolute_percentage_error(
        predictions_df.target[:end_index],
        bigquery_dmslstm_df.forecast_value[:end_index]
    )
    print('ARIMA SMAPE for {}-hour interval is: {:.4f}'.format(end_index, smape))

In [37]:
# a flag to isolate all pairs prediction-target for a given model in predictions_df
selected_model = 'DMSLSTM_TPU_006'
flag = predictions_df['model_id'] == selected_model

In [38]:
average_predictions_df = predictions_df[flag].groupby(['timestamp']).mean()

In [39]:
average_predictions_df

Unnamed: 0_level_0,prediction,target
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-06-26 00:00:00,2089.254077,2053.264893
2018-06-26 01:00:00,1916.529675,1844.294922
2018-06-26 02:00:00,1820.230493,1765.150024
2018-06-26 03:00:00,1754.403296,1706.646606
2018-06-26 04:00:00,1746.663098,1707.993408
...,...,...
2018-07-02 19:00:00,2633.223975,2698.608154
2018-07-02 20:00:00,2734.921631,2740.811768
2018-07-02 21:00:00,2758.551392,2807.528320
2018-07-02 22:00:00,2601.905981,2662.060059


In [40]:
plots = dict()

In [41]:
# a datetime range for the prediction interval
for datetime in pd.date_range(start='2018-07-02 00:00:00', end='2018-07-02 23:00:00', freq='H'):
    print(datetime)

2018-07-02 00:00:00
2018-07-02 01:00:00
2018-07-02 02:00:00
2018-07-02 03:00:00
2018-07-02 04:00:00
2018-07-02 05:00:00
2018-07-02 06:00:00
2018-07-02 07:00:00
2018-07-02 08:00:00
2018-07-02 09:00:00
2018-07-02 10:00:00
2018-07-02 11:00:00
2018-07-02 12:00:00
2018-07-02 13:00:00
2018-07-02 14:00:00
2018-07-02 15:00:00
2018-07-02 16:00:00
2018-07-02 17:00:00
2018-07-02 18:00:00
2018-07-02 19:00:00
2018-07-02 20:00:00
2018-07-02 21:00:00
2018-07-02 22:00:00
2018-07-02 23:00:00


In [42]:
day = 7
start, end = 24*(day - 1), 24*day

size = 8

plots['dmslstm'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='DMSLSTM (average) vs. ARIMA vs. Real / Day {}.'.format(day),
    toolbar_location=None,
)

plots['dmslstm'].grid.grid_line_alpha=0.3

plots['dmslstm'].xaxis.axis_label = 'Date'
plots['dmslstm'].yaxis.axis_label = 'Active Power [KW]'

plots['dmslstm'].title.text_font_size = '14pt'

plots['dmslstm'].xaxis.axis_label_text_font_size = "14pt"
plots['dmslstm'].yaxis.axis_label_text_font_size = "14pt"

plots['dmslstm'].xaxis.major_label_text_font_size = "12pt"
plots['dmslstm'].yaxis.major_label_text_font_size = "12pt"

# a custom x range to visuallly improve the the plot
plots['dmslstm'].x_range = Range1d(
    start=average_predictions_df.index[start] - timedelta(hours=1),
    end=average_predictions_df.index[end-1] + timedelta(hours=2)
)

# the target value in average_predictions_df is the average of targets, then the only target
plots['dmslstm'].square(
    x=average_predictions_df.index[start:end],
    y=average_predictions_df.prediction[start:end],
    size=size,
    fill_color=None,
    color='black',
    legend_label='DMSLSTM'
)
plots['dmslstm'].line(
    x=average_predictions_df.index[start:end],
    y=average_predictions_df.prediction[start:end],
    color='black',
    legend_label='DMSLSTM'
)

plots['dmslstm'].triangle(
    x=pd.to_datetime(bigquery_dmslstm_df.forecast_timestamp)[start:end],
    y=bigquery_dmslstm_df.forecast_value[start:end],
    size=size,
    fill_color=None,
    color='black',
    legend_label='ARIMA'
)
plots['dmslstm'].line(
    x=pd.to_datetime(bigquery_dmslstm_df.forecast_timestamp)[start:end],
    y=bigquery_dmslstm_df.forecast_value[start:end],
    color='black',
    legend_label='ARIMA'
)

plots['dmslstm'].circle(
    x=average_predictions_df.index[start:end],
    y=average_predictions_df.target[start:end],
    size=size,
    fill_color=None,
    color='black',
    legend_label='Real'
)
plots['dmslstm'].line(
    x=average_predictions_df.index[start:end],
    y=average_predictions_df.target[start:end],
    color='black',
    legend_label='Real'
)

plots['dmslstm'].legend.label_text_font_size = '12pt'
plots['dmslstm'].legend.location = 'top_left'

# uncomment the following two lines to save plot
# output_file('/home/developer/gcp/cbidmltsf/datasets/cfe/{}_H_kw.html'.format(device))
# save(fig_kw)

# uncomment the following line to display plot
show(plots['dmslstm'])

In [43]:
# a Pandas dataframe to store all predictions detail items
buffer_df_columns = [
    'start_timestamp',
    'end_timestamp',
    'model_id',
    'execution',
    'mae',
    'rmse',
    'smape'
]

day_by_day_predictions_df = pd.DataFrame(columns=buffer_df_columns)

In [44]:
day_by_day_predictions_df

Unnamed: 0,start_timestamp,end_timestamp,model_id,execution,mae,rmse,smape


In [45]:

model_id, dataset, inference = 'DMSLSTM_TPU_006', 'test', '024'
executions = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

num_days = 28
start_indexes = 24*np.arange(num_days)

for execution in executions:
    
    flag = \
    global_df.model_id.eq(model_id) & \
    global_df.execution.eq(execution) & \
    global_df.dataset.eq(dataset) & \
    global_df.inference.eq(inference)

    # reset index to use the same row, drop the index column
    filtered_df = global_df[flag].reset_index(drop=True)
    
    for start_index in start_indexes:
        
        list_to_row = [
            pd.to_datetime(filtered_df.iloc[start_index]['string_timestamps'][0]),
            pd.to_datetime(filtered_df.iloc[start_index]['string_timestamps'][-1]),
            filtered_df.iloc[start_index]['model_id'],
            filtered_df.iloc[start_index]['execution'],
            filtered_df.iloc[start_index]['mae'],
            filtered_df.iloc[start_index]['rmse'],
            filtered_df.iloc[start_index]['smape']
        ]
        
        list_to_row_df = pd.DataFrame([list_to_row], columns=buffer_df_columns)

        day_by_day_predictions_df = pd.concat([day_by_day_predictions_df, list_to_row_df])

day_by_day_predictions_df = day_by_day_predictions_df.reset_index(drop=True)

In [46]:
day_by_day_predictions_df

Unnamed: 0,start_timestamp,end_timestamp,model_id,execution,mae,rmse,smape
0,2018-06-26,2018-06-26 23:00:00,DMSLSTM_TPU_006,0,52.396067,59.112906,0.021802
1,2018-06-27,2018-06-27 23:00:00,DMSLSTM_TPU_006,0,86.227798,105.939216,0.033589
2,2018-06-28,2018-06-28 23:00:00,DMSLSTM_TPU_006,0,44.274470,58.762515,0.018125
3,2018-06-29,2018-06-29 23:00:00,DMSLSTM_TPU_006,0,50.163055,63.333390,0.020158
4,2018-06-30,2018-06-30 23:00:00,DMSLSTM_TPU_006,0,85.067256,111.609041,0.034295
...,...,...,...,...,...,...,...
275,2018-07-19,2018-07-19 23:00:00,DMSLSTM_TPU_006,9,80.191879,102.130522,0.030311
276,2018-07-20,2018-07-20 23:00:00,DMSLSTM_TPU_006,9,82.133555,91.754166,0.034834
277,2018-07-21,2018-07-21 23:00:00,DMSLSTM_TPU_006,9,49.197230,56.484748,0.021172
278,2018-07-22,2018-07-22 23:00:00,DMSLSTM_TPU_006,9,59.935099,83.991791,0.024069


In [47]:
# get the time line, on a daily basis, from the grouped dataframe
day_by_day_predictions_df.groupby(['start_timestamp']).mean().index

DatetimeIndex(['2018-06-26', '2018-06-27', '2018-06-28', '2018-06-29',
               '2018-06-30', '2018-07-01', '2018-07-02', '2018-07-03',
               '2018-07-04', '2018-07-05', '2018-07-06', '2018-07-07',
               '2018-07-08', '2018-07-09', '2018-07-10', '2018-07-11',
               '2018-07-12', '2018-07-13', '2018-07-14', '2018-07-15',
               '2018-07-16', '2018-07-17', '2018-07-18', '2018-07-19',
               '2018-07-20', '2018-07-21', '2018-07-22', '2018-07-23'],
              dtype='datetime64[ns]', name='start_timestamp', freq=None)

In [62]:
# get a metric average (on the 10 executions) for a given 24-hour interval
dmslstm_average_one_week_mae_mean = day_by_day_predictions_df.groupby(['start_timestamp']).mean()['mae'][:7]

In [63]:
for x in dmslstm_average_one_week_mae_mean:
    print('{:0.4f}'.format(x))

51.4102
85.5390
47.4907
48.8155
89.2606
85.3257
62.1411


In [64]:
dmslstm_average_one_week_mae_std = day_by_day_predictions_df.groupby(['start_timestamp']).std()['mae'][:7]

In [65]:
for x in dmslstm_average_one_week_mae_std:
    print('{:0.4f}'.format(x))

3.5676
3.6049
2.4059
2.9000
3.9335
4.4253
3.6187


In [68]:
dmslstm_average_one_week_rmse_mean = day_by_day_predictions_df.groupby(['start_timestamp']).mean()['rmse'][:7]

In [69]:
for x in dmslstm_average_one_week_rmse_mean:
    print('{:0.4f}'.format(x))

58.8745
107.4322
63.2627
60.6888
117.4109
104.4914
70.1199


In [70]:
dmslstm_average_one_week_rmse_std = day_by_day_predictions_df.groupby(['start_timestamp']).std()['rmse'][:7]

In [71]:
for x in dmslstm_average_one_week_rmse_std:
    print('{:0.4f}'.format(x))

4.1009
4.4043
3.2333
2.9308
4.6368
6.2990
2.9061


In [74]:
# adjust SMAPE to percentage value
dmslstm_average_one_week_smape_mean = 100*day_by_day_predictions_df.groupby(['start_timestamp']).mean()['smape'][:7]

In [75]:
for x in dmslstm_average_one_week_smape_mean:
    print('{:0.4f}'.format(x))

2.1446
3.3229
1.9352
1.9580
3.5931
4.2671
2.6683


In [76]:
dmslstm_average_one_week_smape_std = 100*day_by_day_predictions_df.groupby(['start_timestamp']).std()['smape'][:7]

In [77]:
for x in dmslstm_average_one_week_smape_std:
    print('{:0.4f}'.format(x))

0.1484
0.1459
0.1008
0.1234
0.1573
0.2278
0.1651


In [54]:
# compare metrics: DMSLSTM average predictions vs. ARIMA
# express them as percentage for tabulation

In [117]:
baseline_mae_pct = -100*(arima_one_week_mae - dmslstm_average_one_week_mae_mean)/arima_one_week_mae

In [118]:
for x in baseline_mae_pct:
    print('{:0.2f}'.format(x))

-32.64
-18.27
-20.35
1.79
-27.16
-30.77
-60.37


In [119]:
baseline_rmse_pct = -100*(arima_one_week_rmse - dmslstm_average_one_week_rmse_mean)/arima_one_week_rmse

In [120]:
for x in baseline_rmse_pct:
    print('{:0.2f}'.format(x))

-39.07
-17.60
-19.67
2.67
-25.10
-37.05
-57.40


In [121]:
baseline_smape_pct = -100*(arima_one_week_smape - dmslstm_average_one_week_smape_mean)/arima_one_week_smape

In [122]:
for x in baseline_smape_pct:
    print('{:0.2f}'.format(x))

-36.46
-21.22
-22.02
0.86
-28.64
-20.54
-61.43


In [54]:
size = 4

plots['dmslstm_week_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='DMSLSTM Performance: {} for {} Consecutive 24-hour Prediction Intervals.'\
    .format('MAE', num_days),
    toolbar_location=None,
)

plots['dmslstm_week_metric'].grid.grid_line_alpha=0.3

plots['dmslstm_week_metric'].xaxis.axis_label = 'Date'
plots['dmslstm_week_metric'].yaxis.axis_label = 'MAE [KW]'

plots['dmslstm_week_metric'].title.text_font_size = '14pt'

plots['dmslstm_week_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['dmslstm_week_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['dmslstm_week_metric'].xaxis.major_label_text_font_size = "12pt"
plots['dmslstm_week_metric'].yaxis.major_label_text_font_size = "12pt"


plots['dmslstm_week_metric'].circle(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=day_by_day_predictions_df.groupby(['start_timestamp']).mean()['mae'],
    color='black',
    size=size,
    fill_color=None,
)
plots['dmslstm_week_metric'].line(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=day_by_day_predictions_df.groupby(['start_timestamp']).mean()['mae'],
    color='black',
)

arima_one_week_mae_average = Span(
    location=np.mean(np.array(arima_one_week_mae)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['dmslstm_week_metric'].add_layout(arima_one_week_mae_average)


# plots['dmslstm_mae'].legend.location = 'top_left'

# uncomment the following two lines to save plot
# output_file('/home/developer/gcp/cbidmltsf/datasets/cfe/{}_H_kw.html'.format(device))
# save(fig_kw)

# uncomment the following line to display plot
show(plots['dmslstm_week_metric'])

In [55]:
size = 4

plots['dmslstm_week_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='DMSLSTM Performance: {} for {} Consecutive 24-hour Prediction Intervals.'\
    .format('RMSE', num_days),
    toolbar_location=None,
)

plots['dmslstm_week_metric'].grid.grid_line_alpha=0.3

plots['dmslstm_week_metric'].title.text_font_size = '14pt'

plots['dmslstm_week_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['dmslstm_week_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['dmslstm_week_metric'].xaxis.major_label_text_font_size = "12pt"
plots['dmslstm_week_metric'].yaxis.major_label_text_font_size = "12pt"

plots['dmslstm_week_metric'].xaxis.axis_label = 'Date'
plots['dmslstm_week_metric'].yaxis.axis_label = 'RMSE [KW]'

plots['dmslstm_week_metric'].circle(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=day_by_day_predictions_df.groupby(['start_timestamp']).mean()['rmse'],
    color='black',
    size=size,
    fill_color=None,
)
plots['dmslstm_week_metric'].line(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=day_by_day_predictions_df.groupby(['start_timestamp']).mean()['rmse'],
    color='black',
)

arima_one_week_rmse_average = Span(
    location=np.mean(np.array(arima_one_week_rmse)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['dmslstm_week_metric'].add_layout(arima_one_week_rmse_average)


# plots['dmslstm_mae'].legend.location = 'top_left'

# uncomment the following two lines to save plot
# output_file('/home/developer/gcp/cbidmltsf/datasets/cfe/{}_H_kw.html'.format(device))
# save(fig_kw)

# uncomment the following line to display plot
show(plots['dmslstm_week_metric'])

In [56]:
size = 4

plots['dmslstm_week_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='DMSLSTM Performance: {} for {} Consecutive 24-hour Prediction Intervals.'\
    .format('SMAPE', num_days),
    toolbar_location=None,
)

plots['dmslstm_week_metric'].grid.grid_line_alpha=0.3

plots['dmslstm_week_metric'].title.text_font_size = '14pt'

plots['dmslstm_week_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['dmslstm_week_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['dmslstm_week_metric'].xaxis.major_label_text_font_size = "12pt"
plots['dmslstm_week_metric'].yaxis.major_label_text_font_size = "12pt"

plots['dmslstm_week_metric'].xaxis.axis_label = 'Date'
plots['dmslstm_week_metric'].yaxis.axis_label = 'SMAPE [%]'

# adjust SMAPE for percentage value
plots['dmslstm_week_metric'].circle(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=100*day_by_day_predictions_df.groupby(['start_timestamp']).mean()['smape'],
    color='black',
    size=size,
    fill_color=None,
)
# adjust SMAPE for percentage value
plots['dmslstm_week_metric'].line(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=100*day_by_day_predictions_df.groupby(['start_timestamp']).mean()['smape'],
    color='black',
)

    location=np.mean(np.array(arima_one_week_smape)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['dmslstm_week_metric'].add_layout(arima_one_week_smape_average)


# plots['dmslstm_mae'].legend.location = 'top_left'

# uncomment the following two lines to save plot
# output_file('/home/developer/gcp/cbidmltsf/datasets/cfe/{}_H_kw.html'.format(device))
# save(fig_kw)

# uncomment the following line to display plot
show(plots['dmslstm_week_metric'])

IndentationError: unexpected indent (<ipython-input-56-e361dce0f06a>, line 40)

In [57]:
# MAE, RMSE, and SMAPE for rolling predictions in 4 weeks
# from global dataframe
start, end = 0, 672
size = 4

plots['dmslstm_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='DMSLSTM Performance: {} for {} Consecutive 1-hour Rolling Predictions.'\
    .format('MAE', end),
    toolbar_location=None,
)

plots['dmslstm_metric'].grid.grid_line_alpha=0.3

plots['dmslstm_metric'].title.text_font_size = '14pt'

plots['dmslstm_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['dmslstm_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['dmslstm_metric'].xaxis.major_label_text_font_size = "12pt"
plots['dmslstm_metric'].yaxis.major_label_text_font_size = "12pt"

plots['dmslstm_metric'].xaxis.axis_label = 'Date'
plots['dmslstm_metric'].yaxis.axis_label = 'MAE [KW]'

plots['dmslstm_metric'].circle(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=global_df['mae'][start:end],
    color='black',
    size=size,
    fill_color=None,
)
plots['dmslstm_metric'].line(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=global_df['mae'][start:end],
    color='black',
)

arima_one_week_mae_average = Span(
    location=np.mean(np.array(arima_one_week_mae)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['dmslstm_metric'].add_layout(arima_one_week_mae_average)


# plots['dmslstm_mae'].legend.location = 'top_left'

# uncomment the following two lines to save plot
# output_file('/home/developer/gcp/cbidmltsf/datasets/cfe/{}_H_kw.html'.format(device))
# save(fig_kw)

# uncomment the following line to display plot
show(plots['dmslstm_metric'])

In [58]:
# MAE, RMSE, and SMAPE for rolling predictions in 4 weeks
# from global dataframe
start, end = 0, 672
size = 4

plots['dmslstm_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='DMSLSTM Performance: {} for {} Consecutive 1-hour Rolling Predictions.'\
    .format('RMSE', end),
    toolbar_location=None,
)

plots['dmslstm_metric'].grid.grid_line_alpha=0.3

plots['dmslstm_metric'].title.text_font_size = '14pt'

plots['dmslstm_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['dmslstm_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['dmslstm_metric'].xaxis.major_label_text_font_size = "12pt"
plots['dmslstm_metric'].yaxis.major_label_text_font_size = "12pt"

plots['dmslstm_metric'].xaxis.axis_label = 'Date'
plots['dmslstm_metric'].yaxis.axis_label = 'RMSE [KW]'

plots['dmslstm_metric'].circle(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=global_df['rmse'][start:end],
    color='black',
    size=size,
    fill_color=None,
)
plots['dmslstm_metric'].line(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=global_df['rmse'][start:end],
    color='black',
)

arima_one_week_rmse_average = Span(
    location=np.mean(np.array(arima_one_week_rmse)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['dmslstm_metric'].add_layout(arima_one_week_rmse_average)


# plots['dmslstm_mae'].legend.location = 'top_left'

# uncomment the following two lines to save plot
# output_file('/home/developer/gcp/cbidmltsf/datasets/cfe/{}_H_kw.html'.format(device))
# save(fig_kw)

# uncomment the following line to display plot
show(plots['dmslstm_metric'])

In [59]:
# MAE, RMSE, and SMAPE for rolling predictions in 4 weeks
# from global dataframe
start, end = 0, 672
size = 4

plots['dmslstm_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='DMSLSTM Performance: {} for {} Consecutive 1-hour Rolling Predictions.'\
    .format('SMAPE', end),
    toolbar_location=None,
)

plots['dmslstm_metric'].grid.grid_line_alpha=0.3

plots['dmslstm_metric'].title.text_font_size = '14pt'

plots['dmslstm_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['dmslstm_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['dmslstm_metric'].xaxis.major_label_text_font_size = "12pt"
plots['dmslstm_metric'].yaxis.major_label_text_font_size = "12pt"

plots['dmslstm_metric'].xaxis.axis_label = 'Date'
plots['dmslstm_metric'].yaxis.axis_label = 'SMAPE [%]'

# adjust SMAPE for percentage value
plots['dmslstm_metric'].circle(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=100*global_df['smape'][start:end],
    color='black',
    size=size,
    fill_color=None,
)
# adjust SMAPE for percentage value
plots['dmslstm_metric'].line(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=100*global_df['smape'][start:end],
    color='black',
)

arima_one_week_smape_average = Span(
    location=np.mean(np.array(arima_one_week_smape)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['dmslstm_metric'].add_layout(arima_one_week_smape_average)


# plots['dmslstm_mae'].legend.location = 'top_left'

# uncomment the following two lines to save plot
# output_file('/home/developer/gcp/cbidmltsf/datasets/cfe/{}_H_kw.html'.format(device))
# save(fig_kw)

# uncomment the following line to display plot
show(plots['dmslstm_metric'])

In [60]:
! ls -l /home/developer/DEPFIE-SCOM/ScD_Thesis/results

total 408
-rw-rw-r-- 1 developer developer    69 sep 22 13:48 BSCTRFM_TPU_010_09_eval.csv
-rw-rw-r-- 1 developer developer 46229 sep 22 13:48 BSCTRFM_TPU_010_09_loss.csv
-rw-rw-r-- 1 developer developer 46008 sep 22 13:48 BSCTRFM_TPU_010_09_lrs.csv
-rw-rw-r-- 1 developer developer    69 sep 23 17:44 BSCTRFM_TPU_011_07_eval.csv
-rw-rw-r-- 1 developer developer 45428 sep 23 17:44 BSCTRFM_TPU_011_07_loss.csv
-rw-rw-r-- 1 developer developer 45874 sep 23 17:44 BSCTRFM_TPU_011_07_lrs.csv
-rw-rw-r-- 1 developer developer 44853 sep 20 13:27 DMSLSTM_TPU_006_09_loss.csv
-rw-rw-r-- 1 developer developer 44752 sep 20 13:27 DMSLSTM_TPU_006_09_lrs.csv
-rw-rw-r-- 1 developer developer    68 sep 21 12:13 EDSLSTM_TPU_013_05_eval.csv
-rw-rw-r-- 1 developer developer 44489 sep 21 12:13 EDSLSTM_TPU_013_05_loss.csv
-rw-rw-r-- 1 developer developer 44597 sep 21 12:12 EDSLSTM_TPU_013_05_lrs.csv
-rw-rw-r-- 1 developer developer 27526 may 23 11:05 transformer_metrics.ods


In [61]:
loss_df = pd.read_csv(
    '/home/developer/DEPFIE-SCOM/ScD_Thesis/results/DMSLSTM_TPU_006_09_loss.csv'
)

In [62]:
loss_df

Unnamed: 0,Wall time,Step,Value
0,1.623424e+09,0,0.048876
1,1.623424e+09,2,0.048492
2,1.623424e+09,3,0.046912
3,1.623424e+09,4,0.046221
4,1.623424e+09,9,0.040796
...,...,...,...
995,1.623424e+09,1573,0.001313
996,1.623424e+09,1574,0.001499
997,1.623424e+09,1576,0.001229
998,1.623424e+09,1578,0.001537


In [63]:
lrs_df = pd.read_csv(
    '/home/developer/DEPFIE-SCOM/ScD_Thesis/results/DMSLSTM_TPU_006_09_lrs.csv'
)

In [64]:
lrs_df

Unnamed: 0,Wall time,Step,Value
0,1.623424e+09,0,0.000000
1,1.623424e+09,2,0.000253
2,1.623424e+09,3,0.000380
3,1.623424e+09,4,0.000507
4,1.623424e+09,9,0.001140
...,...,...,...
995,1.623424e+09,1573,0.000100
996,1.623424e+09,1574,0.000100
997,1.623424e+09,1576,0.000100
998,1.623424e+09,1578,0.000100


In [65]:

plots['dmslstm_lrs'] = figure(
    plot_width=960,
    plot_height=320,
    title='DMSLSTM Learning Rate Schedule.',
    toolbar_location=None,
)

plots['dmslstm_lrs'].grid.grid_line_alpha=0.3

plots['dmslstm_lrs'].title.text_font_size = '14pt'

plots['dmslstm_lrs'].xaxis.axis_label_text_font_size = "14pt"
plots['dmslstm_lrs'].yaxis.axis_label_text_font_size = "14pt"

plots['dmslstm_lrs'].xaxis.major_label_text_font_size = "12pt"
plots['dmslstm_lrs'].yaxis.major_label_text_font_size = "12pt"

plots['dmslstm_lrs'].xaxis.axis_label = 'Training Step'
plots['dmslstm_lrs'].yaxis.axis_label = 'Learning Rate'

# adjust SMAPE for percentage value
plots['dmslstm_lrs'].line(
    x=lrs_df['Step'],
    y=lrs_df['Value'],
    color='black',
)

show(plots['dmslstm_lrs'])

In [66]:
eval_loss = 0.001388

In [67]:

plots['dmslstm_loss'] = figure(
    plot_width=960,
    plot_height=320,
    title='DMSLSTM Training Loss.',
    toolbar_location=None,
)

plots['dmslstm_loss'].grid.grid_line_alpha=0.3

plots['dmslstm_loss'].y_range = Range1d (
    start=0,
    end=0.01
)

plots['dmslstm_loss'].title.text_font_size = '14pt'

plots['dmslstm_loss'].xaxis.axis_label_text_font_size = "14pt"
plots['dmslstm_loss'].yaxis.axis_label_text_font_size = "14pt"

plots['dmslstm_loss'].xaxis.major_label_text_font_size = "12pt"
plots['dmslstm_loss'].yaxis.major_label_text_font_size = "12pt"

plots['dmslstm_loss'].xaxis.axis_label = 'Training Step'
plots['dmslstm_loss'].yaxis.axis_label = 'Loss'

plots['dmslstm_loss'].line(
    x=loss_df['Step'],
    y=loss_df['Value'],
    color='black',
)

final_eval_loss = Span(
    location=eval_loss,
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['dmslstm_loss'].add_layout(final_eval_loss)

show(plots['dmslstm_loss'])

In [6]:
from tensorboard.backend.event_processing import event_accumulator

In [7]:
def get_wall_time(path_to_logdir):
    '''
    receives a UNIX path to a TensorBoard logdir of a model
    returns the wall time for the model training process
    '''
    # an event accumulator to the logdir
    ea = event_accumulator.EventAccumulator(path_to_logdir,
                                            size_guidance={ # see below regarding this argument
                                                # event_accumulator.COMPRESSED_HISTOGRAMS: 500, # not used
                                                # event_accumulator.IMAGES: 4, # not used
                                                # event_accumulator.AUDIO: 4, # not used
                                                event_accumulator.SCALARS: 0, # retrieve all
                                                event_accumulator.TENSORS: 0, # retrieve all
                                                # event_accumulator.HISTOGRAMS: 1 # not used
                                            }
                                           )
    # loads events from file
    ea.Reload()
    
    # wall time is end time - start time
    wall_time = ea.Tensors('loss')[-1][0] - ea.Tensors('loss')[0][0]
    print("Wall time for model in '{}' is {} seconds.".format(path_to_logdir,
                                                            wall_time))
    return wall_time

In [8]:
models_list = [
    '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_00',
    '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_01',
    '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_02',
    '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_03',
    '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_04',
    '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_05',
    '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_06',
    '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_07',
    '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_08',
    '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_09',
]

In [9]:
# a dataframe for wall times
wall_times_list = [get_wall_time(model) for model in models_list
    
]

Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.
Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.


Wall time for model in '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_00' is 24.622194051742554 seconds.


Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.


Wall time for model in '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_01' is 25.001632928848267 seconds.
Wall time for model in '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_02' is 24.876068115234375 seconds.


Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.
Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.


Wall time for model in '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_03' is 24.82333493232727 seconds.


Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.


Wall time for model in '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_04' is 24.68921685218811 seconds.


Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.


Wall time for model in '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_05' is 24.5868980884552 seconds.
Wall time for model in '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_06' is 24.339043855667114 seconds.


Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.
Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.


Wall time for model in '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_07' is 24.257344007492065 seconds.
Wall time for model in '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_08' is 24.316033124923706 seconds.


Found more than one graph event per run, or there was a metagraph containing a graph_def, as well as one or more graph events.  Overwriting the graph with the newest event.
Found more than one metagraph event per run. Overwriting the metagraph with the newest event.


Wall time for model in '/home/developer/gcp/cbidmltsf/models/DMSLSTM_TPU_006_09' is 24.376765966415405 seconds.


In [10]:
wall_times_list

[24.622194051742554,
 25.001632928848267,
 24.876068115234375,
 24.82333493232727,
 24.68921685218811,
 24.5868980884552,
 24.339043855667114,
 24.257344007492065,
 24.316033124923706,
 24.376765966415405]

In [11]:
print('{:0.4f}'.format(np.mean(wall_times_list)))

24.5889


In [12]:
print('{:0.4f}'.format(np.std(wall_times_list)))

0.2471


In [13]:
# the ARIMA training time in seconds
arima_time = 10.0

In [29]:
plots['dmslstm_arima_computation'] = figure(
    plot_width=960,
    plot_height=320,
    title='DMSLSTM vs. ARIMA Computing Times for 24-hour Prediction Intervals.',
    toolbar_location=None,
)

plots['dmslstm_arima_computation'].grid.grid_line_alpha=0.3

plots['dmslstm_arima_computation'].xaxis.axis_label = 'Number of 24-hour Prediction Intervals'
plots['dmslstm_arima_computation'].yaxis.axis_label = 'Computing Time [s]'

plots['dmslstm_arima_computation'].title.text_font_size = '14pt'

plots['dmslstm_arima_computation'].xaxis.axis_label_text_font_size = "14pt"
plots['dmslstm_arima_computation'].yaxis.axis_label_text_font_size = "14pt"

plots['dmslstm_arima_computation'].xaxis.major_label_text_font_size = "12pt"
plots['dmslstm_arima_computation'].yaxis.major_label_text_font_size = "12pt"

plots['dmslstm_arima_computation'].line(
    x=1+np.arange(672),
    y=arima_time*(1+np.arange(672)),
    color='black',
)

dmslstm_training_time = Span(
    location=np.mean(wall_times_list),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

arima_007_predictions = Span(
    location=7,
    dimension='height',
    line_color='black',
    line_dash='dashed',
    line_width=2
)
arima_028_predictions = Span(
    location=28,
    dimension='height',
    line_color='black',
    line_dash='dashed',
    line_width=2
)
arima_672_predictions = Span(
    location=672,
    dimension='height',
    line_color='black',
    line_dash='dashed',
    line_width=2
)

plots['dmslstm_arima_computation'].add_layout(
    dmslstm_training_time
)
plots['dmslstm_arima_computation'].add_layout(
    arima_007_predictions
)
plots['dmslstm_arima_computation'].add_layout(
    arima_028_predictions
)
plots['dmslstm_arima_computation'].add_layout(
    arima_672_predictions
)

show(plots['dmslstm_arima_computation'])