In [1]:
import json
import numpy as np
import pandas as pd
import os

In [2]:
from datetime import timedelta

In [3]:
from math import sqrt
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [4]:
from bokeh.plotting import figure, show, output_file, save

from bokeh.io import output_notebook

from bokeh.models import Span, Range1d

from bokeh.palettes import d3

output_notebook()

In [5]:
pd.set_option('display.max_rows', 20)

In [6]:
def symmetric_mean_absolute_percentage_error(targets, predictions):
    '''
    predictions: a list with the predicted values
    targets: a list with the actual values
    '''
    import numpy as np
    # lists to NumPy arrays
    targets, predictions = np.array(targets), np.array(predictions)
    # verify predictions and targets have the same shape
    if predictions.shape == targets.shape:
            return(np.sum(2*np.abs(predictions - targets) /
                          (np.abs(targets) + np.abs(predictions)))/predictions.shape[0])

In [7]:
# build a list to select specific pickle files per model architecture
selected_pkl_list = [
    # 'DMSLSTM_TPU_006_00_test_024.pkl',
    # 'DMSLSTM_TPU_006_01_test_024.pkl',
    # 'DMSLSTM_TPU_006_02_test_024.pkl',
    # 'DMSLSTM_TPU_006_03_test_024.pkl',
    # 'DMSLSTM_TPU_006_04_test_024.pkl',
    # 'DMSLSTM_TPU_006_05_test_024.pkl',
    # 'DMSLSTM_TPU_006_06_test_024.pkl',
    # 'DMSLSTM_TPU_006_07_test_024.pkl',
    # 'DMSLSTM_TPU_006_08_test_024.pkl',
    # 'DMSLSTM_TPU_006_09_test_024.pkl',
    # 'EDSLSTM_TPU_013_00_test_024.pkl',
    # 'EDSLSTM_TPU_013_01_test_024.pkl',
    # 'EDSLSTM_TPU_013_02_test_024.pkl',
    # 'EDSLSTM_TPU_013_03_test_024.pkl',
    # 'EDSLSTM_TPU_013_04_test_024.pkl',
    # 'EDSLSTM_TPU_013_05_test_024.pkl',
    # 'EDSLSTM_TPU_013_06_test_024.pkl',
    # 'EDSLSTM_TPU_013_07_test_024.pkl',
    # 'EDSLSTM_TPU_013_08_test_024.pkl',
    # 'EDSLSTM_TPU_013_09_test_024.pkl',
    'BSCTRFM_TPU_010_00_test_024.pkl',
    'BSCTRFM_TPU_010_01_test_024.pkl',
    'BSCTRFM_TPU_010_02_test_024.pkl',
    'BSCTRFM_TPU_010_03_test_024.pkl',
    'BSCTRFM_TPU_010_04_test_024.pkl',
    'BSCTRFM_TPU_010_05_test_024.pkl',
    'BSCTRFM_TPU_010_06_test_024.pkl',
    'BSCTRFM_TPU_010_07_test_024.pkl',
    'BSCTRFM_TPU_010_08_test_024.pkl',
    'BSCTRFM_TPU_010_09_test_024.pkl',
]

In [8]:
# a Pandas dataframe to store all predictions detail items
global_df_columns = [
    'model_id', 'execution', 'dataset',
    'string_timestamps', 'predictions', 'targets',
    'mae', 'rmse', 'smape'
]

global_df = pd.DataFrame(columns=global_df_columns)

In [9]:
global_df

Unnamed: 0,model_id,execution,dataset,string_timestamps,predictions,targets,mae,rmse,smape


In [10]:
# collect prediction items from all available pickle files into a single dataframe
# for available_pickle in avail_pkl_list:
#     buffer_df = pd.read_pickle('../database/predictions_detail/{}'.format(available_pickle))
#     # append buffer to final dataframe
#     df = df.append(buffer_df, ignore_index=True)  

In [11]:
# collect prediction items from all available pickle files into a single dataframe
for selected_pickle in selected_pkl_list:
    buffer_df = pd.read_pickle('../database/predictions_detail/{}'.format(selected_pickle))
    # append buffer to final dataframe
    global_df = global_df.append(buffer_df, ignore_index=True)  

In [12]:
global_df

Unnamed: 0,model_id,execution,dataset,string_timestamps,predictions,targets,mae,rmse,smape,inference
0,BSCTRFM_TPU_010,0,test,"[2018-05-14 23:00:00, 2018-05-15 00:00:00, 201...","[2382.16748046875, 2077.376220703125, 1927.182...","[2451.085, 2157.115, 1955.9133333333332, 1843....",74.396584,96.334578,0.027189,024
1,BSCTRFM_TPU_010,0,test,"[2018-05-15 00:00:00, 2018-05-15 01:00:00, 201...","[2155.89404296875, 1972.978515625, 1890.491455...","[2157.115, 1955.9133333333332, 1843.0166666666...",62.046877,86.687252,0.022528,024
2,BSCTRFM_TPU_010,0,test,"[2018-05-15 01:00:00, 2018-05-15 02:00:00, 201...","[1962.076904296875, 1888.2723388671875, 1804.4...","[1955.9133333333332, 1843.0166666666667, 1821....",69.695597,95.546980,0.024663,024
3,BSCTRFM_TPU_010,0,test,"[2018-05-15 02:00:00, 2018-05-15 03:00:00, 201...","[1883.4368896484375, 1783.55126953125, 1759.25...","[1843.0166666666667, 1821.4583333333333, 1782....",72.489587,94.985545,0.026480,024
4,BSCTRFM_TPU_010,0,test,"[2018-05-15 03:00:00, 2018-05-15 04:00:00, 201...","[1804.1121826171875, 1771.7735595703125, 1776....","[1821.4583333333333, 1782.45, 1804.09333333333...",77.848004,101.649048,0.027913,024
...,...,...,...,...,...,...,...,...,...,...
13435,BSCTRFM_TPU_010,9,test,"[2018-07-09 18:00:00, 2018-07-09 19:00:00, 201...","[2707.915771484375, 2609.11962890625, 2701.229...","[2663.6699999999996, 2692.1733333333336, 2778....",52.550846,65.249803,0.021148,024
13436,BSCTRFM_TPU_010,9,test,"[2018-07-09 19:00:00, 2018-07-09 20:00:00, 201...","[2658.371826171875, 2732.99462890625, 2793.747...","[2692.1733333333336, 2778.8350000000005, 2831....",54.897324,69.462673,0.022312,024
13437,BSCTRFM_TPU_010,9,test,"[2018-07-09 20:00:00, 2018-07-09 21:00:00, 201...","[2737.962646484375, 2808.086181640625, 2676.10...","[2778.8350000000005, 2831.001666666667, 2681.7...",63.490341,75.818934,0.026157,024
13438,BSCTRFM_TPU_010,9,test,"[2018-07-09 21:00:00, 2018-07-09 22:00:00, 201...","[2797.225830078125, 2695.39599609375, 2403.328...","[2831.001666666667, 2681.786666666667, 2392.92...",59.448185,75.072112,0.024196,024


In [13]:
# day-1 predictions for this model
global_df['string_timestamps'][0]

['2018-05-14 23:00:00',
 '2018-05-15 00:00:00',
 '2018-05-15 01:00:00',
 '2018-05-15 02:00:00',
 '2018-05-15 03:00:00',
 '2018-05-15 04:00:00',
 '2018-05-15 05:00:00',
 '2018-05-15 06:00:00',
 '2018-05-15 07:00:00',
 '2018-05-15 08:00:00',
 '2018-05-15 09:00:00',
 '2018-05-15 10:00:00',
 '2018-05-15 11:00:00',
 '2018-05-15 12:00:00',
 '2018-05-15 13:00:00',
 '2018-05-15 14:00:00',
 '2018-05-15 15:00:00',
 '2018-05-15 16:00:00',
 '2018-05-15 17:00:00',
 '2018-05-15 18:00:00',
 '2018-05-15 19:00:00',
 '2018-05-15 20:00:00',
 '2018-05-15 21:00:00',
 '2018-05-15 22:00:00']

In [14]:
# day-7 predictions for this model
global_df['string_timestamps'][144]

['2018-05-20 23:00:00',
 '2018-05-21 00:00:00',
 '2018-05-21 01:00:00',
 '2018-05-21 02:00:00',
 '2018-05-21 03:00:00',
 '2018-05-21 04:00:00',
 '2018-05-21 05:00:00',
 '2018-05-21 06:00:00',
 '2018-05-21 07:00:00',
 '2018-05-21 08:00:00',
 '2018-05-21 09:00:00',
 '2018-05-21 10:00:00',
 '2018-05-21 11:00:00',
 '2018-05-21 12:00:00',
 '2018-05-21 13:00:00',
 '2018-05-21 14:00:00',
 '2018-05-21 15:00:00',
 '2018-05-21 16:00:00',
 '2018-05-21 17:00:00',
 '2018-05-21 18:00:00',
 '2018-05-21 19:00:00',
 '2018-05-21 20:00:00',
 '2018-05-21 21:00:00',
 '2018-05-21 22:00:00']

In [15]:
# a Pandas dataframe to store all predictions detail items
buffer_df_columns = [
    'timestamp',
    'model_id', 'execution',
    'prediction', 'target']

predictions_df = pd.DataFrame(columns=buffer_df_columns)

In [16]:
predictions_df

Unnamed: 0,timestamp,model_id,execution,prediction,target


In [17]:
# use the prediction values for the three architectures for local forecasting comparison
# plot a number of 24-hour ahead predictions, starting the first prediction available,
# against ground thruth

In [18]:
# get a filtered dataframe from a given model-execution-dataset-inference combination
dataset, inference = 'test', '024'

for model_id in ['BSCTRFM_TPU_010']:
    for execution in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
        flag = \
        global_df.model_id.eq(model_id) & \
        global_df.execution.eq(execution) & \
        global_df.dataset.eq(dataset) & \
        global_df.inference.eq(inference)
        # reset index to use the same row, drop the index column
        filtered_df = global_df[flag].reset_index(drop=True)

        # pass the array predictions and array targets for selected intervals to dataframe columns
        start_indexes = [0, 24, 48, 72, 96, 120, 144]
        for start_index in start_indexes: 

            buffer_df = pd.DataFrame(columns=buffer_df_columns)

            buffer_df['timestamp'] = pd.to_datetime(filtered_df.iloc[start_index]['string_timestamps'])
            buffer_df['model_id'] = filtered_df.iloc[start_index]['model_id']
            buffer_df['execution'] = filtered_df.iloc[start_index]['execution']
            buffer_df['prediction'] = filtered_df.iloc[start_index]['predictions']
            buffer_df['target'] = filtered_df.iloc[start_index]['targets']

            predictions_df = pd.concat([predictions_df, buffer_df])

# reset index to use the same row, drop the index column
predictions_df = predictions_df.reset_index(drop=True)

In [19]:
predictions_df

Unnamed: 0,timestamp,model_id,execution,prediction,target
0,2018-05-14 23:00:00,BSCTRFM_TPU_010,0,2382.167480,2451.085000
1,2018-05-15 00:00:00,BSCTRFM_TPU_010,0,2077.376221,2157.115000
2,2018-05-15 01:00:00,BSCTRFM_TPU_010,0,1927.182373,1955.913333
3,2018-05-15 02:00:00,BSCTRFM_TPU_010,0,1867.201782,1843.016667
4,2018-05-15 03:00:00,BSCTRFM_TPU_010,0,1785.679077,1821.458333
...,...,...,...,...,...
1675,2018-05-21 18:00:00,BSCTRFM_TPU_010,9,2862.639648,2841.735000
1676,2018-05-21 19:00:00,BSCTRFM_TPU_010,9,2817.707520,2777.203333
1677,2018-05-21 20:00:00,BSCTRFM_TPU_010,9,2853.515869,2851.368333
1678,2018-05-21 21:00:00,BSCTRFM_TPU_010,9,2922.482910,2980.845000


In [20]:
bigquery_arima_columns = [
    'forecast_timestamp',
    'forecast_value',
    'standard_error',
    'confidence_level',
    'prediction_interval_lower_bound',
    'prediction_interval_upper_bound',
    'confidence_interval_lower_bound',
    'confidence_interval_upper_bound'
]

In [21]:
bigquery_bsctrfm_df = pd.DataFrame(columns=bigquery_arima_columns)

In [22]:
bigquery_bsctrfm_df

Unnamed: 0,forecast_timestamp,forecast_value,standard_error,confidence_level,prediction_interval_lower_bound,prediction_interval_upper_bound,confidence_interval_lower_bound,confidence_interval_upper_bound


In [24]:
csv_files = [
    'arima_bsctrfm_forecast_01.csv',
    'arima_bsctrfm_forecast_02.csv',
    'arima_bsctrfm_forecast_03.csv',
    'arima_bsctrfm_forecast_04.csv',
    'arima_bsctrfm_forecast_05.csv',
    'arima_bsctrfm_forecast_06.csv',
    'arima_bsctrfm_forecast_07.csv',
]

In [25]:
for csv_file in csv_files:
    buffer_bigquery_bsctrfm_df = pd.read_csv(
        '/home/developer/gcp/cbidmltsf/database/bigquery/{}'.format(csv_file)
    )
    bigquery_bsctrfm_df = pd.concat([bigquery_bsctrfm_df, buffer_bigquery_bsctrfm_df])


bigquery_bsctrfm_df = bigquery_bsctrfm_df.reset_index(drop=True)

In [26]:
bigquery_bsctrfm_df

Unnamed: 0,forecast_timestamp,forecast_value,standard_error,confidence_level,prediction_interval_lower_bound,prediction_interval_upper_bound,confidence_interval_lower_bound,confidence_interval_upper_bound
0,2018-05-14T23:00:00Z,2468.502288,62.970652,0.9,2365.037025,2571.967550,2365.037025,2571.967550
1,2018-05-15T00:00:00Z,2216.765495,72.737819,0.9,2097.252081,2336.278909,2097.252081,2336.278909
2,2018-05-15T01:00:00Z,2017.767872,78.265392,0.9,1889.172263,2146.363481,1889.172263,2146.363481
3,2018-05-15T02:00:00Z,1911.130583,87.725004,0.9,1766.992157,2055.269009,1766.992157,2055.269009
4,2018-05-15T03:00:00Z,1835.874515,92.640870,0.9,1683.658972,1988.090057,1683.658972,1988.090057
...,...,...,...,...,...,...,...,...
163,2018-05-21T18:00:00Z,2901.328266,119.031339,0.9,2705.751305,3096.905228,2705.751305,3096.905228
164,2018-05-21T19:00:00Z,2846.886231,120.462841,0.9,2648.957209,3044.815253,2648.957209,3044.815253
165,2018-05-21T20:00:00Z,2952.512151,121.877506,0.9,2752.258734,3152.765567,2752.258734,3152.765567
166,2018-05-21T21:00:00Z,2970.495280,123.275923,0.9,2767.944164,3173.046396,2767.944164,3173.046396


In [27]:
# timestamp for EDSLSTM_TPU_006 first prediction is '2018-06-26 00:00:00'
# daily interval endings are '2018-06-26 23:00:00', '2018-06-27 23:00:00', ..., '2018-07-02 23:00:00'
# BigQuery ARIMA is fed with the 8 previous weeks,
# starting on '2018-05-01 00:00:00' - '2018-06-25 23:00:00' (for the first 24-hour ahead prediction)


# timestamp for EDSLSTM_TPU_013 first prediction is '2018-05-03 16:00:00'
# BigQuery ARIMA is fed with the 8 previous weeks,
# prediction for day 1 is based on a model trained from '2018-03-08 16:00:00' to '2018-05-03 15:00:00'
# prediction for day 2 is based on a model trained from '2018-03-09 16:00:00' to '2018-05-04 15:00:00'
# prediction for day 3 is based on a model trained from '2018-03-10 16:00:00' to '2018-05-05 15:00:00'
# prediction for day 4 is based on a model trained from '2018-03-11 16:00:00' to '2018-05-06 15:00:00'
# prediction for day 5 is based on a model trained from '2018-03-12 16:00:00' to '2018-05-07 15:00:00'
# prediction for day 6 is based on a model trained from '2018-03-13 16:00:00' to '2018-05-08 15:00:00'
# prediction for day 7 is based on a model trained from '2018-03-14 16:00:00' to '2018-05-09 15:00:00'


# timestamp for BSCTRFM_TPU_010 first prediction is '2018-05-14 23:00:00'
# BigQuery ARIMA is fed with the 8 previous weeks,
# prediction for day 1 is based on a model trained from '2018-03-19 23:00:00' to '2018-05-14 22:00:00'
# prediction for day 2 is based on a model trained from '2018-03-20 23:00:00' to '2018-05-15 22:00:00'
# prediction for day 3 is based on a model trained from '2018-03-21 23:00:00' to '2018-05-16 22:00:00'
# prediction for day 4 is based on a model trained from '2018-03-22 23:00:00' to '2018-05-17 22:00:00'
# prediction for day 5 is based on a model trained from '2018-03-23 23:00:00' to '2018-05-18 22:00:00'
# prediction for day 6 is based on a model trained from '2018-03-24 23:00:00' to '2018-05-19 22:00:00'
# prediction for day 7 is based on a model trained from '2018-03-25 23:00:00' to '2018-05-20 22:00:00'


In [28]:
# build separated dataframes with metrics per model and date interval

In [30]:
metrics_columns = [
    'model_id', 'execution', 'start_timestamp', 'end_timestamp', 'mae', 'rmse', 'smape'
]

In [31]:
bsctrfm_metrics_df = pd.DataFrame(columns=metrics_columns)
bsctrfm_metrics_df

Unnamed: 0,model_id,execution,start_timestamp,end_timestamp,mae,rmse,smape


In [32]:
# The next cell calculates performace metrics for 1-day, 2-day, ..., up to 7-day periods
# which is not really useful as all predictions are produced on a 24-hour basis,
# then markdown the cell and change metrics calculation to a day 1, day 2, ... up to day 7 scheme

model_id = 'DMSLSTM_TPU_006'

executions = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

start_timestamp = '2018-06-26 00:00:00'

end_timestamps = [
    '2018-06-26 23:00:00',
    '2018-06-27 23:00:00',
    '2018-06-28 23:00:00',
    '2018-06-29 23:00:00',
    '2018-06-30 23:00:00',
    '2018-07-01 23:00:00',
    '2018-07-02 23:00:00',
]

for execution in executions:
    
    for end_timestamp in end_timestamps:

        flag = \
        predictions_df.model_id.eq(model_id) & \
        predictions_df.execution.eq(execution) & \
        predictions_df.timestamp.ge(start_timestamp) & \
        predictions_df.timestamp.le(end_timestamp)
        
        results_df = predictions_df[flag].reset_index(drop=True)
        
        mae = mean_absolute_error(results_df.target, results_df.prediction)
        rmse = sqrt(mean_squared_error(results_df.target, results_df.prediction))
        smape =  symmetric_mean_absolute_percentage_error(results_df.target, results_df.prediction)

        row_list = [
            model_id, execution,
            start_timestamp, end_timestamp,
            mae, rmse, smape
        ]
        row_metrics_df = pd.DataFrame([row_list], columns=metrics_columns)
        
        dmslstm_metrics_df = pd.concat([dmslstm_metrics_df, row_metrics_df])

In [33]:
# execution with best metrics over all intervals
# dmslstm_metrics_df.groupby(['execution']).mean().style.highlight_min(color = 'lightgreen', axis = 0)

In [34]:
# execution with worst metrics over all intervals
# dmslstm_metrics_df.groupby(['execution']).mean().style.highlight_max(color = 'yellow', axis = 0)

In [35]:
# metrics average for 10 executions on different intervals
# dmslstm_metrics_df.groupby(['end_timestamp']).mean()

In [36]:
# metrics standard deviation for 10 executions on different intervals
# dmslstm_metrics_df.groupby(['end_timestamp']).std()

In [37]:
# now get metrics from BigQuery ARIMA
# use target values from results_df with execution = 0 (they are the same across executions)

In [38]:
arima_one_week_mae = list()

for start_index in [0, 24, 48, 72, 96, 120, 144]:
    end_index = start_index + 24
    mae = mean_absolute_error(
        predictions_df.target[start_index:end_index],
        bigquery_bsctrfm_df.forecast_value[start_index:end_index]
    )
    
    arima_one_week_mae.append(mae)
    print('ARIMA MAE for {} to {} interval is: {:.4f}'.format(start_index, end_index, mae))

ARIMA MAE for 0 to 24 interval is: 160.6660
ARIMA MAE for 24 to 48 interval is: 100.7960
ARIMA MAE for 48 to 72 interval is: 90.7451
ARIMA MAE for 72 to 96 interval is: 71.1904
ARIMA MAE for 96 to 120 interval is: 58.2965
ARIMA MAE for 120 to 144 interval is: 124.5701
ARIMA MAE for 144 to 168 interval is: 65.9860


In [39]:
arima_one_week_rmse = list()

for start_index in [0, 24, 48, 72, 96, 120, 144]:
    end_index = start_index + 24
    rmse = sqrt(mean_squared_error(
        predictions_df.target[start_index:end_index],
        bigquery_bsctrfm_df.forecast_value[start_index:end_index]
    ))
    
    arima_one_week_rmse.append(rmse)
    print('ARIMA RMSE for {} to {} interval is: {:.4f}'.format(start_index, end_index, rmse))

ARIMA RMSE for 0 to 24 interval is: 205.4609
ARIMA RMSE for 24 to 48 interval is: 117.6361
ARIMA RMSE for 48 to 72 interval is: 104.4511
ARIMA RMSE for 72 to 96 interval is: 86.0171
ARIMA RMSE for 96 to 120 interval is: 71.5076
ARIMA RMSE for 120 to 144 interval is: 144.9715
ARIMA RMSE for 144 to 168 interval is: 75.4550


In [40]:
arima_one_week_smape = list()

for start_index in [0, 24, 48, 72, 96, 120, 144]:
    end_index = start_index + 24
    
    # adjust SMAPE to percentage value
    smape = 100*symmetric_mean_absolute_percentage_error(
        predictions_df.target[start_index:end_index],
        bigquery_bsctrfm_df.forecast_value[start_index:end_index]
    )
    
    arima_one_week_smape.append(smape)
    print('ARIMA SMAPE for {} to {} interval is: {:.4f}'.format(start_index, end_index, smape))

ARIMA SMAPE for 0 to 24 interval is: 5.8152
ARIMA SMAPE for 24 to 48 interval is: 4.3112
ARIMA SMAPE for 48 to 72 interval is: 3.6705
ARIMA SMAPE for 72 to 96 interval is: 2.8952
ARIMA SMAPE for 96 to 120 interval is: 2.4724
ARIMA SMAPE for 120 to 144 interval is: 4.8394
ARIMA SMAPE for 144 to 168 interval is: 2.7351


for end_index in [24, 48, 72, 96, 120, 144, 168]:
    mae = mean_absolute_error(
        predictions_df.target[:end_index],
        bigquery_dmslstm_df.forecast_value[:end_index]
    )
    print('ARIMA MAE for {}-hour interval is: {:.4f}'.format(end_index, mae))

for end_index in [24, 48, 72, 96, 120, 144, 168]:
    rmse = sqrt(mean_squared_error(
        predictions_df.target[:end_index],
        bigquery_dmslstm_df.forecast_value[:end_index]
    ))
    print('ARIMA RMSE for {}-hour interval is: {:.4f}'.format(end_index, rmse))

for end_index in [24, 48, 72, 96, 120, 144, 168]:
    smape = symmetric_mean_absolute_percentage_error(
        predictions_df.target[:end_index],
        bigquery_dmslstm_df.forecast_value[:end_index]
    )
    print('ARIMA SMAPE for {}-hour interval is: {:.4f}'.format(end_index, smape))

In [41]:
# a flag to isolate all pairs prediction-target for a given model in predictions_df
selected_model = 'BSCTRFM_TPU_010'
flag = predictions_df['model_id'] == selected_model

In [42]:
average_predictions_df = predictions_df[flag].groupby(['timestamp']).mean()

In [43]:
average_predictions_df

Unnamed: 0_level_0,prediction,target
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-05-14 23:00:00,2416.858569,2451.085000
2018-05-15 00:00:00,2134.943774,2157.115000
2018-05-15 01:00:00,1938.502502,1955.913333
2018-05-15 02:00:00,1839.837280,1843.016667
2018-05-15 03:00:00,1772.579114,1821.458333
...,...,...
2018-05-21 18:00:00,2950.280566,2841.735000
2018-05-21 19:00:00,2891.046826,2777.203333
2018-05-21 20:00:00,2934.424463,2851.368333
2018-05-21 21:00:00,3009.908936,2980.845000


In [44]:
plots = dict()

In [45]:
# a datetime range for the prediction interval
# for datetime in pd.date_range(start='2018-07-02 00:00:00', end='2018-07-02 23:00:00', freq='H'):
#     print(datetime)

In [104]:
day = 7
start, end = 24*(day - 1), 24*day

size = 8

plots['bsctrfm'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='BSCTRFM (average) vs. ARIMA vs. Real / Day {}.'.format(day),
    toolbar_location=None,
)

plots['bsctrfm'].grid.grid_line_alpha=0.3

plots['bsctrfm'].xaxis.axis_label = 'Date'
plots['bsctrfm'].yaxis.axis_label = 'Active Power [KW]'

plots['bsctrfm'].title.text_font_size = '14pt'

plots['bsctrfm'].xaxis.axis_label_text_font_size = "14pt"
plots['bsctrfm'].yaxis.axis_label_text_font_size = "14pt"

plots['bsctrfm'].xaxis.major_label_text_font_size = "12pt"
plots['bsctrfm'].yaxis.major_label_text_font_size = "12pt"

# a custom x range to visuallly improve the the plot
plots['bsctrfm'].x_range = Range1d(
    start=average_predictions_df.index[start] - timedelta(hours=1),
    end=average_predictions_df.index[end-1] + timedelta(hours=3)
)

# the target value in average_predictions_df is the average of targets, then the only target
plots['bsctrfm'].square(
    x=average_predictions_df.index[start:end],
    y=average_predictions_df.prediction[start:end],
    size=size,
    fill_color=None,
    color='black',
    legend_label='BSCTRFM'
)
plots['bsctrfm'].line(
    x=average_predictions_df.index[start:end],
    y=average_predictions_df.prediction[start:end],
    color='black',
    legend_label='BSCTRFM'
)

plots['bsctrfm'].triangle(
    x=pd.to_datetime(bigquery_bsctrfm_df.forecast_timestamp)[start:end],
    y=bigquery_bsctrfm_df.forecast_value[start:end],
    size=size,
    fill_color=None,
    color='black',
    legend_label='ARIMA'
)
plots['bsctrfm'].line(
    x=pd.to_datetime(bigquery_bsctrfm_df.forecast_timestamp)[start:end],
    y=bigquery_bsctrfm_df.forecast_value[start:end],
    color='black',
    legend_label='ARIMA'
)

plots['bsctrfm'].circle(
    x=average_predictions_df.index[start:end],
    y=average_predictions_df.target[start:end],
    size=size,
    fill_color=None,
    color='black',
    legend_label='Real'
)
plots['bsctrfm'].line(
    x=average_predictions_df.index[start:end],
    y=average_predictions_df.target[start:end],
    color='black',
    legend_label='Real'
)

plots['bsctrfm'].legend.label_text_font_size = '12pt'
plots['bsctrfm'].legend.location = 'bottom_right'

# uncomment the following two lines to save plot
# output_file('/home/developer/gcp/cbidmltsf/datasets/cfe/{}_H_kw.html'.format(device))
# save(fig_kw)

# uncomment the following line to display plot
show(plots['bsctrfm'])

In [59]:
# a Pandas dataframe to store all predictions detail items
buffer_df_columns = [
    'start_timestamp',
    'end_timestamp',
    'model_id',
    'execution',
    'mae',
    'rmse',
    'smape'
]

day_by_day_predictions_df = pd.DataFrame(columns=buffer_df_columns)

In [60]:
day_by_day_predictions_df

Unnamed: 0,start_timestamp,end_timestamp,model_id,execution,mae,rmse,smape


In [61]:

model_id, dataset, inference = 'BSCTRFM_TPU_010', 'test', '024'
executions = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

num_days = 28
start_indexes = 24*np.arange(num_days)

for execution in executions:
    
    flag = \
    global_df.model_id.eq(model_id) & \
    global_df.execution.eq(execution) & \
    global_df.dataset.eq(dataset) & \
    global_df.inference.eq(inference)

    # reset index to use the same row, drop the index column
    filtered_df = global_df[flag].reset_index(drop=True)
    
    for start_index in start_indexes:
        
        list_to_row = [
            pd.to_datetime(filtered_df.iloc[start_index]['string_timestamps'][0]),
            pd.to_datetime(filtered_df.iloc[start_index]['string_timestamps'][-1]),
            filtered_df.iloc[start_index]['model_id'],
            filtered_df.iloc[start_index]['execution'],
            filtered_df.iloc[start_index]['mae'],
            filtered_df.iloc[start_index]['rmse'],
            filtered_df.iloc[start_index]['smape']
        ]
        
        list_to_row_df = pd.DataFrame([list_to_row], columns=buffer_df_columns)

        day_by_day_predictions_df = pd.concat([day_by_day_predictions_df, list_to_row_df])

day_by_day_predictions_df = day_by_day_predictions_df.reset_index(drop=True)

In [62]:
day_by_day_predictions_df

Unnamed: 0,start_timestamp,end_timestamp,model_id,execution,mae,rmse,smape
0,2018-05-14 23:00:00,2018-05-15 22:00:00,BSCTRFM_TPU_010,0,74.396584,96.334578,0.027189
1,2018-05-15 23:00:00,2018-05-16 22:00:00,BSCTRFM_TPU_010,0,51.234515,61.179318,0.019998
2,2018-05-16 23:00:00,2018-05-17 22:00:00,BSCTRFM_TPU_010,0,80.001159,108.871460,0.028382
3,2018-05-17 23:00:00,2018-05-18 22:00:00,BSCTRFM_TPU_010,0,40.819563,54.001510,0.016045
4,2018-05-18 23:00:00,2018-05-19 22:00:00,BSCTRFM_TPU_010,0,111.405819,136.456637,0.041445
...,...,...,...,...,...,...,...
275,2018-06-06 23:00:00,2018-06-07 22:00:00,BSCTRFM_TPU_010,9,67.483329,86.348940,0.023644
276,2018-06-07 23:00:00,2018-06-08 22:00:00,BSCTRFM_TPU_010,9,185.437452,202.100992,0.066679
277,2018-06-08 23:00:00,2018-06-09 22:00:00,BSCTRFM_TPU_010,9,167.095758,184.523043,0.063405
278,2018-06-09 23:00:00,2018-06-10 22:00:00,BSCTRFM_TPU_010,9,54.556858,64.787909,0.022029


In [63]:
# locating the model execution with the best performance
day_by_day_predictions_df.groupby(['execution']).mean()

Unnamed: 0_level_0,mae,rmse,smape
execution,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,120.925417,153.75598,0.04327
1,115.515397,144.561357,0.041641
2,108.819283,137.565761,0.03923
3,110.204448,136.065884,0.039688
4,113.32831,142.979444,0.040517
5,114.04771,143.775385,0.040788
6,106.804105,134.190353,0.038488
7,113.762571,145.369576,0.040619
8,112.246697,141.153605,0.040235
9,107.060319,133.956683,0.03833


In [64]:
# execution 9 presents the best error metrics!

In [65]:
# get the time line, on a daily basis, from the grouped dataframe
day_by_day_predictions_df.groupby(['start_timestamp']).mean().index

DatetimeIndex(['2018-05-14 23:00:00', '2018-05-15 23:00:00',
               '2018-05-16 23:00:00', '2018-05-17 23:00:00',
               '2018-05-18 23:00:00', '2018-05-19 23:00:00',
               '2018-05-20 23:00:00', '2018-05-21 23:00:00',
               '2018-05-22 23:00:00', '2018-05-23 23:00:00',
               '2018-05-24 23:00:00', '2018-05-25 23:00:00',
               '2018-05-26 23:00:00', '2018-05-27 23:00:00',
               '2018-05-28 23:00:00', '2018-05-29 23:00:00',
               '2018-05-30 23:00:00', '2018-05-31 23:00:00',
               '2018-06-01 23:00:00', '2018-06-02 23:00:00',
               '2018-06-03 23:00:00', '2018-06-04 23:00:00',
               '2018-06-05 23:00:00', '2018-06-06 23:00:00',
               '2018-06-07 23:00:00', '2018-06-08 23:00:00',
               '2018-06-09 23:00:00', '2018-06-10 23:00:00'],
              dtype='datetime64[ns]', name='start_timestamp', freq=None)

In [66]:
# get a metric average (on the 10 executions) for a given 24-hour interval
for x in day_by_day_predictions_df.groupby(['start_timestamp']).mean()['mae']:
    print('{:0.4f}'.format(x))

64.6655
52.6684
65.7693
59.7849
101.5035
53.7973
59.6968
119.1675
108.9169
129.0831
103.6068
78.2596
93.4480
174.7484
173.5844
192.1428
237.0113
219.0880
166.4328
87.0250
113.5668
101.0596
98.0263
89.5259
152.3215
121.2434
53.5468
73.9094


In [67]:
# get a metric dispersion (on the 10 executions) for a given 24-hour interval
for x in day_by_day_predictions_df.groupby(['start_timestamp']).std()['mae']:
    print('{:0.4f}'.format(x))

8.9164
8.4937
11.9562
10.8768
19.4086
7.9980
14.0281
16.1417
10.8790
20.8458
14.6988
23.8437
19.0957
16.1358
12.6701
24.9868
22.9690
36.2141
32.3473
29.5908
15.2069
19.7201
30.4310
28.2809
28.6180
32.1482
9.6247
11.1283


In [68]:
# get a metric average (on the 10 executions) for a given 24-hour interval
for x in day_by_day_predictions_df.groupby(['start_timestamp']).mean()['rmse']:
    print('{:0.4f}'.format(x))

83.4325
64.3000
85.0910
77.2394
126.1072
68.3452
75.7845
155.8474
124.4687
185.5226
130.7779
99.1861
117.0175
234.1270
232.0722
232.1140
306.0482
255.0934
207.4743
107.7918
159.7830
127.7329
130.5655
115.1421
171.0037
139.2196
62.7484
83.4110


In [69]:
# get a metric dispersion (on the 10 executions) for a given 24-hour interval
for x in day_by_day_predictions_df.groupby(['start_timestamp']).std()['rmse']:
    print('{:0.4f}'.format(x))

9.3795
9.8424
13.3558
11.3353
24.6160
10.8775
18.3563
18.6745
11.8409
24.9019
18.7762
31.1938
27.7006
22.4828
11.9252
28.8784
32.3337
48.8588
45.0533
41.6147
24.5419
23.6918
42.2199
35.0554
24.3772
32.7313
11.4139
11.3093


In [70]:
# get a metric average (on the 10 executions) for a given 24-hour interval

# adjust SMAPE to percentage value
for x in 100*day_by_day_predictions_df.groupby(['start_timestamp']).mean()['smape']:
    print('{:0.4f}'.format(x))

2.4165
2.0586
2.3545
2.2421
3.8451
2.0752
2.2252
4.2969
4.7740
4.4260
3.5686
2.9517
3.4849
5.9051
5.6067
6.2818
7.6998
7.6163
6.0265
3.3593
3.9790
3.6135
3.2967
3.1042
5.6089
4.7022
2.2123
3.0542


In [71]:
# get a metric dispersion (on the 10 executions) for a given 24-hour interval

# adjust SMAPE to percentage value
for x in 100*day_by_day_predictions_df.groupby(['start_timestamp']).std()['smape']:
    print('{:0.4f}'.format(x))

0.3511
0.3452
0.4458
0.4069
0.6716
0.2847
0.5001
0.5798
0.4391
0.7475
0.4982
0.8457
0.6627
0.5490
0.4907
0.9186
0.7580
1.2360
1.1946
1.0679
0.5004
0.7428
0.9921
0.9149
0.9553
1.1751
0.3974
0.5014


In [72]:
size = 4

plots['bsctrfm_week_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='BSCTRFM Performance: {} for {} Consecutive 24-hour Prediction Intervals.'\
    .format('MAE', num_days),
    toolbar_location=None,
)

plots['bsctrfm_week_metric'].grid.grid_line_alpha=0.3

# a custom x range to visuallly improve the the plot
# plots['bsctrfm_week_metric'].x_range = Range1d(
#     start=pd.to_datetime('2018-05-02 16:00:00'),
#     end=pd.to_datetime('2018-06-01 16:00:00')
# )

plots['bsctrfm_week_metric'].xaxis.axis_label = 'Date'
plots['bsctrfm_week_metric'].yaxis.axis_label = 'MAE [KW]'

plots['bsctrfm_week_metric'].title.text_font_size = '14pt'

plots['bsctrfm_week_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['bsctrfm_week_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['bsctrfm_week_metric'].xaxis.major_label_text_font_size = "12pt"
plots['bsctrfm_week_metric'].yaxis.major_label_text_font_size = "12pt"


plots['bsctrfm_week_metric'].circle(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=day_by_day_predictions_df.groupby(['start_timestamp']).mean()['mae'],
    color='black',
    size=size,
    fill_color=None,
)
plots['bsctrfm_week_metric'].line(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=day_by_day_predictions_df.groupby(['start_timestamp']).mean()['mae'],
    color='black',
)

arima_one_week_mae_average = Span(
    location=np.mean(np.array(arima_one_week_mae)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['bsctrfm_week_metric'].add_layout(arima_one_week_mae_average)

show(plots['bsctrfm_week_metric'])

In [73]:
size = 4

plots['bsctrfm_week_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='BSCTRFM Performance: {} for {} Consecutive 24-hour Prediction Intervals.'\
    .format('RMSE', num_days),
    toolbar_location=None,
)

plots['bsctrfm_week_metric'].grid.grid_line_alpha=0.3

# a custom x range to visuallly improve the the plot
# plots['bsctrfm_week_metric'].x_range = Range1d(
#     start=pd.to_datetime('2018-05-02 16:00:00'),
#     end=pd.to_datetime('2018-06-01 16:00:00')
# )

plots['bsctrfm_week_metric'].title.text_font_size = '14pt'

plots['bsctrfm_week_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['bsctrfm_week_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['bsctrfm_week_metric'].xaxis.major_label_text_font_size = "12pt"
plots['bsctrfm_week_metric'].yaxis.major_label_text_font_size = "12pt"

plots['bsctrfm_week_metric'].xaxis.axis_label = 'Date'
plots['bsctrfm_week_metric'].yaxis.axis_label = 'RMSE [KW]'

plots['bsctrfm_week_metric'].circle(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=day_by_day_predictions_df.groupby(['start_timestamp']).mean()['rmse'],
    color='black',
    size=size,
    fill_color=None,
)
plots['bsctrfm_week_metric'].line(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=day_by_day_predictions_df.groupby(['start_timestamp']).mean()['rmse'],
    color='black',
)

arima_one_week_rmse_average = Span(
    location=np.mean(np.array(arima_one_week_rmse)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['bsctrfm_week_metric'].add_layout(arima_one_week_rmse_average)


show(plots['bsctrfm_week_metric'])

In [74]:
size = 4

plots['bsctrfm_week_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='BSCTRFM Performance: {} for {} Consecutive 24-hour Prediction Intervals.'\
    .format('SMAPE', num_days),
    toolbar_location=None,
)

plots['bsctrfm_week_metric'].grid.grid_line_alpha=0.3

# a custom x range to visuallly improve the the plot
# plots['bsctrfm_week_metric'].x_range = Range1d(
#     start=pd.to_datetime('2018-05-02 16:00:00'),
#     end=pd.to_datetime('2018-06-01 16:00:00')
# )

plots['bsctrfm_week_metric'].title.text_font_size = '14pt'

plots['bsctrfm_week_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['bsctrfm_week_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['bsctrfm_week_metric'].xaxis.major_label_text_font_size = "12pt"
plots['bsctrfm_week_metric'].yaxis.major_label_text_font_size = "12pt"

plots['bsctrfm_week_metric'].xaxis.axis_label = 'Date'
plots['bsctrfm_week_metric'].yaxis.axis_label = 'SMAPE [%]'

# adjust SMAPE for percentage value
plots['bsctrfm_week_metric'].circle(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=100*day_by_day_predictions_df.groupby(['start_timestamp']).mean()['smape'],
    color='black',
    size=size,
    fill_color=None,
)
# adjust SMAPE for percentage value
plots['bsctrfm_week_metric'].line(
    x=day_by_day_predictions_df.groupby(['start_timestamp']).mean().index,
    y=100*day_by_day_predictions_df.groupby(['start_timestamp']).mean()['smape'],
    color='black',
)

arima_one_week_smape_average = Span(
    location=np.mean(np.array(arima_one_week_smape)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['bsctrfm_week_metric'].add_layout(arima_one_week_smape_average)


show(plots['bsctrfm_week_metric'])

In [75]:
# MAE, RMSE, and SMAPE for rolling predictions in 4 weeks
# from global dataframe
start, end = 0, 672
size = 4

plots['bsctrfm_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='BSCTRFM Performance: {} for {} Consecutive 1-hour Rolling Predictions.'\
    .format('MAE', end),
    toolbar_location=None,
)

plots['bsctrfm_metric'].grid.grid_line_alpha=0.3

plots['bsctrfm_metric'].title.text_font_size = '14pt'

plots['bsctrfm_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['bsctrfm_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['bsctrfm_metric'].xaxis.major_label_text_font_size = "12pt"
plots['bsctrfm_metric'].yaxis.major_label_text_font_size = "12pt"

plots['bsctrfm_metric'].xaxis.axis_label = 'Date'
plots['bsctrfm_metric'].yaxis.axis_label = 'MAE [KW]'

plots['bsctrfm_metric'].circle(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=global_df['mae'][start:end],
    color='black',
    size=size,
    fill_color=None,
)
plots['bsctrfm_metric'].line(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=global_df['mae'][start:end],
    color='black',
)

arima_one_week_mae_average = Span(
    location=np.mean(np.array(arima_one_week_mae)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['bsctrfm_metric'].add_layout(arima_one_week_mae_average)

show(plots['bsctrfm_metric'])

In [76]:
# MAE, RMSE, and SMAPE for rolling predictions in 4 weeks
# from global dataframe
start, end = 0, 672
size = 4

plots['bsctrfm_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='BSCTRFM Performance: {} for {} Consecutive 1-hour Rolling Predictions.'\
    .format('RMSE', end),
    toolbar_location=None,
)

plots['bsctrfm_metric'].grid.grid_line_alpha=0.3

plots['bsctrfm_metric'].title.text_font_size = '14pt'

plots['bsctrfm_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['bsctrfm_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['bsctrfm_metric'].xaxis.major_label_text_font_size = "12pt"
plots['bsctrfm_metric'].yaxis.major_label_text_font_size = "12pt"

plots['bsctrfm_metric'].xaxis.axis_label = 'Date'
plots['bsctrfm_metric'].yaxis.axis_label = 'RMSE [KW]'

plots['bsctrfm_metric'].circle(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=global_df['rmse'][start:end],
    color='black',
    size=size,
    fill_color=None,
)
plots['bsctrfm_metric'].line(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=global_df['rmse'][start:end],
    color='black',
)

arima_one_week_rmse_average = Span(
    location=np.mean(np.array(arima_one_week_rmse)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['bsctrfm_metric'].add_layout(arima_one_week_rmse_average)

show(plots['bsctrfm_metric'])

In [77]:
# MAE, RMSE, and SMAPE for rolling predictions in 4 weeks
# from global dataframe
start, end = 0, 672
size = 4

plots['bsctrfm_metric'] = figure(
    x_axis_type='datetime',
    plot_width=960,
    plot_height=320,
    title='BSCTRFM Performance: {} for {} Consecutive 1-hour Rolling Predictions.'\
    .format('SMAPE', end),
    toolbar_location=None,
)

plots['bsctrfm_metric'].grid.grid_line_alpha=0.3

plots['bsctrfm_metric'].title.text_font_size = '14pt'

plots['bsctrfm_metric'].xaxis.axis_label_text_font_size = "14pt"
plots['bsctrfm_metric'].yaxis.axis_label_text_font_size = "14pt"

plots['bsctrfm_metric'].xaxis.major_label_text_font_size = "12pt"
plots['bsctrfm_metric'].yaxis.major_label_text_font_size = "12pt"

plots['bsctrfm_metric'].xaxis.axis_label = 'Date'
plots['bsctrfm_metric'].yaxis.axis_label = 'SMAPE [%]'

# adjust SMAPE for percentage value
plots['bsctrfm_metric'].circle(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=100*global_df['smape'][start:end],
    color='black',
    size=size,
    fill_color=None,
)
# adjust SMAPE for percentage value
plots['bsctrfm_metric'].line(
    x=[pd.to_datetime(row[0]) for row in global_df['string_timestamps'][start:end]],
    y=100*global_df['smape'][start:end],
    color='black',
)

arima_one_week_smape_average = Span(
    location=np.mean(np.array(arima_one_week_smape)),
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['bsctrfm_metric'].add_layout(arima_one_week_smape_average)

show(plots['bsctrfm_metric'])

In [78]:
! ls -l /home/developer/DEPFIE-SCOM/ScD_Thesis/results

total 308
-rw-rw-r-- 1 developer developer    69 sep 22 13:48 BSCTRFM_TPU_010_09_eval.csv
-rw-rw-r-- 1 developer developer 46229 sep 22 13:48 BSCTRFM_TPU_010_09_loss.csv
-rw-rw-r-- 1 developer developer 46008 sep 22 13:48 BSCTRFM_TPU_010_09_lrs.csv
-rw-rw-r-- 1 developer developer 44853 sep 20 13:27 DMSLSTM_TPU_006_09_loss.csv
-rw-rw-r-- 1 developer developer 44752 sep 20 13:27 DMSLSTM_TPU_006_09_lrs.csv
-rw-rw-r-- 1 developer developer    68 sep 21 12:13 EDSLSTM_TPU_013_05_eval.csv
-rw-rw-r-- 1 developer developer 44489 sep 21 12:13 EDSLSTM_TPU_013_05_loss.csv
-rw-rw-r-- 1 developer developer 44597 sep 21 12:12 EDSLSTM_TPU_013_05_lrs.csv
-rw-rw-r-- 1 developer developer 27526 may 23 11:05 transformer_metrics.ods


In [79]:
eval_loss_df = pd.read_csv(
    '/home/developer/DEPFIE-SCOM/ScD_Thesis/results/BSCTRFM_TPU_010_09_eval.csv'
)

In [80]:
eval_loss_df

Unnamed: 0,Wall time,Step,Value
0,1627837000.0,6800,0.000689


In [81]:
loss_df = pd.read_csv(
    '/home/developer/DEPFIE-SCOM/ScD_Thesis/results/BSCTRFM_TPU_010_09_loss.csv'
)

In [82]:
loss_df

Unnamed: 0,Wall time,Step,Value
0,1.627837e+09,4,0.096661
1,1.627837e+09,18,0.023388
2,1.627837e+09,26,0.015887
3,1.627837e+09,27,0.014355
4,1.627837e+09,40,0.007880
...,...,...,...
995,1.627837e+09,6763,0.000144
996,1.627837e+09,6776,0.000144
997,1.627837e+09,6779,0.000144
998,1.627837e+09,6785,0.000146


In [83]:
lrs_df = pd.read_csv(
    '/home/developer/DEPFIE-SCOM/ScD_Thesis/results/BSCTRFM_TPU_010_09_lrs.csv'
)

In [84]:
lrs_df

Unnamed: 0,Wall time,Step,Value
0,1.627837e+09,4,0.000014
1,1.627837e+09,18,0.000062
2,1.627837e+09,26,0.000090
3,1.627837e+09,27,0.000093
4,1.627837e+09,40,0.000138
...,...,...,...
995,1.627837e+09,6763,0.000035
996,1.627837e+09,6776,0.000035
997,1.627837e+09,6779,0.000035
998,1.627837e+09,6785,0.000035


In [85]:

plots['bsctrfm_lrs'] = figure(
    plot_width=960,
    plot_height=320,
    title='BSCTRFM Learning Rate Schedule.',
    toolbar_location=None,
)

plots['bsctrfm_lrs'].grid.grid_line_alpha=0.3

# a custom x range to visuallly improve the the plot
# plots['bsctrfm_lrs'].x_range = Range1d(
#     start=0,
#     end=1440
# )

plots['bsctrfm_lrs'].title.text_font_size = '14pt'

plots['bsctrfm_lrs'].xaxis.axis_label_text_font_size = "14pt"
plots['bsctrfm_lrs'].yaxis.axis_label_text_font_size = "14pt"

plots['bsctrfm_lrs'].xaxis.major_label_text_font_size = "12pt"
plots['bsctrfm_lrs'].yaxis.major_label_text_font_size = "12pt"

plots['bsctrfm_lrs'].xaxis.axis_label = 'Training Step'
plots['bsctrfm_lrs'].yaxis.axis_label = 'Learning Rate'

# adjust SMAPE for percentage value
plots['bsctrfm_lrs'].line(
    x=lrs_df['Step'],
    y=lrs_df['Value'],
    color='black',
)

show(plots['bsctrfm_lrs'])

In [87]:
eval_loss = 0.000689

In [98]:

plots['bsctrfm_loss'] = figure(
    plot_width=960,
    plot_height=320,
    title='BSCTRFM Training Loss.',
    toolbar_location=None,
)

plots['bsctrfm_loss'].grid.grid_line_alpha=0.3

# a custom x range to visuallly improve the the plot
# plots['bsctrfm_loss'].x_range = Range1d(
#     start=0,
#     end=1440
# )

# a custom y range to visuallly improve the the plot
plots['bsctrfm_loss'].y_range = Range1d (
    start=0,
    end=0.005
)

plots['bsctrfm_loss'].title.text_font_size = '14pt'

plots['bsctrfm_loss'].xaxis.axis_label_text_font_size = "14pt"
plots['bsctrfm_loss'].yaxis.axis_label_text_font_size = "14pt"

plots['bsctrfm_loss'].xaxis.major_label_text_font_size = "12pt"
plots['bsctrfm_loss'].yaxis.major_label_text_font_size = "12pt"

plots['bsctrfm_loss'].xaxis.axis_label = 'Training Step'
plots['bsctrfm_loss'].yaxis.axis_label = 'Loss'

plots['bsctrfm_loss'].line(
    x=loss_df['Step'],
    y=loss_df['Value'],
    color='black',
)

final_eval_loss = Span(
    location=eval_loss,
    dimension='width',
    line_color='black',
    line_dash='dashed',
    line_width=2)

plots['bsctrfm_loss'].add_layout(final_eval_loss)

show(plots['bsctrfm_loss'])