In [3]:
import pandas as pd

from base.training import mse_weighted
from common import Predictor, DataStorage, L2Threshold
from common.model_utils import load_model_tflite
from simulation.model_simulator import ModelSimulator
from simulation.model_simulator import VIOLATION_COLUMN

if mse_weighted:
    print("mse_weighted loaded")

df_baseline = pd.read_pickle("data/zamg_vienna_hourly.pickle")

THRESHOLD = 1.0
features = ["TL", "P", "RF", "SO"]
df_baseline = df_baseline[features]
df_baseline.dropna(inplace=True)

df_2019_2019 = df_baseline[df_baseline.index.year == 2019]
df_2010_2019 = df_baseline[df_baseline.index.year <= 2019]
initial_df = df_2010_2019[features].asfreq('H')

sim_data: pd.DataFrame = df_baseline.loc[df_baseline.index.year >= 2020]

threshold = L2Threshold(THRESHOLD, [0], [0])

sim_data

mse_weighted loaded


Unnamed: 0_level_0,TL,P,RF,SO
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-01 00:00:00,5.0,1012.4,60.0,0.0
2020-01-01 00:10:00,5.0,1012.5,60.0,0.0
2020-01-01 00:20:00,4.9,1012.5,61.0,0.0
2020-01-01 00:30:00,4.9,1012.5,61.0,0.0
2020-01-01 00:40:00,4.9,1012.6,60.0,0.0
...,...,...,...,...
2021-12-31 23:10:00,15.3,999.2,59.0,0.0
2021-12-31 23:20:00,15.2,999.3,59.0,0.0
2021-12-31 23:30:00,15.2,999.3,59.0,0.0
2021-12-31 23:40:00,15.0,999.7,59.0,0.0


In [4]:
def print_df_stats(name: str, df: pd.DataFrame):
    max_ts = df.index.max()
    min_ts = df.index.min()
    print(f"{name}")
    print(f"---------------------------------------------------")
    print(f"Start date: {min_ts}")
    print(f"End date: {max_ts}")
    print(f"Size of dataframe: {len(df)}")
    for var in features:
        metric: pd.Series = df[var]
        # p75 = metric.quantile(0.75)
        # p95 = metric.quantile(0.95)
        # p99 = metric.quantile(0.99)
        print(f"    {var}: Min={metric.min()}, Mean={metric.mean()}, Median={metric.median()}, Max={metric.max()}")
    print("\n")


print_df_stats("baseline", df_baseline)
print_df_stats("df_2010_2019", df_2010_2019)
print_df_stats("df_2019_2019", df_2019_2019)

baseline
---------------------------------------------------
Start date: 2010-01-01 01:00:00
End date: 2021-12-31 23:50:00
Size of dataframe: 631146
    TL: Min=-14.1, Mean=12.65017117011489, Median=12.5, Max=39.4
    P: Min=955.7, Mean=995.6065873295034, Median=995.6, Max=1025.3
    RF: Min=8.0, Mean=66.88900138267005, Median=68.0, Max=100.0
    SO: Min=0.0, Mean=858.1486882591349, Median=0.0, Max=3600.0


df_2010_2019
---------------------------------------------------
Start date: 2010-01-01 01:00:00
End date: 2019-12-31 23:50:00
Size of dataframe: 525882
    TL: Min=-14.1, Mean=12.66363132160193, Median=12.6, Max=39.4
    P: Min=955.7, Mean=995.5006133822164, Median=995.5, Max=1022.2
    RF: Min=15.0, Mean=67.47230367268703, Median=69.0, Max=100.0
    SO: Min=0.0, Mean=857.6400066935169, Median=0.0, Max=3600.0


df_2019_2019
---------------------------------------------------
Start date: 2019-01-01 00:00:00
End date: 2019-12-31 23:50:00
Size of dataframe: 52560
    TL: Min=-6.0, Mea

In [9]:
# Vienna 2010-2019 simple dense
base_model_2010_2019 = load_model_tflite("models/zamg_vienna_2010_2019_simple_dense")
metadata = base_model_2010_2019.metadata
data_storage = DataStorage(metadata.input_features, metadata.output_features)
data_storage.add_measurement_df(initial_df)
predictor = Predictor(base_model_2010_2019, data_storage)
predictor.update_prediction_horizon(sim_data.index.min())

In [4]:
model_simulator = ModelSimulator(predictor, threshold, sim_data, 600)

results_2010_2019 = model_simulator.run()
results_2010_2019

Unnamed: 0,TL,P,RF,SO,prediction,error,model,violation
2020-01-01 00:00:00,5.0,1012.4,60.0,0.0,5.1,0.1,zamg_vienna_2010_2019_simple_dense,0
2020-01-01 00:10:00,5.0,1012.5,60.0,0.0,5.032051,0.032051,zamg_vienna_2010_2019_simple_dense,0
2020-01-01 00:20:00,4.9,1012.5,61.0,0.0,4.964102,0.064102,zamg_vienna_2010_2019_simple_dense,0
2020-01-01 00:30:00,4.9,1012.5,61.0,0.0,4.896153,0.003847,zamg_vienna_2010_2019_simple_dense,0
2020-01-01 00:40:00,4.9,1012.6,60.0,0.0,4.828204,0.071796,zamg_vienna_2010_2019_simple_dense,0
...,...,...,...,...,...,...,...,...
2021-12-31 23:10:00,15.3,999.2,59.0,0.0,15.058929,0.241071,zamg_vienna_2010_2019_simple_dense,0
2021-12-31 23:20:00,15.2,999.3,59.0,0.0,14.990402,0.209598,zamg_vienna_2010_2019_simple_dense,0
2021-12-31 23:30:00,15.2,999.3,59.0,0.0,14.921875,0.278125,zamg_vienna_2010_2019_simple_dense,0
2021-12-31 23:40:00,15.0,999.7,59.0,0.0,14.853349,0.146651,zamg_vienna_2010_2019_simple_dense,0


In [5]:
violations_2010_2019 = results_2010_2019[results_2010_2019[VIOLATION_COLUMN] == True]
print(f"Simulation data: {results_2010_2019.index.min()} - {results_2010_2019.index.max()}")
print(f"Violations: {len(violations_2010_2019)}")
print(f"Measurements: {len(results_2010_2019)}")
print(f"Violations / Measurements = {len(violations_2010_2019) / len(results_2010_2019)}")

Simulation data: 2020-01-01 00:00:00 - 2021-12-31 23:50:00
Violations: 4293
Measurements: 105264
Violations / Measurements = 0.04078317373461012


In [10]:
# Vienna 2019-2019 simple dense
base_model_2019_2019 = load_model_tflite("models/zamg_vienna_2019_2019_simple_dense")
metadata = base_model_2019_2019.metadata
data_storage = DataStorage(metadata.input_features, metadata.output_features)
data_storage.add_measurement_df(initial_df)
predictor = Predictor(base_model_2019_2019, data_storage)
predictor.update_prediction_horizon(sim_data.index.min())

In [2]:
model_simulator = ModelSimulator(predictor, threshold, sim_data, 600)

results_2019_2019 = model_simulator.run()
results_2019_2019

Unnamed: 0,TL,P,RF,SO,prediction,error,model,violation
2020-01-01 00:00:00,5.0,1012.4,60.0,0.0,5.1,0.1,zamg_vienna_2019_2019_simple_dense,0
2020-01-01 00:10:00,5.0,1012.5,60.0,0.0,4.882985,0.117015,zamg_vienna_2019_2019_simple_dense,0
2020-01-01 00:20:00,4.9,1012.5,61.0,0.0,4.66597,0.23403,zamg_vienna_2019_2019_simple_dense,0
2020-01-01 00:30:00,4.9,1012.5,61.0,0.0,4.448955,0.451045,zamg_vienna_2019_2019_simple_dense,0
2020-01-01 00:40:00,4.9,1012.6,60.0,0.0,4.23194,0.66806,zamg_vienna_2019_2019_simple_dense,0
...,...,...,...,...,...,...,...,...
2021-12-31 23:10:00,15.3,999.2,59.0,0.0,14.849399,0.450601,zamg_vienna_2019_2019_simple_dense,0
2021-12-31 23:20:00,15.2,999.3,59.0,0.0,14.791105,0.408895,zamg_vienna_2019_2019_simple_dense,0
2021-12-31 23:30:00,15.2,999.3,59.0,0.0,14.73281,0.46719,zamg_vienna_2019_2019_simple_dense,0
2021-12-31 23:40:00,15.0,999.7,59.0,0.0,14.674515,0.325485,zamg_vienna_2019_2019_simple_dense,0


In [3]:
violations_2019_2019 = results_2019_2019[results_2019_2019[VIOLATION_COLUMN] == True]
print(f"Simulation data: {results_2019_2019.index.min()} - {results_2019_2019.index.max()}")
print(f"Violations: {len(violations_2019_2019)}")
print(f"Measurements: {len(results_2019_2019)}")
print(f"Violations / Measurements = {len(violations_2019_2019) / len(results_2019_2019)}")

Simulation data: 2020-01-01 00:00:00 - 2021-12-31 23:50:00
Violations: 5861
Measurements: 105264
Violations / Measurements = 0.05567905456756346


In [14]:
from base.learning_strategy import RetrainStrategy
from common.model_utils import save_model
import os
from base.model import Model

models_dir = "models"
learning_strategy = RetrainStrategy()


def train_and_save_model(base_model: Model, model_id: str, train_df: pd.DataFrame) -> Model:
    new_model = learning_strategy.train_model(train_df, base_model.metadata, base_model.model)
    new_model.metadata.uuid = model_id
    save_model(new_model, os.path.join(models_dir, model_id))
    return new_model

In [7]:
# Specialized models: temperature
length = len(df_2010_2019)
fraction = length // 3
df_sorted = df_2010_2019.sort_values(by=['TL'], ascending=[True])
df_low_temperature: pd.DataFrame = df_sorted.iloc[:fraction]
df_high_temperature: pd.DataFrame = df_sorted.iloc[2 * fraction:]

print_df_stats("vienna high temperature", df_high_temperature)
print_df_stats("vienna low temperature", df_low_temperature)

vienna high temperature
---------------------------------------------------
Start date: 2010-03-19 11:50:00
End date: 2019-11-03 21:40:00
Size of dataframe: 175294
    TL: Min=17.1, Mean=22.56135121567196, Median=21.7, Max=39.4
    P: Min=969.2, Mean=994.6786050863121, Median=994.8, Max=1012.0
    RF: Min=15.0, Mean=56.86127306125709, Median=56.0, Max=100.0
    SO: Min=0.0, Mean=1602.5739956872453, Median=1113.9999999999998, Max=3600.0


vienna low temperature
---------------------------------------------------
Start date: 2010-01-01 01:00:00
End date: 2019-12-31 23:50:00
Size of dataframe: 175294
    TL: Min=-14.1, Mean=2.8464225814916655, Median=3.5, Max=8.1
    P: Min=955.7, Mean=996.8641877835712, Median=997.3, Max=1022.2
    RF: Min=19.0, Mean=75.63445792021783, Median=76.0, Max=100.0
    SO: Min=0.0, Mean=348.24226347355494, Median=0.0, Max=3600.0




In [16]:
from common.model_utils import load_model_from_savemodel

base_model = load_model_from_savemodel(os.path.join("models", "zamg_vienna_2010_2019_simple_dense"))

In [17]:
train_and_save_model(base_model, "vienna_2010_2019_high_temp_simple_dense", df_high_temperature)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'Day sin'] = np.sin(timestamp_s * (2 * np.pi / day)) / normalization_factor
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'Day cos'] = np.cos(timestamp_s * (2 * np.pi / day)) / normalization_factor
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'Year sin'] = np.sin(ti

Epoch 1/100
5474/5474 - 4s - loss: 0.1363 - mean_squared_error: 0.0305 - mean_absolute_error: 0.0556 - root_mean_squared_error: 0.1748 - 4s/epoch - 789us/step
Epoch 2/100
5474/5474 - 3s - loss: 1.0287e-04 - mean_squared_error: 2.5300e-05 - mean_absolute_error: 0.0030 - root_mean_squared_error: 0.0050 - 3s/epoch - 598us/step
Epoch 3/100
5474/5474 - 3s - loss: 1.0308e-04 - mean_squared_error: 2.4800e-05 - mean_absolute_error: 0.0031 - root_mean_squared_error: 0.0050 - 3s/epoch - 591us/step
Epoch 4/100
5474/5474 - 3s - loss: 9.2697e-05 - mean_squared_error: 2.2691e-05 - mean_absolute_error: 0.0029 - root_mean_squared_error: 0.0048 - 3s/epoch - 580us/step
Epoch 5/100
5474/5474 - 5s - loss: 9.5292e-05 - mean_squared_error: 2.3209e-05 - mean_absolute_error: 0.0029 - root_mean_squared_error: 0.0048 - 5s/epoch - 904us/step
Epoch 6/100
5474/5474 - 5s - loss: 8.9047e-05 - mean_squared_error: 2.2049e-05 - mean_absolute_error: 0.0028 - root_mean_squared_error: 0.0047 - 5s/epoch - 878us/step
Epoch 

INFO:tensorflow:Assets written to: models\vienna_2010_2019_high_temp_simple_dense\assets


INFO:tensorflow:Assets written to: C:\Users\gabri\AppData\Local\Temp\tmph0lg_hup\assets


INFO:tensorflow:Assets written to: C:\Users\gabri\AppData\Local\Temp\tmph0lg_hup\assets


<base.model.Model at 0x22b264fa190>

In [18]:
train_and_save_model(base_model, "vienna_2010_2019_low_temp_simple_dense", df_low_temperature)

Epoch 1/100


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'Day sin'] = np.sin(timestamp_s * (2 * np.pi / day)) / normalization_factor
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'Day cos'] = np.cos(timestamp_s * (2 * np.pi / day)) / normalization_factor
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.loc[:, 'Year sin'] = np.sin(ti

5474/5474 - 6s - loss: 0.1659 - mean_squared_error: 0.0334 - mean_absolute_error: 0.0570 - root_mean_squared_error: 0.1826 - 6s/epoch - 1ms/step
Epoch 2/100
5474/5474 - 5s - loss: 1.2824e-04 - mean_squared_error: 2.8516e-05 - mean_absolute_error: 0.0032 - root_mean_squared_error: 0.0053 - 5s/epoch - 965us/step
Epoch 3/100
5474/5474 - 5s - loss: 1.1691e-04 - mean_squared_error: 2.6613e-05 - mean_absolute_error: 0.0033 - root_mean_squared_error: 0.0052 - 5s/epoch - 944us/step
Epoch 4/100
5474/5474 - 5s - loss: 1.1259e-04 - mean_squared_error: 2.5754e-05 - mean_absolute_error: 0.0032 - root_mean_squared_error: 0.0051 - 5s/epoch - 933us/step
Epoch 5/100
5474/5474 - 5s - loss: 1.0670e-04 - mean_squared_error: 2.4623e-05 - mean_absolute_error: 0.0032 - root_mean_squared_error: 0.0050 - 5s/epoch - 929us/step
Epoch 6/100
5474/5474 - 5s - loss: 1.0421e-04 - mean_squared_error: 2.4153e-05 - mean_absolute_error: 0.0031 - root_mean_squared_error: 0.0049 - 5s/epoch - 935us/step
Epoch 7/100
5474/547

INFO:tensorflow:Assets written to: models\vienna_2010_2019_low_temp_simple_dense\assets


INFO:tensorflow:Assets written to: C:\Users\gabri\AppData\Local\Temp\tmpanfuja5v\assets


INFO:tensorflow:Assets written to: C:\Users\gabri\AppData\Local\Temp\tmpanfuja5v\assets


<base.model.Model at 0x22b26503a90>