In [None]:
import mlflow
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_absolute_error,mean_squared_error

from tbats import BATS, TBATS

from green_city.utils import span
from green_city.plotting import plot_decomposition
from green_city.mlflow_config import get_mlflow_config
plt.rcParams['figure.figsize'] = [25, 8]

In [None]:
building_nr = 5
split_ratio = 3/4
seasonalities = [365, 7]
column_to_predict = "equipment_electric_power_kWh"
#forecast_window = 7
#column_to_predict = "dhw_heating_kWh"

#document wide parameters that don't change in experiments
#for logging to mlflow server
global_params = {
    "building nr": building_nr,
    "predicted feature": column_to_predict,
    "resolution": "daily",
}

In [None]:
## MLFLOW ##
flow_conf = get_mlflow_config()
tracking_uri = flow_conf["TRACKING_URI"]
print("MLFlow experiment name:", flow_conf["EXPERIMENT_NAME"])
mlflow.set_tracking_uri(flow_conf["TRACKING_URI"])
mlflow.set_experiment(flow_conf["EXPERIMENT_NAME"]);

In [None]:
#create dataframe only with daily total equipment energy consumption
def get_daily_df(building_nr, column_to_predict):
    df = (
    pd.read_csv(f"../data/preprocessed/Building_{building_nr}.csv")
        .astype({'datetime': 'datetime64'})
        .assign(tday = lambda x: x.index.map(lambda x: x//24))
        .groupby('tday').sum()
        [[column_to_predict]]
        .rename(columns={column_to_predict: "actual"})
    )
    return df

df = get_daily_df(building_nr, column_to_predict)

fig, (ax1, ax2) = plt.subplots(2, 1)
df.actual.plot(ax=ax1, xlabel="");
#ax1.set_xlabel("")
plot_acf(df.actual, lags=370, ax=ax2);
plt.subplots_adjust(hspace=0.2)

## Seasonal decomposition

In [None]:
def add_seasonal_trend_cols(df, to_row):
    df[["trend", *[f"seasonal_{s}" for s in seasonalities]]] = np.nan
    
    for i, s in enumerate(seasonalities):
        #calculate seasonal components for each seasonality
        if i == 0:
            decomp = seasonal_decompose(df.loc[:to_row-1, ["actual"]], model='add', period=s, extrapolate_trend=1)
            df.loc[:to_row-1, "trend"] = decomp.trend #extrapolate with last constant. could be done better
        else:
            decomp = seasonal_decompose(df.loc[:to_row-1, ["actual"]], model='add', period=s)
        df.loc[:to_row-1, f"seasonal_{s}"] = decomp.seasonal
    
        #extrapolate decomposition data to future
        fr = to_row % s
        to = fr + len(df) - to_row - 1
        df.loc[to_row:, [f"seasonal_{s}"]] = df.loc[fr:to, [f"seasonal_{s}"]].values
        if i == 0:
            df.loc[to_row:, "trend"] = df.trend[to_row-1]
            pass
        assert not any([np.isnan(val) for val in df[f"seasonal_{s}"]])
        assert not any([np.isnan(val) for val in df["trend"]])

In [None]:
# Starting the MLFlow run
r = mlflow.start_run(run_name="seasonal decomposition")
print("run-uuid:", r.info.run_uuid)

# Generating the decomposition
split_pos = int(len(df)*split_ratio)
add_seasonal_trend_cols(df, split_pos)

df["pred_by_decomp"] = df.trend + df.seasonal_365 + df.seasonal_7

#### Getting the scores and plotting the results

In [None]:
##########
# Scores #
##########
mae = mean_absolute_error(df[split_pos:]["actual"],df[split_pos:]["pred_by_decomp"])
mse = mean_squared_error(df[split_pos:]["actual"],df[split_pos:]["pred_by_decomp"])
print(f"mae: {mae}, mse: {mse}")

In [None]:
##########
# MLFLOW #
##########
# Running this cell will upload the records of the model
# to the mlflow server

# alternatively to the "with" statement you can use
# mlflow.start_run() and mlflow.end_run(), see
# https://mlflow.org/docs/latest/python_api/mlflow.html


for k, v in global_params.items():
    mlflow.log_param(k, v)
#individual parameters
mlflow.log_param("model", "seasonal_decompose")
mlflow.log_param("prediction_length", len(df[split_pos:]))
mlflow.log_metric("mse", mse)
mlflow.log_metric("mae", mae)
#mlflow.log_metric("r2_score", r2)

mlflow.end_run()

In [None]:
############
# Plotting #
############
def plt_predictions(low=0, high=-1, ax=None):
    df[low:high].actual.plot(color="black", ax=ax)
    df[low:high].pred_by_decomp.plot(color="orange", ax=ax)
    strt = max(low, split_pos)
    df[strt:high].pred_by_decomp.plot(color="green", ax=ax)
    df[low:high].trend.plot(ax=ax)

fig, (ax1, ax2) = plt.subplots(2, 1)
fig.suptitle("Decomposition prediction")
plt_predictions(ax=ax1)
plt_predictions(split_pos-50, split_pos+50, ax=ax2)
fig, (ax1, ax2) = plt.subplots(2, 1)
fig.suptitle("Errors")
ax1.plot(df.apply(lambda x: x["pred_by_decomp"] - x["actual"], axis=1));
plot_acf(df.apply(lambda x: x["pred_by_decomp"] - x["actual"], axis=1), lags=370, ax=ax2);


## TBATS

In [None]:
# MLFLOW:
# Running this cell will automatically upload the records of the model
# to the mlflow server
# alternatively to the "with" statement you can use
# mlflow.start_run() and mlflow.end_run(), see above or
# https://mlflow.org/docs/latest/python_api/mlflow.html

with mlflow.start_run(run_name="TBATS") as r:
    print("run-uuid:", r.info.run_uuid)

    ###########
    # Fitting #
    ###########
    tbats_forecast_window = 300
    #with 
    df_tbats = get_daily_df(building_nr, column_to_predict)
    estimator = TBATS(seasonal_periods=[7, 365])
    fitted_model = estimator.fit(df_tbats.actual[:split_pos])
    forecast = fitted_model.forecast(steps=tbats_forecast_window)
    df_tbats["forecast"] = np.nan
    df_tbats.loc[split_pos:split_pos+tbats_forecast_window-1, "forecast"] = forecast

    df_tbats["actual"].plot()
    df_tbats["forecast"].plot()

    ###########
    # Scoring #
    ###########
    df_tbats[split_pos:split_pos+tbats_forecast_window].apply(lambda x: x["forecast"] - x["actual"], axis=1)
    df_forcastwindow = df_tbats[split_pos:split_pos+tbats_forecast_window]
    mae = mean_absolute_error(df_forcastwindow["actual"],df_forcastwindow["forecast"])
    mse = mean_squared_error(df_forcastwindow["actual"],df_forcastwindow["forecast"])
    print(f"mae: {mae}, mse: {mse}")

    ##################
    # MLFLOW logging #
    ##################
    #global parameters
    for k, v in global_params.items():
        mlflow.log_param(k, v)
    #individual parameters
    mlflow.log_param("model", "TBATS")
    mlflow.log_param("prediction_length", tbats_forecast_window)
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("mae", mae)
    #mlflow.log_metric("r2_score", r2)