In [None]:
import numpy as np
import pandas as pd
from datetime import datetime
from pathlib import Path
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf
from tbats import TBATS
from green_city.utils import metrics_dict, datetime2index, index2datetime

plt.rcParams['figure.figsize'] = [25, 8]

In [None]:
## MLFLOW ##
import mlflow
from green_city.mlflow_config import get_mlflow_config

flow_conf = get_mlflow_config()
tracking_uri = flow_conf["TRACKING_URI"]
mlflow.set_tracking_uri(flow_conf["TRACKING_URI"])
mlflow.set_experiment(flow_conf["EXPERIMENT_NAME"]);

In [None]:
## DB CONNECTION ##
from sqlalchemy import create_engine
from decouple import Config, RepositoryEnv

config = Config(RepositoryEnv("../.db_credentials"))

db_connection_credentials = {
    "database": config('POSTGRES_DB'),
    "user": config('POSTGRES_USER'),
    "password": config('POSTGRES_PASSWORD'),
    "host": config('POSTGRES_HOST'),
    "port": config('POSTGRES_PORT'),
}
DB_STRING = "postgresql://{user}:{password}@{host}:{port}/{database}".format(**db_connection_credentials)
db = create_engine(DB_STRING)

In [None]:
global_params = {
    "feature": "net_load_kW",
    #"building_nr": 5,
    "building_nr": "all",
    "resolution": "H",
    "pred_steps": 24,
    "model": "baseline_Y"
}

assert global_params["resolution"] == "H"

# Load the dataset
def get_df(building_nr, feature):
    if building_nr == "all":
        filename = "Agg_buildings.csv"
    else:
        filename = f"Building_{building_nr}.csv"
    df = (
    pd.read_csv(Path("../data/preprocessed") / filename)
        .astype({'datetime': 'datetime64'})
        [[feature, "datetime"]]
        .rename(columns={feature: "actual"})
    )
    df.index.name = "id"
    return df

df = get_df(global_params["building_nr"], global_params["feature"])
df.head()

In [None]:
indices = [32135, 33311, 26478, 33357, 30387, 30794, 31800, 28783]
for index in indices:
    pred_from, pred_to = (index+1, index+24)

    df["baseline_Y"] = df["actual"].shift(365*24) #usage data from one year ago
    df["baseline_W"] = df["actual"].shift(7*24)     #usage data from the previous 24 hours
    #df.loc[pred_lims[0]:pred_lims[1]].set_index("datetime").plot()

    #for shift in ["Y", "W"]:
    for shift in ["Y"]:
        with mlflow.start_run(run_name=f"baseline_{shift}_{index}") as r:
            scores = metrics_dict(
                df.loc[pred_from:pred_to, "actual"],
                df.loc[pred_from:pred_to, f"baseline_{shift}"],
                ["mae", "mse", "r2_score"]
            )
            params = global_params.copy()
            params.update({
                "index": index,
                "datetime": index2datetime(index),
                #"model": f"baseline_{shift}"
                "model": "baseline"
            })
            mlflow.log_params(params)
            mlflow.log_metrics(scores)

            forecasts = df.loc[pred_from:pred_to, [f"baseline_{shift}"]].assign(run_id = r.info.run_id).rename(columns={f"baseline_{shift}": "prediction"})
            forecasts.to_sql("forecast", con=db, if_exists="append")