# TS Prophet
## Data split in training and test. The last 365 days are the test data.

In [1]:
from audioop import cross
import itertools
from matplotlib import units
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from prophet import Prophet
from prophet.diagnostics import cross_validation
from prophet.diagnostics import performance_metrics
from prophet.plot import add_changepoints_to_plot
from prophet.plot import plot_cross_validation_metric
from prophet.plot import plot_plotly, plot_components_plotly

plt.style.use('fivethirtyeight')

from sklearn.metrics import mean_absolute_error,mean_squared_error

from green_city.utils import metrics_dict, datetime2index, index2datetime

%matplotlib inline

In [2]:
## MLFLOW ##
import mlflow
from green_city.mlflow_config import get_mlflow_config

flow_conf = get_mlflow_config()
tracking_uri = flow_conf["TRACKING_URI"]
mlflow.set_tracking_uri(flow_conf["TRACKING_URI"])
mlflow.set_experiment(flow_conf["EXPERIMENT_NAME"]);

In [3]:
## DB CONNECTION ##
from sqlalchemy import create_engine
from decouple import Config, RepositoryEnv

config = Config(RepositoryEnv("../.db_credentials"))

db_connection_credentials = {
    "database": config('POSTGRES_DB'),
    "user": config('POSTGRES_USER'),
    "password": config('POSTGRES_PASSWORD'),
    "host": config('POSTGRES_HOST'),
    "port": config('POSTGRES_PORT'),
}
DB_STRING = "postgresql://{user}:{password}@{host}:{port}/{database}".format(**db_connection_credentials)
db = create_engine(DB_STRING)

### Load data and feature engineering

In [4]:
building = 5
# Load data
df = pd.read_csv(f"../data/preprocessed/Building_{building}.csv").astype({'datetime': 'datetime64'})#.set_index('datetime')

In [5]:
df = df.rename(columns={'datetime':'ds',
                        'net_load_kWh':'y'})
# df.head(5)

In [6]:
def is_winter_season(ds):
    date = pd.to_datetime(ds)
    return(date.month < 4 or date.month > 9)

df['winter'] = df['ds'].apply(is_winter_season)
df['summer'] = ~df['ds'].apply(is_winter_season)

In [7]:
df_holiday = df[['ds','holiday']].query('holiday')
df_holiday['holiday'] = 'Holiday'

### Split data in training and test data

In [10]:
df_train = df[:365*3*24]
df_test = df[365*3*24:]


## Prophet simple model. Three years training

In [11]:
my_model_simple = Prophet(interval_width = 0.95, seasonality_mode="additive")

In [12]:
my_model_simple.fit(df_train)

Initial log joint probability = -807.326


<prophet.forecaster.Prophet at 0x13fa44fa0>

    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       47996.1    0.00499351       258.451           1           1      122   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       48025.4     0.0139303       710.728           1           1      228   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       48037.3    0.00110004       218.145      0.7697      0.7697      342   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     315       48039.4   0.000111663       472.897   1.763e-07       0.001      399  LS failed, Hessian reset 
     399         48046    0.00148367       327.591      0.5189      0.5189      500   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499       48052.1    0.00499085       447.872           1           1      6

In [13]:
pred_indices = [32135, 33311, 26478, 33357, 30387, 30794, 31800, 28783]


In [15]:
for index in pred_indices:
    with mlflow.start_run(run_name='Prophet Simple Model') as r:
        ### Predict ##
        # my_model_simple.fit(df.loc[:index])
        df_future = df.copy().loc[index+1:index+24]
        df_forecast = my_model_simple.predict(df_future)
        df_forecast.index = df_future.index
        df_forecast.index.name = 'id'
        df_forecast['error'] = df_future.y - df_forecast.yhat
        df_forecast = df_forecast[['yhat', 'error']]
        
        ## Evaluate ##
        metrics = metrics_dict(df_future.y, df_forecast.yhat, ["mae", "mse", "r2_score"])
        print("mae: {mae}, mse: {mse}, r2: {r2_score}".format(**metrics))
        mlflow.log_metrics(metrics)

        ## Parameters log ##
        params = {
            'building_nr': building,
            'datetime':index2datetime(index)
        }
        mlflow.log_params(params)

        forecast = df_forecast[['yhat']].assign(run_id = r.info.run_id).rename(columns={"yhat": "prediction"})
        # forecasts.to_sql("forecast", con=db, if_exists="append")

mae: 5.381091005581283, mse: 47.32219039105721, r2: 0.6440722191468136
mae: 10.536584487366788, mse: 140.72746586087422, r2: -0.5485944775992997
mae: 6.671270476282078, mse: 77.8964513625216, r2: -0.3620809874510411
mae: 10.193935633411757, mse: 136.30121427849497, r2: -0.02802957848261345
mae: 5.738748299421961, mse: 56.968331333133854, r2: 0.6243512217481284
mae: 5.3202301169115955, mse: 44.38111760262769, r2: 0.6314194949223908
mae: 5.278088350438547, mse: 36.56512023161958, r2: 0.6520552918424901
mae: 6.235855971940784, mse: 64.41949969740966, r2: 0.4406090898519034


In [16]:
param_grid_simple = {
    'interval_width': [0.95],
    'seasonality_mode':['additive'],
    'yearly_seasonality':['auto'],
    'changepoint_prior_scale':[0.05],
    'seasonality_prior_scale':[10]
}
all_params = [dict(zip(param_grid_simple.keys(), v)) for v in itertools.product(*param_grid_simple.values())]


In [17]:
def my_prophet(df, all_params):
        rmses = []
        for params in all_params:
                m = Prophet(**params#,
                        # holidays=df_holidays 
                # weekly_seasonality=False
                )
        # m.add_seasonality(name='weekly_on_winter', period = 7, fourier_order=10, condition_name='winter')
        # m.add_seasonality(name='weekly_on_summer', period = 7, fourier_order=10, condition_name='summer')
                m.fit(df_train)
                df_cv = cross_validation(m, horizon='180 days', parallel='processes')
                df_p = performance_metrics(df_cv, rolling_window=1)
                rmses.append(df_p['rmse'].values[0])
        tuning_results = pd.DataFrame(all_params)
        tuning_results['rmse'] = rmses
        print(tuning_results)
        best_params=all_params[np.argmin(rmses)]
        print(best_params)

        my_model = Prophet(**best_params)
        my_model.fit(df)
        return my_model

Supress logging

In [19]:
import warnings
import logging
import os

def my_prophet(df, all_params):
    warnings.simplefilter("ignore", DeprecationWarning)
    warnings.simplefilter("ignore", FutureWarning)
    logging.getLogger('prophet').setLevel(logging.ERROR) #Notice that i had modified the name from 'fbprophet' to just 'prophet'
    rmses = []
    for params in all_params:
            m = Prophet(**params#,
                    # holidays=df_holidays 
            # weekly_seasonality=False
            )
    # m.add_seasonality(name='weekly_on_winter', period = 7, fourier_order=10, condition_name='winter')
    # m.add_seasonality(name='weekly_on_summer', period = 7, fourier_order=10, condition_name='summer')
            m.fit(df_train)
            df_cv = cross_validation(m, horizon='180 days', parallel='processes')
            df_p = performance_metrics(df_cv, rolling_window=1)
            rmses.append(df_p['rmse'].values[0])
    tuning_results = pd.DataFrame(all_params)
    tuning_results['rmse'] = rmses
    print(tuning_results)
    best_params=all_params[np.argmin(rmses)]
    print(best_params)

    my_model = Prophet(**best_params)
    my_model.fit(df)
    return my_model

In [20]:
for index in pred_indices:
    with mlflow.start_run(run_name='Prophet Simple Model w/Params') as r:
        ### Predict ##
        my_model = my_prophet(df=df.loc[:index], all_params=all_params)
        df_future = df.copy().loc[index+1:index+24]
        df_forecast = my_model_simple.predict(df_future)
        df_forecast.index = df_future.index
        df_forecast.index.name = 'id'
        df_forecast['error'] = df_future.y - df_forecast.yhat
        df_forecast = df_forecast[['yhat', 'error']]
        
        ## Evaluate ##
        metrics = metrics_dict(df_future.y, df_forecast.yhat, ["mae", "mse", "r2_score"])
        print(index)
        print("mae: {mae}, mse: {mse}, r2: {r2_score}".format(**metrics))
        mlflow.log_metrics(metrics)

        ## Parameters log ##
        params = {
            'building_nr': building,
            'datetime':index2datetime(index)
        }
        mlflow.log_params(params)

        forecasts = df_forecast[['yhat']].assign(run_id = r.info.run_id).rename(columns={"yhat": "prediction"})
        # forecasts.to_sql("forecast", con=db, if_exists="append")

Initial log joint probability = -807.326
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       47996.1    0.00499351       258.451           1           1      122   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199       48025.4     0.0139303       710.728           1           1      228   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       48037.3    0.00110004       218.145      0.7697      0.7697      342   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     315       48039.4   0.000111663       472.897   1.763e-07       0.001      399  LS failed, Hessian reset 
     399         48046    0.00148367       327.591      0.5189      0.5189      500   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     499       48052.1    0.00499085    

In [None]:
future_dates = my_model.make_future_dataframe(periods = 365)
# future_dates

### Add holidays

In [None]:
df_holiday = df[['ds','holiday']].query('holiday')
df_holiday['holiday'] = 'Holiday'
# df_holiday

In [None]:
df_workday = df[['ds','workday']].query('workday')
df_workday['holiday'] = 'Workday'
df_workday = df_workday.drop('workday', axis=1)

In [None]:
df_holidays = pd.concat([df_holiday, df_workday], ignore_index=True)

In [None]:
# df_holidays

In [None]:
my_model = Prophet(interval_width = 0.95, seasonality_mode='additive',
                    holidays=df_holidays)
my_model.fit(df)

In [None]:
forecast = my_model.predict(future_dates)

In [None]:
forecast[(forecast['Holiday']+ forecast['Workday']).abs()>0][
    ['ds','Holiday','Workday']]
# forecast[(forecast['Holiday']+ forecast['Workday']).abs()>0][
#     ['ds','Holiday','Workday']][-10:]

In [None]:
fig = my_model.plot_components(forecast)

In [None]:
df_cv = cross_validation(my_model, horizon = '180 days', parallel='processes')

In [None]:
df_cv_pm = performance_metrics(df_cv)

In [None]:
df_cv_pm

### Fourier Order for Seasonalities

In [None]:
my_model = Prophet(interval_width = 0.95, seasonality_mode='additive',
                    holidays=df_holidays, yearly_seasonality=3)
my_model.fit(df)

In [None]:
fig = my_model.plot_components(forecast)

In [None]:
df_cv = cross_validation(my_model, horizon = '180 days', parallel='processes')
df_cv_pm = performance_metrics(df_cv)
df_cv_pm

### Assume different weekly sesonalities. Summer vs Winter

In [None]:
def is_winter_season(ds):
    date = pd.to_datetime(ds)
    return(date.month < 4 or date.month > 9)

df['winter'] = df['ds'].apply(is_winter_season)
df['summer'] = ~df['ds'].apply(is_winter_season)

In [None]:
my_model = Prophet(interval_width = 0.95, seasonality_mode='additive',
                    holidays=df_holidays, weekly_seasonality=False)
my_model.add_seasonality(name='weekly_on_winter', period = 7, fourier_order=10, condition_name='winter')
my_model.add_seasonality(name='weekly_on_summer', period = 7, fourier_order=10, condition_name='summer')
my_model.fit(df)

In [None]:
future_dates['winter'] = future_dates['ds'].apply(is_winter_season)
future_dates['summer'] = ~future_dates['ds'].apply(is_winter_season)
forecast = my_model.predict(future_dates)
forecast

In [None]:
fig = my_model.plot(forecast, uncertainty=True)

In [None]:
fig = my_model.plot_components(forecast)

In [None]:
df_cv = cross_validation(my_model, horizon = '180 days', parallel='processes')
df_cv_pm = performance_metrics(df_cv)
df_cv_pm

In [None]:
from prophet.plot import plot_cross_validation_metric
fig = plot_cross_validation_metric(df_cv, metric='rmse')

### Review Holidays parameter. Select only true holidays, do not count for working days

In [None]:
df_holidays = df_holiday
my_model = Prophet(interval_width = 0.95, seasonality_mode='additive',
                    holidays=df_holidays, weekly_seasonality=False)
my_model.add_seasonality(name='weekly_on_winter', period = 7, fourier_order=10, condition_name='winter')
my_model.add_seasonality(name='weekly_on_summer', period = 7, fourier_order=10, condition_name='summer')
my_model.fit(df)
forecast = my_model.predict(future_dates)
fig = my_model.plot(forecast, uncertainty=True)

In [None]:
fig = my_model.plot_components(forecast)

In [None]:
df_cv = cross_validation(my_model, horizon = '180 days', parallel='processes')
df_cv_pm = performance_metrics(df_cv)
df_cv_pm

In [None]:
fig = plot_cross_validation_metric(df_cv, metric='rmse')

### Hyperparameter tunning

In [None]:
param_grid = {
    'changepoint_prior_scale':[0.0001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale':[0.01, 0.1, 1.0, 10.0]
}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
rmses = [] # Store the RMSEs for each params her

# Use cross valiation to evaluate all parameters
for params in all_params:
    m = Prophet(**params, 
                interval_width = 0.95, 
                seasonality_mode='additive',
                holidays=df_holidays, 
                weekly_seasonality=False)
    m.add_seasonality(name='weekly_on_winter', period = 7, fourier_order=10, condition_name='winter')
    m.add_seasonality(name='weekly_on_summer', period = 7, fourier_order=10, condition_name='summer')
    m.fit(df)
    df_cv = cross_validation(m, horizon='180 days', parallel='processes')
    df_p = performance_metrics(df_cv, rolling_window=1)
    rmses.append(df_p['rmse'].values[0])

#Find the best parameter
tuning_results = pd.DataFrame(all_params)
tuning_results['rmse'] = rmses
print(tuning_results)

In [None]:
best_params = all_params[np.argmin(rmses)]
print(best_params)

### Train best model

In [None]:
df_holidays = df_holiday
my_model = Prophet(interval_width = 0.95, seasonality_mode='additive',
                    holidays=df_holidays, weekly_seasonality=False,
                    **best_params)
my_model.add_seasonality(name='weekly_on_winter', period = 7, fourier_order=10, condition_name='winter')
my_model.add_seasonality(name='weekly_on_summer', period = 7, fourier_order=10, condition_name='summer')
my_model.fit(df)
forecast = my_model.predict(future_dates)
fig = my_model.plot(forecast, uncertainty=True)

In [None]:
fig = my_model.plot_components(forecast)

In [None]:
df_cv = cross_validation(my_model, horizon = '180 days', parallel='processes')
df_cv_pm = performance_metrics(df_cv)
df_cv_pm

## Assesment with training data

In [None]:
y_test['winter'] = y_test['ds'].apply(is_winter_season)
y_test['summer'] = ~y_test['ds'].apply(is_winter_season)
forecast = my_model.predict(y_test.drop('y', axis=1))
forecast

In [None]:
print(f"Mean Absolute Error = {mean_absolute_error(y_test['y'], forecast['yhat']).round(3)}")
print(f"Mean Squared Error = {mean_squared_error(y_test['y'], forecast['yhat']).round(3)}")

In [None]:
df.columns


### Add outdoor temperature as additional regressor.

In [None]:
df_holidays = df_holiday
my_model = Prophet(interval_width = 0.95, seasonality_mode='additive',
                    holidays=df_holidays, weekly_seasonality=False,
                    **best_params)
my_model.add_seasonality(name='weekly_on_winter', period = 7, fourier_order=10, condition_name='winter')
my_model.add_seasonality(name='weekly_on_summer', period = 7, fourier_order=10, condition_name='summer')
my_model.add_regressor(name='outdoor_temp')
my_model.add_regressor(name='pred_24h_outdoor_temp')
my_model.add_regressor(name='direct_solar_W_m2')
my_model.add_regressor(name='pred_24h_direct_solar_W_m2')

my_model.fit(df)
forecast = my_model.predict(y_test)
forecast
# forecast = my_model.predict(future_dates)
fig = my_model.plot(forecast, uncertainty=True)

In [None]:
print(f"Mean Absolute Error = {mean_absolute_error(y_test['y'], forecast['yhat']).round(3)}")
print(f"Mean Squared Error = {mean_squared_error(y_test['y'], forecast['yhat']).round(3)}")