In [31]:
import pandas as pd
# from datetime import datetime
# from typing import Tuple
# import codecs, json
from greykite.common.data_loader import DataLoader
from greykite.framework.templates.autogen.forecast_config import ForecastConfig
from greykite.framework.templates.autogen.forecast_config import MetadataParam
from greykite.framework.templates.forecaster import Forecaster, ForecastResult
from greykite.framework.templates.model_templates import ModelTemplateEnum
from greykite.framework.utils.result_summary import summarize_grid_search_results

# Inputs

In [32]:
LOADS = 'out/loads.csv'

# (unix) timestamp of time offset
INITIAL_TIME = 1247270400

# time slot length
DELTA = 10 * 60
DELTA_ALIAS = '10T' # see: https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases

# # adjusts flexibility of trend; increase to make more flexible; default is `0.05`
# CHANGEPOINT_PRIOR_SCALE = 0.05

# # number of samples of the predicted distribution
# NUMBER_OF_SAMPLES = 300

# number of predicted time slots
WINDOW = 100

# Preparation

In [33]:
l = pd.read_csv(LOADS)

In [34]:
metadata = MetadataParam(
    time_col="ds",
    value_col="y",
    freq=DELTA_ALIAS
)

## Preparing date times

In [35]:
def prepare_df(load_type: str) -> pd.DataFrame:
  df = l[[load_type]].rename(columns={load_type: 'y'})
  df['ds'] = df.index
  df['ds'] = (INITIAL_TIME + df['ds'] * DELTA).apply(lambda x: datetime.fromtimestamp(x))
  return df

## Making predictions

In [36]:
def predict(df: pd.DataFrame) -> ForecastResult:
  print(df)
  forecaster = Forecaster()
  return forecaster.run_forecast_config(
      df=df,
      config=ForecastConfig(
          model_template=ModelTemplateEnum.SILVERKITE.name,
          forecast_horizon=WINDOW,
          coverage=0.95,
          metadata_param=metadata
      )
  )

# Execution

In [39]:
predictions = {}

for load_type in l.columns:
  df = prepare_df(load_type)
  prediction = predict(df)
  predictions[load_type] = prediction
  summary = prediction.model[-1].summary(max_colwidth=30)
  print(summary)
  ts = prediction.timeseries
  ts.plot().show()
  backtest = prediction.backtest
  backtest.plot().show()

      y                  ds
0    34 2009-07-11 02:00:00
1    31 2009-07-11 02:10:00
2    49 2009-07-11 02:20:00
3    45 2009-07-11 02:30:00
4    27 2009-07-11 02:40:00
..   ..                 ...
139  62 2009-07-12 01:10:00
140  33 2009-07-12 01:20:00
141  22 2009-07-12 01:30:00
142  46 2009-07-12 01:40:00
143  89 2009-07-12 01:50:00

[144 rows x 2 columns]
Fitting 1 folds for each of 1 candidates, totalling 1 fits



Not enough training data to forecast the full forecast_horizon. Exercise extra caution with forecasted values after 72 periods.


test_horizon should be <= than 1/3 of the data set size to allow enough data to train a backtest model. Consider reducing to 48. If this is smaller than the forecast_horizon, you will need to make a trade-off between setting test_horizon=forecast_horizon and having enough data left over to properly train a realistic backtest model.


There are no CV splits under the requested settings. Decrease `forecast_horizon` and/or `min_train_periods`. Using default 90/10 CV split


Requested holiday 'Easter Monday [England, Wales, Northern Ireland]' does not occur in the provided countries


Requested holiday 'Easter Monday [England, Wales, Northern Ireland]' does not occur in the provided countries


Requested holiday 'Easter Monday [England, Wales, Northern Ireland]' does not occur in the provided countries


Input data has many null values. Missing 40.98% of one in


Number of observations: 144,   Number of features: 58
Method: Ridge regression
Number of nonzero features: 7
Regularization parameter: 8.111e-05

Residuals:
         Min           1Q       Median           3Q          Max
      -38.84       -18.56       -5.504        9.877        87.28

                 Pred_col Estimate Std. Err Pr(>)_boot sig. code             95%CI
                Intercept    53.21    4.269     <2e-16       ***    (44.46, 62.26)
  events_Chinese New Year       0.       0.      1.000                    (0., 0.)
events_Chinese New Year-1       0.       0.      1.000                    (0., 0.)
events_Chinese New Year-2       0.       0.      1.000                    (0., 0.)
events_Chinese New Year+1       0.       0.      1.000                    (0., 0.)
events_Chinese New Year+2       0.       0.      1.000                    (0., 0.)
     events_Christmas Day       0.       0.      1.000                    (0., 0.)
   events_Christmas Day-1       0.       0.    

# Store predictions

Saves predictions as a JSON dump. For each load type the dump constains a list of predictions for each time slot (past and future). A prediction consists of a list of samples.

In [38]:
json.dump({k: v.tolist() for k, v in predictions.items()}, codecs.open('out/predictions.json', 'w', encoding='utf-8'), separators=(',', ':'), sort_keys=True, indent=4)

AttributeError: 'ForecastResult' object has no attribute 'tolist'