# Linkedin Silverkite

In [1]:
import warnings
warnings.simplefilter('ignore')

In [2]:
# libraries
import numpy as np
import pandas as pd
from greykite.framework.templates.autogen.forecast_config import *
from greykite.framework.templates.forecaster import Forecaster
from greykite.framework.templates.model_templates import ModelTemplateEnum
from greykite.common.features.timeseries_features import *
from greykite.common.evaluation import EvaluationMetricEnum
from greykite.framework.utils.result_summary import summarize_grid_search_results
from plotly.offline import iplot

In [3]:
# load the data
# YYYY-MM-DD
df = pd.read_csv('../../_data/nyc_data.csv')
future_df = pd.read_csv('../../_data/future.csv')
future_df.head()

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2021,,0,0,0,5.0,154.221
1,1/2/2021,,0,0,0,11.11,264.805
2,1/3/2021,,0,0,0,3.89,115.499
3,1/4/2021,,0,0,0,6.67,124.65
4,1/5/2021,,0,0,0,5.56,77.968


In [4]:
# get the best parameters
parameters  = pd.read_csv("../../02 Parameter Tuning/03 Linkedin Silverkite/out/best_params_silverkite.csv",
                          index_col=0)
parameters

Unnamed: 0,"[('estimator__growth_term', 'sqrt'), ('estimator__fit_algorithm_dict', {'fit_algorithm': 'linear'})]"
mean_test_RMSE,47.8
param_estimator__fit_algorithm_dict,{'fit_algorithm': 'linear'}
param_estimator__growth_term,sqrt


In [5]:
# get the parameters
growth_term_param = parameters.loc["param_estimator__growth_term"][0]
fit_algorithm_param = parameters.loc["param_estimator__fit_algorithm_dict"][0]

In [6]:
# merging both
df = pd.concat([df, future_df])
df = df.reset_index(drop=True)
df.head()

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2015,720.000885,0,0,0,3.68,41.305
1,1/2/2015,581.276773,0,0,0,4.73,131.574
2,1/3/2015,754.117039,0,0,0,7.23,162.7
3,1/4/2015,622.252774,0,0,0,10.96,160.281
4,1/5/2015,785.373319,0,0,0,6.92,51.077


In [7]:
# inspecting df
df.tail()

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
2218,1/27/2021,,0,0,0,3.33,39.664
2219,1/28/2021,,0,0,0,1.67,195.314
2220,1/29/2021,,0,0,0,-2.78,235.894
2221,1/30/2021,,0,0,0,1.11,152.752
2222,1/31/2021,,0,0,0,4.44,158.62


In [8]:
# Rename variable
df = df.rename(columns={'Demand': 'y'})
df.head(0)

Unnamed: 0,Date,y,Easter,Thanksgiving,Christmas,Temperature,Marketing


## Silverkite Preparations

In [9]:
# Specifying Time Series names
metadata = MetadataParam(time_col="Date",
                         value_col="y",
                         freq="D",
                         train_end_date=pd.to_datetime("2020-12-31"))
metadata

MetadataParam(anomaly_info=None, date_format=None, freq='D', time_col='Date', train_end_date=Timestamp('2020-12-31 00:00:00'), value_col='y')

In [10]:
# growth terms possibilities
growth = dict(growth_term=growth_term_param)
growth

{'growth_term': 'sqrt'}

In [11]:
# seasonalities
seasonality = dict(yearly_seasonality="auto",
                   quarterly_seasonality="auto",
                   monthly_seasonality="auto",
                   weekly_seasonality="auto",
                   daily_seasonality="auto")
seasonality

{'yearly_seasonality': 'auto',
 'quarterly_seasonality': 'auto',
 'monthly_seasonality': 'auto',
 'weekly_seasonality': 'auto',
 'daily_seasonality': 'auto'}

In [12]:
# Specifying events
events = dict(holidays_to_model_separately=["New Year's Day"],
              holiday_lookup_countries=["US"],
              holiday_pre_num_days=2,
              holiday_post_num_days=2,
              holiday_pre_post_num_dict={"New Year's Day": (3,1)},
              daily_event_df_dict = {"elections": pd.DataFrame({
                "date": ["2016-11-08", "2020-11-03"],
                "event_name": ["elections"] * 2
              })})
events

{'holidays_to_model_separately': ["New Year's Day"],
 'holiday_lookup_countries': ['US'],
 'holiday_pre_num_days': 2,
 'holiday_post_num_days': 2,
 'holiday_pre_post_num_dict': {"New Year's Day": (3, 1)},
 'daily_event_df_dict': {'elections':          date event_name
  0  2016-11-08  elections
  1  2020-11-03  elections}}

In [13]:
# Changepoints -> reflects the changes in the trend
changepoints = dict(changepoints_dict=dict(method="auto"))

In [14]:
# Regressors
regressors = dict(regressor_cols=["Easter", "Temperature", "Marketing"])
regressors

{'regressor_cols': ['Easter', 'Temperature', 'Marketing']}

In [15]:
# Lagged Regressors
lagged_regressors = dict(lagged_regressor_dict={"Temperature": "auto",
                                                "Easter": "auto",
                                                "Marketing": "auto"})

In [16]:
# autogression -> dependent on the forecasting horizon
autoregression = dict(autoreg_dict="auto")

In [17]:
fit_algorithm_param

"{'fit_algorithm': 'linear'}"

In [18]:
# Fitting algorithms
import yaml
custom = dict(fit_algorithm_dict=yaml.safe_load(fit_algorithm_param))
custom

{'fit_algorithm_dict': {'fit_algorithm': 'linear'}}

## Silverkite Model

In [19]:
# Build the model
model_components = ModelComponentsParam(growth=growth,
                                        seasonality=seasonality,
                                        events=events,
                                        changepoints=changepoints,
                                        regressors=regressors,
                                        lagged_regressors=lagged_regressors,
                                        autoregression=autoregression,
                                        custom=custom)

In [20]:
# Cross-validation
evaluation_period = EvaluationPeriodParam(cv_min_train_periods=df.shape[0] - 180 - 31,
                                          cv_expanding_window=True,
                                          cv_max_splits=50,
                                          cv_periods_between_splits=16)

In [21]:
# Evaluation metric
evaluation_metric = EvaluationMetricParam(
    cv_selection_metric = EvaluationMetricEnum.RootMeanSquaredError.name)

In [22]:
# Configuration
config = ForecastConfig(model_template=ModelTemplateEnum.SILVERKITE.name,
                        forecast_horizon=31,
                        metadata_param=metadata,
                        model_components_param=model_components,
                        evaluation_period_param=evaluation_period,
                        evaluation_metric_param=evaluation_metric)

In [23]:
# Forecasting
forecaster = Forecaster()
result = forecaster.run_forecast_config(df=df,
                                        config=config)

Fitting 8 folds for each of 1 candidates, totalling 8 fits


In [27]:
# Look at the model summary
summary = result.model[-1].summary() # -1 retrieves the estimator from the pipeline
print(summary)


Number of observations: 2192,   Number of features: 182
Method: Ordinary least squares
Number of nonzero features: 182

Residuals:
         Min           1Q       Median           3Q          Max
      -165.2       -32.13      -0.9338        31.22        235.2

            Pred_col Estimate Std. Err   t value Pr(>|t|) sig. code              95%CI
           Intercept    64.06    3.195     20.05   <2e-16       ***     (57.79, 70.32)
events_New Years Day   -16.02    22.53   -0.7108    0.477              (-60.21, 28.17)
 events_N...rs Day-1   -48.39    22.51     -2.15    0.032         *   (-92.53, -4.255)
 events_N...rs Day-2    -7.17    22.53   -0.3182    0.750              (-51.36, 37.02)
 events_N...rs Day-3   -41.31    22.48    -1.838    0.066         .     (-85.4, 2.778)
 events_N...rs Day+1  -0.1647    22.45 -0.007335    0.994              (-44.18, 43.86)
        events_Other   -6.531    6.752   -0.9673    0.334               (-19.77, 6.71)
      events_Other-1   -5.075    6.749   

In [28]:
# visualization
fig = result.forecast.plot_components()
iplot(fig)

In [31]:
# getting the forecast result
forecast = result.forecast.df[["ts", "forecast"]]
forecast = forecast.rename(columns={'forecast': 'silverkite'})
predictions_silverkite = forecast.iloc[-31:, :]
predictions_silverkite

Unnamed: 0,ts,silverkite
2192,2021-01-01,751.509648
2193,2021-01-02,836.244799
2194,2021-01-03,754.438222
2195,2021-01-04,898.366173
2196,2021-01-05,781.204767
2197,2021-01-06,915.563305
2198,2021-01-07,827.511744
2199,2021-01-08,755.982269
2200,2021-01-09,823.150542
2201,2021-01-10,738.205218


In [32]:
# exporting
predictions_silverkite.to_csv("./out/predictions_silverkite.csv")