# Backtest External Models

We continue to illustrate how to use `Backtest` object to gauge performance of external models. `Backtest` is designed to work for any model objects which have a `fit` method and a `predict` method, by writing some call back functions. 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

from orbit.backtest.backtest import TimeSeriesSplitter, Backtest

## Load data

In [2]:
data_path = "../examples/data/iclaims_example.csv"
raw_data = pd.read_csv(data_path, parse_dates=['week'])

## log transformation
data = raw_data.copy()
# data[['claims', 'trend.unemploy', 'trend.filling', 'trend.job']] = \
#     data[['claims', 'trend.unemploy', 'trend.filling', 'trend.job']].apply(np.log, axis=1)

print(data.shape)
data.head(5)

(443, 5)


Unnamed: 0,week,claims,trend.unemploy,trend.filling,trend.job
0,2010-01-03,651215,1.183973,0.72014,1.119669
1,2010-01-10,825891,1.183973,0.814896,1.178599
2,2010-01-17,659173,1.203382,0.739091,1.119669
3,2010-01-24,507651,1.164564,0.814896,1.107883
4,2010-01-31,538617,1.086926,0.776993,1.072525


## Sklearn model object - Random Forest

Declare a `TimeSeriesSplitter` and `Backtest` object.

In [3]:
splitter = TimeSeriesSplitter(data, min_train_len=200, incremental_len=20, forecast_len=20, n_splits=3, date_col='week')

In [4]:
bt = Backtest(splitter=splitter)

Next, we instantiate an sklearn model.

In [5]:
from sklearn.ensemble import RandomForestRegressor

mod = RandomForestRegressor(n_estimators = 50)

Create callback functions for the `RandomForestRegressor` model.

In [6]:
def fit_callback_sklearn(model, train_df, response_col, regressor_col, date_col):
    y = train_df[response_col]
    X = train_df[regressor_col]
    model.fit(X, y)
    return

def predict_callback_sklearn(model, test_df, response_col, regressor_col, date_col):
    X = test_df[regressor_col]
    pred = model.predict(X)

    return pd.DataFrame({date_col:  test_df[date_col], 'prediction': pred})

# passed into fit_callback_sklearn()
fit_predict_args = {
    'response_col': 'claims',
    'regressor_col': ['trend.unemploy', 'trend.filling', 'trend.job'],
    'date_col': 'week',
}

In [7]:
bt.fit_score(
    mod,
    response_col='claims',
    predicted_col='prediction',
    fit_callback=fit_callback_sklearn,
    predict_callback=predict_callback_sklearn,
    fit_args=fit_predict_args,
    predict_args=fit_predict_args
)

bt.get_predictions().head(5)

Unnamed: 0,steps,week,claims,prediction,part,split_key
0,1,2017-05-07,215040,248052.376,test,0
1,2,2017-05-14,206905,317408.198,test,0
2,3,2017-05-21,210544,414146.85,test,0
3,4,2017-05-28,232138,244482.9,test,0
4,5,2017-06-04,212696,288049.5,test,0


In [8]:
bt.get_scores()

Unnamed: 0,n_splits,forecast_len,incremental_len,wmape,smape,rmsse
0,3,20,20,0.266317,0.218235,1.811515


## Prophet model object

In [9]:
from fbprophet import Prophet
import inspect

In [10]:
def model_callback_prophet(model, **kwargs):
    object_type = type(model)
    new_instance = object_type(**kwargs)
    
    return new_instance

def fit_callbacks_prophet(model, train_df, date_col, response_col, regressor_col):
    
    train_df = train_df.rename(columns={date_col: "ds", response_col: "y"})
    if regressor_col is not None:
        for regressor in regressor_col:
            model.add_regressor(regressor) 
    model.fit(train_df)
    
    return 

def pred_callbacks_prophet(model, test_df, date_col, response_col, regressor_col):
    test_df = test_df.rename(columns={date_col: "ds", response_col: "y"})
    
    predictions = model.predict(test_df)
    predictions.rename(columns={'yhat': 'prediction', 'ds': date_col}, inplace=True)
    predictions=predictions[[date_col, 'prediction']]

    return predictions

fit_predict_args = {
    'response_col': 'claims',
    'date_col': 'week',
    'regressor_col': ['trend.unemploy', 'trend.filling', 'trend.job']
}

In [11]:
mod = Prophet()

Note: if some error pops out below (related to the pickle issue), it could be eliminated by setting `save_model=False` or upgrading your python to 3.7.

In [12]:
bt.fit_score(
    mod,
    response_col='claims',
    predicted_col='prediction',
    fit_callback=fit_callbacks_prophet,
    predict_callback=pred_callbacks_prophet,
    model_callback=model_callback_prophet,
    fit_args=fit_predict_args,
    predict_args=fit_predict_args,
)

INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.


In [13]:
bt.get_predictions().head()

Unnamed: 0,steps,week,claims,prediction,part,split_key
0,1,2017-05-07,215040,201359.63744,test,0
1,2,2017-05-14,206905,192141.755352,test,0
2,3,2017-05-21,210544,197247.881505,test,0
3,4,2017-05-28,232138,208456.090372,test,0
4,5,2017-06-04,212696,216694.353579,test,0


In [14]:
bt.get_scores()

Unnamed: 0,n_splits,forecast_len,incremental_len,wmape,smape,rmsse
0,3,20,20,0.075781,0.07822,0.507137
