In [1]:
from utils import runwrite
import matplotlib.pyplot as plt

# strategy.py

In [None]:
# AutoRegressiveStrategy(horizon_length=100, horizon_step=2)
# MultiOutputStrategy(horizon_length=100)
# DirectStrategy(horizon_length=100, horizon_step=)
# CerqueiraArbitrage

In [2]:
%%runwrite ../timegym/strategy.py

import numpy as np
import pandas as pd

In [3]:
df = pd.DataFrame(np.random.normal(size=1000), columns=['target'], 
                  index=pd.date_range('2012-12-12', periods=1000))
df

Unnamed: 0,target
2012-12-12,-0.493436
2012-12-13,-0.262520
2012-12-14,0.281611
2012-12-15,-1.271695
2012-12-16,0.713204
...,...
2015-09-03,0.859353
2015-09-04,1.608761
2015-09-05,0.744984
2015-09-06,-0.046160


## Multi-output target forecast

In [4]:
%%runwrite -a ../timegym/strategy.py

def multioutput_target(df, horizon=10, dropna=True):
    df = df.copy()
    for col in df.columns:
        for h in range(1, 1+horizon):
            df[f'{col}_t+{h}'] = df[col].shift(-h)
    if dropna:
        df.dropna(inplace=True)
    return df

In [5]:
multioutput_target(df)

Unnamed: 0,target,target_t+1,target_t+2,target_t+3,target_t+4,target_t+5,target_t+6,target_t+7,target_t+8,target_t+9,target_t+10
2012-12-12,-0.493436,-0.262520,0.281611,-1.271695,0.713204,-0.043433,0.691720,-0.898219,-1.224259,0.177200,-0.147052
2012-12-13,-0.262520,0.281611,-1.271695,0.713204,-0.043433,0.691720,-0.898219,-1.224259,0.177200,-0.147052,-0.812313
2012-12-14,0.281611,-1.271695,0.713204,-0.043433,0.691720,-0.898219,-1.224259,0.177200,-0.147052,-0.812313,0.701799
2012-12-15,-1.271695,0.713204,-0.043433,0.691720,-0.898219,-1.224259,0.177200,-0.147052,-0.812313,0.701799,-1.015486
2012-12-16,0.713204,-0.043433,0.691720,-0.898219,-1.224259,0.177200,-0.147052,-0.812313,0.701799,-1.015486,0.187275
...,...,...,...,...,...,...,...,...,...,...,...
2015-08-24,0.445250,-1.091299,-0.091886,1.459718,0.753539,-1.059421,-0.570290,-1.432866,0.766799,1.338253,0.859353
2015-08-25,-1.091299,-0.091886,1.459718,0.753539,-1.059421,-0.570290,-1.432866,0.766799,1.338253,0.859353,1.608761
2015-08-26,-0.091886,1.459718,0.753539,-1.059421,-0.570290,-1.432866,0.766799,1.338253,0.859353,1.608761,0.744984
2015-08-27,1.459718,0.753539,-1.059421,-0.570290,-1.432866,0.766799,1.338253,0.859353,1.608761,0.744984,-0.046160


## One-step forecast

This can be used to recursively forecast more steps ahead

In [6]:
%%runwrite -a ../timegym/strategy.py

def onestep_target(df, dropna=True):
    return multioutput_target(df, horizon=1, dropna=dropna)

In [7]:
onestep_target(df)

Unnamed: 0,target,target_t+1
2012-12-12,-0.493436,-0.262520
2012-12-13,-0.262520,0.281611
2012-12-14,0.281611,-1.271695
2012-12-15,-1.271695,0.713204
2012-12-16,0.713204,-0.043433
...,...,...
2015-09-02,1.338253,0.859353
2015-09-03,0.859353,1.608761
2015-09-04,1.608761,0.744984
2015-09-05,0.744984,-0.046160


## Step-ahead target

In [8]:
%%runwrite -a ../timegym/strategy.py

def step_ahead_target(df, step=5, dropna=True):
    df = df.copy()
    for col in df.columns:
        df[f'{col}_t+{step}'] = df[col].shift(-step)
    if dropna:
        df.dropna(inplace=True)
    return df

In [9]:
step_ahead_target(df)

Unnamed: 0,target,target_t+5
2012-12-12,-0.493436,-0.043433
2012-12-13,-0.262520,0.691720
2012-12-14,0.281611,-0.898219
2012-12-15,-1.271695,-1.224259
2012-12-16,0.713204,0.177200
...,...,...
2015-08-29,-1.059421,0.859353
2015-08-30,-0.570290,1.608761
2015-08-31,-1.432866,0.744984
2015-09-01,0.766799,-0.046160


In [None]:
# TODO: Direct vs Recursive vs DirRec vs Multioutput
# for predicting t+2, a hybrid pipeline may be trained on the historical data of t-h to t, and on the prediction of t+1