In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(14)
data = {'y':np.random.randint(0,200, 50),
       'x':np.random.randint(0,2000, 50)}

In [3]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,y,x
0,107,1359
1,88,1705
2,12,639
3,150,1301
4,71,525


# Data Preparation

In [4]:
# convert time-series data to supervised data
def series_to_supervised(data, n_in, n_out=1, dropnan=True, col_name=None):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    
    # input sequence (t-n, ... t-1)
    for j in range(0,n_vars):
        for i in range(n_in[j], 0, -1):
            cols.append(df[j].shift(i))
            names += [(str(col_name[j]) + ' %d(t-%d)' % (j+1, i))]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [(str(col_name[j]) +' %d(t)' % (j+1)) 
                      for j in range(n_vars)]
        else:
            names += [(str(col_name[j]) +' %d(t+%d)' % (j+1, i))
                      for j in range(n_vars)]
    
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg, names

In [5]:
n_lag = 2
lag = n_lag*(np.ones(df.shape[1]))
lag = list(map(int, lag))
transform_df1 = series_to_supervised(df.values, lag, n_out=2,col_name=df.columns)[0]

In [6]:
transform_df1.head()

Unnamed: 0,y 1(t-2),y 1(t-1),x 2(t-2),x 2(t-1),y 1(t),x 2(t),y 1(t+1),x 2(t+1)
2,107.0,88.0,1359.0,1705.0,12,639,150.0,1301.0
3,88.0,12.0,1705.0,639.0,150,1301,71.0,525.0
4,12.0,150.0,639.0,1301.0,71,525,102.0,323.0
5,150.0,71.0,1301.0,525.0,102,323,156.0,61.0
6,71.0,102.0,525.0,323.0,156,61,138.0,330.0


In [7]:
def transform_data(df, n_lag=1, n_out=1):
    lag = n_lag*(np.ones(df.shape[1]))
    lag = list(map(int, lag))
    df1 = series_to_supervised(df.values, lag, n_out=n_out,col_name=df.columns)[0]
    n_obs = sum(lag)
    X, y = df1.values[:, :n_obs], df1.values[:, n_obs:]
    n_features = df.shape[1]
    y = y[:,n_features*np.arange(n_out)]
    return X,y

In [8]:
X, y = transform_data(df, n_lag=2, n_out=2)

In [9]:
pd.DataFrame(X).head()

Unnamed: 0,0,1,2,3
0,107.0,88.0,1359.0,1705.0
1,88.0,12.0,1705.0,639.0
2,12.0,150.0,639.0,1301.0
3,150.0,71.0,1301.0,525.0
4,71.0,102.0,525.0,323.0


In [10]:
pd.DataFrame(y).head()

Unnamed: 0,0,1
0,12.0,150.0
1,150.0,71.0
2,71.0,102.0
3,102.0,156.0
4,156.0,138.0


# Simple Modelling

In [11]:
from sklearn.multioutput import RegressorChain
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor

In [12]:
linear_model = LinearRegression()
chain = RegressorChain(base_estimator=linear_model).fit(X, y)
prediction_lr = chain.predict(X)

In [13]:
xgb = XGBRegressor()
chain = RegressorChain(base_estimator=xgb).fit(X, y)
prediction_xgb = chain.predict(X)