In [2]:
from pandas import DataFrame
from pandas import concat
import pandas

# One-Step Univariate Forecasting

In [3]:
# data is a list or a 2D numpy array
# n_in is number of lag observations
# n_out is number of observatios as output
def series_to_supervised(data, n_in=1,n_out=1,dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # input sequence
    for i in range(n_in,0,-1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # all together
    agg = concat(cols,axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg


In [4]:
# Testing
values = [x for x in range(10)]
data = series_to_supervised(values, 2)
data

Unnamed: 0,var1(t-2),var1(t-1),var1(t)
2,0.0,1.0,2
3,1.0,2.0,3
4,2.0,3.0,4
5,3.0,4.0,5
6,4.0,5.0,6
7,5.0,6.0,7
8,6.0,7.0,8
9,7.0,8.0,9


# Multi-Step or Sequence Forecasting


In [5]:
values = [x for x in range(10)]
# We add the n_out to have forecasting
# every t-n is an input, and t+n is a forecast
# The current observation (t) is considered an output
data = series_to_supervised(values, 2, 2)
print(data)

   var1(t-2)  var1(t-1)  var1(t)  var1(t+1)
2        0.0        1.0        2        3.0
3        1.0        2.0        3        4.0
4        2.0        3.0        4        5.0
5        3.0        4.0        5        6.0
6        4.0        5.0        6        7.0
7        5.0        6.0        7        8.0
8        6.0        7.0        8        9.0


# Multivariate Forecasting

In [8]:
raw = DataFrame()
raw['ob1'] = [x for x in range(10)]
raw['ob2'] = [x for x in range(50, 60)]
values = raw.values
# Trying reframing a multivariate dataset to have 1 input and 2 outputs:
data = series_to_supervised(values,1,2)
print(data)

   var1(t-1)  var2(t-1)  var1(t)  var2(t)  var1(t+1)  var2(t+1)
1        0.0       50.0        1       51        2.0       52.0
2        1.0       51.0        2       52        3.0       53.0
3        2.0       52.0        3       53        4.0       54.0
4        3.0       53.0        4       54        5.0       55.0
5        4.0       54.0        5       55        6.0       56.0
6        5.0       55.0        6       56        7.0       57.0
7        6.0       56.0        7       57        8.0       58.0
8        7.0       57.0        8       58        9.0       59.0
