# Reframing Time Series as Supervised Learning

In [1]:
import pandas as pd
import numpy as np

### generating data

In [5]:
df = pd.DataFrame()
df["t"] = [x for x in range(10)]
df["t-1"] = df["t"].shift(1)
df

Unnamed: 0,t,t-1
0,0,
1,1,0.0
2,2,1.0
3,3,2.0
4,4,3.0
5,5,4.0
6,6,5.0
7,7,6.0
8,8,7.0
9,9,8.0


In [8]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [10]:
values = values = [x for x in range(10)]
data = series_to_supervised(values)
data

Unnamed: 0,var1(t-1),var1(t)
1,0.0,1
2,1.0,2
3,2.0,3
4,3.0,4
5,4.0,5
6,5.0,6
7,6.0,7
8,7.0,8
9,8.0,9


In [11]:
values = values = [x for x in range(10)]
data = series_to_supervised(values, 2)
data

Unnamed: 0,var1(t-2),var1(t-1),var1(t)
2,0.0,1.0,2
3,1.0,2.0,3
4,2.0,3.0,4
5,3.0,4.0,5
6,4.0,5.0,6
7,5.0,6.0,7
8,6.0,7.0,8
9,7.0,8.0,9


In [12]:
values = values = [x for x in range(10)]
data = series_to_supervised(values, 2, 2)
data

Unnamed: 0,var1(t-2),var1(t-1),var1(t),var1(t+1)
2,0.0,1.0,2,3.0
3,1.0,2.0,3,4.0
4,2.0,3.0,4,5.0
5,3.0,4.0,5,6.0
6,4.0,5.0,6,7.0
7,5.0,6.0,7,8.0
8,6.0,7.0,8,9.0


In [13]:
values = values = [x for x in range(20)]
data = series_to_supervised(values, 5, 1)
data

Unnamed: 0,var1(t-5),var1(t-4),var1(t-3),var1(t-2),var1(t-1),var1(t)
5,0.0,1.0,2.0,3.0,4.0,5
6,1.0,2.0,3.0,4.0,5.0,6
7,2.0,3.0,4.0,5.0,6.0,7
8,3.0,4.0,5.0,6.0,7.0,8
9,4.0,5.0,6.0,7.0,8.0,9
10,5.0,6.0,7.0,8.0,9.0,10
11,6.0,7.0,8.0,9.0,10.0,11
12,7.0,8.0,9.0,10.0,11.0,12
13,8.0,9.0,10.0,11.0,12.0,13
14,9.0,10.0,11.0,12.0,13.0,14


### multivariate forecast

In [14]:
raw = pd.DataFrame()
raw['ob1'] = [x for x in range(10)]
raw['ob2'] = [x for x in range(50, 60)]
raw

Unnamed: 0,ob1,ob2
0,0,50
1,1,51
2,2,52
3,3,53
4,4,54
5,5,55
6,6,56
7,7,57
8,8,58
9,9,59


In [15]:
values = raw.values
values

array([[ 0, 50],
       [ 1, 51],
       [ 2, 52],
       [ 3, 53],
       [ 4, 54],
       [ 5, 55],
       [ 6, 56],
       [ 7, 57],
       [ 8, 58],
       [ 9, 59]])

In [18]:
data = series_to_supervised(values)
data

Unnamed: 0,var1(t-1),var2(t-1),var1(t),var2(t)
1,0.0,50.0,1,51
2,1.0,51.0,2,52
3,2.0,52.0,3,53
4,3.0,53.0,4,54
5,4.0,54.0,5,55
6,5.0,55.0,6,56
7,6.0,56.0,7,57
8,7.0,57.0,8,58
9,8.0,58.0,9,59
