## Time Series to Supervised

In [2]:
import numpy as np

In [3]:
def split_sequence(sequence, n_steps):
    X, y = list(), list()
    for i in range(len(sequence)):
        end_idx = i + n_steps
        if end_idx > len(sequence) - 1:
            break
        seq_x, seq_y = sequence[i:end_idx], sequence[end_idx]
        X.append(seq_x)
        y.append(seq_y)
        
    return np.array(X), np.array(y)

- Feature : A column in a dataset, such as a lag obervation for a time series dataset
- Sample : A row in a dataset, such as an input and output sequence for a time series

In [12]:
seq = np.array([1,2,3,4,5,6,7,8,9,10])

In [13]:
X, y = split_sequence(seq, 3)

In [14]:
print(X.shape, y.shape)

(7, 3) (7,)


In [15]:
for i in range(len(X)):
    print(X[i], y[i])

[1 2 3] 4
[2 3 4] 5
[3 4 5] 6
[4 5 6] 7
[5 6 7] 8
[6 7 8] 9
[7 8 9] 10


In LSTM layer must specify the shape of the input data. The input to every CNN and LSTM layer must be three-dimensional.
- Sample : One sequenec is one sample. A batch is comprised of one or more samples
- Time Steps : One time step is one point of obervation in the sample. One sample is comprised of multiple time steps
- Features : One feature is one observation at a time step. One time step is comprised of one or more features.

In [22]:
X_reshaped = X.reshape((X.shape[0], X.shape[1], 1))
# [samples, features] -> [samples, timesteps, features]

In [23]:
X.shape

(7, 3)

In [24]:
X_reshaped.shape

(7, 3, 1)

## Exercise

In [37]:
data = list()
n = 5000
for i in range(n):
    data.append([i+1, (i+1)*10])

In [38]:
data = np.array(data)

In [39]:
print(data, data.shape)

[[    1    10]
 [    2    20]
 [    3    30]
 ...
 [ 4998 49980]
 [ 4999 49990]
 [ 5000 50000]] (5000, 2)


In [40]:
data = data[:, 1]
# If time series data is uniform over timeand there is no missing values, 
# can drop the time column.
data.shape

(5000,)

### Split

In [41]:
length = 200
samples = list()

for i in range(0, n, length):
    sample = data[i:i+length]
    samples.append(sample)

In [45]:
len(samples)

25

### Reshape Subsequences

In [46]:
data = np.array(samples)

In [47]:
data.shape

(25, 200)

In [48]:
data = data.reshape((len(samples), length, 1))

In [49]:
data.shape

(25, 200, 1)

In [53]:
data

array([[[   10],
        [   20],
        [   30],
        ...,
        [ 1980],
        [ 1990],
        [ 2000]],

       [[ 2010],
        [ 2020],
        [ 2030],
        ...,
        [ 3980],
        [ 3990],
        [ 4000]],

       [[ 4010],
        [ 4020],
        [ 4030],
        ...,
        [ 5980],
        [ 5990],
        [ 6000]],

       ...,

       [[44010],
        [44020],
        [44030],
        ...,
        [45980],
        [45990],
        [46000]],

       [[46010],
        [46020],
        [46030],
        ...,
        [47980],
        [47990],
        [48000]],

       [[48010],
        [48020],
        [48030],
        ...,
        [49980],
        [49990],
        [50000]]])