In [1]:
import os, inspect, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

CURRENT_DIR = os.path.dirname(inspect.getabsfile(inspect.currentframe()))
ROOT_DIR = os.path.dirname(CURRENT_DIR)
sys.path.insert(0, ROOT_DIR)

from reb.src.pyts import series_to_supervised

Using TensorFlow backend.


In [2]:
ffname = os.path.join(ROOT_DIR, "reb", "data", "ext", "data_monthly_processed.csv")

In [3]:
df = pd.read_csv(ffname, parse_dates=["DATE"])
df.DATE = pd.to_datetime(df.DATE, format="%Y-%m")
df.head()


Unnamed: 0,DATE,PAYEMS,USPRIV,AWHNONAG,CES9091000001,USGOVT,UNEMPLOY,LNS13023622,LNS13023706,LNS13023654,LNS13026511,MVPHGFD027MNFRBDAL,MORTGAGE30US,DGS10
0,1987-01-01,100683,83638,34.7,3060,17045,7892,49.7,11.0,13.5,36.1,1743.4,9.2,7.1
1,1987-02-01,100915,83879,34.9,3064,17036,7865,48.5,13.1,12.6,35.9,1768.3,9.1,7.3
2,1987-03-01,101164,84100,34.7,3072,17064,7862,48.2,12.6,12.5,35.7,1756.0,9.0,7.2
3,1987-04-01,101502,84393,34.7,3073,17109,7542,49.0,12.6,12.8,36.2,1701.8,9.8,8.0
4,1987-05-01,101728,84616,34.8,3078,17112,7574,47.6,12.5,12.4,35.2,1713.6,10.6,8.6


Make a clen copy of data. This allows us to modify freely while we have always the original data for any further reference.

In [4]:
df_original = df.copy() 

Reindex data frame per the time stamps

In [5]:
df.set_index("DATE", inplace=True)
df.head()

Unnamed: 0_level_0,PAYEMS,USPRIV,AWHNONAG,CES9091000001,USGOVT,UNEMPLOY,LNS13023622,LNS13023706,LNS13023654,LNS13026511,MVPHGFD027MNFRBDAL,MORTGAGE30US,DGS10
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1987-01-01,100683,83638,34.7,3060,17045,7892,49.7,11.0,13.5,36.1,1743.4,9.2,7.1
1987-02-01,100915,83879,34.9,3064,17036,7865,48.5,13.1,12.6,35.9,1768.3,9.1,7.3
1987-03-01,101164,84100,34.7,3072,17064,7862,48.2,12.6,12.5,35.7,1756.0,9.0,7.2
1987-04-01,101502,84393,34.7,3073,17109,7542,49.0,12.6,12.8,36.2,1701.8,9.8,8.0
1987-05-01,101728,84616,34.8,3078,17112,7574,47.6,12.5,12.4,35.2,1713.6,10.6,8.6


In [6]:
# rescale data
values = df.values
values = values.astype("float32")
scaler = MinMaxScaler(feature_range=(0, 1))
values_scaled = scaler.fit_transform(values)
n_variables = values.shape[1]

In [7]:
index_target = 0
# set model parameters
n_lags = 3
n_sequences = 3
n_train = int(values_scaled.shape[0] * 0.8)
n_units = 30

In [8]:
# set train parameters
optimizer = "adam"
loss = "mse"
n_epochs = 10
sz_batch = 10
verbose = 1
values_scaled.shape

(380, 13)

In [9]:
df_reframed = series_to_supervised(values_scaled, n_lags, n_sequences)
df_reframed.head()

Unnamed: 0,var1(t-3),var2(t-3),var3(t-3),var4(t-3),var5(t-3),var6(t-3),var7(t-3),var8(t-3),var9(t-3),var10(t-3),...,var4(t+2),var5(t+2),var6(t+2),var7(t+2),var8(t+2),var9(t+2),var10(t+2),var11(t+2),var12(t+2),var13(t+2)
3,0.0,0.0,0.894735,0.472574,0.00151,0.244251,0.324675,0.470085,0.569892,0.357401,...,0.507736,0.014765,0.194205,0.277056,0.641026,0.451613,0.361011,0.00343,0.9,0.8625
4,0.004774,0.005566,1.0,0.4782,0.0,0.241516,0.272727,0.649573,0.473118,0.350181,...,0.513361,0.020805,0.181035,0.268398,0.717949,0.483871,0.34657,0.002324,0.875,0.8625
5,0.009898,0.010669,0.894735,0.489451,0.004698,0.241212,0.25974,0.606838,0.462366,0.34296,...,0.520394,0.022819,0.180326,0.212121,0.709402,0.419355,0.317689,0.00383,0.875,0.9125
6,0.016853,0.017436,0.894735,0.490858,0.012248,0.208793,0.294372,0.606838,0.494624,0.361011,...,0.528833,0.016611,0.164218,0.199134,0.709402,0.408602,0.314079,0.0,0.95,0.9875
7,0.021504,0.022586,0.947367,0.49789,0.012752,0.212035,0.233766,0.598291,0.451613,0.32491,...,0.544304,0.039094,0.176882,0.277056,0.675214,0.580645,0.317689,0.005073,1.0,1.0


In [10]:
# create train/valid data
# split into train and test sets
values = df_reframed.values
train_values, valid_values = values[:n_train, :], values[n_train:, :]


In [11]:
# split into input and targets
n_train = train_values.shape[0]
n_valid = valid_values.shape[0]
n_observations = n_lags * n_variables
x_train, y_train = train_values[:, :n_observations], train_values[:, n_observations+index_target:n_train:n_variables]
x_valid, y_valid = valid_values[:, :n_observations], valid_values[:, n_observations+index_target:n_valid:n_variables]
print(f"Train Inputs Shape: {x_train.shape}, Train Targets Shape: {y_train.shape}")
print(f"Valid Inputs Shape: {x_valid.shape}, Valid Targets Shape: {y_valid.shape}")

Train Inputs Shape: (304, 39), Train Targets Shape: (304, 3)
Valid Inputs Shape: (71, 39), Valid Targets Shape: (71, 3)


In [12]:
# reshape data as required by ltsm
x_train = x_train.reshape((n_train, n_lags, n_variables))
x_valid = x_valid.reshape((n_valid, n_lags, n_variables))
print(f"Train Inputs Shape: {x_train.shape}, Train Targets Shape: {y_train.shape}")
print(f"Valid Inputs Shape: {x_valid.shape}, Valid Targets Shape: {y_valid.shape}")

Train Inputs Shape: (304, 3, 13), Train Targets Shape: (304, 3)
Valid Inputs Shape: (71, 3, 13), Valid Targets Shape: (71, 3)


In [13]:
# build model
model = Sequential()
model.add(LSTM(n_units, input_shape=(n_lags, n_variables)))
model.add(Dense(n_sequences))
model.compile(loss=loss, optimizer=optimizer)

In [14]:
# train model
history = model.fit(x_train, y_train,
                    epochs=n_epochs,
                    batch_size=sz_batch,
                    validation_data=(x_valid, y_valid),
                    verbose=verbose,
                    shuffle=False)

Train on 304 samples, validate on 71 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
