In [1]:
import os, inspect, sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

CURRENT_DIR = os.path.dirname(inspect.getabsfile(inspect.currentframe()))
ROOT_DIR = os.path.dirname(CURRENT_DIR)
sys.path.insert(0, ROOT_DIR)

from reb.src.pyts import series_to_supervised

Using TensorFlow backend.


In [2]:
ffname = os.path.join(ROOT_DIR, "reb", "data", "ext", "data_monthly_processed.csv")

In [3]:
df = pd.read_csv(ffname, parse_dates=["DATE"])
df.DATE = pd.to_datetime(df.DATE, format="%Y-%m")
df.head()


Unnamed: 0,DATE,PAYEMS,USPRIV,AWHNONAG,CES9091000001,USGOVT,UNEMPLOY,LNS13023622,LNS13023706,LNS13023654,LNS13026511,MVPHGFD027MNFRBDAL,MORTGAGE30US,DGS10
0,1987-01-01,100683,83638,34.7,3060,17045,7892,49.7,11.0,13.5,36.1,1743.4,9.2,7.1
1,1987-02-01,100915,83879,34.9,3064,17036,7865,48.5,13.1,12.6,35.9,1768.3,9.1,7.3
2,1987-03-01,101164,84100,34.7,3072,17064,7862,48.2,12.6,12.5,35.7,1756.0,9.0,7.2
3,1987-04-01,101502,84393,34.7,3073,17109,7542,49.0,12.6,12.8,36.2,1701.8,9.8,8.0
4,1987-05-01,101728,84616,34.8,3078,17112,7574,47.6,12.5,12.4,35.2,1713.6,10.6,8.6


Make a clen copy of data. This allows us to modify freely while we have always the original data for any further reference.

In [4]:
df_original = df.copy() 

Reindex data frame per the time stamps

In [5]:
df.set_index("DATE", inplace=True)
df.head()

Unnamed: 0_level_0,PAYEMS,USPRIV,AWHNONAG,CES9091000001,USGOVT,UNEMPLOY,LNS13023622,LNS13023706,LNS13023654,LNS13026511,MVPHGFD027MNFRBDAL,MORTGAGE30US,DGS10
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1987-01-01,100683,83638,34.7,3060,17045,7892,49.7,11.0,13.5,36.1,1743.4,9.2,7.1
1987-02-01,100915,83879,34.9,3064,17036,7865,48.5,13.1,12.6,35.9,1768.3,9.1,7.3
1987-03-01,101164,84100,34.7,3072,17064,7862,48.2,12.6,12.5,35.7,1756.0,9.0,7.2
1987-04-01,101502,84393,34.7,3073,17109,7542,49.0,12.6,12.8,36.2,1701.8,9.8,8.0
1987-05-01,101728,84616,34.8,3078,17112,7574,47.6,12.5,12.4,35.2,1713.6,10.6,8.6


In [6]:
# rescale data
values = df.values
values = values.astype("float32")
scaler = MinMaxScaler(feature_range=(0, 1))
values_scaled = scaler.fit_transform(values)
n_variables = values.shape[1]

In [7]:
index_target = 0
# set model parameters
n_lags = 6
n_sequences = 6
n_train = int(values.shape[0] * 0.8)
n_units = 40

In [8]:
# set train parameters
optimizer = "adam"
loss = "mse"
n_epochs = 20
sz_batch = 10
verbose = 1


In [9]:
df_reframed = series_to_supervised(values_scaled, n_lags, n_sequences)
df_reframed.head()

Unnamed: 0,var1(t-6),var2(t-6),var3(t-6),var4(t-6),var5(t-6),var6(t-6),var7(t-6),var8(t-6),var9(t-6),var10(t-6),...,var4(t+5),var5(t+5),var6(t+5),var7(t+5),var8(t+5),var9(t+5),var10(t+5),var11(t+5),var12(t+5),var13(t+5)
6,0.0,0.0,0.894735,0.472574,0.00151,0.244251,0.324675,0.470085,0.569892,0.357401,...,0.565401,0.052181,0.147401,0.142857,0.683761,0.44086,0.252708,0.007567,0.9125,0.9375
7,0.004774,0.005566,1.0,0.4782,0.0,0.241516,0.272727,0.649573,0.473118,0.350181,...,0.563994,0.055201,0.149124,0.12987,0.803419,0.451613,0.238267,0.011244,0.8875,0.9
8,0.009898,0.010669,0.894735,0.489451,0.004698,0.241212,0.25974,0.606838,0.462366,0.34296,...,0.555556,0.059228,0.146692,0.155844,0.717949,0.473118,0.249097,0.013499,0.825,0.8375
9,0.016853,0.017436,0.894735,0.490858,0.012248,0.208793,0.294372,0.606838,0.494624,0.361011,...,0.547117,0.06896,0.141323,0.134199,0.871795,0.473118,0.238267,0.01281,0.825,0.8625
10,0.021504,0.022586,0.947367,0.49789,0.012752,0.212035,0.233766,0.598291,0.451613,0.32491,...,0.538678,0.069799,0.113464,0.069264,0.811966,0.451613,0.187726,0.010095,0.8625,0.9


In [10]:
# create train/valid data
# split into train and test sets
values = df_reframed.values
train_values, valid_values = values[:n_train, :], values[n_train:, :]


In [11]:
# split into input and targets
n_train, n_ = train_values.shape
n_valid, n_ = valid_values.shape
n_features = n_lags * n_variables


In [12]:
# split into input and targets
n_train = train_values.shape[0]
n_valid = valid_values.shape[0]
n_observations = n_lags * n_variables
x_train, y_train = train_values[:, :n_observations], train_values[:, n_observations+index_target:n_train:n_variables]
x_valid, y_valid = valid_values[:, :n_observations], valid_values[:, n_observations+index_target:n_valid:n_variables]
print(f"Train Inputs Shape: {x_train.shape}, Train Targets Shape: {y_train.shape}")
print(f"Valid Inputs Shape: {x_valid.shape}, Valid Targets Shape: {y_valid.shape}")

Train Inputs Shape: (304, 78), Train Targets Shape: (304, 6)
Valid Inputs Shape: (65, 78), Valid Targets Shape: (65, 0)


In [13]:
# reshape data as required by ltsm
x_train = x_train.reshape((n_train, n_lags, n_variables))
x_valid = x_valid.reshape((n_valid, n_lags, n_variables))
print(f"Train Inputs Shape: {x_train.shape}, Train Targets Shape: {y_train.shape}")
print(f"Valid Inputs Shape: {x_valid.shape}, Valid Targets Shape: {y_valid.shape}")

Train Inputs Shape: (304, 6, 13), Train Targets Shape: (304, 6)
Valid Inputs Shape: (65, 6, 13), Valid Targets Shape: (65, 0)


In [14]:
# build model
model = Sequential()
model.add(LSTM(n_units, input_shape=(n_lags, n_variables)))
model.add(Dense(n_sequences))
model.compile(loss=loss, optimizer=optimizer)

In [15]:
# train model
history = model.fit(x_train, y_train,
                    epochs=n_epochs,
                    batch_size=sz_batch,
                    validation_data=(x_valid, y_valid),
                    verbose=verbose,
                    shuffle=False)

ValueError: Error when checking target: expected dense_1 to have shape (6,) but got array with shape (0,)

In [None]:
# plot history
figsize = (12, 7)
titlefontsize = 20
xtickfontsize = 15
ytickfontsize = 15
labelfontsize = 19
legendfontsize = 19
linewidth = 3
fig = plt.figure(figsize=figsize)
ax = fig.subplots(1, 1)
ax.plot(np.arange(1, n_epochs+1), history.history['loss'],
        "-",
        linewidth=linewidth,
        label='Train Loss')
ax.plot(np.arange(1, n_epochs+1), history.history['val_loss'],
        "-",
        linewidth=linewidth,
        label='Valid Loss')
ax.set_xlabel("Epoch #", fontsize=labelfontsize)
ax.set_ylabel("Loss - " + loss.upper(), fontsize=labelfontsize)
ax.tick_params(
    axis='x',          
    which='both',      
    labelsize=xtickfontsize)
ax.tick_params(
    axis='y',    
    labelsize=ytickfontsize)
ax.set_title("Train Loss " +  f"({loss})".upper() + " vs Epoch",
        fontsize=titlefontsize,
        fontweight="bold"
    )
ax.legend(loc="upper right",
          fontsize=legendfontsize,
          framealpha=0.8,
          fancybox=True,
          frameon=True,
          shadow=False,
          edgecolor="k")
ax.set_xlim([0, n_epochs+1])
plt.tight_layout()
fname = f"loss-plot-valid.png"
# fig.savefig(os.path.join(ROOT_DIR, "reports", "figures", fname), transparent=False, dpi=dpi)
plt.show()

In [None]:
# make a prediction
yhat_valid = model.predict(x_valid)

In [None]:
temp = x_valid.reshape((n_valid, n_lags*n_variables))
temp[-n_sequences*n_variables:][:, index_target:n_sequences*n_variables:n_variables] = \
    y_valid.reshape((n_valid, n_sequences))
temp = temp.reshape((-1, n_variables))
y_valid = scaler.inverse_transform(temp)[:, index_target]

temp = x_valid.reshape((n_valid, n_lags*n_variables))
temp[-n_sequences*n_variables:][:, index_target:n_sequences*n_variables:n_variables] = \
    yhat_valid.reshape((n_valid, n_sequences))
temp = temp.reshape((-1, n_variables))
yhat_valid = scaler.inverse_transform(temp)[:, index_target]

In [None]:
n = 12
ncols = 3
fig, axs = plt.subplots(nrows=int(np.ceil(n/ncols)),
                        ncols=ncols,
                        figsize=(12, 22),
                        sharex=True)
for k in range(n):
    i = k // ncols
    j = k % ncols
    axs[i, j].plot(np.arange(1,n_sequences+1),
                   y_valid[i*n_sequences:i*n_sequences+n_sequences],
                   color="blue",
                   label="True")
    axs[i, j].plot(np.arange(1,n_sequences+1),
                   yhat_valid[i*n_sequences:i*n_sequences+n_sequences],
                   color="black",
                   label="Predicted")
    
    
    axs[i, j].legend(loc="best",
                     fontsize=legendfontsize,
                     framealpha=0.8,
                     fancybox=True,
                     frameon=True,
                     shadow=False,
                     edgecolor="k")
    
    axs[i, j].set_ylim([0, 1.1*max(y_valid)])
#     print(y_valid[i*n_sequences:i*n_sequences+n_sequences].shape) 
    