In [13]:
import numpy as np
import pandas
import pylab as plt  # matplotlib的一个子包
from sklearn.preprocessing import MinMaxScaler
from math import sqrt
from sklearn.utils import shuffle
from scipy.sparse import coo_matrix
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error # 平方绝对误差
from sklearn.metrics import r2_score  # R square

import sys 
sys.path.append(r'C:\\Users\\demo\\workplace\\Time-Series-Prediction-with-LSTM\\')  # 要用绝对路径
from utils import eemd_tools, data_tools, networks_factory, data_metrics
from utils.constants import const

In [14]:
# fix random seed for reproducibility
np.random.seed(7)

# 导入数据

In [15]:
data_multi = np.load(const.PROJECT_DIR + "data/eemd/apple/data_multi.npy")
print("# shape", data_multi.shape)  # not .shape()

# shape (1901, 4)


## data preprocessing; (-1, 1)

In [16]:
# normalize features
scaler = MinMaxScaler(feature_range=(-1, 1))
scaled = scaler.fit_transform(data_multi)

output = 1

lags = [5, 7, 9, 11, 13, 15, 17, 19, 21]
# lags = [5, 10, 15, 20, 25, 30, 35, 40]
# lags = [10, 20, 30, 40, 50, 60]
# lags = [10]  # test one

multi_data_supervised = list()
for i, lag in enumerate(lags):
    reframed = data_tools.series_to_supervised(scaled, lag, output)
    # drop columns we don't want to predict
    index_drop = [-j-1 for j in range(data_multi.shape[1] - 1)]
    reframed.drop(reframed.columns[index_drop], axis=1, inplace=True)
    data_supervised = reframed.values
    multi_data_supervised.append(data_supervised)
    print("# shape:", reframed.shape)
    print(len(data_multi) == len(reframed) + lag)
    # print(reframed.head(3))

print("-----------done--------------")
print(len(multi_data_supervised))

# shape: (1896, 21)
True
# shape: (1894, 29)
True
# shape: (1892, 37)
True
# shape: (1890, 45)
True
# shape: (1888, 53)
True
# shape: (1886, 61)
True
# shape: (1884, 69)
True
# shape: (1882, 77)
True
# shape: (1880, 85)
True
-----------done--------------
9


# LSTM

In [17]:
n_lstm_neurons = [4, 8, 16, 24, 32, 40, 48]
# n_lstm_neurons = 32
n_epoch = networks_factory.EPOCHS
n_batch_size = networks_factory.BATCH_SIZE

## test set is for validate; hidden_layers=1

In [18]:
for i, data_supervised in enumerate(multi_data_supervised):
    # split into train and test sets
    train_size = int(len(data_supervised) * 0.80)
    test_size = len(data_supervised) - train_size
    train_data, test_data = data_supervised[0:train_size,:], data_supervised[train_size:len(data_multi),:]
    
    # split into input and outputs
    train_X, train_Y = train_data[:, :-1], train_data[:, -1]
    test_X, test_Y = test_data[:, :-1], test_data[:, -1]
 
    # reshape input to be 3D [samples, timesteps, features]
    time_steps = lags[i]  # There are dependencies
    print("-----------time steps: %d--------------" % time_steps)
    train_X = train_X.reshape((train_X.shape[0], time_steps, train_X.shape[1]//time_steps))
    test_X = test_X.reshape((test_X.shape[0], time_steps, test_X.shape[1]//time_steps))
    
    for i, n_lstm_neuron in enumerate(n_lstm_neurons):
        print("-----------n_lstm_neuron: %d--------------" % n_lstm_neuron)
        # create and fit the LSTM network
        n_dims = train_X.shape[2]
        s, model = networks_factory.create_lstm_model(lstm_neurons=n_lstm_neuron, hidden_layers=1, 
                                                      lenth=time_steps, dims=n_dims, n_out=1)

        model.compile(loss='mae', optimizer='adam')
        # fit network
        history = model.fit(train_X, train_Y, epochs=n_epoch, batch_size=n_batch_size, validation_data=(test_X, test_Y), 
                            verbose=0, callbacks=[networks_factory.ES])
        print("# Finished Training...")

        # make a prediction
        train_predict = model.predict(train_X)
        test_predict = model.predict(test_X)
        # invert predictions
        inv_trainP, inv_trainY = data_tools.inv_transform_multi(scaler, train_X, train_predict, train_Y)
        inv_testP, inv_testY = data_tools.inv_transform_multi(scaler, test_X, test_predict, test_Y)

        # calculate RMSE, MAPE, Dstat
        train_rmse = sqrt(mean_squared_error(inv_trainP, inv_trainY))
        test_rmse = sqrt(mean_squared_error(inv_testP, inv_testY))
        print('Train RMSE: %.4f, Test RMSE: %.4f' % (train_rmse, test_rmse))
        train_mape = data_metrics.MAPE(inv_trainP, inv_trainY)
        test_mape = data_metrics.MAPE(inv_testP, inv_testY)
        print('Train MAPE: %.4f, Test MAPE: %.4f' % (train_mape, test_mape))
        train_ds = data_metrics.Dstat(inv_trainP, inv_trainY)
        test_ds = data_metrics.Dstat(inv_testP, inv_testY)
        print('Train Dstat: %.4f, Test Dstat: %.4f' % (train_ds, test_ds))
        
    print("")

print("# All Done!")

-----------time steps: 5--------------
-----------n_lstm_neuron: 4--------------
Epoch 00286: early stopping
# Finished Training...
Train RMSE: 0.5640, Test RMSE: 0.2283
Train MAPE: 0.0300, Test MAPE: 0.0189
Train Dstat: 0.7122, Test Dstat: 0.8285
-----------n_lstm_neuron: 8--------------
Epoch 00352: early stopping
# Finished Training...
Train RMSE: 0.5698, Test RMSE: 0.2324
Train MAPE: 0.0321, Test MAPE: 0.0200
Train Dstat: 0.6997, Test Dstat: 0.7678
-----------n_lstm_neuron: 16--------------
Epoch 00281: early stopping
# Finished Training...
Train RMSE: 0.5600, Test RMSE: 0.2413
Train MAPE: 0.0304, Test MAPE: 0.0212
Train Dstat: 0.7149, Test Dstat: 0.7203
-----------n_lstm_neuron: 24--------------
Epoch 00291: early stopping
# Finished Training...
Train RMSE: 0.5687, Test RMSE: 0.3152
Train MAPE: 0.0331, Test MAPE: 0.0322
Train Dstat: 0.7003, Test Dstat: 0.6781
-----------n_lstm_neuron: 32--------------
Epoch 00232: early stopping
# Finished Training...
Train RMSE: 0.6641, Test RMSE

Epoch 00283: early stopping
# Finished Training...
Train RMSE: 0.5609, Test RMSE: 0.2450
Train MAPE: 0.0300, Test MAPE: 0.0226
Train Dstat: 0.7213, Test Dstat: 0.7427
-----------n_lstm_neuron: 32--------------
Epoch 00229: early stopping
# Finished Training...
Train RMSE: 0.5600, Test RMSE: 0.2433
Train MAPE: 0.0303, Test MAPE: 0.0222
Train Dstat: 0.7299, Test Dstat: 0.7480
-----------n_lstm_neuron: 40--------------
Epoch 00293: early stopping
# Finished Training...
Train RMSE: 0.5665, Test RMSE: 0.2916
Train MAPE: 0.0338, Test MAPE: 0.0294
Train Dstat: 0.6855, Test Dstat: 0.6631
-----------n_lstm_neuron: 48--------------
Epoch 00257: early stopping
# Finished Training...
Train RMSE: 0.5860, Test RMSE: 0.2861
Train MAPE: 0.0354, Test MAPE: 0.0287
Train Dstat: 0.7339, Test Dstat: 0.7401

-----------time steps: 17--------------
-----------n_lstm_neuron: 4--------------
Epoch 00290: early stopping
# Finished Training...
Train RMSE: 0.5714, Test RMSE: 0.2231
Train MAPE: 0.0307, Test MAPE: 