https://machinelearningmastery.com/time-series-forecasting-long-short-term-memory-network-python/

## One-Step univariate time series forecast

In [1]:
# univariate data preparation
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense

import pandas as pd
import numpy as np
import myLibrary as mL
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from math import sqrt
from matplotlib import pyplot

In [2]:
STATION_ID = "42036"    # 33.61% NaN values -> Best value for GOM and 2022
YEAR = "2022"

In [3]:
data = mL.get_buoy_data(STATION_ID, YEAR)

data = data.drop([
    "WDIR_42036",
    "WSPD_42036",
    "WVHT_42036",
    "APD_42036",
    "MWD_42036",
    "PRES_42036",
    "ATMP_42036",
    #"WTMP_42036",
    "DEWP_42036"], axis=1)

from disc


In [4]:
print(type(data))
print("NaN values: ", data.isna().sum().sum())
data.fillna(method='ffill', inplace=True) #ffill .. forward fill (just copy previous value)
print("Remaining NaN values: ", data.isna().sum().sum())
# msno.matrix(data)

<class 'pandas.core.frame.DataFrame'>
NaN values:  57
Remaining NaN values:  0


In [5]:
raw_values = data.WTMP_42036.values
print(type(raw_values))
raw_values

<class 'numpy.ndarray'>


array([23. , 23. , 23. , ..., 22.1, 22.2, 22.3])

### Transform Time Series to Stationary

= Preprocessing step that converts the absolute values to delta values. This should result in a more skillful forecast.

TODO: Test without this step is needed to compare if it really increases the result!

In [6]:
# create a differenced series
# method expects the parameter dataset to be a pd.series
def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return pd.Series(diff)

In [7]:
# invert diferenced value
def inverse_difference(history, yhat, interval=1):
    return yhat + history[-interval]

In [8]:
diff_series = difference(raw_values)
diff_series

0       0.0
1       0.0
2       0.0
3       0.0
4       0.0
       ... 
8754    0.0
8755    0.0
8756    0.1
8757    0.1
8758    0.1
Length: 8759, dtype: float64

### Tranform to supervised Learning problem

LSTM model expects data devided into input (X) and output (y). The model learns a function that maps an input X to an output y. In this example, X is the observation at timestep t-1 and y is the value at t.

#TODO: consider using a series of n previous timesteps as input X.

In [9]:
# frame a sequence as a supervised learning problem
def timeseries_to_supervised(data, lag=1):
 df = pd.DataFrame(data)
 columns = [df.shift(i) for i in range(1, lag+1)]
 columns.append(df)
 df = pd.concat(columns, axis=1)
 df.fillna(0, inplace=True)
 return df

In [10]:
supervised = timeseries_to_supervised(diff_series, 1)
supervised_values = supervised.values
supervised_values

array([[0. , 0. ],
       [0. , 0. ],
       [0. , 0. ],
       ...,
       [0. , 0.1],
       [0.1, 0.1],
       [0.1, 0.1]])

### Train / Test split

In [11]:
test_hours = 7*24 #one week forecast

# split the dataframe into training and testing subsets
train, test = supervised_values[0:-test_hours], supervised_values[-test_hours:]

In [12]:
train

array([[ 0. ,  0. ],
       [ 0. ,  0. ],
       [ 0. ,  0. ],
       ...,
       [ 0.1, -0.1],
       [-0.1,  0. ],
       [ 0. , -0.1]])

In [13]:
test

array([[-0.1,  0.1],
       [ 0.1,  0. ],
       [ 0. , -0.1],
       [-0.1,  0. ],
       [ 0. , -0.1],
       [-0.1,  0. ],
       [ 0. ,  0. ],
       [ 0. , -0.1],
       [-0.1,  0. ],
       [ 0. ,  0. ],
       [ 0. , -0.1],
       [-0.1,  0. ],
       [ 0. ,  0. ],
       [ 0. ,  0. ],
       [ 0. ,  0. ],
       [ 0. ,  0. ],
       [ 0. ,  0. ],
       [ 0. ,  0. ],
       [ 0. ,  0. ],
       [ 0. ,  0. ],
       [ 0. ,  0. ],
       [ 0. ,  0. ],
       [ 0. , -0.1],
       [-0.1,  0. ],
       [ 0. , -0.1],
       [-0.1, -0.1],
       [-0.1,  0. ],
       [ 0. , -0.2],
       [-0.2, -0.1],
       [-0.1,  0. ],
       [ 0. ,  0. ],
       [ 0. ,  0. ],
       [ 0. , -0.1],
       [-0.1,  0. ],
       [ 0. ,  0. ],
       [ 0. , -0.1],
       [-0.1,  0. ],
       [ 0. ,  0.1],
       [ 0.1,  0. ],
       [ 0. ,  0.1],
       [ 0.1,  0.1],
       [ 0.1,  0. ],
       [ 0. ,  0.1],
       [ 0.1,  0. ],
       [ 0. ,  0. ],
       [ 0. ,  0. ],
       [ 0. , -0.1],
       [-0.1,

### Transform Time Series to Scale (=Normalisation ?!)
The default activation function for LSTMs is the hyperbolic tangent (tanh) which outputs values between -1 and 1. We use MinMaxScaler class to reshape out data to this scale. To make the experiment fair, we only consider the training data for the min and max values. Otherwise, we would contaminate the experiment with knowledge from the test data.

In [14]:
print(type(train))
print(type(test))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [15]:
# scale train and test data to [-1, 1]
def scale(train, test):
     # fit scaler
     scaler = MinMaxScaler(feature_range=(-1, 1))
     scaler = scaler.fit(train)
     # transform train
     train = train.reshape(train.shape[0], train.shape[1])
     train_scaled = scaler.transform(train)
     # transform test
     test = test.reshape(test.shape[0], test.shape[1])
     test_scaled = scaler.transform(test)
     return scaler, train_scaled, test_scaled

In [16]:
# inverse scaling for a forecasted value
def invert_scale(scaler, X, value):
     new_row = [x for x in X] + [value]
     array = np.array(new_row)
     array = array.reshape(1, len(array))
     inverted = scaler.inverse_transform(array)
     return inverted[0, -1]

In [17]:
scaler, train_scaled, test_scaled = scale(train, test)

In [18]:
train_scaled

array([[ 0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04,  0.04],
       ...,
       [ 0.12, -0.04],
       [-0.04,  0.04],
       [ 0.04, -0.04]])

In [19]:
test_scaled

array([[-0.04,  0.12],
       [ 0.12,  0.04],
       [ 0.04, -0.04],
       [-0.04,  0.04],
       [ 0.04, -0.04],
       [-0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04, -0.04],
       [-0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04, -0.04],
       [-0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04, -0.04],
       [-0.04,  0.04],
       [ 0.04, -0.04],
       [-0.04, -0.04],
       [-0.04,  0.04],
       [ 0.04, -0.12],
       [-0.12, -0.04],
       [-0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04, -0.04],
       [-0.04,  0.04],
       [ 0.04,  0.04],
       [ 0.04, -0.04],
       [-0.04,  0.04],
       [ 0.04,  0.12],
       [ 0.12,  0.04],
       [ 0.04,  0.12],
       [ 0.12,  0.12],
       [ 0.12,  0.04],
       [ 0.04,  0.12],
       [ 0.

### LSTM Model

The LSTM layer expects input to be in a matrix with the dimensions: [samples, time steps, features].

Samples: independent observations i.e. rows of data
Time steps: ???
Features: number of features (in this case 1)

In [20]:
# TODO: understand this in more detail!
# fit an LSTM network to training data
def fit_lstm(train, batch_size, nb_epoch, neurons):
     X, y = train[:, 0:-1], train[:, -1]
     X = X.reshape(X.shape[0], 1, X.shape[1])
     model = Sequential()
     model.add(LSTM(neurons, batch_input_shape=(batch_size, X.shape[1], X.shape[2]), stateful=True))
     model.add(Dense(1))
     model.compile(loss='mean_squared_error', optimizer='adam')
     for i in range(nb_epoch):
        model.fit(X, y, epochs=1, batch_size=batch_size, verbose=0, shuffle=False)
        model.reset_states()
     return model

In [21]:
# make a one-step forecast
def forecast_lstm(model, batch_size, X):
     X = X.reshape(1, 1, len(X))
     yhat = model.predict(X, batch_size=batch_size)
     return yhat[0,0]

In [22]:
# repeat experiment
#repeats = 5
#error_scores = list()
#for r in range(repeats):
#print("Repeat #", r)
# fit the model
#lstm_model = fit_lstm(train_scaled, 1, 200, 50)    #training data, batch size, epochs, #neurons
lstm_model = fit_lstm(train_scaled, 1, 2, 5)    #training data, batch size, epochs, #neurons

2023-04-11 21:36:08.412268: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [24]:
# forecast the entire training dataset to build up state for forecasting
train_reshaped = train_scaled[:, 0].reshape(len(train_scaled), 1, 1)
train_reshaped

array([[[ 0.04]],

       [[ 0.04]],

       [[ 0.04]],

       ...,

       [[ 0.12]],

       [[-0.04]],

       [[ 0.04]]])

In [25]:
lstm_model.predict(train_reshaped, batch_size=1)



array([[0.02328026],
       [0.02234987],
       [0.02189929],
       ...,
       [0.0259742 ],
       [0.02236029],
       [0.02322393]], dtype=float32)

In [31]:
test_scaled[0, 0:-1].shape

(1,)

In [23]:
# walk-forward validation on the test data
predictions = list()
for i in range(len(test_scaled)):
    # make one-step forecast
    X, y = test_scaled[i, 0:-1], test_scaled[i, -1]
    yhat = forecast_lstm(lstm_model, 1, X)  #yhat = predictioned y
    # invert scaling
    yhat = invert_scale(scaler, X, yhat)
    # invert differencing
    yhat = inverse_difference(raw_values, yhat, len(test_scaled)+1-i)
    # store forecast
    predictions.append(yhat)

# report performance
mae = mean_absolute_error(raw_values[-test_hours:], predictions)
mse = mean_squared_error(raw_values[-test_hours:], predictions)
print('MAE: ', mae)
print('MSE: ', mse)
#error_scores.append(rmse)

MAE:  0.04423395445891926
MSE:  0.003429429850092122


took about 20 min to train ....

## Evaluation

In [None]:
import matplotlib.pyplot as plt

# Create the plot
plt.plot(predictions, label="Prediction")
plt.plot(raw_values[-test_hours:], label="Ground Truth")

# Add labels and title
plt.xlabel("Timesteps")
plt.ylabel("Temperature")
plt.title("LSTM Prediction")

# Add legend
plt.legend()

# Show the plot
plt.show()

print('MAE: ', mae)
print('MSE: ', mse)