#  Multistep Timeseries LSTM Forecasting - Single Variable - Tensorflow

# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import matplotlib.pyplot as plt
import datetime
import matplotlib.dates as mdates
from statsmodels.tsa.seasonal import seasonal_decompose
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.optim import AdamW
import time

from pandas import DataFrame
from pandas import Series
from pandas import concat
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
from matplotlib import pyplot
from numpy import array
from keras.layers import Dropout
from keras.metrics import MeanAbsoluteError

# Inputs

In [2]:
path = "/Users/aparuchuri/Desktop/AI_Ml/fabric-aiml/fabric_stats.csv"
filtered_path ='/Users/aparuchuri/Desktop/AI_Ml/fabric-aiml/filtered_data1.csv'
# configure
n_lag = 30 # Number of observations to look into past per prediction 
n_seq = 4500 # Number of points to predict into future [60,180,540,1620,4860]
n_test = 1 # Number of test samples 
n_epochs = 500  #Number of iterations of training 
n_batch = 1 # Dont Change , Feed entire batch 
n_neurons = 64 # number of learning units per layer (capacity per layer)

# Developing ML Model

In [3]:
#a.) convert time series into supervised learning problem
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]

    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
        # put it all together
        agg = concat(cols, axis=1)
        agg.columns = names
        # drop rows with NaN values
        if dropnan:
            agg.dropna(inplace=True)
        return agg

#b.) create a differenced series

def difference(dataset, interval=1):
    diff = list()
    for i in range(interval, len(dataset)):
        value = dataset[i] - dataset[i - interval]
        diff.append(value)
    return Series(diff)
 
#c.) transform series into train and test sets for supervised learning

def prepare_data(series, n_test, n_lag, n_seq):
    # extract raw values
    raw_values = series.values
    # transform data to be stationary
    diff_series = difference(raw_values, 1)
    diff_values = diff_series.values
    diff_values = diff_values.reshape(len(diff_values), 1)
    # rescale values to -1, 1
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaled_values = scaler.fit_transform(diff_values)
    scaled_values = scaled_values.reshape(len(scaled_values), 1)
    # transform into supervised learning problem X, y
    supervised = series_to_supervised(scaled_values, n_lag, n_seq)
    supervised_values = supervised.values
    # split into train and test sets
    train, test = supervised_values[0:-n_test], supervised_values[-n_test:]
    return scaler, train, test
 
#d.) Define the LSTM network 

def fit_lstm(train, n_lag, n_seq, n_batch, nb_epoch, n_neurons):
    # reshape training into [samples, timesteps, features]
    X, y = train[:, 0:n_lag], train[:, n_lag:]
    X = X.reshape(X.shape[0], 1, X.shape[1])
    # design network
    model = Sequential()
    model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True, return_sequences=True))
    model.add(Dropout(0.2))  # Add dropout with a dropout rate of 0.2
    model.add(LSTM(n_neurons, return_sequences=True))
    model.add(Dropout(0.2))  # Add dropout with a dropout rate of 0.2
    model.add(LSTM(n_neurons, return_sequences=True))
    model.add(Dropout(0.2))  # Add dropout with a dropout rate of 0.2
    model.add(LSTM(n_neurons))
    model.add(Dropout(0.2))  # Add dropout with a dropout rate of 0.2
    model.add(Dense(y.shape[1]))
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=[MeanAbsoluteError()])
    # fit network
    for i in range(nb_epoch):
        model.fit(X, y, epochs=1, batch_size=n_batch, verbose=1, shuffle=False)
        model.reset_states()
    return model
 
#e.) forecast with an LSTM,

def forecast_lstm(model, X, n_batch):
    # reshape input pattern to [samples, timesteps, features]
    X = X.reshape(1, 1, len(X))
    # make forecast
    forecast = model.predict(X, batch_size=n_batch)
    # convert to array
    return [x for x in forecast[0, :]]
 
#f.)  evaluate the persistence model

def make_forecasts(model, n_batch, train, test, n_lag, n_seq):
    forecasts = list()
    for i in range(len(test)):
        X, y = test[i, 0:n_lag], test[i, n_lag:]
        # make forecast
        forecast = forecast_lstm(model, X, n_batch)
        # store the forecast
        forecasts.append(forecast)
    return forecasts
 
#g.) invert differenced forecast- reverse the stationary form

def inverse_difference(last_ob, forecast):
    # invert first forecast
    inverted = list()
    inverted.append(forecast[0] + last_ob)
    # propagate difference forecast using inverted first value
    for i in range(1, len(forecast)):
        inverted.append(forecast[i] + inverted[i-1])
    return inverted
 
#h.) inverse data transform on forecasts - reverse the scaled transformation applied earlier on data

def inverse_transform(series, forecasts, scaler, n_test):
    inverted = list()
    for i in range(len(forecasts)):
        # create array from forecast
        forecast = array(forecasts[i])
        forecast = forecast.reshape(1, len(forecast))
        # invert scaling
        inv_scale = scaler.inverse_transform(forecast)
        inv_scale = inv_scale[0, :]
        # invert differencing
        index = len(series) - n_test + i - 1
        last_ob = series.values[index]
        inv_diff = inverse_difference(last_ob, inv_scale)
        # store
        inverted.append(inv_diff)
    return inverted
 
#i.) evaluate the model with RMSE 

def evaluate_forecasts(test, forecasts, n_lag, n_seq):
    all_rmse = []
    for i in range(n_seq):
        actual = [row[i] for row in test]
        predicted = [forecast[i] for forecast in forecasts]
        rmse = sqrt(mean_squared_error(actual, predicted))
        all_rmse.append(rmse)
        print('t+%d RMSE: %f' % ((i+1), rmse))
    return all_rmse
 
#j.) plot the forecasts
def plot_forecasts(series, forecasts, n_test):
    # plot the entire dataset in blue
    pyplot.plot(series.values)
    # plot the forecasts in red
    for i in range(len(forecasts)):
        off_s = len(series) - n_test + i - 1
        off_e = off_s + len(forecasts[i]) + 1
        xaxis = [x for x in range(off_s, off_e)]
        yaxis = [series.values[off_s]] + forecasts[i]
        pyplot.plot(xaxis, yaxis, color='red')
    # show the plot
    pyplot.show()

In [4]:


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

#Load and clean dataset

sampling_time = 60


# Load the data from the CSV file
df = pd.read_csv(path)
df['time'] = pd.to_datetime(df['time'], unit='ns')
df.set_index('time', inplace=True)

# Data Cleaning

pfe_spec = 'FPC0:PIC0:NPU0:DP0' # can specify other FPCs/PICs
rate_bps_lower_limit = 0
rate_bps_upper_limit = 1.8e12


# Step 1: Drop First 1000 elements
df = df[1000:]

# Filter for one FPC: FPC0:PIC0:NPU0:DP0
df_per_fpc_per_pfe = df[df['fru']== pfe_spec]

# Filter for in-rate-bps >0 and out-rate-bps >0

df_filtered = df_per_fpc_per_pfe[(df_per_fpc_per_pfe['out-rate-bps'] > rate_bps_lower_limit) & (df_per_fpc_per_pfe['out-rate-bps'] < rate_bps_upper_limit)]

df_filtered = df_filtered[3200:] #removing all points before and including straight line in the dataset
df_filtered['out-rate-bps'] = df_filtered['out-rate-bps']/1e12
df_filtered.to_csv(filtered_path, index=False)

cpu


In [5]:
# load the dataset
series = read_csv(filtered_path, usecols=[8], engine='python')
print(series)

      out-rate-bps
0         0.474935
1         0.474480
2         0.474978
3         0.474912
4         0.474406
...            ...
3718      0.494484
3719      0.844184
3720      0.633698
3721      0.633637
3722      0.633774

[3723 rows x 1 columns]


In [None]:
#prepare data
scaler, train, test = prepare_data(series, n_test, n_lag, n_seq)
#fit model
start_time = time.time()
model = fit_lstm(train, n_lag, n_seq, n_batch, n_epochs, n_neurons)
end_time = time.time()
training_time = end_time - start_time
print(f"Total training time: {training_time:.2f} seconds")

#forecasts
forecasts = make_forecasts(model, n_batch, train, test, n_lag, n_seq)
#inverse transform forecasts and test
forecasts = inverse_transform(series, forecasts, scaler, n_test+2)

actual = [row[n_lag:] for row in test]
actual = inverse_transform(series, actual, scaler, n_test+2)

#evaluate forecasts
evaluate_forecasts(actual, forecasts, n_lag, n_seq)


 548/3691 [===>..........................] - ETA: 4s - loss: 0.0167 - mean_absolute_error: 0.0574

In [None]:
#plot forecasts
n_test = 1
plot_forecasts(series, forecasts, n_test)

In [None]:
# Evaluate the model fit on the train dataset
train_forecasts = make_forecasts(model, n_batch, train, train, n_lag, n_seq)
train_forecasts = inverse_transform(series, train_forecasts, scaler, len(train) + n_seq - n_lag - 1)
train_actual = [row[n_lag:] for row in train]
train_actual = inverse_transform(series, train_actual, scaler, len(train) + n_seq - n_lag - 1)

# Evaluate and print RMSE for train dataset
# print("Train Dataset Evaluation:")
# evaluate_forecasts(train_actual, train_forecasts, n_lag, n_seq)


In [None]:
# Plot train forecasts
n_test = 1
plot_forecasts(series, train_forecasts, n_test)

## Theory for Hyper Parameters:

Changing each of the configuration parameters will have different effects on the LSTM model and its predictions. Here's an explanation of what each parameter controls:

n_lag: This parameter defines the number of lag observations (input features) to use for predicting the next time step. Increasing n_lag allows the model to consider more past observations, potentially capturing more complex patterns. However, it also increases the dimensionality of the input data and may require more training data to learn effectively.

n_seq: The n_seq parameter determines the number of forecasted observations (output labels) to predict. Increasing n_seq extends the forecast horizon, allowing the model to make predictions further into the future. However, longer forecast horizons are generally more challenging, and the accuracy of the model's predictions may decrease.

n_test: The n_test parameter specifies the number of samples to reserve for testing the model's performance. It determines how many time steps from the end of the dataset will be used for evaluation. Changing n_test allows you to control the size of the test set and assess the model's accuracy on a specific timeframe.

n_epochs: This parameter defines the number of training epochs, which is the number of times the entire training dataset is passed through the LSTM model during training. Increasing n_epochs allows the model to train for a longer duration, potentially improving its accuracy. However, setting n_epochs too high can lead to overfitting if the model starts memorizing the training data without generalizing well to unseen data.

n_batch: The n_batch parameter determines the batch size, which is the number of samples used in each mini-batch during training. Changing n_batch affects the speed and stability of the training process. Larger batch sizes can lead to faster training, but they may also require more memory. Smaller batch sizes provide more frequent updates to the model's weights but may result in slower training progress.

n_neurons: The n_neurons parameter controls the number of neurons (units) in the LSTM layer. Increasing n_neurons can enable the model to capture more complex patterns in the data but also increases the model's capacity and the number of trainable parameters. Too many neurons may lead to overfitting, while too few neurons may limit the model's ability to learn complex relationships.
By adjusting these configuration parameters, you can experiment with different settings to find the optimal values that balance model complexity, training time, and predictive accuracy for your specific dataset and task. It often requires some trial and error to determine the best combination of these parameters.