<a href="https://colab.research.google.com/github/hepuliu/Masters_Thesis/blob/main/LSTMSS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Master Thesis Simulation - LSTMSS Model

*Flood Prevention with Machine Learining - Hepu Liu*

---

**This Notebook is Dedicated to the LSTM-seq2seq Model Simulations**

### Overall Project Simulation Steps
1. Process discharge data from Waldangelbach Station

2. Process precipitation data from Baiertal  Station

3. Build Prediction Models

4. Evaluation of NSE

### Variable Naming Conventions

- Weather Stations Naming: ('p' for precipitation, 'd' for discharge, 'a' for different stations, 'r' for result)

  - da: Waldangelbach Station
  - pa: Baiertal Station
  - pr: combined/resulting precipitation
  - dr: predicted/resulting discharge

- Variable Naming Coventions: 
  - df: data frame
  - trs: training set
  - tes: testing set
  - lstm: LSTM
  - cnn: CNN
  - lstmss: LSTM-seq2sqe


## Importing Libraries

In [328]:
# importing libraries
import csv
import numpy as np
from numpy import array
from numpy import hstack
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from pandas import DataFrame
from pandas import concat
from math import sqrt
from numpy import concatenate
from numpy import loadtxt
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
from sklearn.model_selection import train_test_split
from google.colab import drive
drive.mount

<function google.colab.drive.mount>

## Importing Datasets

In [329]:
# # Making dataframe for LSTMSS
# df_lstmss = pd.read_csv('/content/drive/MyDrive/thesis/dataset/cleaned_df/df_fbp.csv')
# df_lstmss.to_csv('/content/drive/MyDrive/thesis/dataset/cleaned_df/df_lstmss.csv', index=False)
# df_lstmss


In [330]:
# import datafram for LSTMSS
df_lstmss = pd.read_csv('/content/drive/MyDrive/thesis/dataset/cleaned_df/df_lstmss.csv')
df_lstmss.head()

Unnamed: 0,ds,y,temp,rad,preci
0,2007-01-01 00:00:00,0.226,10.0,0.0,2.6
1,2007-01-01 01:00:00,0.248,10.58,0.0,0.8
2,2007-01-01 02:00:00,0.248,11.22,0.0,0.2
3,2007-01-01 03:00:00,0.32,11.42,0.0,0.6
4,2007-01-01 04:00:00,0.346,11.58,0.0,0.0


In [331]:
# Data Processing for Multivariable LSTMSS - Small Testing Sample Set
# df_lstmss = df_lstmss.iloc[:5000, :]
df_lstmss = df_lstmss.set_index('ds')
df_lstmss = df_lstmss[['temp', 'rad', 'preci', 'y']]
df_lstmss = df_lstmss.to_numpy()
df_lstmss

array([[10.   ,  0.   ,  2.6  ,  0.226],
       [10.58 ,  0.   ,  0.8  ,  0.248],
       [11.22 ,  0.   ,  0.2  ,  0.248],
       ...,
       [ 6.25 ,  0.   ,  0.1  ,  0.232],
       [ 6.26 ,  0.   ,  0.   ,  0.226],
       [ 6.26 ,  0.   ,  0.   ,  0.226]])

## Data Processing

In [332]:
# LSTMSS - split into train and test sets
n = 9*365*24
# n = 3000
trs_lstmss= df_lstmss[:n, :]
tes_lstmss = df_lstmss[n:, :]

In [333]:
# LSTMSS
# convert history into inputs and outputs
def to_supervised(train, n_input, n_out):
	# flatten data
	# data = train.reshape((train.shape[0]*train.shape[1], train.shape[2]))
  data = train
  X, y = list(), list()
  in_start = 0
  # step over the entire history one time step at a time
  for _ in range(len(data)):
   # define the end of the input sequence
   in_end = in_start + n_input
   out_end = in_end + n_out
   # ensure we have enough data for this instance
   if out_end <= len(data):
    x_input = data[in_start:in_end, :]
    X.append(x_input)
    y.append(data[in_end:out_end, 3])
   # move along one time step
   in_start += 1
  return array(X), array(y)

n_input, n_out = 10, 3
trs_lstmss_X, trs_lstmss_y = to_supervised(trs_lstmss, n_input, n_out)
tes_lstmss_X, tes_lstmss_y = to_supervised(tes_lstmss, n_input, n_out)

# define parameters
verbose, epochs, batch_size = 2, 50, 42
n_timesteps, n_features, n_outputs = trs_lstmss_X.shape[1], trs_lstmss_X.shape[2], trs_lstmss_y.shape[1]

# reshape output into [samples, timesteps, features]
trs_lstmss_y = trs_lstmss_y.reshape((trs_lstmss_y.shape[0], trs_lstmss_y.shape[1], 1))
tes_lstmss_y = tes_lstmss_y.reshape((tes_lstmss_y.shape[0], tes_lstmss_y.shape[1], 1))

# define model
model = Sequential()
model.add(LSTM(100, activation='relu', input_shape=(n_timesteps, n_features)))
model.add(RepeatVector(n_outputs))
model.add(LSTM(100, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(50, activation='relu')))
model.add(TimeDistributed(Dense(1)))
model.compile(loss='mse', optimizer='adam')
# fit network
model.fit(trs_lstmss_X, trs_lstmss_y, epochs=epochs, batch_size=batch_size, verbose=verbose)


Epoch 1/50
1877/1877 - 28s - loss: 0.5887 - 28s/epoch - 15ms/step
Epoch 2/50
1877/1877 - 23s - loss: 0.0195 - 23s/epoch - 12ms/step
Epoch 3/50
1877/1877 - 23s - loss: 0.0188 - 23s/epoch - 12ms/step
Epoch 4/50
1877/1877 - 22s - loss: 0.0118 - 22s/epoch - 12ms/step
Epoch 5/50
1877/1877 - 24s - loss: 0.0545 - 24s/epoch - 13ms/step
Epoch 6/50
1877/1877 - 23s - loss: 0.0121 - 23s/epoch - 12ms/step
Epoch 7/50
1877/1877 - 22s - loss: 0.0125 - 22s/epoch - 12ms/step
Epoch 8/50
1877/1877 - 22s - loss: 0.0089 - 22s/epoch - 12ms/step
Epoch 9/50
1877/1877 - 22s - loss: 0.0089 - 22s/epoch - 12ms/step
Epoch 10/50
1877/1877 - 24s - loss: 0.0075 - 24s/epoch - 13ms/step
Epoch 11/50
1877/1877 - 23s - loss: 0.0071 - 23s/epoch - 12ms/step
Epoch 12/50
1877/1877 - 23s - loss: 0.0066 - 23s/epoch - 12ms/step
Epoch 13/50
1877/1877 - 23s - loss: 0.0059 - 23s/epoch - 12ms/step
Epoch 14/50
1877/1877 - 23s - loss: 0.0056 - 23s/epoch - 12ms/step
Epoch 15/50
1877/1877 - 25s - loss: 0.0055 - 25s/epoch - 13ms/step
Epoc

<keras.callbacks.History at 0x7f8cebcdc790>

## LSTM Model Prediction


In [334]:
# LSTMSS Prediction
trs_lstmss_pred = model.predict(trs_lstmss_X)
tes_lstmss_pred = model.predict(tes_lstmss_X)


In [335]:
trs_lstmss_pred = trs_lstmss_pred.reshape(-1,1)
trs_lstmss_y = trs_lstmss_y.reshape(-1,1)
tes_lstmss_pred = tes_lstmss_pred.reshape(-1,1)
tes_lstmss_y = tes_lstmss_y.reshape(-1,1)

In [336]:
# calculate NSE
nse_trs_lstmss = 1-(np.sum((trs_lstmss_pred-trs_lstmss_y)**2)/np.sum((trs_lstmss_y-np.mean(trs_lstmss_y))**2))
nse_tes_lstmss = 1-(np.sum((tes_lstmss_pred-tes_lstmss_y)**2)/np.sum((tes_lstmss_y-np.mean(tes_lstmss_y))**2))

print('Train NSE: %.3f' % nse_trs_lstmss)
print('Test NSE: %.3f' % nse_tes_lstmss)

# calculate RMSE
rmse_trs_lstmss = np.sqrt(mean_squared_error(trs_lstmss_y, trs_lstmss_pred))
rmse_tes_lstmss = np.sqrt(mean_squared_error(tes_lstmss_y, tes_lstmss_pred))

print('Train RMSE: %.3f' % rmse_trs_lstmss)
print('Test RMSE: %.3f' % rmse_tes_lstmss)

Train NSE: 0.874
Test NSE: 0.824
Train RMSE: 0.086
Test RMSE: 0.113


## Evaluation

In [337]:
# # LSTM
# # Training Accuracy Evaluations
# # make a prediction
# yhat = model.predict(train_X)
# train_X = train_X.reshape((train_X.shape[0], train_X.shape[2]))
# # invert scaling for forecast
# inv_yhat = concatenate((yhat, train_X[:, 1:]), axis=1)
# inv_yhat = scaler.inverse_transform(inv_yhat)
# inv_yhat = inv_yhat[:,0]
# # invert scaling for actual
# train_y = train_y.reshape((len(train_y), 1))
# inv_y = concatenate((train_y, train_X[:, 1:]), axis=1)
# inv_y = scaler.inverse_transform(inv_y)
# inv_y = inv_y[:,0]

# # calculate RMSE
# rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
# print('Test RMSE: %.3f' % rmse)

# # calculate NSE
# nse = 1-(np.sum((inv_yhat-inv_y)**2)/np.sum((inv_y-np.mean(inv_y))**2))
# print('Test NSE: %.3f' % nse)

# # plot history
# plt.plot(inv_y, label='train')
# plt.plot(inv_yhat, label='predict')
# plt.legend()
# plt.show()

In [338]:
# # LSTM
# # Testing Accuracy Evaluations
# # make a prediction
# yhat = model.predict(test_X)
# test_X = test_X.reshape((test_X.shape[0], test_X.shape[2]))
# # invert scaling for forecast
# inv_yhat = concatenate((yhat, test_X[:, 1:]), axis=1)
# inv_yhat = scaler.inverse_transform(inv_yhat)
# inv_yhat = inv_yhat[:,0]
# # invert scaling for actual
# test_y = test_y.reshape((len(test_y), 1))
# inv_y = concatenate((test_y, test_X[:, 1:]), axis=1)
# inv_y = scaler.inverse_transform(inv_y)
# inv_y = inv_y[:,0]

# # calculate RMSE
# rmse = sqrt(mean_squared_error(inv_y, inv_yhat))
# print('Test RMSE: %.3f' % rmse)

# # calculate NSE
# nse = 1-(np.sum((inv_yhat-inv_y)**2)/np.sum((inv_y-np.mean(inv_y))**2))
# print('Test NSE: %.3f' % nse)

# # plot history
# plt.plot(inv_y, label='train')
# plt.plot(inv_yhat, label='predict')
# plt.legend()
# plt.show()

# Archive

In [339]:
# # NSE Calculation and Plot

# o = np.array([1,2,3,4,5,6,7,8,9,10,2,3,4,5,6,7])
# m = np.array([1.1,2.2,3.2,4,5,6.1,7.2,8.5,8,10.5,1,2,4,5,6,7])
# # nse = 1-(np.sum((p-t)**2)/np.sum((t-np.mean(t))**2))
# # print('Test NSE: %.3f' % nse)
# # plot


# fig= plt.figure(figsize=(14, 4))
# plt.title('Insert Title')
# plt.plot(o, label='observed', color='#00688b', linewidth=0.5)
# plt.plot(m, label='model', color='#ee7600', linewidth=0.5)
# plt.plot([], [], ' ', label='NSE = %.3f' % nse)
# plt.ylabel('y label')
# plt.ylabel('x label')
# plt.legend()
# plt.show()