<a href="https://colab.research.google.com/github/hepuliu/Masters_Thesis/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Master Thesis Simulation - LSTM Model

*Flood Prevention with Machine Learining - Hepu Liu*

---

**This Notebook is Dedicated to the LSTM Model Simulations**

### Overall Project Simulation Steps
1. Process discharge data from Waldangelbach Station

2. Process precipitation data from Baiertal  Station

3. Build Prediction Models

4. Evaluation of NSE

### Variable Naming Conventions

- Weather Stations Naming: ('p' for precipitation, 'd' for discharge, 'a' for different stations, 'r' for result)

  - da: Waldangelbach Station
  - pa: Baiertal Station
  - pr: combined/resulting precipitation
  - dr: predicted/resulting discharge

- Variable Naming Coventions: 
  - df: data frame
  - trs: training set
  - tes: testing set
  - lstm: LSTM
  - cnn: CNN
  - lstmss: LSTM-seq2sqe


## Importing Libraries

In [91]:
# importing libraries
import csv
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from pandas import DataFrame
from pandas import concat
from math import sqrt
from numpy import concatenate
from numpy import loadtxt
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import LSTM
from sklearn.model_selection import train_test_split
from google.colab import drive
drive.mount

<function google.colab.drive.mount>

## Importing Datasets

In [92]:
# import datafram for LSTM
df_lstm = pd.read_csv('/content/drive/MyDrive/thesis/dataset/cleaned_df/df_lstm.csv')
df_lstm.head()

Unnamed: 0,ds,y,temp,rad,preci
0,2007-01-01 00:00:00,0.226,10.0,0.0,2.6
1,2007-01-01 01:00:00,0.248,10.58,0.0,0.8
2,2007-01-01 02:00:00,0.248,11.22,0.0,0.2
3,2007-01-01 03:00:00,0.32,11.42,0.0,0.6
4,2007-01-01 04:00:00,0.346,11.58,0.0,0.0


In [93]:
# Data Processing for Multivariable LSTM - Small Testing Sample Set
# df_lstm = df_lstm.iloc[:1500, :]
df_lstm = df_lstm.set_index('ds')

## Data Processing

In [94]:
# LSTM
# convert series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
  n_vars = 1 if type(data) is list else data.shape[1]
  df = DataFrame(data)
  cols, names = list(), list()
	# input sequence (t-n, ... t-1)
  for i in range(n_in, 0, -1):
    cols.append(df.shift(i))
    names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
  for i in range(0, n_out):
    cols.append(df.shift(-i))
    if i == 0:
      names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
    else:
      names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
  agg = concat(cols, axis=1)
  agg.columns = names
  # drop rows with NaN values
  if dropnan:
    agg.dropna(inplace=True)
  return agg

# assigne df values
values = df_lstm.values
# ensure all data is float
values = values.astype('float32')
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)
# frame as supervised learning
df_lstm_reframed = series_to_supervised(scaled, 1, 1)
# drop columns not to be predicted
df_lstm_reframed.drop(df_lstm_reframed.columns[[5,6,7]], axis=1, inplace=True)
df_lstm_reframed

Unnamed: 0,var1(t-1),var2(t-1),var3(t-1),var4(t-1),var1(t)
1,0.020047,0.497151,0.0,0.123223,0.023067
2,0.023067,0.506872,0.0,0.037915,0.023067
3,0.023067,0.517600,0.0,0.009479,0.032953
4,0.032953,0.520952,0.0,0.028436,0.036523
5,0.036523,0.523634,0.0,0.000000,0.036523
...,...,...,...,...,...
105148,0.020047,0.432953,0.0,0.000000,0.020871
105149,0.020871,0.433289,0.0,0.000000,0.023067
105150,0.023067,0.433959,0.0,0.004739,0.020871
105151,0.020871,0.434294,0.0,0.004739,0.020047


In [95]:
# LSTM
# split into train and test sets
values = df_lstm_reframed.values
n = 9*365*24
# n = 1000
trs_lstm = values[:n, :]
tes_lstm = values[n:, :]
# split into input and outputs
trs_lstm_X, trs_lstm_y = trs_lstm[:, :-1], trs_lstm[:, -1]
tes_lstm_X, tes_lstm_y = tes_lstm[:, :-1], tes_lstm[:, -1]
# reshape input to be 3D [samples, timesteps, features]
trs_lstm_X = trs_lstm_X.reshape((trs_lstm_X.shape[0], 1, trs_lstm_X.shape[1]))
tes_lstm_X = tes_lstm_X.reshape((tes_lstm_X.shape[0], 1, tes_lstm_X.shape[1]))
print(trs_lstm_X.shape, trs_lstm_y.shape, tes_lstm_X.shape, tes_lstm_y.shape)

(78840, 1, 4) (78840,) (26312, 1, 4) (26312,)


## LSTM Model Prediction


In [96]:
# LSTM
# design network
model_lstm = Sequential()
model_lstm.add(LSTM(50, input_shape=(trs_lstm_X.shape[1], trs_lstm_X.shape[2])))
model_lstm.add(Dense(1))
model_lstm.compile(loss='mae', optimizer='adam')

# fit network
history = model_lstm.fit(trs_lstm_X, trs_lstm_y, epochs=50, batch_size=46, validation_data=(tes_lstm_X, tes_lstm_y), verbose=2, shuffle=False)

# # plot history
# plt.plot(history.history['loss'], label='train')
# plt.plot(history.history['val_loss'], label='test')
# plt.legend()
# plt.show()


# fig= plt.figure(figsize=(14, 4)) - This part doesnt work yet
# plt.title('Insert Title')
# plt.plot(o, label='observed', color='#00688b', linewidth=0.5)
# plt.plot(m, label='model_lstm', color='#ee7600', linewidth=0.5)
# plt.plot([], [], ' ', label='NSE = %.3f' % nse)
# plt.ylabel('y label')
# plt.ylabel('x label')
# plt.legend()
# plt.show()


Epoch 1/50
1714/1714 - 6s - loss: 0.0078 - val_loss: 0.0036 - 6s/epoch - 4ms/step
Epoch 2/50
1714/1714 - 4s - loss: 0.0021 - val_loss: 0.0028 - 4s/epoch - 2ms/step
Epoch 3/50
1714/1714 - 5s - loss: 0.0021 - val_loss: 0.0035 - 5s/epoch - 3ms/step
Epoch 4/50
1714/1714 - 4s - loss: 0.0020 - val_loss: 0.0025 - 4s/epoch - 2ms/step
Epoch 5/50
1714/1714 - 4s - loss: 0.0020 - val_loss: 0.0024 - 4s/epoch - 2ms/step
Epoch 6/50
1714/1714 - 5s - loss: 0.0020 - val_loss: 0.0026 - 5s/epoch - 3ms/step
Epoch 7/50
1714/1714 - 4s - loss: 0.0019 - val_loss: 0.0037 - 4s/epoch - 2ms/step
Epoch 8/50
1714/1714 - 4s - loss: 0.0019 - val_loss: 0.0037 - 4s/epoch - 2ms/step
Epoch 9/50
1714/1714 - 5s - loss: 0.0019 - val_loss: 0.0028 - 5s/epoch - 3ms/step
Epoch 10/50
1714/1714 - 5s - loss: 0.0019 - val_loss: 0.0031 - 5s/epoch - 3ms/step
Epoch 11/50
1714/1714 - 5s - loss: 0.0019 - val_loss: 0.0025 - 5s/epoch - 3ms/step
Epoch 12/50
1714/1714 - 4s - loss: 0.0018 - val_loss: 0.0027 - 4s/epoch - 2ms/step
Epoch 13/50
1

## Evaluation

In [97]:
# LSTM
# Training Accuracy Evaluations
# make a prediction
trs_yhat = model_lstm.predict(trs_lstm_X)
trs_lstm_X = trs_lstm_X.reshape((trs_lstm_X.shape[0], trs_lstm_X.shape[2]))
# invert scaling for forecast
trs_inv_yhat = concatenate((trs_yhat, trs_lstm_X[:, 1:]), axis=1)
trs_inv_yhat = scaler.inverse_transform(trs_inv_yhat)
trs_inv_yhat = trs_inv_yhat[:,0]
# invert scaling for actual
trs_lstm_y = trs_lstm_y.reshape((len(trs_lstm_y), 1))
trs_inv_y = concatenate((trs_lstm_y, trs_lstm_X[:, 1:]), axis=1)
trs_inv_y = scaler.inverse_transform(trs_inv_y)
trs_inv_y = trs_inv_y[:,0]

# calculate RMSE
rmse = sqrt(mean_squared_error(trs_inv_y, trs_inv_yhat))
print('Train RMSE: %.3f' % rmse)

# calculate NSE
nse = 1-(np.sum((trs_inv_yhat-trs_inv_y)**2)/np.sum((trs_inv_y-np.mean(trs_inv_y))**2))
print('Train NSE: %.3f' % nse)

# # plot history
# plt.plot(inv_y, label='train')
# plt.plot(trs_inv_yhat, label='predict')
# plt.legend()
# plt.show()

Train RMSE: 0.052
Train NSE: 0.954


In [98]:
# LSTM
# Testing Accuracy Evaluations
# make a prediction
tes_yhat = model_lstm.predict(tes_lstm_X)
tes_lstm_X = tes_lstm_X.reshape((tes_lstm_X.shape[0], tes_lstm_X.shape[2]))
# invert scaling for forecast
tes_inv_yhat = concatenate((tes_yhat, tes_lstm_X[:, 1:]), axis=1)
tes_inv_yhat = scaler.inverse_transform(tes_inv_yhat)
tes_inv_yhat = tes_inv_yhat[:,0]
# invert scaling for actual
tes_lstm_y = tes_lstm_y.reshape((len(tes_lstm_y), 1))
tes_inv_y = concatenate((tes_lstm_y, tes_lstm_X[:, 1:]), axis=1)
tes_inv_y = scaler.inverse_transform(tes_inv_y)
tes_inv_y = tes_inv_y[:,0]

# calculate RMSE
rmse = sqrt(mean_squared_error(tes_inv_y, tes_inv_yhat))
print('Test RMSE: %.3f' % rmse)

# calculate NSE
nse = 1-(np.sum((tes_inv_yhat-tes_inv_y)**2)/np.sum((tes_inv_y-np.mean(tes_inv_y))**2))
print('Test NSE: %.3f' % nse)

# # plot history
# plt.plot(inv_y, label='train')
# plt.plot(tes_inv_yhat, label='predict')
# plt.legend()
# plt.show()

Test RMSE: 0.054
Test NSE: 0.961


# Archive

In [99]:
# # NSE Calculation and Plot

# o = np.array([1,2,3,4,5,6,7,8,9,10,2,3,4,5,6,7])
# m = np.array([1.1,2.2,3.2,4,5,6.1,7.2,8.5,8,10.5,1,2,4,5,6,7])
# # nse = 1-(np.sum((p-t)**2)/np.sum((t-np.mean(t))**2))
# # print('Test NSE: %.3f' % nse)
# # plot


# fig= plt.figure(figsize=(14, 4))
# plt.title('Insert Title')
# plt.plot(o, label='observed', color='#00688b', linewidth=0.5)
# plt.plot(m, label='model', color='#ee7600', linewidth=0.5)
# plt.plot([], [], ' ', label='NSE = %.3f' % nse)
# plt.ylabel('y label')
# plt.ylabel('x label')
# plt.legend()
# plt.show()