In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_percentage_error as mape
import os

## Dataset

In [2]:
BASE_DIR_PATH = '../Datasets'
DATASET_FILE = os.path.join(BASE_DIR_PATH, '2016_2017_sto_nino_and_montalban.csv')

In [3]:
df = pd.read_csv(DATASET_FILE)
df

Unnamed: 0.1,Unnamed: 0,datetime,month,day,hour,t,Station,x,Rainfall_Aries,Rainfall_Boso,Rainfall_Campana,Rainfall_Nangka,Rainfall_Oro,Waterlevel,Discharge,Cross_Section,Velocity,fric_coeff,slope
0,0,2016-01-01 00:00:00,1,1,0,0.0,Montalban,0,0,1,2,0,0,21.03,14.842428,630.90,0.023526,0.035,0.002000
1,1,2016-01-01 00:00:00,1,1,0,0.0,Sto_Nino,14420,0,1,2,0,0,12.18,21.033407,803.88,0.026165,0.033,0.000667
2,2,2016-01-01 01:00:00,1,1,1,3600.0,Montalban,0,0,1,1,1,0,21.03,14.842428,630.90,0.023526,0.035,0.002000
3,3,2016-01-01 01:00:00,1,1,1,3600.0,Sto_Nino,14420,0,1,1,1,0,12.19,21.280072,804.54,0.026450,0.033,0.000667
4,4,2016-01-01 02:00:00,1,1,2,7200.0,Montalban,0,1,1,1,0,1,21.03,14.842428,630.90,0.023526,0.035,0.002000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35035,35035,2017-12-31 21:00:00,12,31,21,63147600.0,Sto_Nino,14420,0,0,0,0,0,12.44,28.244204,821.04,0.034401,0.033,0.000667
35036,35036,2017-12-31 22:00:00,12,31,22,63151200.0,Montalban,0,0,0,0,0,0,21.18,17.224575,635.40,0.027108,0.035,0.002000
35037,35037,2017-12-31 22:00:00,12,31,22,63151200.0,Sto_Nino,14420,0,0,0,0,0,12.44,28.244204,821.04,0.034401,0.033,0.000667
35038,35038,2017-12-31 23:00:00,12,31,23,63154800.0,Montalban,0,0,0,0,0,0,21.18,17.224575,635.40,0.027108,0.035,0.002000


In [4]:
df = df[['Station', 'Waterlevel']]
df

Unnamed: 0,Station,Waterlevel
0,Montalban,21.03
1,Sto_Nino,12.18
2,Montalban,21.03
3,Sto_Nino,12.19
4,Montalban,21.03
...,...,...
35035,Sto_Nino,12.44
35036,Montalban,21.18
35037,Sto_Nino,12.44
35038,Montalban,21.18


In [5]:
test_df = df[int(0.75*len(df)):]
test_df

Unnamed: 0,Station,Waterlevel
26280,Montalban,21.26
26281,Sto_Nino,11.89
26282,Montalban,21.26
26283,Sto_Nino,11.87
26284,Montalban,21.25
...,...,...
35035,Sto_Nino,12.44
35036,Montalban,21.18
35037,Sto_Nino,12.44
35038,Montalban,21.18


In [6]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8760 entries, 26280 to 35039
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Station     8760 non-null   object 
 1   Waterlevel  8760 non-null   float64
dtypes: float64(1), object(1)
memory usage: 137.0+ KB


In [7]:
sto_nino_df = test_df[test_df['Station'] == 'Sto_Nino']
sto_nino_df = sto_nino_df.reset_index(drop=True)
sto_nino_df

Unnamed: 0,Station,Waterlevel
0,Sto_Nino,11.89
1,Sto_Nino,11.87
2,Sto_Nino,11.85
3,Sto_Nino,11.84
4,Sto_Nino,11.84
...,...,...
4375,Sto_Nino,12.44
4376,Sto_Nino,12.44
4377,Sto_Nino,12.44
4378,Sto_Nino,12.44


In [8]:
montalban_df = test_df[test_df['Station'] == 'Montalban']
montalban_df = montalban_df.reset_index(drop=True)
montalban_df

Unnamed: 0,Station,Waterlevel
0,Montalban,21.26
1,Montalban,21.26
2,Montalban,21.25
3,Montalban,21.24
4,Montalban,21.00
...,...,...
4375,Montalban,21.18
4376,Montalban,21.18
4377,Montalban,21.18
4378,Montalban,21.18


In [9]:
sto_nino_true = sto_nino_df['Waterlevel']
montalban_true = montalban_df['Waterlevel']

## Predictions

### Univariate vs. Multivariate LSTM vs. Window 1, Sto Nino predictions

In [10]:
uni_lstm_pred = pd.read_csv('results/univariate_lstm_predictions.csv', header=None, names=['Waterlevel'])
uni_lstm_pred

Unnamed: 0,Waterlevel
0,12.1242
1,12.4815
2,12.7188
3,12.7825
4,12.7183
...,...
4369,12.4478
4370,12.4478
4371,12.4478
4372,12.4478


In [11]:
multi_lstm_pred = pd.read_csv('results/multivariate_lstm_predictions.csv', header=None, names=['Waterlevel'])
multi_lstm_pred

Unnamed: 0,Waterlevel
0,12.8262
1,12.5147
2,12.4972
3,12.4814
4,12.3783
...,...
4369,12.4038
4370,12.4038
4371,12.4038
4372,12.4038


In [12]:
sto_nino_true_minus6 = sto_nino_true[6:]
sto_nino_true_minus6 = sto_nino_true_minus6.reset_index(drop=True)
sto_nino_true_minus6

0       12.63
1       12.81
2       12.80
3       12.68
4       12.51
        ...  
4369    12.44
4370    12.44
4371    12.44
4372    12.44
4373    12.44
Name: Waterlevel, Length: 4374, dtype: float64

In [13]:
mape(sto_nino_true_minus6, uni_lstm_pred)

0.0029477697176881886

In [14]:
mape(sto_nino_true_minus6, multi_lstm_pred)

0.005641047051748804

In [15]:
window1_lstm_pred = pd.read_csv('results/window_lag1_sto_nino.csv', header=None, names=['Waterlevel'])
window1_lstm_pred

Unnamed: 0,Waterlevel
0,11.9897
1,11.9600
2,11.9264
3,11.8698
4,11.9328
...,...
4374,12.6201
4375,12.6187
4376,12.6267
4377,12.6324


In [16]:
sto_nino_true_minus1 = sto_nino_true[1:]
sto_nino_true_minus1 = sto_nino_true_minus1.reset_index(drop=True)
sto_nino_true_minus1

0       11.87
1       11.85
2       11.84
3       11.84
4       12.26
        ...  
4374    12.44
4375    12.44
4376    12.44
4377    12.44
4378    12.44
Name: Waterlevel, Length: 4379, dtype: float64

In [17]:
mape(sto_nino_true_minus1, window1_lstm_pred)

0.012258915314412939

### Lag 1 vs. Window 6, Montalban predictions

In [18]:
lag1_montalban_pred = pd.read_csv('results/no_window_lag1_montalban.csv', header=None, names=['Waterlevel'])
lag1_montalban_pred

Unnamed: 0,Waterlevel
0,21.1852
1,21.1718
2,21.1777
3,21.3101
4,20.8579
...,...
4375,21.1458
4376,21.1230
4377,21.0986
4378,21.0689


In [27]:
montalban_true

0       21.26
1       21.26
2       21.25
3       21.24
4       21.00
        ...  
4375    21.18
4376    21.18
4377    21.18
4378    21.18
4379    21.18
Name: Waterlevel, Length: 4380, dtype: float64

In [20]:
mape(montalban_true, lag1_montalban_pred)

0.007989598638535097

In [21]:
window6_montalban_pred = pd.read_csv('results/window_lag6_montalban.csv', header=None, names=['Waterlevel'])
window6_montalban_pred

Unnamed: 0,Waterlevel
0,21.4771
1,20.6538
2,20.7376
3,21.0371
4,21.0394
...,...
4369,21.3919
4370,21.3509
4371,21.2947
4372,21.2220


In [22]:
montalban_true_minus6 = montalban_true[6:]
montalban_true_minus6 = montalban_true_minus6.reset_index(drop=True)
montalban_true_minus6

0       21.33
1       21.34
2       21.34
3       21.33
4       21.32
        ...  
4369    21.18
4370    21.18
4371    21.18
4372    21.18
4373    21.18
Name: Waterlevel, Length: 4374, dtype: float64

In [23]:
mape(montalban_true_minus6, window6_montalban_pred)

0.010442554792629687