In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import warnings
warnings.filterwarnings('ignore')
from scipy import stats
%matplotlib inline
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential, layers, callbacks
from tensorflow.keras.layers import Dense, LSTM, Dropout, GRU, Bidirectional

In [None]:
tf.random.set_seed(1234)

### DATASET

In [None]:
df = pd.read_csv('PasutRapi.csv', sep=';')

In [None]:
df.head()

In [None]:
df['Time (UTC)'] = pd.to_datetime(df['Time (UTC)'], format="%d/%m/%Y %H:%M")
df['WaterLevel(m)'] = pd.to_numeric(df['WaterLevel(m)'], errors='coerce')
df.info()

In [None]:
df.rename(columns={'Time (UTC)':'time'}, inplace=True)
df

In [None]:
plot_cols = ['WaterLevel(m)']
plot_features = df[plot_cols]
plot_features.index = df['time']
_ = plot_features.plot(subplots=True)

plot_features = df[plot_cols][:480]
plot_features.index = df['time'][:480]
_ = plot_features.plot(subplots=True)

### INSPECT AND CLEANUP

In [None]:
df.describe().transpose()

In [None]:
df = df[(df['WaterLevel(m)'] > 0.2) & (df['WaterLevel(m)'] < 3.5)]
# The above inplace edits are reflected in the DataFrame.

In [None]:
df.describe().transpose()

In [None]:
plot_cols = ['WaterLevel(m)']
plot_features = df[plot_cols]
plot_features.index = df['time']
_ = plot_features.plot(subplots=True)

plot_features = df[plot_cols][:480]
plot_features.index = df['time'][:480]
_ = plot_features.plot(subplots=True)

### FEATURE ENGINEERING

In [None]:
df.index = pd.to_datetime(df['time'], format='%Y-%m-%d-%H')
del df['time']

In [None]:
# Split train data and test data
train_size = int(len(df)*0.8)

train_data = df.iloc[:train_size]
test_data = df.iloc[train_size:]

In [None]:
scaler = MinMaxScaler().fit(train_data)
train_scaled = scaler.transform(train_data)
test_scaled = scaler.transform(test_data)

In [None]:
# Create input dataset
def create_dataset (X, look_back = 1):
    Xs, ys = [], []
 
    for i in range(len(X)-look_back):
        v = X[i:i+look_back]
        Xs.append(v)
        ys.append(X[i+look_back])
 
    return np.array(Xs), np.array(ys)
LOOK_BACK = 30
X_train, y_train = create_dataset(train_scaled,LOOK_BACK)
X_test, y_test = create_dataset(test_scaled,LOOK_BACK)
# Print data shape
print('X_train.shape: ', X_train.shape)
print('y_train.shape: ', y_train.shape)
print('X_test.shape: ', X_test.shape) 
print('y_test.shape: ', y_test.shape)

In [None]:
# Create BiLSTM model
def create_bilstm(units):
    model = Sequential()
    # Input layer
    model.add(Bidirectional(
              LSTM(units = units, return_sequences=True), 
              input_shape=(X_train.shape[1], X_train.shape[2])))
    # Hidden layer
    model.add(Bidirectional(LSTM(units = units)))
    model.add(Dense(1))
    #Compile model
    model.compile(optimizer='adam',loss='mse')
    return model
model_bilstm = create_bilstm(64)
# Create GRU model
def create_gru(units):
    model = Sequential()
    # Input layer
    model.add(GRU (units = units, return_sequences = True, 
    input_shape = [X_train.shape[1], X_train.shape[2]]))
    model.add(Dropout(0.2)) 
    # Hidden layer
    model.add(GRU(units = units)) 
    model.add(Dropout(0.2))
    model.add(Dense(units = 1)) 
    #Compile model
    model.compile(optimizer='adam',loss='mse')
    return model
model_gru = create_gru(64)

In [None]:
def fit_model(model):
    early_stop = keras.callbacks.EarlyStopping(monitor = 'val_loss',
                                               patience = 10)
    history = model.fit(X_train, y_train, epochs = 2,  
                        validation_split = 0.2,
                        batch_size = 16, shuffle = False, 
                        callbacks = [early_stop])
    return history
history_gru = fit_model(model_gru)
history_bilstm = fit_model(model_bilstm)

[1m 1385/41758[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m39:18[0m 58ms/step - loss: 0.0054

In [None]:
y_test = scaler.inverse_transform(y_test)
y_train = scaler.inverse_transform(y_train)

In [None]:
def plot_loss (history, model_name):
    plt.figure(figsize = (10, 6))
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Train vs Validation Loss for ' + model_name)
    plt.ylabel('Loss')
    plt.xlabel('epoch')
    plt.legend(['Train loss', 'Validation loss'], loc='upper right')
 
plot_loss (history_gru, 'GRU')
plot_loss (history_bilstm, 'Bidirectional LSTM')

In [None]:
# Make prediction
def prediction(model):
    prediction = model.predict(X_test)
    prediction = scaler.inverse_transform(prediction)
    return prediction
prediction_gru = prediction(model_gru)
prediction_bilstm = prediction(model_bilstm)
# Plot test data vs prediction
def plot_future(prediction, model_name, y_test):
    plt.figure(figsize=(10, 6))
    range_future = len(prediction)
    plt.plot(np.arange(range_future), np.array(y_test), 
             label='Test   data')
    plt.plot(np.arange(range_future), 
             np.array(prediction),label='Prediction')
    plt.title('Test data vs prediction for ' + model_name)
    plt.legend(loc='upper left')
    plt.xlabel('Time (Hour)')
    plt.ylabel('Water Level (mm)')
 
plot_future(prediction_gru, 'GRU', y_test)
plot_future(prediction_bilstm, 'Bidirectional LSTM', y_test)

In [None]:
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
from numpy import sqrt

def evaluate_prediction(predictions, actual, model_name):
    errors = predictions - actual
    mse = np.square(errors).mean()
    rmse = np.sqrt(mse)
    mae = np.abs(errors).mean()
    r2Score = r2_score(actual, predictions)
    meanSquaredError = mean_squared_error(actual, predictions)
    meanAbsoluteError = mean_absolute_error(actual, predictions)
    print("r2Score: ", r2Score)
    print("meanSquaredError: ", meanSquaredError)
    print('RMSE: ',sqrt(meanSquaredError))
    print("meanAbsoluteError: ", meanAbsoluteError)
    print(model_name + ':')
    print('Mean Absolute Error: {:.4f}'.format(mae))
    print('Root Mean Square Error: {:.4f}'.format(rmse))
    print('')
evaluate_prediction(prediction_gru, y_test, 'GRU')
evaluate_prediction(prediction_bilstm, y_test, 'Bidirectiona LSTM')

In [None]:
# Make prediction for new data
def prediction(model):
    prediction = model.predict(X_test)
    prediction = scaler.inverse_transform(prediction)
    return prediction
prediction_gru = prediction(model_gru)
prediction_bilstm = prediction(model_bilstm)
# Plot history and future
def plot_multi_step(history, prediction1, prediction2):
    
    plt.figure(figsize=(15, 6))
    
    range_history = len(history)
    range_future = list(range(range_history, range_history +
                        len(prediction1)))
    plt.plot(np.arange(range_history), np.array(history), 
             label='History')
    plt.plot(range_future, np.array(prediction1),
             label='Forecasted for GRU')
    plt.plot(range_future, np.array(prediction2),
             label='Forecasted for BiLSTM')
      
    plt.legend(loc='upper right')
    plt.xlabel('Time step (hour)')
    plt.ylabel('Water Level (mm)')
        
plot_multi_step(df, prediction_gru, prediction_bilstm)

In [None]:
# Make prediction for new data
def prediction(model):
    prediction = model.predict(X_test)
    prediction = scaler.inverse_transform(prediction)
    return prediction
prediction_bilstm = prediction(model_bilstm)

In [None]:
new = np.array(prediction_bilstm)[:1]
pd.DataFrame(new)

In [None]:
last = df.tail(1)

In [None]:
# Plot history and future
def plot_multi_step(history, prediction1):
    plt.figure(figsize=(15, 6))
    range_history = len(history)
    range_future = list(range(range_history, range_history +
                        len(prediction1)))
    plt.plot(np.arange(range_history), np.array(history), 
             label='History')
    if ((last - new) > 0).bool():
        plt.plot(range_future, np.array(prediction1),
             label='Forecasted for BiLSTM', color="green")
    else:
        plt.plot(range_future, np.array(prediction1),
             label='Forecasted for BiLSTM', color="red")
    plt.legend(loc='upper right')
    plt.xlabel('Time step (hour)')
    plt.ylabel('Water Level (mm)')
        
plot_multi_step(df, prediction_bilstm)

In [None]:
# Plot history and future
def plot_multi_step(history, prediction1):
    plt.figure(figsize=(15, 6))
    if ((last - new) > 0).bool():
        plt.plot(np.array(prediction1),
             label='Forecasted for BiLSTM', color="green")
    else:
        plt.plot(np.array(prediction1),
             label='Forecasted for BiLSTM', color="red")
    plt.legend(loc='upper right')
    plt.xlabel('Time step (hour)')
    plt.ylabel('Water Level (mm)')
        
plot_multi_step(df, prediction_bilstm)