In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from datetime import datetime
from time import time

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Bidirectional
from keras.layers import SimpleRNN
from keras.layers import GRU

from tensorflow.keras.callbacks import Callback

In [None]:
def RNN_dataset(data, n_times, n_features):
    
    X = np.zeros((len(data)-n_times, n_times, n_features))
    Y = np.zeros(len(data)-n_times)

    for i in range(len(data) - n_times):

        X[i] = data[i:n_times+i, 0:n_features]
        Y[i] = data[n_times+i, -1]
        
    return X, Y

def RNN_dataset_pred(data, n_times, n_features):
    
    X = np.zeros((len(data)-n_times, n_times, n_features))

    for i in range(len(data) - n_times):

        X[i] = data[i:n_times+i, 0:n_features]
        
    return X
    
def preprocessing(data, n_times=24, test_size=0.2):
    
    scaler = MinMaxScaler()

    scaled = scaler.fit_transform(data)

    data_f = scaled
    
    n_features = data.shape[-1] - 1

    X, Y = RNN_dataset(data_f, n_times, n_features)
    
    idxs = []

    for i in range(len(X)):

        if str(Y[i]) == 'nan':

            idxs.append(i)

        else:

            j = 0

            for item in X[i]:

                if str(item[0]) == 'nan' or str(item[1]) == 'nan':

                    #print("nan found")
                    idxs.append(i)

                    break

                j+= 1

        i += 1
        
    
    X_new = np.zeros((X.shape[0] - len(idxs), X.shape[1], X.shape[2]))
    Y_new = np.zeros(len(Y) - len(idxs))

    k = 0

    for i in range(len(X)):

        if i not in idxs:

            X_new[k] = X[i]
            Y_new[k] = Y[i]

            k += 1

    X_train, X_test, Y_train, Y_test = train_test_split(X_new, Y_new, test_size=test_size, random_state=4)
    
    print("Training set:", X_train.shape, "Test set:", X_test.shape)
    
    return X_train, X_test, Y_train, Y_test, scaler, X_new, Y_new

class PrintCrossPoint(Callback):
    
    def __init__(self):
        
        self.epoch_cross = ""
        self.epoch = 0
     
    def on_epoch_end(self, epoch, logs=None):
        
        self.epoch += 1
        
        logs = logs or {}
        
        current_train_loss = logs.get("loss")
        current_val_loss = logs.get("val_loss")
        
        if current_val_loss < current_train_loss:
            
            if self.epoch_cross == "":
                self.epoch_cross = self.epoch
                
            #self.model.stop_training = True
            
    def on_train_end(self, epoch, logs=None):
        
        print("Validation loss higher than training loss from epoch %s!" % self.epoch_cross)
        
class StopCrossPoint(Callback):
    
    def __init__(self):
    
        self.epoch = 0
     
    def on_epoch_end(self, epoch, logs=None):
        
        self.epoch += 1
        
        logs = logs or {}
        
        current_train_loss = logs.get("loss")
        current_val_loss = logs.get("val_loss")
        
        if current_val_loss < current_train_loss:
                
            print("Validation loss higher than training loss from epoch %s!" % self.epoch)
                
            self.model.stop_training = True

# Load data, resample and construct training and validations sets

In [None]:
df_f = pd.read_csv("Datos pH Baleares/Palma_Bay.csv")

df_f["Time"] = pd.to_datetime(df_f["Time"])

In [None]:
plt.scatter(df_f["Time"], df_f["PH"], s=1)

plt.plot(df_f["Time"][(df_f["Time"] > datetime(2019, 8, 15)) & (df_f["Time"] < datetime(2019, 9, 1))], 
         df_f["PH"][(df_f["Time"] > datetime(2019, 8, 15)) & (df_f["Time"] < datetime(2019, 9, 1))],
         color='r')

plt.plot(df_f["Time"][(df_f["Time"] > datetime(2020, 6, 15)) & (df_f["Time"] < datetime(2020, 7, 1))], 
         df_f["PH"][(df_f["Time"] > datetime(2020, 6, 15)) & (df_f["Time"] < datetime(2020, 7, 1))],
         color='r')

plt.xticks(rotation=30);

In [None]:
#Delete red points
df_final = df_f.drop(df_f[(df_f["Time"] > datetime(2019, 8, 15)) & (df_f["Time"] < datetime(2019, 9, 1))].index)

df_final = df_final.drop(df_final[(df_final["Time"] > datetime(2020, 6, 15)) & (df_final["Time"] < datetime(2020, 7, 1))].index)

plt.scatter(df_final["Time"], df_final["PH"], s=1)

plt.xticks(rotation=30);

# Choose window size 

In [None]:
window_size = 6

In [None]:
#data_resampled = df_final[df_final["DO(umol kg-1)"].astype('str') != 'nan'][["Tempertaure (ºC)", "DO(umol kg-1)", "pHT"]].values
data_resampled = df_final[df_final["Oxygen"].astype('str') != 'nan'][["Temperature", "Oxygen", "Salinity", "PH"]].values

X_train, X_test, Y_train, Y_test, scaler, X_scaled, Y_scaled = preprocessing(data_resampled, n_times=window_size, test_size=0.1)

In [None]:
#data_new = df_final[["Tempertaure (ºC)", "DO(umol kg-1)"]].values
data_new = df_final[["Temperature", "Oxygen", "Salinity"]].values

scaled_new = scaler.min_[0:data_new.shape[-1]] + data_new * scaler.scale_[0:data_new.shape[-1]]

n_features = data_new.shape[-1]

X_to_predict = RNN_dataset_pred(scaled_new, window_size, n_features)

# RNN 

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2, min_delta=0.00001)
callback_2 = StopCrossPoint()

t0 = time()

# design network
model_RNN = Sequential()
model_RNN.add(SimpleRNN(3, activation='tanh', input_shape=(X_train.shape[1], X_train.shape[2])))
model_RNN.add(Dense(1, activation='sigmoid'))
model_RNN.compile(loss='mse', optimizer='adam')

# fit network
history_RNN = model_RNN.fit(
    X_train, 
    Y_train, 
    epochs=500,
    steps_per_epoch=10,S
    #batch_size=100, 
    validation_data=(X_test, Y_test), 
    verbose=0, 
    shuffle=False,
    callbacks=[callback, callback_2]
    )

time_elapsed_RNN =  time()-t0
epochs_used_RNN = len(history_RNN.history['loss'])

final_train_loss_RNN = (history_RNN.history['loss'][-1]*100)
final_val_loss_RNN = (history_RNN.history['val_loss'][-1]*100)

init_train_loss_RNN = (history_RNN.history['loss'][0]*100)

print("Finished in", time_elapsed_RNN, "s using", epochs_used_RNN, "epochs")

y_pred_RNN = model_RNN.predict(X_to_predict)

y_pred_noscale_RNN = (y_pred_RNN - scaler.min_[-1]) / scaler.scale_[-1]

yhat_RNN = model_RNN.predict(X_scaled)

In [None]:
plt.figure(figsize=(16, 12))

plt.subplot(2, 2, 1)
plt.plot(history_RNN.history['loss'], label='Train loss', lw=3)
plt.plot(history_RNN.history['val_loss'], label='Validation loss', lw=3)

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xlabel("Epoch", fontsize=30, labelpad=10)
plt.ylabel("Loss (MSE)", fontsize=30, labelpad=10)

plt.text(epochs_used_RNN*0.5, init_train_loss_RNN*0.006,
         "Final train loss: %.2f%%" % final_train_loss_RNN, fontsize=16)
plt.text(epochs_used_RNN*0.5, init_train_loss_RNN*0.005,
         "Final val loss: %.2f%%" % final_val_loss_RNN, fontsize=16)

plt.legend(fontsize=20);

plt.subplot(2, 2, 3)
plt.plot(Y_scaled, lw=3)
plt.plot(yhat_RNN, lw=3)

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xlabel("Time", fontsize=30, labelpad=10)
plt.ylabel("pH", fontsize=30, labelpad=10)

plt.subplot(2, 2, 2)
plt.scatter(yhat_RNN, Y_scaled, s=20)
plt.plot(np.linspace(0, 1, 100), np.linspace(0, 1, 100), color='k', lw=3)

plt.xlabel("Predicted values", fontsize=30, labelpad=10)
plt.ylabel("True values", fontsize=30, labelpad=10)

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.subplot(2,2,4)
plt.scatter(df_final.index, df_final["PH"], s=10)
plt.scatter(df_final.index[window_size:], y_pred_noscale_RNN, s=1, color='r')

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xlabel("Time", fontsize=30)
plt.ylabel("pH", fontsize=30)

plt.tight_layout()
plt.subplots_adjust(hspace=0.3, wspace=0.2)

#plt.savefig("RNN_.png", dpi=300, bbox_inches='tight')

In [None]:
y_pred_noscale_RNN[:, 0][df_final["PH"][window_size:].astype('str') != 'nan'] = df_final["PH"][window_size:][df_final["PH"][window_size:].astype('str') != 'nan'].values

time_delta = df_final["Time"][window_size:] - df_final["Time"][window_size]

time_years = np.array([(item / np.timedelta64(1, 'm')) / (60*24*365) for item in time_delta])

Y_TREND = y_pred_noscale_RNN[y_pred_noscale_RNN.astype('str') != 'nan']
X_TREND = time_years[y_pred_noscale_RNN[:, 0].astype('str') != 'nan']

reg = LinearRegression().fit(X_TREND.reshape(-1,1), Y_TREND)

slope_RNN = reg.coef_[0]
intercept_RNN = reg.intercept_

print("m:", slope_RNN, "n:", intercept_RNN)

# LSTM 

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2, min_delta=0.00001)
callback_2 = StopCrossPoint()     

t0 = time()

# design network
model_LSTM = Sequential()
model_LSTM.add(LSTM(3, input_shape=(X_train.shape[1], X_train.shape[2])))
model_LSTM.add(Dense(1, activation='sigmoid'))
model_LSTM.compile(loss='mse', optimizer='adam')

# fit network
history_LSTM = model_LSTM.fit(
    X_train, 
    Y_train, 
    epochs=500,
    steps_per_epoch=10,
    #batch_size=100, 
    validation_data=(X_test, Y_test), 
    verbose=0, 
    shuffle=False,
    callbacks=[callback, callback_2]
    )

time_elapsed_LSTM =  time()-t0
epochs_used_LSTM = len(history_LSTM.history['loss'])

final_train_loss_LSTM = (history_LSTM.history['loss'][-1]*100)
final_val_loss_LSTM = (history_LSTM.history['val_loss'][-1]*100)

init_train_loss_LSTM = (history_LSTM.history['loss'][0]*100)

print("Finished in", time_elapsed_LSTM, "s using", epochs_used_LSTM, "epochs")

y_pred_LSTM = model_LSTM.predict(X_to_predict)

y_pred_noscale_LSTM = (y_pred_LSTM - scaler.min_[-1]) / scaler.scale_[-1]

yhat_LSTM = model_LSTM.predict(X_scaled)

In [None]:
plt.figure(figsize=(16, 12))

plt.subplot(2, 2, 1)
plt.plot(history_LSTM.history['loss'], label='Train loss', lw=3)
plt.plot(history_LSTM.history['val_loss'], label='Validation loss', lw=3)

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xlabel("Epoch", fontsize=30, labelpad=10)
plt.ylabel("Loss (MSE)", fontsize=30, labelpad=10)

plt.text(epochs_used_LSTM*0.5, init_train_loss_LSTM*0.006,
         "Final train loss: %.2f%%" % final_train_loss_LSTM, fontsize=16)
plt.text(epochs_used_LSTM*0.5, init_train_loss_LSTM*0.005,
         "Final val loss: %.2f%%" % final_val_loss_LSTM, fontsize=16)

plt.legend(fontsize=20);

plt.subplot(2, 2, 3)
plt.plot(Y_scaled, lw=3)
plt.plot(yhat_LSTM, lw=3)

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xlabel("Time", fontsize=30, labelpad=10)
plt.ylabel("pH", fontsize=30, labelpad=10)

plt.subplot(2, 2, 2)
plt.scatter(yhat_LSTM, Y_scaled, s=20)
plt.plot(np.linspace(0, 1, 100), np.linspace(0, 1, 100), color='k', lw=3)

plt.xlabel("Predicted values", fontsize=30, labelpad=10)
plt.ylabel("True values", fontsize=30, labelpad=10)

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.subplot(2,2,4)
plt.scatter(df_final.index, df_final["PH"], s=10)
plt.scatter(df_final.index[window_size:], y_pred_noscale_LSTM, s=1, color='r')

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xlabel("Time", fontsize=30)
plt.ylabel(r"$pH_T$", fontsize=30)

plt.tight_layout()
plt.subplots_adjust(hspace=0.3, wspace=0.2)

In [None]:
y_pred_noscale_LSTM[:, 0][df_final["PH"][window_size:].astype('str') != 'nan'] = df_final["PH"][window_size:][df_final["PH"][window_size:].astype('str') != 'nan'].values

time_delta = df_final["Time"][window_size:] - df_final["Time"][window_size]

time_years = np.array([(item / np.timedelta64(1, 'm')) / (60*24*365) for item in time_delta])

Y_TREND = y_pred_noscale_LSTM[y_pred_noscale_LSTM.astype('str') != 'nan']
X_TREND = time_years[y_pred_noscale_LSTM[:, 0].astype('str') != 'nan']

reg = LinearRegression().fit(X_TREND.reshape(-1,1), Y_TREND)

slope_LSTM = reg.coef_[0]
intercept_LSTM = reg.intercept_

print("m:", slope_LSTM, "n:", intercept_LSTM)

## Bidirectional LSTM 

In [None]:
#final_train_loss_BI_LSTM = 100

#while final_train_loss_BI_LSTM > 0.4:

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2, min_delta=0.00001)
callback_2 = StopCrossPoint()     

t0 = time()

# design network
model_BI_LSTM = Sequential()
model_BI_LSTM.add(Bidirectional(LSTM(3, activation='tanh',
                              input_shape=(X_train.shape[1], X_train.shape[2]))))
model_BI_LSTM.add(Dense(1, activation='sigmoid'))
model_BI_LSTM.compile(loss='mse', optimizer='adam')

# fit network
history_BI_LSTM = model_BI_LSTM.fit(
    X_train, 
    Y_train, 
    epochs=500,
    steps_per_epoch=10,
    #batch_size=100, 
    validation_data=(X_test, Y_test), 
    verbose=0, 
    shuffle=False,
    callbacks=[callback, callback_2]
    )

time_elapsed_BI_LSTM =  time()-t0
epochs_used_BI_LSTM = len(history_BI_LSTM.history['loss'])

final_train_loss_BI_LSTM = (history_BI_LSTM.history['loss'][-1]*100)
final_val_loss_BI_LSTM = (history_BI_LSTM.history['val_loss'][-1]*100)

init_train_loss_BI_LSTM = (history_BI_LSTM.history['loss'][0]*100)

print("Finished in", time_elapsed_BI_LSTM, "s using", epochs_used_BI_LSTM, "epochs")

y_pred_BI_LSTM = model_BI_LSTM.predict(X_to_predict)

y_noscaled = (Y_scaled - scaler.min_[-1]) / scaler.scale_[-1]

y_pred_noscale_BI_LSTM = (y_pred_BI_LSTM - scaler.min_[-1]) / scaler.scale_[-1]

yhat_BI_LSTM = model_BI_LSTM.predict(X_scaled)

yhat_BI_LSTM_noscale = (yhat_BI_LSTM - scaler.min_[-1]) / scaler.scale_[-1]

df_to_save = df_final.loc[:, ("Time", "PH")]

df_to_save["DataType"] = ["" for i in range(len(df_to_save))]

for i in range(6, len(df_to_save[6:])):

    if df_to_save["PH"].iloc[i].astype('str') == 'nan' :
                
        df_to_save["PH"].iloc[i] = y_pred_noscale_BI_LSTM[i][0]
        df_to_save["DataType"].iloc[i] = "Prediction"
                
    else:
        
        df_to_save["DataType"].iloc[i] = "Observation"

In [None]:
y_pred_noscale_BI_LSTM[:, 0][df_final["PH"][window_size:].astype('str') != 'nan'] = df_final["PH"][window_size:][df_final["PH"][window_size:].astype('str') != 'nan'].values

time_delta = df_final["Time"][window_size:] - df_final["Time"][window_size]

time_years = np.array([(item / np.timedelta64(1, 'm')) / (60*24*365) for item in time_delta])

Y_TREND = y_pred_noscale_BI_LSTM[y_pred_noscale_BI_LSTM.astype('str') != 'nan']
X_TREND = time_years[y_pred_noscale_BI_LSTM[:, 0].astype('str') != 'nan']

reg = LinearRegression().fit(X_TREND.reshape(-1,1), Y_TREND)

slope_BI_LSTM = reg.coef_[0]
intercept_BI_LSTM = reg.intercept_

print("m:", slope_BI_LSTM, "n:", intercept_BI_LSTM)

plt.figure(figsize=(16, 12))

plt.subplot(2, 2, 1)
plt.plot(history_BI_LSTM.history['loss'], label='Train loss', lw=3)
plt.plot(history_BI_LSTM.history['val_loss'], label='Validation loss', lw=3)

plt.text(-1, 0.0075, "a)", fontsize=40)

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xlabel("Epoch", fontsize=30, labelpad=10)
plt.ylabel("Loss (MSE)", fontsize=30, labelpad=10)

plt.text(epochs_used_BI_LSTM*0.5, init_train_loss_BI_LSTM*0.006, 
         "Final train loss: %.2f%%" % final_train_loss_BI_LSTM, fontsize=16)
plt.text(epochs_used_BI_LSTM*0.5, init_train_loss_BI_LSTM*0.005,
         "Final val loss: %.2f%%" % final_val_loss_BI_LSTM, fontsize=16)

plt.legend(fontsize=20);

plt.subplot(2, 2, 3)
plt.plot(y_noscaled, lw=3)
plt.plot(yhat_BI_LSTM_noscale, lw=3)

plt.text(-1, 8.0, "c)", fontsize=40)

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xlabel("Training data sequence", fontsize=30, labelpad=10)
plt.ylabel(r"pH$_\mathrm{T}$", fontsize=30, labelpad=10)

plt.subplot(2, 2, 2)
plt.scatter(yhat_BI_LSTM, Y_scaled, s=20)
plt.plot(np.linspace(0, 1, 100), np.linspace(0, 1, 100), color='k', lw=3)

plt.text(0, 0.9, "b)", fontsize=40)

plt.xlabel("Predicted values", fontsize=30, labelpad=10)
plt.ylabel("True values", fontsize=30, labelpad=10)

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.subplot(2,2,4)
#plt.scatter(X_TREND, Y_TREND, s=1)
#plt.plot(X_TREND, slope_BI_LSTM*X_TREND + intercept_BI_LSTM, color='k', lw=3, 
#         label=r'y=%.4fx + %.4f'% (slope_BI_LSTM, intercept_BI_LSTM))

plt.scatter(df_to_save["Time"][df_to_save["DataType"] == "Observation"], 
            df_to_save["PH"][df_to_save["DataType"] == "Observation"], s=1, lw=3)

plt.scatter(df_to_save["Time"][df_to_save["DataType"] == "Prediction"], 
            df_to_save["PH"][df_to_save["DataType"] == "Prediction"], s=1, lw=3)

time_delta_plot = df_final["Time"] - df_final["Time"][0]

time_years_plot = np.array([(item / np.timedelta64(1, 'm')) / (60*24*365) for item in time_delta_plot])

plt.plot(df_to_save["Time"], slope_BI_LSTM*time_years_plot + intercept_BI_LSTM, color='k', lw=3, 
         label=r'y=%.4fx + %.4f'% (slope_BI_LSTM, intercept_BI_LSTM))

plt.text(datetime(2012, 2, 1), 7.98, "d)", fontsize=40)

plt.xticks(fontsize=16, rotation=30)
plt.yticks(fontsize=16)

plt.xlabel("Time [years]", fontsize=30)
plt.ylabel(r"pH$_\mathrm{T}$", fontsize=30)

plt.legend(fontsize=20)

plt.tight_layout()
plt.subplots_adjust(hspace=0.3, wspace=0.3)

#plt.savefig("Best_bidirectional_LSTM.pdf", dpi=300, bbox_inches='tight')

In [None]:
#df_to_save.to_csv("Palma_data_w_predictions_bis.csv")

In [None]:
#model_BI_LSTM.save('Bidirectional_LSTM_Palma_final_bis.h5')

# Bidirectional GRU 

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=2, min_delta=0.00001)
callback_2 = StopCrossPoint()     

t0 = time()

# design network
model_BI_GRU = Sequential()
model_BI_GRU.add(Bidirectional(GRU(1, activation='tanh',
                              input_shape=(X_train.shape[1], X_train.shape[2]))))
model_BI_GRU.add(Dense(1, activation='sigmoid'))
model_BI_GRU.compile(loss='mse', optimizer='adam')

# fit network
history_BI_GRU = model_BI_GRU.fit(
    X_train, 
    Y_train, 
    epochs=500,
    steps_per_epoch=10,
    #batch_size=100, 
    validation_data=(X_test, Y_test), 
    verbose=0, 
    shuffle=False,
    callbacks=[callback, callback_2]
    )

time_elapsed_BI_GRU =  time()-t0
epochs_used_BI_GRU = len(history_BI_GRU.history['loss'])

final_train_loss_BI_GRU = (history_BI_GRU.history['loss'][-1]*100)
final_val_loss_BI_GRU = (history_BI_GRU.history['val_loss'][-1]*100)

init_train_loss_BI_GRU = (history_BI_GRU.history['loss'][0]*100)

print("Finished in", time_elapsed_BI_GRU, "s using", epochs_used_BI_GRU, "epochs")

y_pred_BI_GRU = model_BI_GRU.predict(X_to_predict)

y_pred_noscale_BI_GRU = (y_pred_BI_GRU - scaler.min_[-1]) / scaler.scale_[-1]

yhat_BI_GRU = model_BI_GRU.predict(X_scaled)

In [None]:
plt.figure(figsize=(16, 12))

plt.subplot(2, 2, 1)
plt.plot(history_BI_GRU.history['loss'], label='Train loss', lw=3)
plt.plot(history_BI_GRU.history['val_loss'], label='Validation loss', lw=3)

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xlabel("Epoch", fontsize=30, labelpad=10)
plt.ylabel("Loss (MSE)", fontsize=30, labelpad=10)

plt.text(epochs_used_BI_GRU*0.5, init_train_loss_BI_GRU*0.006,
         "Final train loss: %.2f%%" % final_train_loss_BI_GRU, fontsize=16)
plt.text(epochs_used_BI_GRU*0.5, init_train_loss_BI_GRU*0.005,
         "Final val loss: %.2f%%" % final_val_loss_BI_GRU, fontsize=16)

plt.legend(fontsize=20);

plt.subplot(2, 2, 3)
plt.plot(Y_scaled, lw=3)
plt.plot(yhat_BI_GRU, lw=3)

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xlabel("Time", fontsize=30, labelpad=10)
plt.ylabel("pH", fontsize=30, labelpad=10)

plt.subplot(2, 2, 2)
plt.scatter(yhat_BI_GRU, Y_scaled, s=20)
plt.plot(np.linspace(0, 1, 100), np.linspace(0, 1, 100), color='k', lw=3)

plt.xlabel("Predicted values", fontsize=30, labelpad=10)
plt.ylabel("True values", fontsize=30, labelpad=10)

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.subplot(2,2,4)
plt.scatter(df_final.index, df_final["PH"], s=10)
plt.scatter(df_final.index[window_size:], y_pred_noscale_BI_GRU, s=1, color='r')

plt.xticks(fontsize=16)
plt.yticks(fontsize=16)

plt.xlabel("Time", fontsize=30)
plt.ylabel("pH", fontsize=30)

plt.tight_layout()
plt.subplots_adjust(hspace=0.3, wspace=0.2)

#plt.savefig("Best_bidirectional_LSTM.png", dpi=300, bbox_inches='tight')

In [None]:
y_pred_noscale_BI_GRU[:, 0][df_final["PH"][window_size:].astype('str') != 'nan'] = df_final["PH"][window_size:][df_final["PH"][window_size:].astype('str') != 'nan'].values

time_delta = df_final["Time"][window_size:] - df_final["Time"][window_size]

time_years = np.array([(item / np.timedelta64(1, 'm')) / (60*24*365) for item in time_delta])

Y_TREND = y_pred_noscale_BI_GRU[y_pred_noscale_BI_GRU.astype('str') != 'nan']
X_TREND = time_years[y_pred_noscale_BI_GRU[:, 0].astype('str') != 'nan']

reg = LinearRegression().fit(X_TREND.reshape(-1,1), Y_TREND)

slope_BI_GRU = reg.coef_[0]
intercept_BI_GRU = reg.intercept_

print("m:", slope_BI_GRU, "n:", intercept_BI_GRU)

# Results per architecture

In [None]:
architectures = ["RNN", "LSTM", "Bidirectional LSTM", "Bidirectional GRU"]
final_train_losses = [final_train_loss_RNN, final_train_loss_LSTM, final_train_loss_BI_LSTM, final_train_loss_BI_GRU]
final_val_losses = [final_val_loss_RNN, final_val_loss_LSTM, final_val_loss_BI_LSTM, final_val_loss_BI_GRU]
training_times = [time_elapsed_RNN, time_elapsed_LSTM, time_elapsed_BI_LSTM, time_elapsed_BI_GRU]
training_epochs = [epochs_used_RNN, epochs_used_LSTM, epochs_used_BI_LSTM, epochs_used_BI_GRU]
trends = [slope_RNN, slope_LSTM, slope_BI_LSTM, slope_BI_GRU]

d = {'Architecture':architectures, 'Final training loss':final_train_losses, 
     'Final validation loss':final_val_losses, 'Training Epochs':training_epochs,
     'Training time [s]':training_times, 'Predicted trends':trends}

df_results = pd.DataFrame(d)

df_results.to_csv("Results_window_size_%s.csv" % window_size)

df_results

## Trend 

In [None]:
plt.figure(figsize=(8, 6))

plt.plot(X_TREND, (X_TREND * slope + intercept), lw=3, color='k', label=r"PH$=%.4f$y$ + %.2f$" % (slope, intercept))
plt.scatter(X_TREND, Y_TREND, color='r', s=1)

#plt.text(-0.3, 7.92, r"PH$=-0.011$y$+8.19$", fontsize=20)

plt.xticks(np.arange(0, 9, 1), np.arange(2013, 2022, 1), fontsize=20, rotation=45)
plt.yticks(fontsize=20)

plt.ylabel("PH", fontsize=30)

plt.legend(loc="lower left", fontsize=20)

#plt.savefig("Trend_Bidirectional_LSTM.png", dpi=300, bbox_inches="tight")

# Save models

In [None]:
model_RNN.save("RNN_Palma.h5")
model_LSTM.save('LSTM_Palma.h5')
model_BI_LSTM.save('Bidirectional_LSTM_Palma.h5')
model_BI_GRU.save('Bidirectional_GRU_Palma.h5')