In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import math
from gee_scripts.parameters import explain_vars, response_var, west_region_ids, center_region_ids


from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard, ModelCheckpoint

# importing libraries
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import SimpleRNN
from keras.layers import Dropout
from keras.layers import GRU, Bidirectional
from keras.optimizers import SGD
from sklearn import metrics
from sklearn.metrics import mean_squared_error

In [None]:
df = pd.read_csv("data/field_data_unique_coords_and_location.csv", parse_dates=["date"])
df = df[(df.gwl_cm <= 400)&(df.date.dt.year>2018)]
df.set_index("date", inplace=True)
df.sort_index(inplace=True)
# Remove all that is above 400cm
# df = df[["date", "gwl_cm"]]
len(df)

kalimantan_df = df[df.region_id.isin(center_region_ids)][["gwl_cm"]]
sumatra_df = df[df.region_id.isin(west_region_ids)][["gwl_cm"]]


# data = df.sort_values(by="date")
# Set date as index

# sort by date ascending

# Aggregate data every 7 days using mean
kalimantan_resampled = kalimantan_df.resample('3D').mean()
sumatra_resampled = sumatra_df.resample('3D').mean()

# Plot the data in two subplots using seaborn

fig, ax = plt.subplots(2, 1, figsize=(10, 10))

kalimantan_resampled.plot(ax=ax[0], title="Kalimantan")
sumatra_resampled.plot(ax=ax[1], title="Sumatra")

plt.show()

In [None]:
kalimantan_resampled

In [None]:
data = kalimantan_df[["gwl_cm"]]

# Setting 80 percent data for training
training_data_len = math.ceil(len(data) * .8)
training_data_len 

#Splitting the dataset
train_data = data[:training_data_len]
test_data = data[training_data_len:]
print(train_data.shape, test_data.shape)

In [None]:
train_data

In [None]:
dataset_train = train_data.gwl_cm.values 
# Reshaping 1D to 2D array
dataset_train = np.reshape(dataset_train, (-1,1)) 
dataset_train.shape


In [None]:
scaler = MinMaxScaler(feature_range=(0,1))
# scaling dataset
scaled_train = scaler.fit_transform(dataset_train)

print(scaled_train[:5])


In [None]:
dataset_test = test_data.gwl_cm.values 
# Reshaping 1D to 2D array
dataset_test = np.reshape(dataset_test, (-1,1)) 
# Normalizing values between 0 and 1
scaled_test = scaler.fit_transform(dataset_test) 
print(*scaled_test[:5])


In [None]:
X_train = []
y_train = []
for i in range(50, len(scaled_train)):
	X_train.append(scaled_train[i-50:i, 0])
	y_train.append(scaled_train[i, 0])
	if i <= 51:
		print(X_train)
		print(y_train)
		print()


In [None]:
X_test = []
y_test = []
for i in range(50, len(scaled_test)):
	X_test.append(scaled_test[i-50:i, 0])
	y_test.append(scaled_test[i, 0])


In [None]:
# The data is converted to Numpy array
X_train, y_train = np.array(X_train), np.array(y_train)

#Reshaping
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1],1))
y_train = np.reshape(y_train, (y_train.shape[0],1))
print("X_train :",X_train.shape,"y_train :",y_train.shape)


In [None]:
# The data is converted to numpy array
X_test, y_test = np.array(X_test), np.array(y_test)

#Reshaping
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1],1))
y_test = np.reshape(y_test, (y_test.shape[0],1))
print("X_test :",X_test.shape,"y_test :",y_test.shape)


In [None]:
def get_callbacks(model_name: str = 'model'):

    checkpoint_name = model_name + 'epoch_{epoch:02d}.h5'

    # Setup model checkpoint
    checkpoint_callback = ModelCheckpoint(
        checkpoint_name,  # Saves the model with the epoch number in the filename
        save_weights_only=False,       # Can set to True if you only want to save weights
        save_best_only=False,          # Every epoch's model will be saved, not just the best
        verbose=1
    )


    # Setup early stopping
    early_stopping_callback = EarlyStopping(
        monitor='loss',  # Monitor validation loss (or adjust according to your setup)
        min_delta=0.001,  # Minimum change to qualify as an improvement
        patience=5,  # Number of epochs with no improvement after which training will be stopped
        verbose=1,
        restore_best_weights=True  # This rolls back to the best model
    )

    # Create output logs directory
    Path(f'data/15_nn_logs/{model_name}').mkdir(parents=True, exist_ok=True)

    # tensorboard monitor
    tensor_board = TensorBoard(
        log_dir=f'data/15_nn_logs/{model_name}',
        write_graph=True, 
        write_images=True,
        histogram_freq=1
    )

    return [checkpoint_callback, early_stopping_callback, tensor_board]


In [None]:
# initializing the RNN
regressor = Sequential()

# adding RNN layers and dropout regularization
regressor.add(SimpleRNN(units = 50, 
						activation = "tanh",
						return_sequences = True,
						input_shape = (X_train.shape[1],1)))
regressor.add(Dropout(0.2))

regressor.add(SimpleRNN(units = 50, 
						activation = "tanh",
						return_sequences = True))

regressor.add(SimpleRNN(units = 50,
						activation = "tanh",
						return_sequences = True))

regressor.add(SimpleRNN(units = 50))

# adding the output layer
regressor.add(Dense(units = 1,activation='sigmoid'))

# compiling RNN
regressor.compile(optimizer = SGD(learning_rate=0.01,
								decay=1e-6, 
								momentum=0.9, 
								nesterov=True), 
				loss = "mean_squared_error")

# fitting the model
regressor.fit(X_train, y_train, epochs = 1, batch_size = 32, callbacks=get_callbacks("simple_rnn"))
regressor.summary()


In [None]:
#Initialising the model
regressorLSTM = Sequential()

#Adding LSTM layers
regressorLSTM.add(LSTM(50, return_sequences = True, input_shape = (X_train.shape[1],1)))
regressorLSTM.add(LSTM(50, return_sequences = False))
regressorLSTM.add(Dense(25))

#Adding the output layer
regressorLSTM.add(Dense(1))

#Compiling the model
regressorLSTM.compile(optimizer = 'adam', loss = 'mean_squared_error', metrics = ["accuracy"])

#Fitting the model
regressorLSTM.fit(
    X_train, 
    y_train, 
    batch_size = 8, 
    epochs = 12, 
    callbacks=get_callbacks("LSTM")
)
regressorLSTM.summary()

In [None]:
#Initialising the model
regressorGRU = Sequential()

# GRU layers with Dropout regularisation
regressorGRU.add(GRU(units=50, return_sequences=True, input_shape=(X_train.shape[1],1), activation='tanh'))
regressorGRU.add(Dropout(0.2))
regressorGRU.add(GRU(units=50, return_sequences=True, activation='tanh'))
regressorGRU.add(GRU(units=50, return_sequences=True, activation='tanh'))
regressorGRU.add(GRU(units=50, activation='tanh'))
# The output layer
regressorGRU.add(Dense(units=1, activation='relu'))

# Compiling the RNN
regressorGRU.compile(optimizer=SGD(learning_rate=0.01, decay=1e-7, momentum=0.9, nesterov=False), loss='mean_squared_error')

# Fitting the data
regressorGRU.fit(
    X_train,
    y_train,
    epochs=20,
    batch_size=1, 
    callbacks=get_callbacks("GRU")
)

regressorGRU.summary()


In [None]:
# predictions with X_test data
# y_RNN = regressor.predict(X_test)
y_LSTM = regressorLSTM.predict(X_test)
y_GRU = regressorGRU.predict(X_test)

In [None]:
# scaling back from 0-1 to original
# y_RNN_O = scaler.inverse_transform(y_RNN) 
y_LSTM_O = scaler.inverse_transform(y_LSTM) 
y_GRU_O = scaler.inverse_transform(y_GRU)


In [None]:
fig, axs = plt.subplots(3,figsize =(18,12),sharex=True, sharey=True)
fig.suptitle('Model Predictions')

# #Plot for RNN predictions
# axs[0].plot(train_data.index[150:], train_data.gwl_cm[150:], label = "train_data", color = "b")
# axs[0].plot(test_data.index, test_data.gwl_cm, label = "test_data", color = "g")
# axs[0].plot(test_data.index[50:], y_RNN_O, label = "y_RNN", color = "brown")
# axs[0].legend()
# axs[0].title.set_text("Basic RNN")

#Plot for LSTM predictions
axs[1].plot(train_data.index[150:], train_data.gwl_cm[150:], label = "train_data", color = "b")
axs[1].plot(test_data.index, test_data.gwl_cm, label = "test_data", color = "g")
axs[1].plot(test_data.index[50:], y_LSTM_O, label = "y_LSTM", color = "orange")
axs[1].legend()
axs[1].title.set_text("LSTM")

#Plot for GRU predictions
axs[2].plot(train_data.index[150:], train_data.gwl_cm[150:], label = "train_data", color = "b")
axs[2].plot(test_data.index, test_data.gwl_cm, label = "test_data", color = "g")
axs[2].plot(test_data.index[50:], y_GRU_O, label = "y_GRU", color = "red")
axs[2].legend()
axs[2].title.set_text("GRU")

plt.xlabel("Days")
plt.ylabel("gwl_cm price")

plt.show()


In [None]:
# create a graph of observed vs predicted

fig, ax = plt.subplots(1, 1, figsize=(25, 5))

ax.plot(test_data.index[50:], test_data.gwl_cm[50:], label="Observed", color="b")
ax.plot(test_data.index[50:], y_RNN_O, label="Predicted", color="r")
ax.legend()
plt.show()
