Import Dependacies

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping

# Load and preprocess the data
df = pd.read_csv('../data/lawas/lawas-rainfall-daily.csv', encoding='latin')
df["DateTime"] = pd.to_datetime(df["DateTime"],format="%Y-%m-%d")
df.set_index("DateTime",inplace=True)

Feature Engineering (Lagged Day - 30 Days)

In [2]:
for lag in range(1, 31):
    df[f'Rainfall_lag_{lag}'] = df['Rainfall'].shift(lag)
    df[f'Temperature_lag_{lag}'] = df['Temperature'].shift(lag)
    df[f'Humidity_lag_{lag}'] = df['Humidity'].shift(lag)

In [3]:
# Scaling the features and target separately
feature_scaler = RobustScaler()
target_scaler = RobustScaler()

# Separate features and target (Rainfall)
features = df.drop(columns=['Rainfall']).reset_index(drop=True)
target = df['Rainfall']

# Scale features and target
features_scaled = feature_scaler.fit_transform(features)
target_scaled = target_scaler.fit_transform(target.values.reshape(-1, 1))

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(
    features_scaled, target_scaled, 
    test_size=0.30, random_state=100, shuffle=False
)

In [4]:
nn_model = Sequential()
nn_model.add(Dense(12, input_dim=x_train.shape[1], activation='relu'))
nn_model.add(Dense(1))
nn_model.compile(loss='mean_squared_error', optimizer='adam')
early_stop = EarlyStopping(monitor='loss', patience=10, verbose=1)
history = nn_model.fit(x_train, y_train, epochs=50, batch_size=1, verbose=1, shuffle=False)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m4733/4733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 2ms/step - loss: 2.3092
Epoch 2/50
[1m4733/4733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 3ms/step - loss: 2.1341
Epoch 3/50
[1m4733/4733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - loss: 2.1340
Epoch 4/50
[1m4733/4733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - loss: 2.1341
Epoch 5/50
[1m4733/4733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - loss: 2.1341
Epoch 6/50
[1m4733/4733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - loss: 2.1341
Epoch 7/50
[1m4733/4733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - loss: 2.1341
Epoch 8/50
[1m4733/4733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 2.1341
Epoch 9/50
[1m4733/4733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 1ms/step - loss: 2.1341
Epoch 10/50
[1m4733/4733[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [5]:
y_pred_test_nn = nn_model.predict(x_test)
y_train_pred_nn = nn_model.predict(x_train)
print("The R2 score on the Train set is:\t{:0.3f}".format(r2_score(y_train, y_train_pred_nn)))
print("The R2 score on the Test set is:\t{:0.3f}".format(r2_score(y_test, y_pred_test_nn)))

[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m148/148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
The R2 score on the Train set is:	-0.000
The R2 score on the Test set is:	-0.001
