Import dependacies

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam


Import dataset

In [4]:
df = pd.read_csv('lawas/lawas-rainfall-daily.csv', encoding='latin')

df['DateTime'] = pd.to_datetime(df['DateTime'])
df.set_index('DateTime', inplace=True)

df_multi = df[['Rainfall (mm)','TOTAL', 'ClimAdjust', 'ANOM', 'Temperature', 'Dew Point', 'Humidity', 'Wind Speed', 'Pressure']].copy()

Feature engineering (lagged features, date based features)

In [3]:
# Define key columns for lagged features and rolling statistics
key_columns = ['Rainfall (mm)', 'Temperature', 'Dew Point', 'TOTAL']

# Create lagged features (30, 60, 90 days)
for col in key_columns:
    for lag in [30, 60, 90]:
        df_multi[f'{col}_lag{lag}'] = df_multi[col].shift(lag)

# Compute rolling statistics (7, 14, 30 days) - mean, std, sum
for col in key_columns:
    for window in [7, 14, 30]:
        df_multi[f'{col}_roll{window}_mean'] = df_multi[col].rolling(window).mean()
        df_multi[f'{col}_roll{window}_std'] = df_multi[col].rolling(window).std()
        df_multi[f'{col}_roll{window}_sum'] = df_multi[col].rolling(window).sum()

# Extract date-based features
df_multi['day'] = df_multi.index.day
df_multi['month'] = df_multi.index.month
df_multi['year'] = df_multi.index.year
# rainfall_data['season'] = rainfall_data.index.month % 12 // 3 + 1  # Winter=1, Spring=2, etc.

# Check the resulting dataset
df_multi.tail()


Unnamed: 0_level_0,Rainfall (mm),TOTAL,ClimAdjust,ANOM,Temperature,Dew Point,Humidity,Wind Speed,Pressure,Wind,...,TOTAL_roll7_sum,TOTAL_roll14_mean,TOTAL_roll14_std,TOTAL_roll14_sum,TOTAL_roll30_mean,TOTAL_roll30_std,TOTAL_roll30_sum,day,month,year
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-01-27,0.0,25.56,26.55,-0.99,81.333333,74.833333,81.875,6.083333,29.6625,WNW,...,178.92,25.56,0.0,357.84,25.549,0.033564,766.47,27,1,2021
2021-01-28,61.5,25.56,26.55,-0.99,81.958333,76.25,83.375,7.416667,29.6875,WNW,...,178.92,25.56,0.0,357.84,25.552667,0.027908,766.58,28,1,2021
2021-01-29,4.0,25.56,26.55,-0.99,82.541667,74.041667,76.875,6.958333,29.675,NW,...,178.92,25.56,0.0,357.84,25.556333,0.020083,766.69,29,1,2021
2021-01-30,9.0,25.56,26.55,-0.99,81.041667,74.25,81.125,6.583333,29.695,NW,...,178.92,25.56,0.0,357.84,25.56,0.0,766.8,30,1,2021
2021-01-31,0.0,25.56,26.55,-0.99,81.0,75.0,84.0,12.0,29.72,NW,...,178.92,25.56,0.0,357.84,25.56,0.0,766.8,31,1,2021
