In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
import joblib
import math

levels = pd.read_csv('chennai_reservoir_levels.csv')
rainfall = pd.read_csv('chennai_reservoir_rainfall.csv')

In [3]:
levels.columns = [col.strip().upper() for col in levels.columns]
rainfall.columns = [col.strip().upper() for col in rainfall.columns]

levels['DATE'] = pd.to_datetime(levels['DATE'], format='%d-%m-%Y')
rainfall['DATE'] = pd.to_datetime(rainfall['DATE'], format='%d-%m-%Y')

rainfall['TOTAL_RAIN'] = rainfall[['POONDI', 'CHOLAVARAM', 'REDHILLS', 'CHEMBARAMBAKKAM']].sum(axis=1)

df = levels[['DATE', 'POONDI']].merge(rainfall[['DATE', 'TOTAL_RAIN']], on='DATE', how='left')

df.set_index('DATE', inplace=True)
df = df.fillna(0) 

features = ['POONDI', 'TOTAL_RAIN']
dataset = df[features].values.astype('float32')

In [4]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)

train_size = int(len(scaled_data) * 0.8)
train_data = scaled_data[0:train_size]
test_data = scaled_data[train_size:len(scaled_data)]

In [5]:
def create_dataset(dataset, look_back=60):
    X, Y = [], []
    for i in range(len(dataset) - look_back - 1):
        # X takes the sequence of all features for the look_back period
        a = dataset[i:(i + look_back), :]
        X.append(a)
        # Y takes only the target variable (POONDI level) at the next time step
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

In [6]:
look_back = 60
X_train, y_train = create_dataset(train_data, look_back)
X_test, y_test = create_dataset(test_data, look_back)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], X_train.shape[2]))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], X_test.shape[2]))

In [7]:
model = Sequential()
model.add(LSTM(64, return_sequences=True, input_shape=(look_back, X_train.shape[2])))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(32, activation='relu'))
model.add(Dense(1))

es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

model.compile(optimizer='adam', loss='mean_squared_error')

  super().__init__(**kwargs)


In [9]:
history = model.fit(
    X_train, 
    y_train, 
    validation_split=0.1, 
    epochs=50, 
    batch_size=16, 
    verbose=1,
    callbacks=[es]
)

train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

Epoch 1/50
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - loss: 5.0761e-04 - val_loss: 9.4640e-05
Epoch 2/50
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - loss: 4.9813e-04 - val_loss: 6.6475e-05
Epoch 3/50
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - loss: 4.8513e-04 - val_loss: 7.7160e-05
Epoch 4/50
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - loss: 4.8022e-04 - val_loss: 1.1867e-04
Epoch 5/50
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - loss: 4.8760e-04 - val_loss: 9.6880e-05
Epoch 6/50
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 20ms/step - loss: 4.9775e-04 - val_loss: 7.5396e-05
Epoch 7/50
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - loss: 4.9780e-04 - val_loss: 8.3463e-05
Epoch 8/50
[1m275/275[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 23ms/step - loss: 4.

In [11]:
import json

# --- Inverse Transform Logic ---
# We need a dummy array because the scaler expects 2 columns (Level, Rain)
# but our prediction is just 1 column (Level).
def inverse_transform_prediction(prediction, scaler, n_features):
    dummy_array = np.zeros((len(prediction), n_features))
    dummy_array[:, 0] = prediction.flatten()
    return scaler.inverse_transform(dummy_array)[:, 0]

n_features = X_train.shape[2]

# Convert scaled predictions back to MCFT
train_predict_rescaled = inverse_transform_prediction(train_predict, scaler, n_features)
y_train_rescaled = inverse_transform_prediction(y_train.reshape(-1, 1), scaler, n_features)
test_predict_rescaled = inverse_transform_prediction(test_predict, scaler, n_features)
y_test_rescaled = inverse_transform_prediction(y_test.reshape(-1, 1), scaler, n_features)

# --- Calculate Statistics ---
train_rmse = math.sqrt(mean_squared_error(y_train_rescaled, train_predict_rescaled))
test_rmse = math.sqrt(mean_squared_error(y_test_rescaled, test_predict_rescaled))
test_mae = mean_absolute_error(y_test_rescaled, test_predict_rescaled)

print("\n--- Model Evaluation (Multi-Variate) ---")
print(f"Train RMSE: {train_rmse:.2f} MCFT")
print(f"Test RMSE: {test_rmse:.2f} MCFT")
print(f"Test MAE: {test_mae:.2f} MCFT")

# --- Save Artifacts ---
metrics = {
    "test_rmse": round(test_rmse, 2),
    "test_mae": round(test_mae, 2),
    "look_back": look_back,
    "lstm_units": 64,
    "n_features": X_train.shape[2],
    "train_samples": X_train.shape[0]
}

with open('model_metrics.json', 'w') as f:
    json.dump(metrics, f, indent=4)

model.save('poondi_multi_variate_lstm_model.h5')
joblib.dump(scaler, 'multi_variate_scaler.pkl')
print("\n✅ Model and Metrics Saved Successfully!")




--- Model Evaluation (Multi-Variate) ---
Train RMSE: 74.38 MCFT
Test RMSE: 35.50 MCFT
Test MAE: 21.43 MCFT

✅ Model and Metrics Saved Successfully!
