In [12]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
import joblib
import math
import json

# 1. Load Data
# Ensure 'Aquifer_Petrignano.csv' is in your folder
df = pd.read_csv('Aquifer_Petrignano.csv')

# 2. Rename columns for clarity
df = df.rename(columns={
    'Date': 'DATE',
    'Rainfall_Bastia_Umbra': 'RAIN',
    'Depth_to_Groundwater_P25': 'DEPTH',
    'Temperature_Bastia_Umbra': 'TEMP',
    'Volume_C10_Petrignano': 'DEMAND'
})

# 3. Filter for required columns
df = df[['DATE', 'RAIN', 'TEMP', 'DEPTH', 'DEMAND']]

# 4. Data Cleaning
# Convert Date
df['DATE'] = pd.to_datetime(df['DATE'], format='%d/%m/%Y')
df = df.sort_values('DATE')

# Convert Demand to Positive (Absolute value)
df['DEMAND'] = df['DEMAND'].abs()

# Handle Missing Values (Interpolation)
# We remove the very first rows if they are empty, then fill gaps
df = df.dropna(subset=['DEMAND']) # Drop rows where target is totally missing
df = df.interpolate(method='linear', limit_direction='forward')
df = df.fillna(0) # Fill any remaining gaps with 0

# Set Date as Index
df = df.set_index('DATE')

# Select Features: We use Rain, Temp, and Depth to predict Demand
features = ['DEMAND', 'RAIN', 'TEMP', 'DEPTH']
dataset = df[features].values.astype('float32')

In [13]:
# --- 2. Scaling and Splitting ---

# Scale all features to range [0, 1]
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)

# Split into Train and Test sets (80/20 split)
train_size = int(len(scaled_data) * 0.8)
train_data = scaled_data[0:train_size, :]
test_data = scaled_data[train_size:len(scaled_data), :]

print(f"Total Data Points: {len(scaled_data)}")
print(f"Training Set: {len(train_data)}")
print(f"Testing Set: {len(test_data)}")

Total Data Points: 5025
Training Set: 4020
Testing Set: 1005


In [14]:
def create_dataset(dataset, look_back=60):
    X, Y = [], []
    for i in range(len(dataset) - look_back - 1):
        a = dataset[i:(i + look_back), :]
        X.append(a)
        Y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(Y)

look_back = 60
X_train, y_train = create_dataset(train_data, look_back)
X_test, y_test = create_dataset(test_data, look_back)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], X_train.shape[2]))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], X_test.shape[2]))

In [15]:
# --- 4. Model Architecture and Training ---

model = Sequential()
# Layer 1: LSTM with Return Sequences (to pass data to next LSTM layer)
model.add(LSTM(64, return_sequences=True, input_shape=(look_back, X_train.shape[2])))
# Layer 2: Standard LSTM
model.add(LSTM(64, return_sequences=False))
# Layer 3: Dense Layer for interpretation
model.add(Dense(32, activation='relu'))
# Layer 4: Output Layer (Predicting 1 value: DEMAND)
model.add(Dense(1))

# Setup Early Stopping
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

# Compile and Train
model.compile(optimizer='adam', loss='mean_squared_error')
history = model.fit(
    X_train, 
    y_train, 
    validation_split=0.1, 
    epochs=50, 
    batch_size=16, 
    verbose=1, 
    callbacks=[es]
)

# Generate Predictions
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

Epoch 1/50


  super().__init__(**kwargs)


[1m223/223[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 36ms/step - loss: 0.0118 - val_loss: 0.0021
Epoch 2/50
[1m223/223[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 36ms/step - loss: 0.0043 - val_loss: 0.0018
Epoch 3/50
[1m223/223[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 36ms/step - loss: 0.0041 - val_loss: 0.0025
Epoch 4/50
[1m223/223[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 34ms/step - loss: 0.0040 - val_loss: 0.0046
Epoch 5/50
[1m223/223[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 37ms/step - loss: 0.0039 - val_loss: 0.0015
Epoch 6/50
[1m223/223[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 39ms/step - loss: 0.0037 - val_loss: 0.0016
Epoch 7/50
[1m223/223[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 36ms/step - loss: 0.0036 - val_loss: 0.0014
Epoch 8/50
[1m223/223[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 34ms/step - loss: 0.0035 - val_loss: 0.0017
Epoch 9/50
[1m223/223[0m [32m━━━━━━━━━━

In [16]:
# --- 5. Save Model and Scaler ---
# We need to save the scaler to inverse_transform the predictions later
joblib.dump(scaler, 'scaler.pkl')

# Save the trained Keras model
model.save('water_demand_lstm_model.h5')

print("Model and Scaler saved successfully!")



Model and Scaler saved successfully!
