In [1]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.keras.regularizers import l2
from tensorflow.keras import backend as K
import tensorflow as tf
import os

In [2]:
from datetime import datetime
from tensorflow.python.framework.ops import disable_eager_execution

disable_eager_execution()

In [3]:
tf.random.set_seed(36)

### Dataset

In [4]:
BASE_DIR_PATH = '/Users/neilb/Documents/dsci_thesis/Datasets'
DATASET_FILE = os.path.join(BASE_DIR_PATH, 'compiled_data_2016_2017.csv')

In [5]:
df = pd.read_csv(DATASET_FILE)
df.head()

Unnamed: 0.1,Unnamed: 0,index,Rainfall_Aries,Rainfall_Boso,Rainfall_Campana,Rainfall_Nangka,Rainfall_Oro,Waterlevel_Sto_Nino,Waterlevel_Montalban,Discharge_Sto_Nino,Discharge_San_Jose,Cross_Section_Sto_Nino,Cross_Section_Montalban,Velocity_Sto_Nino,Velocity_Montalban,datetime,t,x
0,0,0,0,1,2,0,0,12.18,21.03,21.033407,14.842428,803.88,630.9,0.026165,0.023526,2016-01-01 00:00:00,0.0,14420
1,1,1,0,1,1,1,0,12.19,21.03,21.280072,14.842428,804.54,630.9,0.02645,0.023526,2016-01-01 01:00:00,3600.0,14420
2,2,2,1,1,1,0,1,12.19,21.03,21.280072,14.842428,804.54,630.9,0.02645,0.023526,2016-01-01 02:00:00,7200.0,14420
3,3,3,0,0,0,1,0,12.2,21.03,21.529056,14.842428,805.2,630.9,0.026738,0.023526,2016-01-01 03:00:00,10800.0,14420
4,4,4,1,1,1,0,0,12.2,21.03,21.529056,14.842428,805.2,630.9,0.026738,0.023526,2016-01-01 04:00:00,14400.0,14420


In [6]:
n = len(df)

In [7]:
df['friction_coeff'] = [0.033 for i in range(n)]
df['slope'] = [1/1500 for i in range(n)]
df.head()

Unnamed: 0.1,Unnamed: 0,index,Rainfall_Aries,Rainfall_Boso,Rainfall_Campana,Rainfall_Nangka,Rainfall_Oro,Waterlevel_Sto_Nino,Waterlevel_Montalban,Discharge_Sto_Nino,Discharge_San_Jose,Cross_Section_Sto_Nino,Cross_Section_Montalban,Velocity_Sto_Nino,Velocity_Montalban,datetime,t,x,friction_coeff,slope
0,0,0,0,1,2,0,0,12.18,21.03,21.033407,14.842428,803.88,630.9,0.026165,0.023526,2016-01-01 00:00:00,0.0,14420,0.033,0.000667
1,1,1,0,1,1,1,0,12.19,21.03,21.280072,14.842428,804.54,630.9,0.02645,0.023526,2016-01-01 01:00:00,3600.0,14420,0.033,0.000667
2,2,2,1,1,1,0,1,12.19,21.03,21.280072,14.842428,804.54,630.9,0.02645,0.023526,2016-01-01 02:00:00,7200.0,14420,0.033,0.000667
3,3,3,0,0,0,1,0,12.2,21.03,21.529056,14.842428,805.2,630.9,0.026738,0.023526,2016-01-01 03:00:00,10800.0,14420,0.033,0.000667
4,4,4,1,1,1,0,0,12.2,21.03,21.529056,14.842428,805.2,630.9,0.026738,0.023526,2016-01-01 04:00:00,14400.0,14420,0.033,0.000667


In [8]:
train_2016_2017 = df[:int(0.50*n)]
val_2016_2017 = df[int(0.50*n):int(0.75*n)]
test_2016_2017 = df[int(0.75*n):]

In [9]:
X_train_2016_2017 = np.array(train_2016_2017[['x','t', 'Discharge_Sto_Nino', 'friction_coeff', 'slope', 'Rainfall_Aries', 'Rainfall_Boso', 'Rainfall_Campana', 'Rainfall_Nangka', 'Rainfall_Oro']].values.tolist())
X_val_2016_2017 = np.array(val_2016_2017[['x','t', 'Discharge_Sto_Nino', 'friction_coeff', 'slope', 'Rainfall_Aries', 'Rainfall_Boso', 'Rainfall_Campana', 'Rainfall_Nangka', 'Rainfall_Oro']].values.tolist())
X_test_2016_2017 = np.array(test_2016_2017[['x','t', 'Discharge_Sto_Nino', 'friction_coeff', 'slope', 'Rainfall_Aries', 'Rainfall_Boso', 'Rainfall_Campana', 'Rainfall_Nangka', 'Rainfall_Oro']].values.tolist())
Y_train_2016_2017 = np.array(train_2016_2017[['Velocity_Sto_Nino','Waterlevel_Sto_Nino']].values.tolist())
Y_val_2016_2017 = np.array(val_2016_2017[['Velocity_Sto_Nino','Waterlevel_Sto_Nino']].values.tolist())
Y_test_2016_2017 = np.array(test_2016_2017[['Velocity_Sto_Nino','Waterlevel_Sto_Nino']].values.tolist())

### Defining evaluation metrics

In [10]:
def r_square(y_true, y_pred):
    x = y_true
    y = y_pred
    mx = K.mean(x, axis=0)
    my = K.mean(y, axis=0)
    xm, ym = x - mx, y - my
    r_num = K.square(K.sum(xm * ym))
    x_square_sum = K.sum(xm * xm)
    y_square_sum = K.sum(ym * ym)
    r_den = (x_square_sum * y_square_sum) + K.epsilon()
    
    r = r_num / r_den
    return r

In [11]:
def NSE(y_true, y_pred):

    y_pred = K.flatten(y_pred)
    y_true = K.flatten(y_true)

    
    SS_res =  K.sum(K.square(y_true - y_pred)) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 
    
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

In [12]:
def custom_loss(grads_inputs, physics_weight=1.0):
    """Modified loss function with adjustable physics weight and improved numerical stability"""
    du_dx, du_dt, dh_dx, fric_coeff, slope = (grads_inputs[:,i] for i in range(5))
    g = K.constant(9.81)  # More precise gravity constant
    
    def loss(y_true, y_pred):
        # Data loss
        mse_loss = K.mean(K.square(y_pred - y_true), axis=0)
        data_loss = K.sum(mse_loss)
        
        # Physics loss with improved numerical stability
        u = y_pred[:,0]  # velocity
        h = y_pred[:,1]  # water level
        
        # Saint-Venant equation terms
        momentum_eq = (
            du_dt +                                         # ∂u/∂t
            u * du_dx +                                     # u∂u/∂x
            g * dh_dx +                                     # g∂h/∂x
            g * slope +                                     # gS₀
            g * K.square(fric_coeff) * K.square(y_true[:,0]) /       # gn²u²/h^(4/3)
            (K.pow(y_true[:,1], 4/3) + K.epsilon())
        )
        
        physics_loss = K.mean(K.square(momentum_eq))
        
        # Combined loss with weighting
        return data_loss + physics_weight * physics_loss
    
    return loss

### Defining model

In [13]:
def create_pinn_model(n1, n2, n3):
    lstm_model = tf.keras.models.Sequential([
        tf.keras.layers.Reshape((1, 10), input_shape=(10,)),  # Reshape to 3D
        tf.keras.layers.LSTM(n1, return_sequences=True),
        tf.keras.layers.Flatten(),  # Add Flatten layer to handle dimension mismatch
        tf.keras.layers.Dense(units=n2, activation='relu'),
        tf.keras.layers.Dense(units=n3, activation='relu'),
        tf.keras.layers.Dense(units=2)
    ])

    return lstm_model

## Training Loop

In [14]:
def train_pinn(X_train, Y_train, X_val, Y_val, n1=64, n2=64, n3=64, 
               reg_const=0, physics_weight=1.0, epochs=20, 
               batch_size=32, patience=2):
    
    # Create and compile model
    model = create_pinn_model(n1, n2, n3)
    
    # Calculate gradients for physics loss
    grads_u = K.gradients(model.output[:,0], model.input)[0]
    grads_h = K.gradients(model.output[:,1], model.input)[0]
    du_dx, du_dt, dh_dx = grads_u[:,0], grads_u[:,1], grads_h[:,0]
    calc_grads_inputs = K.stack(
        (du_dx, du_dt, dh_dx, model.input[:,3], model.input[:,4]), 
        axis=1
    )
    
    # Compile with custom loss
    model.compile(
        optimizer='adam',
        loss=custom_loss(calc_grads_inputs),
        metrics=['mse', NSE, r_square]
    )
    
    # Train with early stopping
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=patience
    )
    
    history = model.fit(
        X_train, Y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(X_val, Y_val),
        callbacks=[early_stopping],
        verbose=1
    )

    return model, history

In [15]:
model, history = train_pinn(X_train_2016_2017, Y_train_2016_2017, X_val_2016_2017, Y_val_2016_2017)

Train on 8760 samples, validate on 4380 samples
Epoch 1/20

  updates = self.state_updates


Epoch 2/20
Epoch 3/20
Epoch 4/20


## Testing

In [16]:
X_test_2016_2017.shape

(4380, 10)

In [17]:
predictions = model.predict(X_test_2016_2017)
predictions

  updates=self.state_updates,


array([[ 0.05704231, 12.502163  ],
       [ 0.05704231, 12.502163  ],
       [ 0.05704231, 12.502163  ],
       ...,
       [ 0.05704231, 12.502163  ],
       [ 0.05704231, 12.502163  ],
       [ 0.05704231, 12.502163  ]], dtype=float32)

In [18]:
y_pred_h = predictions[:,1]
y_pred_v = predictions[:,0]

In [19]:
Y_test_2016_2017

array([[ 0.01888235, 11.89      ],
       [ 0.01844587, 11.87      ],
       [ 0.0180173 , 11.85      ],
       ...,
       [ 0.03440052, 12.44      ],
       [ 0.03440052, 12.44      ],
       [ 0.03440052, 12.44      ]])

In [20]:
y_true_h = Y_test_2016_2017[:,1]
y_true_v = Y_test_2016_2017[:,0]

In [21]:
mse_h = np.mean(np.square(y_true_h - y_pred_h))
mse_v = np.mean(np.square(y_true_v - y_pred_v))

mse_h, mse_v

(0.32846772676671043, 0.0017727264578701)

In [22]:
(mse_h + mse_v)/2

0.16512022661229026

In [23]:
SS_res_h = np.sum(np.square(y_true_h - y_pred_h)) 
SS_tot_h = np.sum(np.square(y_true_h - np.mean(y_true_h)))

NSE_h = 1 - SS_res_h/(SS_tot_h)
NSE_h

-0.0579383618526601

In [24]:
SS_res_v = np.sum(np.square(y_true_v - y_pred_v)) 
SS_tot_v = np.sum(np.square(y_true_v - np.mean(y_true_v)))

NSE_v = 1 - SS_res_v/(SS_tot_v)
NSE_v

-0.04993967642249375

In [25]:
np.savetxt('pinn_predictions.csv', y_pred_h, delimiter=',', fmt='%.4f')