# Machine learning applied to 0D reactors with multiple dt prediction: NODE method

In [None]:
use_colab = False

## Google colab preparation

These lines are here to enable Colab running of the tools. We need to perform a git clone in order to have access to python scripts. This needs to be done at each runtime as the clone is lost. 

In [None]:
import os

if use_colab:
    !git clone -b cost_course_exercices https://github.com/cmehl/ML_chem.git
    
    !pip install cantera

    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')

    # Create a folder in the root directory
    if not os.path.isdir("/content/drive/MyDrive/ML_chem_data"):
        !mkdir -p "/content/drive/MyDrive/ML_chem_data"
    else:
        print("Folder /content/drive/MyDrive/ML_chem_data already exists")

## Imports and options

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import time
import json
import joblib
import numpy as np
import pandas as pd

import cantera as ct

import torch
import torch.nn as nn
import torch.optim as optim
import torchdiffeq as tdf

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme("notebook")

if use_colab:
    from ML_chem.chem_ai.cantera_runs import compute_nn_cantera_0D_homo
    from ML_chem.chem_ai.utils import get_molar_mass_atomic_matrix
    from ML_chem.chem_ai.utils import StandardScaler
else:
    from chem_ai.cantera_runs import compute_nn_cantera_0D_homo
    from chem_ai.utils import get_molar_mass_atomic_matrix
    from chem_ai.utils import StandardScaler

We set the default pytorch precision to double. It slows down a little bit the training but it is the usual standard for CFD reacting flows applications.

In [None]:
torch.set_default_dtype(torch.float64)

We identify the device (CPU or GPU) available on the machine. This will be used by pytorch to identify the device on which to train and use the model:

In [None]:
if torch.cuda.is_available():
  device = torch.device('cuda:0')
  print('Running on the GPU')
else:
  device = torch.device('cpu')
  print('Running on the CPU')

## Preliminary

We define the folder including the desired database:

In [None]:
if use_colab:
    folder = "/content/drive/MyDrive/ML_chem_data/case_0D_test_multidt"
else:
    folder = "./case_0D_test_multidt_node" 

We load the parameters stored in the json file of the dabatase folder:

In [None]:
with open(os.path.join(folder, "dtb_params.json"), "r") as file:
    dtb_params = json.load(file)

fuel = dtb_params["fuel"]
mech_file = dtb_params["mech_file"]
log_transform = dtb_params["log_transform"]
threshold = dtb_params["threshold"]
p = dtb_params["p"]
dt = dtb_params["dt"]

print(f"fuel={fuel}")
print(f"mech_file={mech_file}")
print(f"log_transform={log_transform}")
print(f"threshold={threshold}")
print(f"p={p}")
print(f"dt_min={dt}")

We load the scaler:

In [None]:
Xscaler = joblib.load(os.path.join(folder, "processed_database", "Xscaler.pkl"))

We load the training and validation databases:

In [None]:
X_train = np.load(os.path.join(folder, "processed_database", "X_train.npy"))
X_val = np.load(os.path.join(folder, "processed_database", "X_val.npy"))
Y_train = np.load(os.path.join(folder, "processed_database", "Y_train.npy"))
Y_val = np.load(os.path.join(folder, "processed_database", "Y_val.npy"))

dt_array_train = np.load(os.path.join(folder, "dt_array_train.npy"))
dt_array_val = np.load(os.path.join(folder, "dt_array_val.npy"))

Number of input and output dimensions, and number of dt values:

In [None]:
n_s_train = X_train.shape[0]
n_s_val = X_val.shape[0]

n_in = X_train.shape[1]
n_out = Y_train.shape[1]
nb_dt = Y_train.shape[2]

In [None]:
gas = ct.Solution(mech_file)
A_element = get_molar_mass_atomic_matrix(gas.species_names, fuel, True)
print(A_element)

In this first method, which we could qualify as brute force, we add *dt* as an input of the network. 

We first need to prepare datasets so that we have the list of input and corresponding outputs.

### Model training


The integration times are here fixed for all samples, to ease the NeuralODE integration. We just select the first row of *dt_array_train* for instance.

In [None]:
dt_max = 2.0e-6

integration_times = dt_array_train[0,:]
# integration_times = integration_times/dt_max

integration_times = [0,dt]

In [None]:
X_train = torch.tensor(X_train, dtype=torch.float64)
Y_train = torch.tensor(Y_train, dtype=torch.float64)
X_val = torch.tensor(X_val, dtype=torch.float64)
Y_val = torch.tensor(Y_val, dtype=torch.float64)

integration_times = torch.tensor(integration_times, dtype=torch.float64)

In [None]:
Xscaler_mean = torch.from_numpy(Xscaler.mean)
Xscaler_std = torch.from_numpy(Xscaler.std)

In [None]:
A_element = torch.tensor(A_element, dtype=torch.float64)

In [None]:
X_train = X_train.to(device)
Y_train = Y_train.to(device)
X_val = X_val.to(device)
Y_val = Y_val.to(device)

# dt_array_train = dt_array_train.to(device)
# dt_array_val = dt_array_val.to(device)

Xscaler_mean = Xscaler_mean.to(device)
Xscaler_std = Xscaler_std.to(device)

A_element = A_element.to(device)

We now can generate the model. 

In [None]:
class ANN(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(n_in, 100)
        self.act1 = nn.ReLU()
        self.hidden2 = nn.Linear(100, 100)
        self.act2 = nn.ReLU()
        self.output = nn.Linear(100, n_out)
 
    def forward(self, x):
        x = self.act1(self.hidden1(x))
        x = self.act2(self.hidden2(x))
        x = self.output(x)
        return x
    

class _ODEFunc(nn.Module):
    """ Continuous surrogate dynamic system model
    """
    def __init__(self, module):
        super().__init__()
        self.module = module

    def forward(self, t, x):
        return self.module(x)


class NeuralODE(nn.Module):
    """ Neural ODE Net: using torchdiffeq package (Chen et al. NeurIPS 2018)
    """
    def __init__(self, solver: str = 'dopri5', integration_time=[0, 1]):
        
        super().__init__()
        
        self.odefunc = _ODEFunc(ANN())
        self.solver = solver
        # self.use_adjoint = adjoint
        self.integration_time = torch.tensor(integration_time, dtype=torch.float64)      

    def forward(self, x: torch.Tensor, adjoint: bool = True, integration_time=None, rtol=1e-7, atol=1e-9, inf_solver=None):
        
        integration_time = self.integration_time if integration_time is None else integration_time
        integration_time = integration_time.to(x.device)
        # method to solve the neuralODE by torchdiffeq
        ode_method =  tdf.odeint_adjoint if adjoint else tdf.odeint
        solver = inf_solver if inf_solver else self.solver
        out = ode_method(
              self.odefunc, x, integration_time, rtol=rtol,
              atol=atol, method=solver)
        
        return out

The model is then instantiated and transferred to the GPU if present:

In [None]:
model_node = NeuralODE(solver="rk4")
print(model_node)

# Put model on GPU
model_node = model_node.to(device)

In [None]:
n_epochs = 100
batch_size = 256

loss_fn = nn.MSELoss()
optimizer = optim.Adam(model_node.parameters(), lr=0.001)

In [None]:
def main_training_loop(X_train, X_val, Y_train, Y_val, integration_times, loss_fn, optimizer, n_epochs, model, log_transform):

    # Array to store the loss and validation loss
    loss_list = np.empty(n_epochs)
    val_loss_list = np.empty(n_epochs//10)

    epochs = np.arange(n_epochs)
    epochs_small = epochs[::10]

    for epoch in range(n_epochs):

        # Training parameters
        for i in range(0, len(X_train), batch_size):

            Xbatch = X_train[i:i+batch_size,:]
            Ybatch = Y_train[i:i+batch_size,:,:]

            Ypred = model_node(Xbatch, integration_time=integration_times)
            Ypred = torch.transpose(Ypred,0,1)
            Ypred = torch.transpose(Ypred,1,2)
            
            loss = loss_fn(Ypred, Ybatch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        loss_list[epoch] = loss

        # Computing validation loss and mass conservation metric (only every 10 epochs as it is expensive)
        if epoch%10==0:
            model.eval()  # evaluation mode
            with torch.no_grad():

                # VALIDATION LOSS
                y_val_pred = model_node(X_val, integration_time=integration_times)
                y_val_pred = torch.transpose(y_val_pred,0,1)
                y_val_pred = torch.transpose(y_val_pred,1,2)

                val_loss = loss_fn(y_val_pred, Y_val)

            model.train()   # Back to training mode
            val_loss_list[epoch//10] = val_loss

        print(f"Finished epoch {epoch}")
        print(f"    >> Loss: {loss}")
        if epoch%10==0:
            print(f"    >> Validation loss: {val_loss}")

    return epochs, epochs_small, loss_list, val_loss_list

In [None]:
start_time = time.perf_counter()
epochs, epochs_small, loss_list, val_loss_list = main_training_loop(X_train, X_val, Y_train, Y_val, integration_times, loss_fn, optimizer, n_epochs, model_node, log_transform)
end_time = time.perf_counter()
print(f" TRAINING DURATION: {end_time-start_time} s")

We can define a function to analyze the training. We plot:

+ The training and validation losses

+ The evolution of $\sum_{k=1}^{N_S} Y_k$ (mean, min and max).

+ The elements conservation by plotting $100\times\delta Y_e$ for each element (C, H, O and N). The factor $100$ enables to get an error in \%.

In [None]:
x = X_train[12,:]
y = Y_train[12,:]

y_pred = model_node(x, integration_time=integration_times)
y_pred = torch.transpose(y_pred,0,1)

l = loss_fn(y,y_pred)
print(l)

In [None]:
print(y)
print(y_pred)

In [None]:
def plot_losses(epochs, epochs_small, loss_list, val_loss_list):

    # LOSSES
    fig, ax = plt.subplots()

    ax.plot(epochs, loss_list, color="k", label="Training")
    ax.plot(epochs_small, val_loss_list, color="r", label = "Validation")

    ax.set_yscale('log')

    ax.legend()

    ax.set_xlabel("Epoch")
    ax.set_ylabel("Loss")

    fig.tight_layout()

In [None]:
plot_losses(epochs, epochs_small, loss_list, val_loss_list)

We save the Pytorch model in the case folder for later use:

In [None]:
# torch.save(model_node.state_dict(), os.path.join(folder,"pytorch_mlp.pt"))

### Model testing

We first load the test initial conditions:

In [None]:
df_sim_test = pd.read_csv(os.path.join(folder, "sim_test.csv"))

n_sim = df_sim_test.shape[0]
print(f"There are {n_sim} test simulations")

In [None]:
def run_test_simulations(dt):

    list_test_results = []

    fails = 0
    for i, row in df_sim_test.iterrows():

        phi_ini = row['Phi']
        temperature_ini = row['T0']

        print(f"Performing test computation for phi={phi_ini}; T0={temperature_ini}")

        df_exact, df_nn, fail = compute_nn_cantera_0D_homo(device, model_node, Xscaler, Xscaler, phi_ini, temperature_ini, dt, dtb_params, A_element.detach().cpu().numpy(), 1, None, True)

        fails += fail

        list_test_results.append((df_exact, df_nn))


    print(f"dt={dt}:Total number of simulations which crashed: {fails}")

    return list_test_results

In [None]:
# dt_list = [0.1e-6, 0.2e-6, 0.4e-6, 0.6e-6, 0.8e-6, 1e-6, 1.2e-6, 1.4e-6, 1.6e-6, 1.8e-6, 2.0e-6]
dt_list = [1.0e-7, 2.0e-7, 3.0e-7]
dict_test_res = {}

for dt in dt_list:

     print(f"RUNNING SIMULATIONS FOR DT={dt}")
     dict_test_res[dt] = run_test_simulations(dt)

We write a function to plot a given simulation, for a given dt: (in *dt_list*)

In [None]:
def plot_results_sim(i_sim, dt, dict_test_res, spec_to_plot):

    df_exact =  dict_test_res[dt][i_sim][0]
    df_nn =  dict_test_res[dt][i_sim][1]

    # Temperature 
    fig, ax = plt.subplots()

    ax.plot(df_exact['Time'], df_exact['Temperature'], color='k')
    ax.plot(df_nn['Time'], df_nn['Temperature'], color='b')
    ax.set_xlabel("Time [s]")
    ax.set_ylabel("T [K]")

    # Species (normal)
    fig, ax = plt.subplots()

    ax.plot(df_exact['Time'], df_exact[spec_to_plot], color='k')
    ax.plot(df_nn['Time'], df_nn[spec_to_plot], color='b')
    ax.set_xlabel("Time [s]")
    ax.set_ylabel(f"{spec_to_plot} [-]")

    # Species (log)
    fig, ax = plt.subplots()

    ax.plot(df_exact['Time'], np.log(df_exact[spec_to_plot]), color='k')
    ax.plot(df_nn['Time'], np.log(df_nn[spec_to_plot]), color='b')
    ax.set_xlabel("Time [s]")
    ax.set_ylabel(f"{spec_to_plot} [-]")

    # Sum of Yk
    fig, ax = plt.subplots()
    ax.plot(df_nn['Time'], df_nn['SumYk'], color='b')
    ax.set_xlabel("Time [s]")
    ax.set_ylabel("$\sum Y_k$ [-]")

    # Elements
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2,2)
    ax1.plot(df_nn['Time'], df_nn['Y_C'], color='b')
    ax2.plot(df_nn['Time'], df_nn['Y_H'], color='b')
    ax3.plot(df_nn['Time'], df_nn['Y_O'], color='b')
    ax4.plot(df_nn['Time'], df_nn['Y_N'], color='b')
    ax1.set_ylabel("$Y_C$")
    ax2.set_ylabel("$Y_H$")
    ax3.set_ylabel("$Y_O$")
    ax4.set_ylabel("$Y_N$")
    ax3.set_xlabel("Time [s]")
    ax4.set_xlabel("Time [s]")
    fig.tight_layout()

In [None]:
dt = 1e-7
i_sim = 50
spec_to_plot = "H2"
plot_results_sim(i_sim, dt, dict_test_res, spec_to_plot)

Function to compute fitness between two simulations:

In [None]:
def compute_fitness(list_test_results):

    # Results will be stored in data_errors array.
    # The first column corresponds to errors on temperature
    # The next n_out columns correspond to errors on species mass fractions
    # The last column corresponds to the mean error
    data_errors = np.empty([n_sim, n_out+2]) 

    for i_sim in range(n_sim):

        df_exact = list_test_results[i_sim][0]
        df_nn = list_test_results[i_sim][1]

        # Removing undesired variables
        df_exact = df_exact.drop('Time', axis=1)
        df_nn = df_nn.drop(["Time","SumYk", "Y_C", "Y_H", "Y_O", "Y_N"], axis=1)

        # Applying log
        if log_transform:

            df_exact[df_exact < threshold] = threshold
            df_nn[df_nn < threshold] = threshold

            df_exact.iloc[:, 1:] = np.log(df_exact.iloc[:, 1:])
            df_nn.iloc[:, 1:] = np.log(df_nn.iloc[:, 1:])

        # Scaling
        data_exact_scaled = (df_exact-Xscaler.mean)/(Xscaler.std+1.0e-7)
        data_nn_scaled = (df_nn-Xscaler.mean)/(Xscaler.std+1.0e-7)

        diff_exact_nn = np.abs((data_nn_scaled-data_exact_scaled)/data_exact_scaled)

        diff_exact_nn = diff_exact_nn.mean(axis=0)

        M = diff_exact_nn.mean()

        print(f"Simulation {i_sim} error M = {M}")

        data_errors[i_sim, :n_out+1] = diff_exact_nn
        data_errors[i_sim, n_out+1] = M


    return data_errors

In [None]:
data_errors = {}
for dt in dt_list:
    data_errors[dt] = compute_fitness(dict_test_res[dt])

We compute the mean and std error for each dt:

In [None]:
data_errors_mean = np.empty(len(dt_list))
data_errors_std = np.empty(len(dt_list))

for i, dt in enumerate(dt_list):
    data_errors_mean[i] = data_errors[dt][:,-1].mean()
    data_errors_std[i] = data_errors[dt][:,-1].std()

In [None]:
fig, ax = plt.subplots()

ax.plot(dt_list, data_errors_mean, color="k", marker="o")

ax.set_xlabel("dt [s]", fontsize=14)
ax.set_ylabel("Error [%]", fontsize=14)

fig.tight_layout()