# Data Collection

Make sure `PDEControlGym` is correctly installed according to [doc](https://pdecontrolgym.readthedocs.io/en/latest/guide/install.html). This file is modified based on `PDEControlGym/examples/reactionDiffusionPDE/ParabolicPDEExample.ipynb` from 

In [None]:
import gymnasium as gym
import numpy as np
import math
import matplotlib.pyplot as plt
import stable_baselines3
import time

from stable_baselines3 import PPO
from stable_baselines3 import SAC
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.callbacks import CheckpointCallback
import pde_control_gym
from pde_control_gym.src import TunedReward1D

In [2]:
# Print Versioning
print("Gym version", gym.__version__)
print("Numpy version", np.__version__)
print("Stable Baselines3 version", stable_baselines3.__version__)

Gym version 0.29.1
Numpy version 1.26.2
Stable Baselines3 version 2.2.1


Helper Functions for the Environment

In [4]:
# NO NOISE
def noiseFunc(state):
    return state

# Chebyshev Polynomial Beta Functions
def solveBetaFunction(x, gamma):
    beta = np.zeros(len(x), dtype=np.float32)
    for idx, val in enumerate(x):
        beta[idx] = 50*math.cos(gamma*math.acos(val))
    return beta

# Kernel function solver for backstepping
def solveKernelFunction(beta):
    k = np.zeros((len(beta), len(beta)))
    # First we calculate a at each timestep
    a = beta

    # FD LOOP
    k[1][1] = -(a[1] + a[0]) * dx / 4
    for i in range(1, len(beta)-1):
        k[i+1][0] = 0
        k[i+1][i+1] = k[i][i]-dx/4.0*(a[i-1] + a[i])
        k[i+1][i] = k[i][i] - dx/2 * a[i]
        for j in range(1, i):
                k[i+1][j] = -k[i-1][j] + k[i][j+1] + k[i][j-1] + a[j]*(dx**2)*(k[i][j+1]+k[i][j-1])/2
    return k

# Control convolution solver
def solveControl(kernel, u):
    return sum(kernel[-1][0:len(u)-1]*u[0:len(u)-1])*dx

# Set initial condition function here
def getInitialCondition(nx):
    return np.ones(nx+1)*np.random.uniform(1, 10)

# Returns beta functions passed into PDE environment. Currently gamma is always
# set to 8, but this can be modified for further problems
def getBetaFunction(nx):
    return solveBetaFunction(np.linspace(0, 1, nx+1), 8)

In [5]:
# Timestep and spatial step for PDE Solver
T = 1
dt = 1e-5
dx = 5e-3
X = 1

In [6]:
# Backstepping does not need to normalize actions to be between -1 and 1, so normalize is set to False. Otherwise, 
# parameters are same as RL algorithms
parabolicParameters = {
        "T": T, 
        "dt": dt, 
        "X": X,
        "dx": dx, 
        "reward_class": TunedReward1D(int(round(T/dt)), -1e3, 3e2),
        "normalize": None,
        "sensing_loc": "full", 
        "control_type": "Dirchilet", 
        "sensing_type": None,
        "sensing_noise_func": lambda state: state,
        "limit_pde_state_size": True,
        "max_state_value": 1e10,
        "max_control_value": 20,
        "reset_init_condition_func": getInitialCondition,
        "reset_recirculation_func": getBetaFunction,
        "control_sample_rate": 0.001,
}

parabolicParametersBackstepping = parabolicParameters.copy()
parabolicParametersBackstepping["normalize"] = False

parabolicParametersRL = parabolicParameters.copy()
parabolicParametersRL["normalize"] = True

Relace `PPO_MODEL_PATH` and `SAC_MODEL_PATH` with the pretrained PPO and SAC nominal controller from `PDEControlGym`.

In [None]:
# Load RL models. 
ppoModelPath = "PPO_MODEL_PATH"
sacModelPath = "SAC_MODEL_PATH"

ppoModel = PPO.load(ppoModelPath)
sacModel = SAC.load(sacModelPath)

# For backstepping controller
spatial = np.linspace(dx, X, int(round(X/dx)))
beta = solveBetaFunction(spatial, 8)

In [None]:
# Runs a single epsiode calculation
# Parameter varies. For SAC and PPO it is the model itself
# For backstepping it is the beta function
def runSingleEpisode(model, env, parameter):
    terminate = False
    truncate = False

    # Holds the resulting states
    uStorage = []

    # Reset Environment
    obs,__ = env.reset()
    uStorage.append(obs)

    i = 0
    rew = 0
    while not truncate and not terminate:
        # use backstepping controller
        action = model(obs, parameter)
        obs, rewards, terminate, truncate, info = env.step(action)
        uStorage.append(obs)
        rew += rewards 
    u = np.array(uStorage)
    return rew, u

In [None]:
# Define Controllers
def bcksController(obs, kernel):
    return solveControl(kernel, obs)

def RLController(obs, model):
    action, _state = model.predict(obs)
    return action

def openLoopController(_, _a):
    return 0

Collect data for model training.

In [None]:
import scipy
# from tqdm import tqdm
from tqdm import trange, tqdm
def getInitialConditionRandom(nx):
    return np.ones(nx+1) * (1 + np.random.rand() * 9)



parabolicParametersBacksteppingRandom = parabolicParametersBackstepping.copy()
parabolicParametersBacksteppingRandom["reset_init_condition_func"] = getInitialConditionRandom



parabolicParametersRLRandom = parabolicParametersRL.copy()
parabolicParametersRLRandom["reset_init_condition_func"] = getInitialConditionRandom


# Make environments
envBcksRandom = gym.make("PDEControlGym-ReactionDiffusionPDE1D", **parabolicParametersBacksteppingRandom)


envRLRandom = gym.make("PDEControlGym-ReactionDiffusionPDE1D", **parabolicParametersRLRandom)

xs_bcks = []
ys_bcks = []
xs_ppo = []
ys_ppo = []
xs_sac = []
ys_sac = []
xs_bcks_dense = []
ys_bcks_dense = []
xs_ppo_dense = []
ys_ppo_dense = []
xs_sac_dense = []
ys_sac_dense = []
for i in range(15000):
    
    rewBcksRandom, uBcksRandom = runSingleEpisode(bcksController, envBcksRandom, kernel)
    xs_bcks.append((uBcksRandom.transpose()[:,::20])[-1])
    ys_bcks.append((uBcksRandom.transpose()[:,::20])[100])

    rewPPORandom, uPPORandom = runSingleEpisode(RLController, envRLRandom, ppoModel)
    xs_ppo.append((uPPORandom.transpose()[:,::20])[-1])
    ys_ppo.append((uPPORandom.transpose()[:,::20])[100])
    

    rewSACRandom, uSACRandom = runSingleEpisode(RLController, envRLRandom, sacModel)
    xs_sac.append((uSACRandom.transpose()[:,::20])[-1])
    ys_sac.append((uSACRandom.transpose()[:,::20])[100])
    if i % 1000 == 0: print(i)
    # xs_bcks_dense.append((uBcksRandom.transpose())[-1])
    # ys_bcks_dense.append((uBcksRandom.transpose())[100])

    # xs_ppo_dense.append((uPPORandom.transpose())[-1])
    # ys_ppo_dense.append((uPPORandom.transpose())[100])

    # xs_sac_dense.append((uSACRandom.transpose())[-1])
    # ys_sac_dense.append((uSACRandom.transpose())[100])

        
data_bcks = {"a": np.stack(xs_bcks), "u": np.stack(ys_bcks)}
scipy.io.savemat("data_bcks_parabolic_train3.mat", data_bcks)

data_ppo = {"a": np.stack(xs_ppo), "u": np.stack(ys_ppo)}
scipy.io.savemat("data_ppo_parabolic_train3.mat", data_ppo)

data_sac = {"a": np.stack(xs_sac), "u": np.stack(ys_sac)}
scipy.io.savemat("data_sac_parabolic_train3.mat", data_sac)

# data_bcks_dense = {"a": np.stack(xs_bcks_dense), "u": np.stack(ys_bcks_dense)}
# scipy.io.savemat("data_bcks_parabolic_test_dense.mat", data_bcks_dense)

# data_ppo_dense = {"a": np.stack(xs_ppo_dense), "u": np.stack(ys_ppo_dense)}
# scipy.io.savemat("data_ppo_parabolic_test_dense.mat", data_ppo_dense)

# data_sac_dense = {"a": np.stack(xs_sac_dense), "u": np.stack(ys_sac_dense)}
# scipy.io.savemat("data_sac_parabolic_test_dense.mat", data_sac_dense)


# Safety filtering

In [None]:
def runSingleEpisodeQP(model, env, parameter):
    terminate = False
    truncate = False

    # Holds the resulting states
    uStorage = []

    # Reset Environment
    obs,__ = env.reset()
    uStorage.append(obs)

    i = 0
    rew = 0
    while not truncate and not terminate:
        # use backstepping controller
        action = model(obs, parameter,i)

        obs, rewards, terminate, truncate, info = env.step(action)

        uStorage.append(obs)
        rew += rewards 
        i += 1
    u = np.array(uStorage)
    return rew, u

def QP_filter_Controller(obs, parameter,index):
    return parameter[index+1]

def find_earliest_true(condition):
    # Iterate over the first two dimensions (10 and 8) and check for each slice
    earliest_indices = np.full(condition.shape[:2], 0)  # Initialize with -1 (indicating no valid index)

    for i in range(condition.shape[0]):  # Iterate over first dimension
        for j in range(condition.shape[1]):  # Iterate over second dimension
            # For each slice (i, j), find the earliest index where the condition is True
            # and all subsequent values are also True
            for k in range(condition.shape[2]):
                if not condition[i, j, condition.shape[2]-k-1]: 
                    if k == 0:
                        earliest_indices[i,j] = -1
                    else:
                        earliest_indices[i,j] = condition.shape[2]-k
                    break
    return earliest_indices

Replace the filtered results `FILTER_RESULT_PATH` with the one saved in  `test_cbf_parabolic.ipynb`.

In [None]:
RL_1000 = np.load("FILTER_RESULT_PATH")
RL_reward_beforeQP = []
RL_reward_afterQP = []
uBcks_beforeQP_list = []
uBcks_afterQP_list = []
uBcks_beforeQP,uBcks_afterQP = 0,0
for i in range(RL_1000["safe_label"].transpose().shape[0]):
    U_list = RL_1000["U_nominal"][:, i]
    def getInitialConditionFixed(nx):
        return np.ones(nx+1) * U_list[0]
    parabolicParametersBacksteppingFixed = parabolicParametersBackstepping.copy()
    parabolicParametersBacksteppingFixed["reset_init_condition_func"] = getInitialConditionFixed
    envBcksFixed = gym.make("PDEControlGym-ReactionDiffusionPDE1D", **parabolicParametersBacksteppingFixed)
    
    
    reward_beforeQP, uBcks_beforeQP = runSingleEpisodeQP(QP_filter_Controller, envBcksFixed, U_list)


    U_safe_list = RL_1000["U_safe"][:, i]
    def getInitialConditionFixed(nx):
        return np.ones(nx+1) * U_list[0]
    parabolicParametersBacksteppingFixed = parabolicParametersBackstepping.copy()
    parabolicParametersBacksteppingFixed["reset_init_condition_func"] = getInitialConditionFixed
    envBcksFixed = gym.make("PDEControlGym-ReactionDiffusionPDE1D", **parabolicParametersBacksteppingFixed)
    
    
    reward_afterQP, uBcks_afterQP = runSingleEpisodeQP(QP_filter_Controller, envBcksFixed, U_safe_list)
    RL_reward_beforeQP.append(reward_beforeQP)
    uBcks_beforeQP_list.append(uBcks_beforeQP)

    RL_reward_afterQP.append(reward_afterQP)
    uBcks_afterQP_list.append(uBcks_afterQP)


result = np.array([uBcks_beforeQP_list, uBcks_afterQP_list]) 

condition = result[:, :,:, 100] < 0.6
earliest_index = find_earliest_true(condition)
valid_earliest_index_beforeQP = earliest_index[0,earliest_index[0,:]>=0]
valid_earliest_index_afterQP = earliest_index[1,earliest_index[1,:]>=0]

print(f"beforeQP PF steps among {valid_earliest_index_beforeQP.shape[0]} PF trajectories", np.mean(result.shape[2] - valid_earliest_index_beforeQP), np.std(result.shape[2] - valid_earliest_index_beforeQP))
print(f"afterQP PF steps among {valid_earliest_index_afterQP.shape[0]} PF trajectories", np.mean(result.shape[2] - valid_earliest_index_afterQP), np.std(result.shape[2] - valid_earliest_index_afterQP))


reward_result = np.array([RL_reward_beforeQP,RL_reward_afterQP])
print("reward: beforeQP and afterQP")
print(np.mean(reward_result, axis=1))
print(np.std(reward_result, axis=1))

