# Data Collection

Make sure `PDEControlGym` is correctly installed according to [doc](https://pdecontrolgym.readthedocs.io/en/latest/guide/install.html). This file is modified based on `PDEControlGym/examples/transportPDE/HyperbolicPDEExample.ipynb` from 

In [None]:
import gymnasium as gym
import pde_control_gym
import numpy as np
import math
import matplotlib.pyplot as plt

import stable_baselines3
import time
from stable_baselines3 import PPO
from stable_baselines3 import SAC

In [2]:
# Print Versioning
print("Gym version", gym.__version__)
print("Numpy version", np.__version__)
print("Stable Baselines3 version", stable_baselines3.__version__)

Gym version 0.29.1
Numpy version 1.26.2
Stable Baselines3 version 2.2.1


In [9]:
# NO NOISE
def noiseFunc(state):
    return state

# Chebyshev Polynomial Beta Functions
def solveBetaFunction(x, gamma):
    beta = np.zeros(len(x), dtype=np.float32)
    for idx, val in enumerate(x):
        beta[idx] = 5*math.cos(gamma*math.acos(val))
    return beta

# Kernel function solver for backstepping
def solveKernelFunction(theta):
    kappa = np.zeros(len(theta))
    for i in range(0, len(theta)):
        kernelIntegral = 0
        for j in range(0, i):
            kernelIntegral += (kappa[i-j]*theta[j])*dx
        kappa[i] = kernelIntegral  - theta[i]
    return np.flip(kappa)

# Control convolution solver
def solveControl(kernel, u):
    res = 0
    for i in range(len(u)):
        res += kernel[i]*u[i]
    return res*1e-2

# Set initial condition function here
def getInitialCondition(nx):
    return np.ones(nx)*np.random.uniform(1, 10)

# Returns beta functions passed into PDE environment. Currently gamma is always
# set to 7.35, but this can be modified for further problesms
def getBetaFunction(nx):
    return solveBetaFunction(np.linspace(0, 1, nx), 7.35)


In [10]:
# Timestep and spatial step for PDE Solver
T = 5
dt = 1e-4 
dx = 1e-2
X = 1

In [None]:
# Backstepping does not need to normalize actions to be between -1 and 1, so normalize is set to False. Otherwise, 
# parameters are same as RL algorithms
from pde_control_gym.src import TunedReward1D,NormReward
reward_class =  TunedReward1D(int(round(T/dt)), -1e3, 3e2) # with penalize
hyperbolicParameters = {
        "T": T, 
        "dt": dt, 
        "X": X,
        "dx": dx, 
        "reward_class": reward_class,
        "normalize":None, 
        "sensing_loc": "full", 
        "control_type": "Dirchilet", 
        "sensing_type": None,
        "sensing_noise_func": lambda state: state,
        "limit_pde_state_size": True,
        "max_state_value": 1e10,
        "max_control_value": 20,
        "reset_init_condition_func": getInitialCondition,
        "reset_recirculation_func": getBetaFunction,
        "control_sample_rate": 0.1
}

hyperbolicParametersBackstepping = hyperbolicParameters.copy()
hyperbolicParametersBackstepping["normalize"] = False

hyperbolicParametersRL = hyperbolicParameters.copy()
hyperbolicParametersRL["normalize"] = True

Relace `PPO_MODEL_PATH` and `SAC_MODEL_PATH` with the pretrained PPO and SAC nominal controller from `PDEControlGym`.

In [None]:
ppoModelPath = "PPO_MODEL_PATH"
sacModelPath = "SAC_MODEL_PATH"

ppoModel = PPO.load(ppoModelPath)
sacModel = SAC.load(sacModelPath)

# For backstepping controller
spatial = np.linspace(dx, X, int(round(X/dx)))
beta = solveBetaFunction(spatial, 7.35)

In [12]:
# Runs a single epsiode calculation
# Parameter varies. For SAC and PPO it is the model itself
# For backstepping it is the beta function
def runSingleEpisode(model, env, parameter):
    terminate = False
    truncate = False

    # Holds the resulting states
    uStorage = []

    # Reset Environment
    obs,__ = env.reset()
    uStorage.append(obs)

    i = 0
    rew = 0
    while not truncate and not terminate:
        # use backstepping controller
        action = model(obs, parameter)
        
        obs, rewards, terminate, truncate, info = env.step(action)
        # print(action, obs)
        uStorage.append(obs)
        rew += rewards 
    u = np.array(uStorage)
    return rew, u

In [13]:
# Define Controllers
def bcksController(obs, beta):
    kernel = solveKernelFunction(beta)
    return solveControl(kernel, obs)

def RLController(obs, model):
    action, _state = model.predict(obs)
    return action

def openLoopController(_, _a):
    return 0

Collect data for model training.

In [None]:
# collect dataset
import scipy
# from tqdm import tqdm
from tqdm import trange, tqdm
def getInitialConditionRandom(nx):
    return np.ones(nx) * (1 + np.random.rand() * 9)


hyperbolicParametersRL["reward_class"] = TunedReward1D(int(round(T/dt)), -1e3, 3e2)
hyperbolicParametersBackstepping["reward_class"] = TunedReward1D(int(round(T/dt)), -1e3, 3e2)

hyperbolicParametersBacksteppingRandom = hyperbolicParametersBackstepping.copy()
hyperbolicParametersBacksteppingRandom["reset_init_condition_func"] = getInitialConditionRandom



hyperbolicParametersRLRandom = hyperbolicParametersRL.copy()
hyperbolicParametersRLRandom["reset_init_condition_func"] = getInitialConditionRandom


# Make environments
envBcksRandom = gym.make("PDEControlGym-TransportPDE1D", **hyperbolicParametersBacksteppingRandom)


envRLRandom = gym.make("PDEControlGym-TransportPDE1D", **hyperbolicParametersRLRandom)

xs_bcks = []
ys_bcks = []
xs_ppo = []
ys_ppo = []
xs_sac = []
ys_sac = []
for i in range(50000):
    rewBcksRandom, uBcksRandom = runSingleEpisode(bcksController, envBcksRandom, beta)
    xs_bcks.append((uBcksRandom.transpose())[-1])
    ys_bcks.append((uBcksRandom.transpose())[0])

    rewPPORandom, uPPORandom = runSingleEpisode(RLController, envRLRandom, ppoModel)
    xs_ppo.append((uPPORandom.transpose())[-1])
    ys_ppo.append((uPPORandom.transpose())[0])
    

    rewSACRandom, uSACRandom = runSingleEpisode(RLController, envRLRandom, sacModel)
    xs_sac.append((uSACRandom.transpose())[-1])
    ys_sac.append((uSACRandom.transpose())[0])
    print(rewPPORandom,rewSACRandom)
    print((uSACRandom)[-1])
    # if i % 1000 == 0: print(i)
    
data_bcks = {"a": np.stack(xs_bcks), "u": np.stack(ys_bcks)}
scipy.io.savemat("data_bcks_hyperbolic.mat", data_bcks)

data_ppo = {"a": np.stack(xs_ppo), "u": np.stack(ys_ppo)}
scipy.io.savemat("data_ppo_hyperbolic.mat", data_ppo)

data_sac = {"a": np.stack(xs_sac), "u": np.stack(ys_sac)}
scipy.io.savemat("data_sac_hyperbolic.mat", data_sac)


# Safety filtering

In [None]:
def runSingleEpisodeQP(model, env, parameter):
    terminate = False
    truncate = False

    # Holds the resulting states
    uStorage = []

    # Reset Environment
    obs,__ = env.reset()
    uStorage.append(obs)

    i = 0
    rew = 0
    while not truncate and not terminate:
        # use backstepping controller
        action = model(obs, parameter,i)
        # print(action)
        obs, rewards, terminate, truncate, info = env.step(action)
        # print(action, obs)
        uStorage.append(obs)
        rew += rewards 
        i += 1
    u = np.array(uStorage)
    return rew, u

def QP_filter_Controller(obs, parameter,index):
    # print(obs)
    # print(parameter)
    return parameter[index+1]

def find_earliest_true(condition):
    # Iterate over the first two dimensions (10 and 8) and check for each slice
    earliest_indices = np.full(condition.shape[:2], 0)  # Initialize with -1 (indicating no valid index)

    for i in range(condition.shape[0]):  # Iterate over first dimension
        for j in range(condition.shape[1]):  # Iterate over second dimension
            # For each slice (i, j), find the earliest index where the condition is True
            # and all subsequent values are also True
            for k in range(condition.shape[2]):
                if not condition[i, j, condition.shape[2]-k-1]: 
                    # print(k)
                    if k == 0:
                        earliest_indices[i,j] = -1
                    else:
                        earliest_indices[i,j] = condition.shape[2]-k
                    break
    return earliest_indices

reward_class_no_penalty = TunedReward1D(int(round(T/dt)), -1e-4, 3e2) # no penalize

Replace the filtered results `FILTER_RESULT_PATH` with the one saved in  `test_cbf_hyper.ipynb`.

In [None]:
filtered_result_path = "FILTER_RESULT_PATH"
RL_1000 = np.load(filtered_result_path)
RL_reward_beforeQP = []
RL_reward_afterQP = []
uBcks_beforeQP_list = []
uBcks_afterQP_list = []
for i in range(RL_1000["safe_label"].transpose().shape[0]):
    U_list = RL_1000["U_nominal"][:, i]

    def getInitialConditionFixed(nx):
        return np.ones(nx) * U_list[0]
    hyperbolicParametersBacksteppingFixed = hyperbolicParametersBackstepping.copy()
    hyperbolicParametersBacksteppingFixed["reset_init_condition_func"] = getInitialConditionFixed
    hyperbolicParametersBacksteppingFixed["reward_class"] = reward_class_no_penalty
    envBcksFixed = gym.make("PDEControlGym-TransportPDE1D", **hyperbolicParametersBacksteppingFixed)
    reward_beforeQP, uBcks_beforeQP = runSingleEpisodeQP(QP_filter_Controller, envBcksFixed, U_list)
    uBcks_beforeQP_list.append(uBcks_beforeQP)
    RL_reward_beforeQP.append(reward_beforeQP)

    U_safe_list = RL_1000["U_safe"][:, i]
    def getInitialConditionFixed(nx):
        return np.ones(nx) * U_list[0]
    hyperbolicParametersBacksteppingFixed = hyperbolicParametersBackstepping.copy()
    hyperbolicParametersBacksteppingFixed["reset_init_condition_func"] = getInitialConditionFixed
    hyperbolicParametersBacksteppingFixed["reward_class"] = reward_class_no_penalty
    envBcksFixed = gym.make("PDEControlGym-TransportPDE1D", **hyperbolicParametersBacksteppingFixed)
    reward_afterQP, uBcks_afterQP = runSingleEpisodeQP(QP_filter_Controller, envBcksFixed, U_safe_list)

    uBcks_afterQP_list.append(uBcks_afterQP)
    RL_reward_afterQP.append(reward_afterQP)

result = np.array([uBcks_beforeQP_list, uBcks_afterQP_list]) #(2,100,51, 100) # first 100 is num of samples, second 100 is num of 100 spatial steps

condition = result[:, :,:, 0] < 1
earliest_index = find_earliest_true(condition)
valid_earliest_index_beforeQP = earliest_index[0,earliest_index[0,:]>=0]
valid_earliest_index_afterQP = earliest_index[1,earliest_index[1,:]>=0]

print(f"beforeQP PF steps among {valid_earliest_index_beforeQP.shape[0]} PF trajectories", np.mean(result.shape[2] - valid_earliest_index_beforeQP), np.std(result.shape[2] - valid_earliest_index_beforeQP))
print(f"afterQP PF steps among {valid_earliest_index_afterQP.shape[0]} PF trajectories", np.mean(result.shape[2] - valid_earliest_index_afterQP), np.std(result.shape[2] - valid_earliest_index_afterQP))


reward_result = np.array([RL_reward_beforeQP,RL_reward_afterQP])
print("reward: beforeQP and afterQP")
print(np.mean(reward_result, axis=1))
print(np.std(reward_result, axis=1))
