In [1]:
from Simulation.mpc import *
from Simulation.system_functions import PolymerCSTR
from utils.helpers import *

## Initialize the system

In [2]:
# First initiate the system
# Parameters
Ad = 2.142e17           # h^-1
Ed = 14897              # K
Ap = 3.816e10           # L/(molh)
Ep = 3557               # K
At = 4.50e12            # L/(molh)
Et = 843                # K
fi = 0.6                # Coefficient
m_delta_H_r = -6.99e4   # j/mol
hA = 1.05e6             # j/(Kh)
rhocp = 1506            # j/(Kh)
rhoccpc = 4043          # j/(Kh)
Mm = 104.14             # g/mol
system_params = np.array([Ad, Ed, Ap, Ep, At, Et, fi, m_delta_H_r, hA, rhocp, rhoccpc, Mm])

In [3]:
# Design Parameters
CIf = 0.5888    # mol/L
CMf = 8.6981    # mol/L
Qi = 108.       # L/h
Qs = 459.       # L/h
Tf = 330.       # K
Tcf = 295.      # K
V = 3000.       # L
Vc = 3312.4     # L

system_design_params = np.array([CIf, CMf, Qi, Qs, Tf, Tcf, V, Vc])

In [4]:
# Steady State Inputs
Qm_ss = 378.    # L/h
Qc_ss = 471.6   # L/h

system_steady_state_inputs = np.array([Qc_ss, Qm_ss])

In [5]:
# Sampling time of the system
delta_t = 0.5 # 30 mins

In [6]:
# Initiate the CSTR for steady state values
cstr = PolymerCSTR(system_params, system_design_params, system_steady_state_inputs, delta_t)
steady_states={"ss_inputs":cstr.ss_inputs,
               "y_ss":cstr.y_ss}

## Loading the system matrices, min max scaling, and min max of the states

In [7]:
dir_path = os.path.join(os.getcwd(), "Data")

In [8]:
# Defining the range of setpoints for data generation
setpoint_y = np.array([[3.2, 321],
                       [4.5, 325]])
u_min = np.array([71.6, 78])
u_max = np.array([870, 670])

system_data = load_and_prepare_system_data(steady_states=steady_states, setpoint_y=setpoint_y, u_min=u_min, u_max=u_max)

In [9]:
A_aug = system_data["A_aug"]
B_aug = system_data["B_aug"]
C_aug = system_data["C_aug"]

In [10]:
data_min = system_data["data_min"]
data_max = system_data["data_max"]

In [11]:
min_max_states = {'max_s': np.array([256.79686253, 256.01560603,  48.99447186, 144.79949103,
          2.82199733,   3.14014989,   2.78866348,   3.71691422,
          6.2029936 ]),
                  'min_s': np.array([ -272.28060121, -1112.33972595,   -76.63993491,  -608.60327886,
           -3.94399122,    -3.93115257,    -2.9532091 ,    -4.06547624,
          -28.25906582])}

In [12]:
y_sp_scaled_deviation = system_data["y_sp_scaled_deviation"]

In [13]:
b_min = system_data["b_min"]
b_max = system_data["b_max"]

In [14]:
min_max_dict = system_data["min_max_dict"]
min_max_dict["x_max"] = np.array([256.79686253, 256.01560603,  48.99447186, 144.79949103,
          2.82199733,   3.14014989,   2.78866348,   3.71691422,
          6.2029936 ])
min_max_dict["x_min"] = np.array([ -272.28060121, -1112.33972595,   -76.63993491,  -608.60327886,
           -3.94399122,    -3.93115257,    -2.9532091 ,    -4.06547624,
          -28.25906582])

In [15]:
# Setpoints in deviation form
inputs_number = int(B_aug.shape[1])
y_sp_scenario = np.array([[4.5, 324],
                          [3.4, 321]])

y_sp_scenario = (apply_min_max(y_sp_scenario, data_min[inputs_number:], data_max[inputs_number:])
                 - apply_min_max(steady_states["y_ss"], data_min[inputs_number:], data_max[inputs_number:]))
n_tests = 200
set_points_len = 400
TEST_CYCLE = [False, False, False, False, False]
warm_start = 10
ACTOR_FREEZE = 10 * set_points_len
warm_start_plot = warm_start * 2 * set_points_len + ACTOR_FREEZE

In [16]:
# Observer Gain
poles = np.array(np.array([0.44619852, 0.33547649, 0.36380595, 0.70467118, 0.3562966,
                           0.42900673, 0.4228262 , 0.96916776, 0.91230187]))
L = compute_observer_gain(A_aug, C_aug, poles)

The system is observable.


You asked for a tolerance of 0.001, we got 0.9999999422182038.
  obs_gain_calc = signal.place_poles(A.T, C.T, desired_poles, method='KNV0')


In [17]:
def make_reward_fn_relative_QR(
        data_min, data_max, n_inputs,
        k_rel, band_floor_phys,
        Q_diag, R_diag,
        tau_frac=0.7,
        gamma_out=0.5, gamma_in=0.5,
        beta=5.0, gate="geom", lam_in=1.0,
        bonus_kind="exp", bonus_k=12.0, bonus_p=0.6, bonus_c=20.0,
):
    """
    Reward with relative tracking bands.

    data_min, data_max : arrays for [u_min..., y_min...], [u_max..., y_max...]
    n_inputs           : number of inputs (so outputs start at index n_inputs)
    k_rel              : per-output relative tolerance factors (same length as outputs)
    band_floor_phys    : per-output minimum band in physical units
    Q_diag, R_diag     : quadratic weights (same as before)
    """

    data_min = np.asarray(data_min, float)
    data_max = np.asarray(data_max, float)
    dy = np.maximum(data_max[n_inputs:] - data_min[n_inputs:], 1e-12)  # phys range for each y

    k_rel = np.asarray(k_rel, float)
    band_floor_phys = np.asarray(band_floor_phys, float)
    Q_diag = np.asarray(Q_diag, float)
    R_diag = np.asarray(R_diag, float)

    # floor in *scaled* coordinates (used if y_sp_phys is not provided)
    band_floor_scaled = band_floor_phys / np.maximum(dy, 1e-12)

    def _sigmoid(x):
        x = np.clip(x, -60.0, 60.0)
        return 1.0 / (1.0 + np.exp(-x))

    def _phi(z, kind=bonus_kind, k=bonus_k, p=bonus_p, c=bonus_c):
        z = np.clip(z, 0.0, 1.0)
        if kind == "linear":
            return 1.0 - z
        if kind == "quadratic":
            return (1.0 - z) ** 2
        if kind == "exp":
            return (np.exp(-k * z) - np.exp(-k)) / (1.0 - np.exp(-k))
        if kind == "power":
            return 1.0 - np.power(z, p)
        if kind == "log":
            return np.log1p(c * (1.0 - z)) / np.log1p(c)
        raise ValueError("unknown bonus kind")

    def reward_fn(e_scaled, du_scaled, y_sp_phys=None):
        """
        e_scaled : output error in scaled deviation space  (same as before)
        du_scaled: input move in scaled deviation space    (same as before)
        y_sp_phys: current setpoint in *physical* units (array len = n_outputs)
        """

        e_scaled = np.asarray(e_scaled, float)
        du_scaled = np.asarray(du_scaled, float)

        # ----- dynamic band based on setpoint -----
        if y_sp_phys is None:
            # fallback: just use the floor
            band_scaled = band_floor_scaled
        else:
            y_sp_phys_arr = np.asarray(y_sp_phys, float)
            # band_phys_i = max(k_rel_i * |y_sp_i|, band_floor_phys_i)
            band_phys = np.maximum(k_rel * np.abs(y_sp_phys_arr), band_floor_phys)
            band_scaled = band_phys / np.maximum(dy, 1e-12)

        tau_scaled = tau_frac * band_scaled

        # ----- inside/outside gate -----
        abs_e = np.abs(e_scaled)
        s_i = _sigmoid((band_scaled - abs_e) / np.maximum(tau_scaled, 1e-12))

        if gate == "prod":
            w_in = float(np.prod(s_i, dtype=np.float64))
        elif gate == "mean":
            w_in = float(np.mean(s_i))
        elif gate == "geom":
            w_in = float(np.prod(s_i, dtype=np.float64) ** (1.0 / len(s_i)))
        else:
            raise ValueError("gate must be 'prod'|'mean'|'geom'")

        # ----- core quadratic costs -----
        err_quad = np.sum(Q_diag * (e_scaled ** 2))
        err_eff = (1.0 - w_in) * err_quad + w_in * (lam_in * err_quad)
        move = np.sum(R_diag * (du_scaled ** 2))

        # ----- linear penalties around band edge -----
        slope_at_edge = 2.0 * Q_diag * band_scaled

        overflow = np.maximum(abs_e - band_scaled, 0.0)
        lin_out = (1.0 - w_in) * np.sum(gamma_out * slope_at_edge * overflow)

        inside_mag = np.minimum(abs_e, band_scaled)
        lin_in = w_in * np.sum(gamma_in * slope_at_edge * inside_mag)

        # ----- bonus near zero error -----
        qb2 = Q_diag * (band_scaled ** 2)
        z = abs_e / np.maximum(band_scaled, 1e-12)
        phi = _phi(z)
        bonus = w_in * beta * np.sum(qb2 * phi)

        # ----- total reward -----
        return (-(err_eff + move + lin_out + lin_in) + bonus)*0.01

    params = dict(
        k_rel=k_rel,
        band_floor_phys=band_floor_phys,
        band_floor_scaled=band_floor_scaled,
        Q_diag=Q_diag,
        R_diag=R_diag,
        tau_frac=tau_frac,
        gamma_out=gamma_out,
        gamma_in=gamma_in,
        beta=beta,
        gate=gate,
        lam_in=lam_in,
        bonus_kind=bonus_kind,
        bonus_k=bonus_k,
        bonus_p=bonus_p,
        bonus_c=bonus_c,
    )
    return params, reward_fn


## Reward configuration
n_inputs = 2

dy = data_max[n_inputs:] - data_min[n_inputs:]
y_sp_nom = 0.5 * (data_min[n_inputs:] + data_max[n_inputs:])

k_rel = np.array([0.003, 0.0003])
band_floor_phys = np.array([0.006, 0.07])

band_phys = np.maximum(k_rel * np.abs(y_sp_nom), band_floor_phys)

scale_factor = 1.0  # use 2.0 for [-1, 1] scaling, 1.0 for [0, 1]
band_scaled = scale_factor * band_phys / dy

q0 = 1.4
Q_diag = q0 / np.maximum(band_scaled ** 2, 1e-12)

print("dy:", dy)
print("y_sp_nom:", y_sp_nom)
print("band_phys:", band_phys)
print("band_scaled:", band_scaled)
print("Q_diag:", Q_diag)
Q_diag = np.array([518., 90.])  # rounded from the band-based calculation
R_diag = np.array([90., 90.])  # move cost for du_scaled ~ 0.02

n_inputs = 2

print("Band scaled are:")

params, reward_fn = make_reward_fn_relative_QR(
    data_min, data_max, n_inputs,
    k_rel, band_floor_phys,
    Q_diag, R_diag,
    tau_frac=0.7,
    gamma_out=0.5, gamma_in=0.5,
    beta=7.0, gate="geom", lam_in=1.0,
    bonus_kind="exp", bonus_k=12.0, bonus_p=0.6, bonus_c=20.0,
)
print(params)

dy: [0.22165278 0.78153727]
y_sp_nom: [  3.83915067 323.21371982]
band_phys: [0.01151745 0.09696412]
band_scaled: [0.05196169 0.12406845]
Q_diag: [518.51529284  90.95055189]
Band scaled are:
{'k_rel': array([0.003 , 0.0003]), 'band_floor_phys': array([0.006, 0.07 ]), 'band_floor_scaled': array([0.02706937, 0.08956707]), 'Q_diag': array([518.,  90.]), 'R_diag': array([90., 90.]), 'tau_frac': 0.7, 'gamma_out': 0.5, 'gamma_in': 0.5, 'beta': 7.0, 'gate': 'geom', 'lam_in': 1.0, 'bonus_kind': 'exp', 'bonus_k': 12.0, 'bonus_p': 0.6, 'bonus_c': 20.0}


In [30]:
def make_reward_fn_relative_QR(
        data_min, data_max, n_inputs,
        k_rel, band_floor_phys,
        Q_diag, R_diag,
        tau_frac=0.7,
        gamma_out=0.5, gamma_in=0.5,
        beta=5.0, gate="geom", lam_in=1.0,
        bonus_kind="exp", bonus_k=12.0, bonus_p=0.6, bonus_c=20.0,
):
    """
    Reward with relative tracking bands.

    data_min, data_max : arrays for [u_min..., y_min...], [u_max..., y_max...]
    n_inputs           : number of inputs (so outputs start at index n_inputs)
    k_rel              : per-output relative tolerance factors (same length as outputs)
    band_floor_phys    : per-output minimum band in physical units
    Q_diag, R_diag     : quadratic weights (same as before)
    """

    data_min = np.asarray(data_min, float)
    data_max = np.asarray(data_max, float)
    dy = np.maximum(data_max[n_inputs:] - data_min[n_inputs:], 1e-12)  # phys range for each y

    k_rel = np.asarray(k_rel, float)
    band_floor_phys = np.asarray(band_floor_phys, float)
    Q_diag = np.asarray(Q_diag, float)
    R_diag = np.asarray(R_diag, float)

    # floor in *scaled* coordinates (used if y_sp_phys is not provided)
    band_floor_scaled = band_floor_phys / np.maximum(dy, 1e-12)

    def _sigmoid(x):
        x = np.clip(x, -60.0, 60.0)
        return 1.0 / (1.0 + np.exp(-x))

    def _phi(z, kind=bonus_kind, k=bonus_k, p=bonus_p, c=bonus_c):
        z = np.clip(z, 0.0, 1.0)
        if kind == "linear":
            return 1.0 - z
        if kind == "quadratic":
            return (1.0 - z) ** 2
        if kind == "exp":
            return (np.exp(-k * z) - np.exp(-k)) / (1.0 - np.exp(-k))
        if kind == "power":
            return 1.0 - np.power(z, p)
        if kind == "log":
            return np.log1p(c * (1.0 - z)) / np.log1p(c)
        raise ValueError("unknown bonus kind")

    def reward_fn(e_scaled, du_scaled, y_sp_phys=None):
        """
        e_scaled : output error in scaled deviation space  (same as before)
        du_scaled: input move in scaled deviation space    (same as before)
        y_sp_phys: current setpoint in *physical* units (array len = n_outputs)
        """

        e_scaled = np.asarray(e_scaled, float)
        du_scaled = np.asarray(du_scaled, float)

        # ----- dynamic band based on setpoint -----
        if y_sp_phys is None:
            # fallback: just use the floor
            band_scaled = band_floor_scaled
        else:
            y_sp_phys_arr = np.asarray(y_sp_phys, float)
            # band_phys_i = max(k_rel_i * |y_sp_i|, band_floor_phys_i)
            band_phys = np.maximum(k_rel * np.abs(y_sp_phys_arr), band_floor_phys)
            band_scaled = band_phys / np.maximum(dy, 1e-12)

        tau_scaled = tau_frac * band_scaled

        # ----- inside/outside gate -----
        abs_e = np.abs(e_scaled)
        s_i = _sigmoid((band_scaled - abs_e) / np.maximum(tau_scaled, 1e-12))

        if gate == "prod":
            w_in = float(np.prod(s_i, dtype=np.float64))
        elif gate == "mean":
            w_in = float(np.mean(s_i))
        elif gate == "geom":
            w_in = float(np.prod(s_i, dtype=np.float64) ** (1.0 / len(s_i)))
        else:
            raise ValueError("gate must be 'prod'|'mean'|'geom'")

        # ----- core quadratic costs -----
        err_quad = np.sum(Q_diag * (e_scaled ** 2))
        err_eff = (1.0 - w_in) * err_quad + w_in * (lam_in * err_quad)
        move = np.sum(R_diag * (du_scaled ** 2))

        # ----- linear penalties around band edge -----
        slope_at_edge = 2.0 * Q_diag * band_scaled

        overflow = np.maximum(abs_e - band_scaled, 0.0)
        lin_out = (1.0 - w_in) * np.sum(gamma_out * slope_at_edge * overflow)

        inside_mag = np.minimum(abs_e, band_scaled)
        lin_in = w_in * np.sum(gamma_in * slope_at_edge * inside_mag)

        # ----- bonus near zero error -----
        qb2 = Q_diag * (band_scaled ** 2)
        z = abs_e / np.maximum(band_scaled, 1e-12)
        phi = _phi(z)
        bonus = w_in * beta * np.sum(qb2 * phi)

        # ----- total reward -----
        return -(err_eff + move)

    params = dict(
        k_rel=k_rel,
        band_floor_phys=band_floor_phys,
        band_floor_scaled=band_floor_scaled,
        Q_diag=Q_diag,
        R_diag=R_diag,
        tau_frac=tau_frac,
        gamma_out=gamma_out,
        gamma_in=gamma_in,
        beta=beta,
        gate=gate,
        lam_in=lam_in,
        bonus_kind=bonus_kind,
        bonus_k=bonus_k,
        bonus_p=bonus_p,
        bonus_c=bonus_c,
    )
    return params, reward_fn


## Reward configuration
n_inputs = 2

dy = data_max[n_inputs:] - data_min[n_inputs:]
y_sp_nom = 0.5 * (data_min[n_inputs:] + data_max[n_inputs:])

k_rel = np.array([0.003, 0.0003])
band_floor_phys = np.array([0.006, 0.07])

band_phys = np.maximum(k_rel * np.abs(y_sp_nom), band_floor_phys)

scale_factor = 1.0  # use 2.0 for [-1, 1] scaling, 1.0 for [0, 1]
band_scaled = scale_factor * band_phys / dy

q0 = 1.4
Q_diag = q0 / np.maximum(band_scaled ** 2, 1e-12)

print("dy:", dy)
print("y_sp_nom:", y_sp_nom)
print("band_phys:", band_phys)
print("band_scaled:", band_scaled)
print("Q_diag:", Q_diag)
Q_diag = np.array([5., 1.])  # rounded from the band-based calculation
R_diag = np.array([1., 1.])  # move cost for du_scaled ~ 0.02

n_inputs = 2

print("Band scaled are:")

params, reward_fn_mpc = make_reward_fn_relative_QR(
    data_min, data_max, n_inputs,
    k_rel, band_floor_phys,
    Q_diag, R_diag,
    tau_frac=0.7,
    gamma_out=0.5, gamma_in=0.5,
    beta=7.0, gate="geom", lam_in=1.0,
    bonus_kind="exp", bonus_k=12.0, bonus_p=0.6, bonus_c=20.0,
)
print(params)

dy: [0.22165278 0.78153727]
y_sp_nom: [  3.83915067 323.21371982]
band_phys: [0.01151745 0.09696412]
band_scaled: [0.05196169 0.12406845]
Q_diag: [518.51529284  90.95055189]
Band scaled are:
{'k_rel': array([0.003 , 0.0003]), 'band_floor_phys': array([0.006, 0.07 ]), 'band_floor_scaled': array([0.02706937, 0.08956707]), 'Q_diag': array([5., 1.]), 'R_diag': array([1., 1.]), 'tau_frac': 0.7, 'gamma_out': 0.5, 'gamma_in': 0.5, 'beta': 7.0, 'gate': 'geom', 'lam_in': 1.0, 'bonus_kind': 'exp', 'bonus_k': 12.0, 'bonus_p': 0.6, 'bonus_c': 20.0}


In [50]:
import os
import glob
import pickle
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.ticker as mtick
from datetime import datetime

from utils.helpers import apply_min_max, reverse_min_max


class CompatUnpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module.startswith("numpy._core"):
            module = "numpy.core" + module[len("numpy._core"):]
        return super().find_class(module, name)


def load_pkl(path):
    with open(path, "rb") as f:
        return CompatUnpickler(f).load()


def _paper_style(font=16, label=18, tick=16, lw=2.8):
    mpl.rcParams.update({
        "font.size": font,
        "axes.labelsize": label,
        "axes.labelweight": "bold",
        "axes.grid": True,
        "grid.linestyle": "--",
        "grid.linewidth": 0.8,
        "grid.alpha": 0.35,
        "xtick.labelsize": tick,
        "ytick.labelsize": tick,
        "lines.linewidth": lw
    })


def _as_ysp_steps(y_sp):
    y_sp = np.asarray(y_sp, float)
    if y_sp.ndim != 2:
        raise ValueError("y_sp must be 2D.")
    if y_sp.shape[0] < y_sp.shape[1]:
        return y_sp.T
    return y_sp


def y_sp_phys_from_rl(rl_dict, n_inputs=2, align="tail", n_steps=None):
    y_sp_dev_scaled_all = _as_ysp_steps(rl_dict["y_sp"])
    data_min = np.asarray(rl_dict["data_min"], float)
    data_max = np.asarray(rl_dict["data_max"], float)

    y_ss_scaled = apply_min_max(np.asarray(rl_dict["steady_states"]["y_ss"], float),
                                data_min[n_inputs:], data_max[n_inputs:])

    if n_steps is None:
        y_sp_dev_scaled = y_sp_dev_scaled_all
    else:
        n_steps = int(n_steps)
        if align == "tail":
            y_sp_dev_scaled = y_sp_dev_scaled_all[-n_steps:, :]
        elif align == "head":
            y_sp_dev_scaled = y_sp_dev_scaled_all[:n_steps, :]
        else:
            raise ValueError("align must be 'tail' or 'head'.")

    y_sp_abs_scaled = y_sp_dev_scaled + y_ss_scaled
    y_sp_phys = reverse_min_max(y_sp_abs_scaled, data_min[n_inputs:], data_max[n_inputs:])
    return np.asarray(y_sp_phys, float)


def step_rewards_from_yu(y_phys_full, u_phys, rl_meta, reward_fn, n_inputs=2, align="tail"):
    data_min = np.asarray(rl_meta["data_min"], float)
    data_max = np.asarray(rl_meta["data_max"], float)

    y_sp_dev_scaled_all = _as_ysp_steps(rl_meta["y_sp"])

    y_phys_full = np.asarray(y_phys_full, float)
    u_phys = np.asarray(u_phys, float)

    if y_phys_full.shape[0] >= 2 and (y_phys_full.shape[0] == u_phys.shape[0] + 1):
        y_phys_all = y_phys_full[1:, :]
    else:
        y_phys_all = y_phys_full

    n_y = y_phys_all.shape[0]
    n_u = u_phys.shape[0]
    n_sp = y_sp_dev_scaled_all.shape[0]
    n_steps = int(min(n_y, n_u, n_sp))
    if n_steps <= 0:
        raise ValueError("Could not align lengths for reward recomputation.")

    if align == "tail":
        y_phys = y_phys_all[-n_steps:, :]
        u_use = u_phys[-n_steps:, :]
        y_sp_dev_scaled = y_sp_dev_scaled_all[-n_steps:, :]
    elif align == "head":
        y_phys = y_phys_all[:n_steps, :]
        u_use = u_phys[:n_steps, :]
        y_sp_dev_scaled = y_sp_dev_scaled_all[:n_steps, :]
    else:
        raise ValueError("align must be 'tail' or 'head'.")

    y_scaled = apply_min_max(y_phys, data_min[n_inputs:], data_max[n_inputs:])
    u_scaled = apply_min_max(u_use, data_min[:n_inputs], data_max[:n_inputs])

    y_ss_scaled = apply_min_max(np.asarray(rl_meta["steady_states"]["y_ss"], float),
                                data_min[n_inputs:], data_max[n_inputs:])
    y_sp_scaled = y_sp_dev_scaled + y_ss_scaled

    e_scaled = y_scaled - y_sp_scaled

    du_scaled = np.zeros_like(u_scaled)
    du_scaled[1:, :] = u_scaled[1:, :] - u_scaled[:-1, :]

    y_sp_phys = reverse_min_max(y_sp_scaled, data_min[n_inputs:], data_max[n_inputs:])

    r = np.zeros(n_steps, dtype=float)
    for t in range(n_steps):
        r[t] = float(reward_fn(e_scaled[t], du_scaled[t], y_sp_phys=y_sp_phys[t]))
    return r


def episode_avg_from_steps(r_step, ep_len):
    r_step = np.asarray(r_step, float).ravel()
    ep_len = int(ep_len)
    n_eps = int(len(r_step) // ep_len)
    out = []
    for e in range(n_eps):
        a = e * ep_len
        b = (e + 1) * ep_len
        out.append(float(np.mean(r_step[a:b])))
    return np.asarray(out, float)


def _stack_1d(curves):
    T = int(max(len(c) for c in curves))
    X = np.full((len(curves), T), np.nan, dtype=float)
    for i, c in enumerate(curves):
        c = np.asarray(c, float).ravel()
        X[i, :len(c)] = c
    return X


def _band_stats(X, band="minmax"):
    mu = np.nanmean(X, axis=0)
    if band == "minmax":
        lo = np.nanmin(X, axis=0)
        hi = np.nanmax(X, axis=0)
        return mu, lo, hi
    if band == "p25p75":
        lo = np.nanpercentile(X, 25, axis=0)
        hi = np.nanpercentile(X, 75, axis=0)
        return mu, lo, hi
    raise ValueError("band must be 'minmax' or 'p25p75'.")


def _tail_last_episode(y_full, ep_len):
    y_full = np.asarray(y_full, float)
    ep_len = int(ep_len)
    if y_full.shape[0] >= ep_len + 1:
        return y_full[-(ep_len + 1):, :]
    return y_full


def _tail_last_episode_sp(y_sp_steps, ep_len):
    y_sp_steps = np.asarray(y_sp_steps, float)
    ep_len = int(ep_len)
    if y_sp_steps.shape[0] >= ep_len:
        return y_sp_steps[-ep_len:, :]
    return y_sp_steps


def plot_band_case_with_mpc(
    rl_input_pkls,
    mpc_results_pkl,
    reward_fn,
    out_dir,
    mode="auto",
    band="minmax",
    n_inputs=2,
    ylabels=(r"$\eta$ (L/g)", r"$T$ (K)"),
    start_episode=1
):
    os.makedirs(out_dir, exist_ok=True)
    _paper_style()

    C_RL = "tab:blue"
    C_MPC = "tab:green"
    C_SP = "tab:red"
    LW = mpl.rcParams["lines.linewidth"]
    A_BAND = 0.22

    rl_runs = [load_pkl(p) for p in rl_input_pkls]
    rl0 = rl_runs[0]

    ep_len = int(rl0["time_in_sub_episodes"])
    delta_t = float(rl0["delta_t"])

    rl_reward_curves = [np.asarray(d["avg_rewards"], float).ravel() for d in rl_runs]
    Xr = _stack_1d(rl_reward_curves)
    mu_r, lo_r, hi_r = _band_stats(Xr, band=band)

    start_episode = int(max(1, start_episode))
    i0 = start_episode - 1
    if i0 >= len(mu_r):
        i0 = 0

    mu_r = mu_r[i0:]
    lo_r = lo_r[i0:]
    hi_r = hi_r[i0:]
    xep = np.arange(start_episode, start_episode + len(mu_r))

    mpc = load_pkl(mpc_results_pkl)
    y_mpc = np.asarray(mpc["y_mpc"], float)
    u_mpc = np.asarray(mpc["u_mpc"], float)

    r_step_mpc = step_rewards_from_yu(y_mpc, u_mpc, rl0, reward_fn, n_inputs=n_inputs, align="tail")
    r_ep_mpc = episode_avg_from_steps(r_step_mpc, ep_len=ep_len)

    if mode == "auto":
        if len(r_ep_mpc) <= 2 and len(mu_r) > len(r_ep_mpc):
            mode_use = "nominal"
        else:
            mode_use = "disturbance"
    else:
        mode_use = mode

    plt.figure(figsize=(7.6, 4.8))
    plt.plot(xep, mu_r, "-", color=C_RL, lw=LW)
    plt.fill_between(xep, lo_r, hi_r, color=C_RL, alpha=A_BAND)

    if mode_use == "nominal":
        mpc_const = float(r_ep_mpc[-1]) if len(r_ep_mpc) else np.nan
        plt.hlines(mpc_const, xmin=xep[0], xmax=xep[-1], color=C_MPC, linestyle="--", lw=LW)
    else:
        r_mpc_plot = r_ep_mpc[i0:i0 + len(mu_r)]
        n = min(len(xep), len(r_mpc_plot))
        plt.plot(xep[:n], r_mpc_plot[:n], "--", color=C_MPC, lw=LW)

    plt.xlabel("Episode #")
    plt.ylabel("Avg. reward")
    plt.ylim((-30, 0.5))
    ax = plt.gca()
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.xaxis.set_major_locator(mtick.MaxNLocator(8, integer=True))
    ax.xaxis.set_minor_locator(mtick.AutoMinorLocator(2))
    plt.tight_layout()
    plt.savefig(os.path.join(out_dir, f"fig_reward_band_mpc_{mode_use}.png"), dpi=300, bbox_inches="tight")
    plt.close()

    rl_y_last = []
    for d in rl_runs:
        y = np.asarray(d.get("y_rl", d.get("y_mpc")), float)
        rl_y_last.append(_tail_last_episode(y, ep_len))

    y_sp_phys = y_sp_phys_from_rl(rl0, n_inputs=n_inputs, align="tail")
    y_sp_last = _tail_last_episode_sp(y_sp_phys, ep_len)
    y_mpc_last = _tail_last_episode(y_mpc, ep_len)

    W = min(ep_len, y_sp_last.shape[0])
    t_line = np.linspace(0.0, W * delta_t, W + 1)
    t_step = t_line[:-1]

    Y0 = []
    Y1 = []
    for y in rl_y_last:
        if y.shape[0] >= W + 1:
            y = y[-(W + 1):, :]
        else:
            pad = (W + 1) - y.shape[0]
            y = np.vstack([np.full((pad, y.shape[1]), np.nan), y])
        Y0.append(y[:, 0])
        Y1.append(y[:, 1])

    X0 = _stack_1d(Y0)
    X1 = _stack_1d(Y1)
    mu0, lo0, hi0 = _band_stats(X0, band=band)
    mu1, lo1, hi1 = _band_stats(X1, band=band)

    if y_mpc_last.shape[0] >= W + 1:
        y_mpc_last = y_mpc_last[-(W + 1):, :]
    else:
        pad = (W + 1) - y_mpc_last.shape[0]
        y_mpc_last = np.vstack([np.full((pad, y_mpc_last.shape[1]), np.nan), y_mpc_last])

    plt.figure(figsize=(7.8, 5.8))

    ax = plt.subplot(2, 1, 1)
    ax.plot(t_line, mu0[:W + 1], "-", color=C_RL, lw=LW, zorder=2)
    ax.fill_between(t_line, lo0[:W + 1], hi0[:W + 1], color=C_RL, alpha=A_BAND, zorder=1)
    ax.plot(t_line, y_mpc_last[:W + 1, 0], "--", color=C_MPC, lw=LW, zorder=2)
    ax.step(t_step, y_sp_last[-W:, 0], where="post", linestyle="--", color=C_SP, lw=LW, zorder=3)
    ax.set_ylabel(ylabels[0])
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.xaxis.set_major_locator(mtick.MaxNLocator(6))
    ax.xaxis.set_minor_locator(mtick.AutoMinorLocator(2))
    ax.xaxis.set_major_formatter(mtick.FormatStrFormatter("%.1f"))

    ax = plt.subplot(2, 1, 2)
    ax.plot(t_line, mu1[:W + 1], "-", color=C_RL, lw=LW, zorder=2)
    ax.fill_between(t_line, lo1[:W + 1], hi1[:W + 1], color=C_RL, alpha=A_BAND, zorder=1)
    ax.plot(t_line, y_mpc_last[:W + 1, 1], "--", color=C_MPC, lw=LW, zorder=2)
    ax.step(t_step, y_sp_last[-W:, 1], where="post", linestyle="--", color=C_SP, lw=LW, zorder=3)
    ax.set_ylabel(ylabels[1])
    ax.set_xlabel("Time (h)")
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.xaxis.set_major_locator(mtick.MaxNLocator(6))
    ax.xaxis.set_minor_locator(mtick.AutoMinorLocator(2))
    ax.xaxis.set_major_formatter(mtick.FormatStrFormatter("%.1f"))

    plt.tight_layout()
    plt.savefig(os.path.join(out_dir, f"fig_last_episode_outputs_band_mpc_{mode_use}.png"),
                dpi=300, bbox_inches="tight")
    plt.close()

    return out_dir


def _get_eval_abs_error_scaled(rl_dict, n_inputs=2, eval_len=800, sub_len=400, settle_len=80):
    data_min = np.asarray(rl_dict["data_min"], float)
    data_max = np.asarray(rl_dict["data_max"], float)

    y_full = np.asarray(rl_dict.get("y_rl", rl_dict.get("y_mpc")), float)
    if y_full.shape[0] >= 2 and "u_mpc" in rl_dict and (y_full.shape[0] == np.asarray(rl_dict["u_mpc"]).shape[0] + 1):
        y_steps = y_full[1:, :]
    else:
        y_steps = y_full

    y_sp_dev_scaled_all = _as_ysp_steps(rl_dict["y_sp"])

    n_steps = int(min(y_steps.shape[0], y_sp_dev_scaled_all.shape[0]))
    y_steps = y_steps[-n_steps:, :]
    y_sp_dev_scaled_all = y_sp_dev_scaled_all[-n_steps:, :]

    eval_len = int(min(eval_len, n_steps))
    y_eval = y_steps[-eval_len:, :]
    y_sp_dev_eval = y_sp_dev_scaled_all[-eval_len:, :]

    y_scaled = apply_min_max(y_eval, data_min[n_inputs:], data_max[n_inputs:])
    y_ss_scaled = apply_min_max(np.asarray(rl_dict["steady_states"]["y_ss"], float),
                                data_min[n_inputs:], data_max[n_inputs:])
    y_sp_scaled = y_sp_dev_eval + y_ss_scaled

    e_scaled = y_scaled - y_sp_scaled
    e_abs = np.abs(e_scaled)

    sub_len = int(min(sub_len, eval_len // 2))
    settle_len = int(min(settle_len, sub_len))

    a = e_abs[:sub_len, :]
    b = e_abs[sub_len:sub_len + sub_len, :]

    a_ss = a[-settle_len:, :]
    b_ss = b[-settle_len:, :]
    return a_ss, b_ss


def boxplot_abs_error_scaled_last_eval_two_rl(
    rl1_pkls,
    rl2_pkls,
    out_dir,
    n_inputs=2,
    eval_len=800,
    sub_len=400,
    settle_len=80,
    ylabels=(r"$\eta$", r"$T$")
):
    os.makedirs(out_dir, exist_ok=True)
    _paper_style()

    rl1 = [load_pkl(p) for p in rl1_pkls]
    rl2 = [load_pkl(p) for p in rl2_pkls]

    def collect(runs):
        sp_a = []
        sp_b = []
        for d in runs:
            a_ss, b_ss = _get_eval_abs_error_scaled(
                d,
                n_inputs=n_inputs,
                eval_len=eval_len,
                sub_len=sub_len,
                settle_len=settle_len
            )
            sp_a.append(a_ss)
            sp_b.append(b_ss)
        sp_a = np.concatenate(sp_a, axis=0)
        sp_b = np.concatenate(sp_b, axis=0)
        return sp_a, sp_b

    a1, b1 = collect(rl1)
    a2, b2 = collect(rl2)

    groups = [
        (a1[:, 0], a2[:, 0], f"SP(a)\n{ylabels[0]}"),
        (b1[:, 0], b2[:, 0], f"SP(b)\n{ylabels[0]}"),
        (a1[:, 1], a2[:, 1], f"SP(a)\n{ylabels[1]}"),
        (b1[:, 1], b2[:, 1], f"SP(b)\n{ylabels[1]}")
    ]

    data = []
    positions = []
    centers = np.arange(1, len(groups) + 1, dtype=float)
    dx = 0.18

    for i, (d_rl1, d_rl2, _) in enumerate(groups):
        c = centers[i]
        data.append(np.asarray(d_rl1, float).ravel())
        positions.append(c - dx)
        data.append(np.asarray(d_rl2, float).ravel())
        positions.append(c + dx)

    plt.figure(figsize=(8.6, 4.9))
    bp = plt.boxplot(
        data,
        positions=positions,
        widths=0.28,
        patch_artist=True,
        showfliers=False,
        whis=1.5
    )

    c_rl1 = "#4C72B0"  # muted blue
    c_rl2 = "#DD8452"  # muted orange

    for k, box in enumerate(bp["boxes"]):
        is_rl1 = (k % 2 == 0)  # ordering: RL1 then RL2 for each region
        box.set_facecolor(c_rl1 if is_rl1 else c_rl2)
        box.set_alpha(0.55)
        box.set_edgecolor("black")
        box.set_linewidth(2.0)
        box.set_hatch(None)  # remove hatch since we have color

    for key in ["whiskers", "caps", "medians"]:
        for line in bp[key]:
            line.set_color("black")
            line.set_linewidth(2.0)

    ax = plt.gca()
    ax.set_ylabel(r"$|e|$ (scaled)")
    ax.set_xticks(centers)
    ax.set_xticklabels([g[2] for g in groups], rotation=0, ha="center")

    # separators between the 4 regions
    for x in (centers[:-1] + centers[1:]) / 2.0:
        ax.axvline(x, linestyle="--", linewidth=1.2, alpha=0.35)

    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.yaxis.set_major_locator(mtick.MaxNLocator(6))

    # tighter margins, looks less "spread out"
    ax.set_xlim(0.5, len(groups) + 0.5)

    plt.tight_layout()
    plt.savefig(
        os.path.join(out_dir, "fig_box_abs_error_scaled_eval_last.png"),
        dpi=300,
        bbox_inches="tight"
    )
    plt.close()

    return out_dir



def ss_error_last_eval_phys_stats(
    rl_pkls,
    n_inputs=2,
    eval_len=800,
    sub_len=400,
    settle_len=80
):
    runs = [load_pkl(p) for p in rl_pkls]
    d0 = runs[0]

    data_min = np.asarray(d0["data_min"], float)
    data_max = np.asarray(d0["data_max"], float)

    vals = []
    for d in runs:
        y_full = np.asarray(d.get("y_rl", d.get("y_mpc")), float)
        if y_full.shape[0] >= 2 and "u_mpc" in d and (y_full.shape[0] == np.asarray(d["u_mpc"]).shape[0] + 1):
            y_steps = y_full[1:, :]
        else:
            y_steps = y_full

        y_sp_phys_all = y_sp_phys_from_rl(d, n_inputs=n_inputs, align="tail", n_steps=y_steps.shape[0])

        n_steps = int(min(y_steps.shape[0], y_sp_phys_all.shape[0]))
        y_steps = y_steps[-n_steps:, :]
        y_sp_phys_all = y_sp_phys_all[-n_steps:, :]

        eval_len_use = int(min(eval_len, n_steps))
        y_eval = y_steps[-eval_len_use:, :]
        sp_eval = y_sp_phys_all[-eval_len_use:, :]

        sub_len_use = int(min(sub_len, eval_len_use // 2))
        settle_len_use = int(min(settle_len, sub_len_use))

        ya = y_eval[:sub_len_use, :]
        yb = y_eval[sub_len_use:sub_len_use + sub_len_use, :]
        spa = sp_eval[:sub_len_use, :]
        spb = sp_eval[sub_len_use:sub_len_use + sub_len_use, :]

        ea = np.abs(ya - spa)[-settle_len_use:, :]
        eb = np.abs(yb - spb)[-settle_len_use:, :]

        vals.append([np.mean(ea, axis=0), np.mean(eb, axis=0)])

    vals = np.asarray(vals, float)  # (n_runs, 2 SP, 2 out)
    mu = np.mean(vals, axis=0)
    sd = np.std(vals, axis=0, ddof=1) if vals.shape[0] > 1 else np.zeros_like(mu)
    return mu, sd


def print_table_block(mu1, sd1, mu2, sd2, name_out=("eta", "T")):
    # mu shape (2 SP, 2 out) -> [SP(a), SP(b)] x [eta, T]
    # print as: output rows: RL1, RL2, Reduction
    for j, nm in enumerate(name_out):
        a1 = mu1[0, j]
        b1 = mu1[1, j]
        a2 = mu2[0, j]
        b2 = mu2[1, j]
        r_a = 100.0 * (1.0 - a2 / max(a1, 1e-12))
        r_b = 100.0 * (1.0 - b2 / max(b1, 1e-12))

        sa1 = sd1[0, j]
        sb1 = sd1[1, j]
        sa2 = sd2[0, j]
        sb2 = sd2[1, j]

        print(f"{nm}:")
        print(f"  RL1 mean: SP(a)={a1:.3f}, SP(b)={b1:.3f}")
        print(f"  RL1 std : SP(a)={sa1:.3f}, SP(b)={sb1:.3f}")
        print(f"  RL2 mean: SP(a)={a2:.3f}, SP(b)={b2:.3f}")
        print(f"  RL2 std : SP(a)={sa2:.3f}, SP(b)={sb2:.3f}")
        print(f"  Reduction (%): SP(a)={r_a:.0f}, SP(b)={r_b:.0f}")
        print("")
def compare_three_paths_outputs_only_polymer(
    rl1_path,
    rl2_path,
    mpc_path,
    directory,
    prefix_name,
    start_idx=-800,
    n_inputs=2,
    ylabels=(r"$\eta$ (L/g)", r"$T$ (K)"),
    save_pdf=True
):
    """
    Compare RL1 vs RL2 vs MPC for OUTPUTS ONLY on the tail window (default last 800 samples).

    Assumes:
      - y is stored as (N+1, n_out) or (N, n_out)
      - setpoint y_sp is stored per-step (N, n_out) in scaled deviation space
      - uses y_sp_phys_from_rl(...) from your polymer plotting helpers to reconstruct physical setpoints

    Saves:
      outputs_compare_three_polymer.png (+ .pdf if save_pdf)
    """

    def _get_first(d, keys):
        for k in keys:
            if k in d and d[k] is not None:
                return d[k]
        return None

    def _mk_outdir(directory, prefix_name):
        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
        out_dir = os.path.join(directory, prefix_name, ts)
        os.makedirs(out_dir, exist_ok=True)
        return out_dir

    def _save_fig(fig, out_dir, stem, save_pdf=True):
        fig.savefig(os.path.join(out_dir, f"{stem}.png"), dpi=300, bbox_inches="tight")
        if save_pdf:
            fig.savefig(os.path.join(out_dir, f"{stem}.pdf"), bbox_inches="tight")
        plt.close(fig)

    def _make_axes_bold(ax, spine_lw=2.0, tick_w=2.0):
        for sp in ax.spines.values():
            sp.set_linewidth(spine_lw)
        ax.tick_params(axis="both", width=tick_w)

    def _slice_y_line(y, W):
        """
        Return y_line with shape (W+1, n_out) from tail.
        If y has only W points (step samples), prepend first sample to make W+1.
        """
        y = np.asarray(y, float)
        if y.ndim != 2:
            raise ValueError("y must be 2D (time, n_out).")

        if y.shape[0] >= W + 1:
            return y[-(W + 1):, :]

        if y.shape[0] == W:
            return np.vstack([y[:1, :], y])

        # pad front if too short
        pad = (W + 1) - y.shape[0]
        return np.vstack([np.full((pad, y.shape[1]), np.nan), y])

    # ---- load data ----
    d1 = load_pkl(rl1_path)
    d2 = load_pkl(rl2_path)
    dm = load_pkl(mpc_path)

    y1 = np.asarray(_get_first(d1, ["y_rl", "y_mpc", "y"]), float)
    y2 = np.asarray(_get_first(d2, ["y_rl", "y_mpc", "y"]), float)
    ym = np.asarray(_get_first(dm, ["y_rl", "y_mpc", "y"]), float)

    if y1.ndim != 2 or y2.ndim != 2 or ym.ndim != 2:
        raise ValueError("Could not read y trajectories. Expected 2D arrays.")

    n_out = int(y1.shape[1])
    if len(ylabels) != n_out:
        raise ValueError("ylabels length must match number of outputs in y.")

    delta_t = float(d1["delta_t"])

    # ---- choose tail window length W ----
    if start_idx >= 0:
        raise ValueError("This function currently expects start_idx < 0 (tail window), e.g., -800.")

    W_req = int(abs(start_idx))

    # setpoint is per-step with length N_steps
    sp_phys_all = y_sp_phys_from_rl(d1, n_inputs=n_inputs, align="tail")
    n_sp = int(sp_phys_all.shape[0])

    # y may be (N+1, n_out) or (N, n_out). Convert to a consistent W+1 tail later.
    # Max feasible W is limited by setpoint length and y lengths
    # - if y is (N+1) points, its step-count is N
    def _steps_available_from_y(y):
        y = np.asarray(y, float)
        if y.shape[0] >= 2:
            return y.shape[0] - 1
        return 0

    W_max = min(
        W_req,
        n_sp,
        _steps_available_from_y(y1),
        _steps_available_from_y(y2),
        _steps_available_from_y(ym),
    )
    if W_max <= 5:
        raise ValueError("Tail window W is too small after alignment. Check your files and start_idx.")

    W = int(W_max)

    # ---- build tail time + setpoint ----
    sp_tail = sp_phys_all[-W:, :]  # (W, n_out)
    t_line = np.linspace(0.0, W * delta_t, W + 1)
    t_step = t_line[:-1]

    # ---- slice y tails ----
    y1_tail = _slice_y_line(y1, W)
    y2_tail = _slice_y_line(y2, W)
    ym_tail = _slice_y_line(ym, W)

    # ---- style ----
    _paper_style()  # your existing helper

    c_rl1 = "tab:blue"
    c_rl2 = "tab:orange"
    c_mpc = "tab:green"
    c_sp = "tab:red"

    out_dir = _mk_outdir(directory, prefix_name)

    # ---- plot ----
    fig, axs = plt.subplots(n_out, 1, figsize=(8.2, 5.6), sharex=True)
    if n_out == 1:
        axs = [axs]

    for j in range(n_out):
        ax = axs[j]
        ax.plot(t_line, y1_tail[:, j], "-", color=c_rl1)
        ax.plot(t_line, y2_tail[:, j], "-", color=c_rl2)
        ax.plot(t_line, ym_tail[:, j], "--", color=c_mpc)
        ax.step(t_step, sp_tail[:, j], where="post", linestyle="--", color=c_sp)

        ax.set_ylabel(ylabels[j])
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        _make_axes_bold(ax)

    axs[-1].set_xlabel("Time (h)")

    _save_fig(fig, out_dir, "outputs_compare_three_polymer", save_pdf=save_pdf)
    return out_dir

# Nominal

In [48]:
rl1_pkls = [
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_nominal_mpc\20260107_015906\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_nominal_mpc\20260107_040655\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_nominal_mpc\20260107_051048\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_nominal_mpc\20260107_071832\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_nominal_mpc\20260107_092639\input_data.pkl",
]

rl2_pkls = [
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_nominal\20260107_021207\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_nominal\20260107_032442\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_nominal\20260107_070316\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_nominal\20260107_081551\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_nominal\20260107_111718\input_data.pkl",
]
mpc_pkl = os.path.join(dir_path, "mpc_results_nominal.pickle")

In [36]:
out_dir = plot_band_case_with_mpc(
    rl_input_pkls=rl1_pkls,
    mpc_results_pkl=mpc_pkl,
    reward_fn=reward_fn_mpc,
    out_dir=os.path.join(dir_path, "paper_plots_rl1_nominal"),
    mode="auto",
    band="minmax",
    ylabels=(r"$\eta$ (L/g)", r"$T$ (K)"),
    start_episode=5
)
print(out_dir)

C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\paper_plots_rl1_nominal


In [37]:
out_dir = plot_band_case_with_mpc(
    rl_input_pkls=rl2_pkls,
    mpc_results_pkl=mpc_pkl,
    reward_fn=reward_fn,
    out_dir=os.path.join(dir_path, "paper_plots_rl2_nominal"),
    mode="auto",
    band="minmax",
    ylabels=(r"$\eta$ (L/g)", r"$T$ (K)"),
    start_episode=5
)
print(out_dir)

C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\paper_plots_rl2_nominal


In [38]:
boxplot_abs_error_scaled_last_eval_two_rl(
    rl1_pkls=rl1_pkls,
    rl2_pkls=rl2_pkls,
    out_dir=os.path.join(dir_path, "box_plots_nominal"),
    eval_len=800,
    sub_len=400,
    settle_len=10
)

'C:\\Users\\HAMEDI\\OneDrive - McMaster University\\PythonProjects\\Polymer_example\\Data\\box_plots_nominal'

In [39]:
mu1, sd1 = ss_error_last_eval_phys_stats(rl1_pkls, eval_len=800, sub_len=400, settle_len=80)
mu2, sd2 = ss_error_last_eval_phys_stats(rl2_pkls, eval_len=800, sub_len=400, settle_len=80)

print_table_block(mu1, sd1, mu2, sd2, name_out=("eta", "T"))

eta:
  RL1 mean: SP(a)=0.034, SP(b)=0.026
  RL1 std : SP(a)=0.024, SP(b)=0.020
  RL2 mean: SP(a)=0.001, SP(b)=0.002
  RL2 std : SP(a)=0.001, SP(b)=0.003
  Reduction (%): SP(a)=98, SP(b)=90

T:
  RL1 mean: SP(a)=0.093, SP(b)=0.058
  RL1 std : SP(a)=0.085, SP(b)=0.043
  RL2 mean: SP(a)=0.008, SP(b)=0.009
  RL2 std : SP(a)=0.009, SP(b)=0.005
  Reduction (%): SP(a)=92, SP(b)=84



In [49]:
out_dir = compare_three_paths_outputs_only_polymer(
    rl1_path=r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_nominal_mpc\20260107_051048\input_data.pkl",
    rl2_path=r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_nominal\20260107_070316\input_data.pkl",
    mpc_path=mpc_pkl,
    directory=dir_path,
    prefix_name="three_way_compare_nominal",
    start_idx=-800,
    ylabels=(r"$\eta$ (L/g)", r"$T$ (K)"),
    save_pdf=True
)
print(out_dir)

C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\three_way_compare_nominal\20260107_235159


# Disturb

In [51]:
rl1_pkls = [
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_dist_mpc\20260107_020146\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_dist_mpc\20260107_030554\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_dist_mpc\20260107_051341\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_dist_mpc\20260107_072153\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_dist_mpc\20260107_082549\input_data.pkl",
]

rl2_pkls = [
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_dist\20260107_021502\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_dist\20260107_044044\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_dist\20260107_081911\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_dist\20260107_093159\input_data.pkl",
    r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_dist\20260107_104200\input_data.pkl",
]
mpc_pkl = os.path.join(dir_path, "mpc_results_dist.pickle")

In [52]:
out_dir = plot_band_case_with_mpc(
    rl_input_pkls=rl1_pkls,
    mpc_results_pkl=mpc_pkl,
    reward_fn=reward_fn_mpc,
    out_dir=os.path.join(dir_path, "paper_plots_rl1_dist"),
    mode="auto",
    band="minmax",
    ylabels=(r"$\eta$ (L/g)", r"$T$ (K)"),
    start_episode=5
)
print(out_dir)

C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\paper_plots_rl1_dist


In [44]:
out_dir = plot_band_case_with_mpc(
    rl_input_pkls=rl2_pkls,
    mpc_results_pkl=mpc_pkl,
    reward_fn=reward_fn,
    out_dir=os.path.join(dir_path, "paper_plots_rl2_dist"),
    mode="auto",
    band="minmax",
    ylabels=(r"$\eta$ (L/g)", r"$T$ (K)"),
    start_episode=5
)
print(out_dir)

C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\paper_plots_rl2_dist


In [45]:
boxplot_abs_error_scaled_last_eval_two_rl(
    rl1_pkls=rl1_pkls,
    rl2_pkls=rl2_pkls,
    out_dir=os.path.join(dir_path, "box_plots_dist"),
    eval_len=800,
    sub_len=400,
    settle_len=10
)

'C:\\Users\\HAMEDI\\OneDrive - McMaster University\\PythonProjects\\Polymer_example\\Data\\box_plots_dist'

In [46]:
mu1, sd1 = ss_error_last_eval_phys_stats(rl1_pkls, eval_len=800, sub_len=400, settle_len=80)
mu2, sd2 = ss_error_last_eval_phys_stats(rl2_pkls, eval_len=800, sub_len=400, settle_len=80)

print_table_block(mu1, sd1, mu2, sd2, name_out=("eta", "T"))

eta:
  RL1 mean: SP(a)=0.032, SP(b)=0.007
  RL1 std : SP(a)=0.020, SP(b)=0.006
  RL2 mean: SP(a)=0.002, SP(b)=0.002
  RL2 std : SP(a)=0.003, SP(b)=0.002
  Reduction (%): SP(a)=92, SP(b)=76

T:
  RL1 mean: SP(a)=0.103, SP(b)=0.056
  RL1 std : SP(a)=0.133, SP(b)=0.038
  RL2 mean: SP(a)=0.016, SP(b)=0.005
  RL2 std : SP(a)=0.012, SP(b)=0.005
  Reduction (%): SP(a)=84, SP(b)=90



In [47]:
out_dir = compare_three_paths_outputs_only_polymer(
    rl1_path=r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_dist_mpc\20260107_020146\input_data.pkl",
    rl2_path=r"C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\polymer_dist\20260107_044044\input_data.pkl",
    mpc_path=mpc_pkl,
    directory=dir_path,
    prefix_name="three_way_compare_dist",
    start_idx=-800,
    ylabels=(r"$\eta$ (L/g)", r"$T$ (K)"),
    save_pdf=True
)
print(out_dir)

C:\Users\HAMEDI\OneDrive - McMaster University\PythonProjects\Polymer_example\Data\three_way_compare_dist\20260107_232948
