Envrionment Setup for MountainCarContinuous-v0

In [5]:
import tensorflow as tf

print("TF version:", tf.__version__)
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("GPUs visible to TF:", tf.config.list_physical_devices("GPU"))

# Optional: show detailed GPU info
for g in tf.config.list_physical_devices("GPU"):
    print(" -", g)

# If this prints [] => you're on CPU only


TF version: 2.20.0
Built with CUDA: False
GPUs visible to TF: []


In [6]:
import sys, platform
print(sys.executable)
print(platform.platform())


C:\Users\wts28\anaconda3\python.exe
Windows-11-10.0.26200-SP0


In [7]:
# ============================
# Cell 1 — Imports + Config
# ============================
import os
import time
import math
import numpy as np

import gymnasium as gym

import matplotlib.pyplot as plt
from matplotlib import animation
from IPython.display import HTML, display

from PIL import Image

np.set_printoptions(precision=4, suppress=True)

SEED = 0
rng = np.random.default_rng(SEED)


In [8]:
# ============================
# Cell 2 — Env Factory + Basics
# ============================

ENV_ID = "MountainCarContinuous-v0"

def make_env(render_mode=None, seed=0):
    """
    render_mode:
      - None: fastest (training)
      - "rgb_array": frames for notebook animation
      - "human": pops up window (not great for notebook)
    """
    env = gym.make(ENV_ID, render_mode=render_mode)
    obs, info = env.reset(seed=seed)
    env.action_space.seed(seed)
    env.observation_space.seed(seed)
    return env

# Quick sanity check
env = make_env(render_mode=None, seed=SEED)
obs, info = env.reset()
print("obs:", obs, "shape:", obs.shape)
print("action_space:", env.action_space)
print("observation_space:", env.observation_space)

# Useful constants
OBS_DIM = env.observation_space.shape[0]   # 2: [position, velocity]
ACT_DIM = env.action_space.shape[0]        # 1: [force]
ACTION_LOW  = env.action_space.low.copy()
ACTION_HIGH = env.action_space.high.copy()

# Gym's goal is typically around position ~ 0.45 (varies by implementation)
GOAL_POS_DEFAULT = 0.45
env.close()


obs: [-0.546  0.   ] shape: (2,)
action_space: Box(-1.0, 1.0, (1,), float32)
observation_space: Box([-1.2  -0.07], [0.6  0.07], (2,), float32)


In [9]:
# ============================
# Cell 3 — Notebook Render Utilities
# ============================

def resize_frame(frame, size=(640, 400)):
    """frame: HxWx3 uint8"""
    if size is None:
        return frame
    img = Image.fromarray(frame)
    img = img.resize(size, resample=Image.BILINEAR)
    return np.asarray(img)

def rollout_and_render(env, policy_fn, steps=600, seed=0,
                       frame_stride=1, resize=(640, 400), fps=30):
    """
    env must be created with render_mode="rgb_array"
    policy_fn(obs) -> action in [-1,1] shape (1,)
    """
    obs, info = env.reset(seed=seed)
    frames = []
    traj = []

    for t in range(steps):
        action = np.asarray(policy_fn(obs), dtype=np.float32).reshape(ACT_DIM,)
        action = np.clip(action, ACTION_LOW, ACTION_HIGH)

        obs2, reward, terminated, truncated, info = env.step(action)
        traj.append((obs.copy(), action.copy(), reward, terminated, truncated))

        if t % frame_stride == 0:
            frame = env.render()
            frame = resize_frame(frame, size=resize)
            frames.append(frame)

        obs = obs2
        if terminated or truncated:
            break

    # ---- animate ----
    fig = plt.figure(figsize=(resize[0]/100, resize[1]/100), dpi=100)
    plt.axis("off")
    im = plt.imshow(frames[0])

    def animate(i):
        im.set_data(frames[i])
        return [im]

    ani = animation.FuncAnimation(
        fig, animate, frames=len(frames),
        interval=1000 / fps, blit=True
    )
    plt.close(fig)
    display(HTML(ani.to_jshtml()))
    return traj

def random_policy(obs):
    # MountainCarContinuous expects 1D action in [-1,1]
    return rng.uniform(low=-1.0, high=1.0, size=(1,))


In [None]:
# ============================
# Cell 4 — Demo: Random Rollout (rendered)
# ============================
env_vis = make_env(render_mode="rgb_array", seed=SEED)

traj = rollout_and_render(
    env_vis,
    policy_fn=random_policy,
    steps=600,
    seed=SEED,
    frame_stride=1,
    resize=(720, 450),
    fps=30
)

env_vis.close()

# Print final state
final_obs = traj[-1][0]
print("Final obs:", final_obs)
print("Steps executed:", len(traj))


Initial Random data collection


In [None]:
# ============================
# Render the RANDOM collection path (and collect X,Y)
# ============================

import numpy as np
import matplotlib.pyplot as plt

def collect_random_transitions_rendered(n_steps=500, seed=0,
                                       frame_stride=1, resize=(720, 450), fps=30):
    """
    Runs random actions, collects (X,Y), AND renders the rollout.
    X = [p,v,u], Y = [dp,dv]
    """
    rng = np.random.default_rng(seed)

    env_vis = make_env(render_mode="rgb_array", seed=seed)
    obs, info = env_vis.reset(seed=seed)

    X_list, Y_list = [], []
    traj_p, traj_v, traj_u = [], [], []
    frames = []

    for t in range(n_steps):
        u = rng.uniform(-1.0, 1.0, size=(1,)).astype(np.float64)
        obs2, reward, terminated, truncated, info = env_vis.step(u)

        p, v = obs
        p2, v2 = obs2
        dp = p2 - p
        dv = v2 - v

        X_list.append([p, v, float(u[0])])
        Y_list.append([dp, dv])

        traj_p.append(p)
        traj_v.append(v)
        traj_u.append(float(u[0]))

        # render frame
        if (t % frame_stride) == 0:
            frame = env_vis.render()
            frame = resize_frame(frame, size=resize)
            frames.append(frame)

        obs = obs2
        if terminated or truncated:
            obs, info = env_vis.reset(seed=seed + 123 + t)

    env_vis.close()

    X = np.asarray(X_list, dtype=np.float64)
    Y = np.asarray(Y_list, dtype=np.float64)

    # ---- show animation ----
    fig = plt.figure(figsize=(resize[0]/100, resize[1]/100), dpi=100)
    plt.axis("off")
    im = plt.imshow(frames[0])

    from matplotlib import animation
    from IPython.display import HTML, display

    def animate(i):
        im.set_data(frames[i])
        return [im]

    ani = animation.FuncAnimation(
        fig, animate, frames=len(frames),
        interval=1000 / fps, blit=True
    )
    plt.close(fig)
    display(HTML(ani.to_jshtml()))

    # ---- show trajectory plots ----
    traj_p = np.array(traj_p)
    traj_v = np.array(traj_v)
    traj_u = np.array(traj_u)

    plt.figure(figsize=(9, 4))
    plt.plot(traj_p, linewidth=2)
    plt.xlabel("t")
    plt.ylabel("position p")
    plt.title("Random collection: position vs time")
    plt.grid(True, alpha=0.25)
    plt.show()

    plt.figure(figsize=(9, 4))
    plt.plot(traj_v, linewidth=2)
    plt.xlabel("t")
    plt.ylabel("velocity v")
    plt.title("Random collection: velocity vs time")
    plt.grid(True, alpha=0.25)
    plt.show()

    plt.figure(figsize=(7, 5))
    plt.scatter(traj_p, traj_v, s=8, alpha=0.6)
    plt.xlabel("position p")
    plt.ylabel("velocity v")
    plt.title("Random collection path in state space (p vs v)")
    plt.grid(True, alpha=0.25)
    plt.show()

    plt.figure(figsize=(9, 3.5))
    plt.plot(traj_u, linewidth=2)
    plt.xlabel("t")
    plt.ylabel("action u")
    plt.title("Random actions over time")
    plt.grid(True, alpha=0.25)
    plt.show()

    print("Collected X shape:", X.shape, " (p,v,u)")
    print("Collected Y shape:", Y.shape, " (dp,dv)")
    return X, Y


# ---- run it ----
X0, Y0 = collect_random_transitions_rendered(
    n_steps=500,
    seed=SEED,
    frame_stride=1,
    resize=(720, 450),
    fps=30
)


In [None]:
# ============================
# Cell 7 — GPflow Setup (float64)
# ============================
import tensorflow as tf
import gpflow
from gpflow.inducing_variables import InducingPoints

gpflow.config.set_default_float(np.float64)
gpflow.config.set_default_jitter(1e-6)
tf.keras.backend.set_floatx("float64")

print("TF:", tf.__version__)
print("GPflow:", gpflow.__version__)


OSGPR-VFE


In [None]:
# ===========================
# Cell 8 — Streaming OSGPR (VFE) — paper-style regression update (D-dim input)
#   Works for MountainCar input x=[p,v,u] (D=3)
#   Single-output GP (we will train two models: dp and dv)
# ===========================
import time
import numpy as np
import tensorflow as tf
import gpflow

from gpflow.inducing_variables import InducingPoints
from gpflow.models import GPModel, InternalDataTrainingLossMixin
from gpflow import covariances

# --- make numerics stable ---
gpflow.config.set_default_float(np.float64)
gpflow.config.set_default_jitter(1e-6)
tf.keras.backend.set_floatx("float64")


# ---------------------------
# helpers
# ---------------------------
def sym_jitter(A, jitter=1e-6):
    A = np.asarray(A, dtype=np.float64)
    A = 0.5 * (A + A.T)
    A = A + jitter * np.eye(A.shape[0], dtype=np.float64)
    return A

def finite_mask(*arrs):
    m = None
    for a in arrs:
        a = np.asarray(a)
        mm = np.isfinite(a).all(axis=1) if a.ndim == 2 else np.isfinite(a)
        m = mm if m is None else (m & mm)
    return m

def choose_inducing_keep_old(Z_old, X_new, M, keep_frac=0.8, rng=None):
    """
    D-dimensional inducing management:
      keep_frac of old inducing points + rest from current batch.
    Z_old: (M_old, D)
    X_new: (N_new, D)
    """
    if rng is None:
        rng = np.random.default_rng(0)
    Z_old = np.asarray(Z_old, dtype=np.float64)
    X_new = np.asarray(X_new, dtype=np.float64)

    assert Z_old.ndim == 2 and X_new.ndim == 2
    D = Z_old.shape[1]
    assert X_new.shape[1] == D, f"Dim mismatch: Z_old D={D}, X_new D={X_new.shape[1]}"

    M_keep = int(np.round(M * keep_frac))
    M_new  = M - M_keep
    M_keep = min(M_keep, Z_old.shape[0])
    M_new  = min(M_new,  X_new.shape[0])

    old_idx = rng.choice(Z_old.shape[0], size=M_keep, replace=False) if M_keep > 0 else np.array([], dtype=int)
    new_idx = rng.choice(X_new.shape[0], size=M_new,  replace=False) if M_new  > 0 else np.array([], dtype=int)

    Z = np.vstack([Z_old[old_idx], X_new[new_idx]]).astype(np.float64)

    if Z.shape[0] < M:
        need = M - Z.shape[0]
        extra = rng.choice(X_new.shape[0], size=need, replace=True)
        Z = np.vstack([Z, X_new[extra]])
    return Z


# ============================================================
# OSGPR-VFE model (paper’s online regression VFE update)
# ============================================================
class OSGPR_VFE(GPModel, InternalDataTrainingLossMixin):
    """
    Online Sparse Variational GP Regression (VFE), regression-only.
    Matches Streaming Sparse GP Approximations (Bui et al., NIPS 2017) VFE case.

    NOTE: This is SINGLE-OUTPUT. Train two models for dp and dv.
    """

    def __init__(self, data, kernel, mu_old, Su_old, Kaa_old, Z_old, Z, mean_function=None):
        X, Y = gpflow.models.util.data_input_to_tensor(data)
        self.X, self.Y = X, Y
        likelihood = gpflow.likelihoods.Gaussian()
        num_latent_gps = GPModel.calc_num_latent_gps_from_data(data, kernel, likelihood)
        super().__init__(kernel, likelihood, mean_function, num_latent_gps)

        Z = np.asarray(Z, dtype=np.float64)
        assert Z.ndim == 2, "Z must be (M, D)"
        self.inducing_variable = InducingPoints(Z)
        self.num_data = tf.shape(self.X)[0]

        # old summary (fixed)
        mu_old  = np.asarray(mu_old, dtype=np.float64).reshape(-1, 1)
        Su_old  = sym_jitter(Su_old, 1e-6)
        Kaa_old = sym_jitter(Kaa_old, 1e-6)
        Z_old   = np.asarray(Z_old, dtype=np.float64)
        assert Z_old.ndim == 2, "Z_old must be (M_old, D)"

        self.mu_old  = tf.Variable(mu_old,  trainable=False, dtype=gpflow.default_float())
        self.Su_old  = tf.Variable(Su_old,  trainable=False, dtype=gpflow.default_float())
        self.Kaa_old = tf.Variable(Kaa_old, trainable=False, dtype=gpflow.default_float())
        self.Z_old   = tf.Variable(Z_old,   trainable=False, dtype=gpflow.default_float())

        if self.mean_function is None:
            self.mean_function = gpflow.mean_functions.Zero()

    def _common_terms(self):
        Mb = self.inducing_variable.num_inducing
        jitter = gpflow.utilities.to_default_float(1e-6)
        sigma2 = self.likelihood.variance

        Saa = self.Su_old
        ma  = self.mu_old

        # Kbf, Kbb, Kba, Kaa (old)
        Kbf = covariances.Kuf(self.inducing_variable, self.kernel, self.X)                # [Mb, N]
        Kbb = covariances.Kuu(self.inducing_variable, self.kernel, jitter=jitter)         # [Mb, Mb]
        Kba = covariances.Kuf(self.inducing_variable, self.kernel, self.Z_old)            # [Mb, Ma]

        # current kernel at Z_old (optional mismatch correction term)
        Kaa_cur = gpflow.utilities.add_noise_cov(self.kernel(self.Z_old), jitter)         # current kernel(Z_old,Z_old)
        Kaa = gpflow.utilities.add_noise_cov(self.Kaa_old, jitter)                        # stored old kernel(Z_old,Z_old)

        err = self.Y - self.mean_function(self.X)

        # c = Kbf * (Y/sigma2) + Kba * (Saa^{-1} ma)
        Sainv_ma = tf.linalg.solve(Saa, ma)
        c1 = tf.matmul(Kbf, self.Y / sigma2)                                              # [Mb,1]
        c2 = tf.matmul(Kba, Sainv_ma)                                                     # [Mb,1]
        c  = c1 + c2

        # Cholesky(Kbb)
        Lb = tf.linalg.cholesky(Kbb)
        Lbinv_c   = tf.linalg.triangular_solve(Lb, c, lower=True)
        Lbinv_Kba = tf.linalg.triangular_solve(Lb, Kba, lower=True)
        Lbinv_Kbf = tf.linalg.triangular_solve(Lb, Kbf, lower=True) / tf.sqrt(sigma2)

        d1 = tf.matmul(Lbinv_Kbf, Lbinv_Kbf, transpose_b=True)                            # [Mb,Mb]

        # d2 = (LSa^{-1} Kab Lb^{-1})^T (LSa^{-1} Kab Lb^{-1})
        LSa = tf.linalg.cholesky(Saa)
        Kab_Lbinv = tf.linalg.matrix_transpose(Lbinv_Kba)                                 # [Ma,Mb]
        LSainv_Kab_Lbinv = tf.linalg.triangular_solve(LSa, Kab_Lbinv, lower=True)
        d2 = tf.matmul(LSainv_Kab_Lbinv, LSainv_Kab_Lbinv, transpose_a=True)

        # d3 = (La^{-1} Kab Lb^{-1})^T (La^{-1} Kab Lb^{-1})
        La = tf.linalg.cholesky(Kaa)
        Lainv_Kab_Lbinv = tf.linalg.triangular_solve(La, Kab_Lbinv, lower=True)
        d3 = tf.matmul(Lainv_Kab_Lbinv, Lainv_Kab_Lbinv, transpose_a=True)

        # D = I + d1 + d2 - d3
        D = tf.eye(Mb, dtype=gpflow.default_float()) + d1 + d2 - d3
        D = gpflow.utilities.add_noise_cov(D, jitter)
        LD = tf.linalg.cholesky(D)

        LDinv_Lbinv_c = tf.linalg.triangular_solve(LD, Lbinv_c, lower=True)

        # Qff_diag term for trace: diag(Kfb Kbb^{-1} Kbf) / sigma2
        Qff_diag = tf.reduce_sum(tf.square(Lbinv_Kbf), axis=0)                             # [N]

        return (Kbf, Kba, Kaa, Kaa_cur, La, Kbb, Lb, D, LD,
                Lbinv_Kba, LDinv_Lbinv_c, err, Qff_diag)

    def maximum_log_likelihood_objective(self):
        jitter = gpflow.utilities.to_default_float(1e-6)
        sigma2 = self.likelihood.variance
        N = tf.cast(self.num_data, gpflow.default_float())

        Saa = self.Su_old
        ma  = self.mu_old

        # diag(Kff)
        Kfdiag = self.kernel(self.X, full_cov=False)

        (Kbf, Kba, Kaa, Kaa_cur, La, Kbb, Lb, D, LD,
         Lbinv_Kba, LDinv_Lbinv_c, err, Qff_diag) = self._common_terms()

        # ma term
        LSa = tf.linalg.cholesky(Saa)
        Lainv_ma = tf.linalg.triangular_solve(LSa, ma, lower=True)

        # bound terms (matches reference implementation structure)
        bound = -0.5 * N * np.log(2.0 * np.pi)
        bound += -0.5 * tf.reduce_sum(tf.square(err)) / sigma2
        bound += -0.5 * tf.reduce_sum(tf.square(Lainv_ma))
        bound +=  0.5 * tf.reduce_sum(tf.square(LDinv_Lbinv_c))

        bound += -0.5 * N * tf.math.log(sigma2)
        bound += -tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LD)))

        # trace-like term: -0.5/sigma2 tr(Kff - Qff)
        bound += -0.5 * tf.reduce_sum(Kfdiag) / sigma2
        bound +=  0.5 * tf.reduce_sum(Qff_diag)  # already has /sigma2 inside

        # delta_a terms (old/new inducing mismatch)
        bound += tf.reduce_sum(tf.math.log(tf.linalg.diag_part(La)))
        bound += -tf.reduce_sum(tf.math.log(tf.linalg.diag_part(LSa)))

        # Kaadiff = Kaa_cur - Kab Kbb^{-1} Kba
        Kaadiff = Kaa_cur - tf.matmul(Lbinv_Kba, Lbinv_Kba, transpose_a=True)
        Sainv_Kaadiff = tf.linalg.solve(Saa, Kaadiff)
        Kainv_Kaadiff = tf.linalg.solve(Kaa, Kaadiff)

        bound += -0.5 * tf.reduce_sum(
            tf.linalg.diag_part(Sainv_Kaadiff) - tf.linalg.diag_part(Kainv_Kaadiff)
        )
        return bound

    def predict_f(self, Xnew, full_cov=False):
        jitter = gpflow.utilities.to_default_float(1e-6)

        Kbs = covariances.Kuf(self.inducing_variable, self.kernel, Xnew)
        (Kbf, Kba, Kaa, Kaa_cur, La, Kbb, Lb, D, LD,
         Lbinv_Kba, LDinv_Lbinv_c, err, Qff_diag) = self._common_terms()

        Lbinv_Kbs = tf.linalg.triangular_solve(Lb, Kbs, lower=True)
        LDinv_Lbinv_Kbs = tf.linalg.triangular_solve(LD, Lbinv_Kbs, lower=True)
        mean = tf.matmul(LDinv_Lbinv_Kbs, LDinv_Lbinv_c, transpose_a=True)

        if full_cov:
            Kss = self.kernel(Xnew) + jitter * tf.eye(tf.shape(Xnew)[0], dtype=gpflow.default_float())
            var = (
                Kss
                - tf.matmul(Lbinv_Kbs, Lbinv_Kbs, transpose_a=True)
                + tf.matmul(LDinv_Lbinv_Kbs, LDinv_Lbinv_Kbs, transpose_a=True)
            )
            return mean + self.mean_function(Xnew), var
        else:
            var = (
                self.kernel(Xnew, full_cov=False)
                - tf.reduce_sum(tf.square(Lbinv_Kbs), axis=0)
                + tf.reduce_sum(tf.square(LDinv_Lbinv_Kbs), axis=0)
            )
            var = tf.maximum(var, tf.cast(1e-12, var.dtype))
            return mean + self.mean_function(Xnew), var


def train_osgpr(model, iters=250, lr=0.01):
    opt = tf.keras.optimizers.Adam(lr)

    @tf.function
    def step():
        with tf.GradientTape() as tape:
            loss = -model.maximum_log_likelihood_objective()
        grads = tape.gradient(loss, model.trainable_variables)
        opt.apply_gradients([(g, v) for g, v in zip(grads, model.trainable_variables) if g is not None])
        return loss

    t0 = time.time()
    last = None
    for _ in range(iters):
        last = step()
    dt = time.time() - t0
    return dt, float(last.numpy())


def prior_summary(kernel, Z):
    Z = np.asarray(Z, dtype=np.float64)
    Kzz = kernel.K(Z).numpy()
    Kzz = sym_jitter(Kzz, 1e-6)
    mu0 = np.zeros((Z.shape[0], 1), dtype=np.float64)
    return mu0, Kzz, Kzz, Z


def extract_summary_from_model(model):
    Z = model.inducing_variable.Z.numpy()

    mu_tf, Sigma_tf = model.predict_f(Z, full_cov=True)  # u = f(Z)
    mu = mu_tf.numpy()

    Sigma = Sigma_tf.numpy()
    if Sigma.ndim == 3:
        Sigma = Sigma[0]
    Sigma = sym_jitter(Sigma, 1e-6)

    Kaa = model.kernel.K(Z).numpy()
    Kaa = sym_jitter(Kaa, 1e-6)

    return mu, Sigma, Kaa, Z


In [None]:
# ===========================
# Cell 9 — Train initial streaming OSGPR-VFE GPs for dp and dv
# ===========================
rng = np.random.default_rng(SEED)

# targets
y_dp = Y0[:, 0:1].astype(np.float64)
y_dv = Y0[:, 1:2].astype(np.float64)

# inducing size
M = 64
idxZ = rng.choice(X0.shape[0], size=min(M, X0.shape[0]), replace=False)
Z0 = X0[idxZ].copy()

# kernels (3D input: p,v,u)
kernel_dp = gpflow.kernels.SquaredExponential(lengthscales=[0.5, 0.2, 0.5], variance=1.0)
kernel_dv = gpflow.kernels.SquaredExponential(lengthscales=[0.5, 0.2, 0.5], variance=1.0)

# ===== dp model =====
mu_old, Su_old, Kaa_old, Z_old = prior_summary(kernel_dp, Z0)
m_dp = OSGPR_VFE(
    data=(X0, y_dp),
    kernel=kernel_dp,
    mu_old=mu_old, Su_old=Su_old, Kaa_old=Kaa_old, Z_old=Z_old,
    Z=Z0
)
m_dp.likelihood.variance.assign(1e-4)

print("Training dp model...")
t_dp, neg_dp = train_osgpr(m_dp, iters=300, lr=0.02)
sum_dp = extract_summary_from_model(m_dp)
print(f"dp done | train={t_dp:.3f}s | neg_obj={neg_dp:.4f}")

# ===== dv model =====
mu_old, Su_old, Kaa_old, Z_old = prior_summary(kernel_dv, Z0)
m_dv = OSGPR_VFE(
    data=(X0, y_dv),
    kernel=kernel_dv,
    mu_old=mu_old, Su_old=Su_old, Kaa_old=Kaa_old, Z_old=Z_old,
    Z=Z0
)
m_dv.likelihood.variance.assign(1e-4)

print("\nTraining dv model...")
t_dv, neg_dv = train_osgpr(m_dv, iters=300, lr=0.02)
sum_dv = extract_summary_from_model(m_dv)
print(f"dv done | train={t_dv:.3f}s | neg_obj={neg_dv:.4f}")


In [None]:
# ============================
# GP Visualization (FULL axis control)
#   - Slice: v fixed, plot u=+1 and u=-1 together
#   - 3D Surface: z = mean, color = std
# Controls you can set per plot:
#   - x_min, x_max
#   - y_min, y_max
#   - x_tick_step, y_tick_step
#   - (3D) z_min, z_max
#   - (3D) std_min, std_max (colorbar range)
# ============================

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401

# MountainCar default bounds
MC_P_MIN, MC_P_MAX = -1.2, 0.6
MC_V_MIN, MC_V_MAX = -0.07, 0.07


# ----------------------------
# Prediction helper
# ----------------------------
def gp_predict_mu_std(model, X):
    """X: (N,3) -> mu (N,), std (N,)"""
    mu_tf, var_tf = model.predict_f(X, full_cov=False)
    mu = mu_tf.numpy().reshape(-1)
    var = var_tf.numpy().reshape(-1)
    std = np.sqrt(np.maximum(var, 1e-12))
    return mu, std


# ============================================================
# 1) Slice plot with FULL axis control
# ============================================================
def plot_slice_two_actions(
    model,
    X_train, y_train,
    title="Slice",
    y_label="Δy",
    v_fixed=0.0,
    a_list=(+1.0, -1.0),
    n_grid=280,
    # ---- axis controls ----
    x_min=MC_P_MIN, x_max=MC_P_MAX,
    y_min=None, y_max=None,
    x_tick_step=None,
    y_tick_step=None,
    # ---- data overlay controls ----
    data_tol_v=0.01,
    data_tol_a=0.2,
    show_data=True,
    show_minmax=True,
):
    """
    Slice along position p for fixed (v=u-fixed), plot two actions on same axes.
    You can manually control x/y limits and tick units.
    """

    p_grid = np.linspace(x_min, x_max, n_grid)

    # compute all action curves first
    curves = []
    auto_ymin = +np.inf
    auto_ymax = -np.inf

    for a_fixed in a_list:
        X_query = np.column_stack([
            p_grid,
            np.full_like(p_grid, v_fixed),
            np.full_like(p_grid, a_fixed),
        ]).astype(np.float64)

        mu, std = gp_predict_mu_std(model, X_query)
        lo = mu - 2 * std
        hi = mu + 2 * std

        curves.append((a_fixed, mu, std, lo, hi))

        auto_ymin = min(auto_ymin, float(np.min(lo)))
        auto_ymax = max(auto_ymax, float(np.max(hi)))

    # auto y if not specified
    if y_min is None:
        y_min = auto_ymin
    if y_max is None:
        y_max = auto_ymax

    # plot
    plt.figure(figsize=(9, 5))

    for a_fixed, mu, std, lo, hi in curves:
        plt.plot(p_grid, mu, lw=2.5, label=f"mean (u={a_fixed:+.1f})")
        plt.fill_between(p_grid, lo, hi, alpha=0.18, label=f"±2σ (u={a_fixed:+.1f})")

        # overlay training data near this slice
        if show_data:
            mask = (np.abs(X_train[:, 1] - v_fixed) < data_tol_v) & (np.abs(X_train[:, 2] - a_fixed) < data_tol_a)
            if np.sum(mask) > 0:
                plt.scatter(
                    X_train[mask, 0], y_train[mask],
                    s=22, alpha=0.65,
                    label=f"data (v≈{v_fixed:.2f}, u≈{a_fixed:+.1f}, n={np.sum(mask)})"
                )

    # axis settings
    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)
    plt.xlabel("Position p")
    plt.ylabel(y_label)

    # ticks/grid units
    ax = plt.gca()
    if x_tick_step is not None:
        ax.xaxis.set_major_locator(MultipleLocator(float(x_tick_step)))
    if y_tick_step is not None:
        ax.yaxis.set_major_locator(MultipleLocator(float(y_tick_step)))

    plt.grid(True, alpha=0.25)

    # min/max display
    if show_minmax:
        # summarize from all curves
        mu_all = np.concatenate([c[1] for c in curves])
        std_all = np.concatenate([c[2] for c in curves])
        lo_all = np.concatenate([c[3] for c in curves])
        hi_all = np.concatenate([c[4] for c in curves])

        extra = (f"\nmean[min,max]=({mu_all.min():+.3e},{mu_all.max():+.3e})"
                 f"  std[min,max]=({std_all.min():+.3e},{std_all.max():+.3e})"
                 f"  band[min,max]=({lo_all.min():+.3e},{hi_all.max():+.3e})")
        plt.title(title + extra)
    else:
        plt.title(title)

    plt.legend(loc="best")
    plt.tight_layout()
    plt.show()


# ============================================================
# 2) 3D Surface plot with FULL axis control
#     - z = mean
#     - color = std
# ============================================================
def plot_surface_mean_colored_by_std(
    model,
    X_train, y_train,
    title="3D Surface",
    z_label="Δy",
    a_fixed=-1.0,
    n_grid=80,
    # ---- axis controls ----
    p_min=MC_P_MIN, p_max=MC_P_MAX,
    v_min=MC_V_MIN, v_max=MC_V_MAX,
    z_min=None, z_max=None,
    # ---- std colorbar controls ----
    std_min=None, std_max=None,
    # ---- overlay controls ----
    show_data=True,
    data_tol_a=0.2,
    show_minmax=True,
):
    """
    3D plot: height=mean, facecolor=std (uncertainty).
    Full manual control of p/v bounds, z bounds, and std color range.
    """
    p_grid = np.linspace(p_min, p_max, n_grid)
    v_grid = np.linspace(v_min, v_max, n_grid)
    P, V = np.meshgrid(p_grid, v_grid)

    X_grid = np.column_stack([
        P.ravel(),
        V.ravel(),
        np.full_like(P.ravel(), a_fixed)
    ]).astype(np.float64)

    mu, std = gp_predict_mu_std(model, X_grid)
    Mean = mu.reshape(P.shape)
    Std  = std.reshape(P.shape)

    # z limits
    if z_min is None:
        z_min = float(np.min(Mean))
    if z_max is None:
        z_max = float(np.max(Mean))

    # std color range
    if std_min is None:
        std_min = float(np.min(Std))
    if std_max is None:
        std_max = float(np.max(Std))

    norm = plt.Normalize(vmin=std_min, vmax=std_max)
    colors = plt.cm.viridis(norm(Std))

    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111, projection="3d")

    ax.plot_surface(
        P, V, Mean,
        facecolors=colors,
        linewidth=0,
        antialiased=False,
        shade=False
    )

    # colorbar = std
    m = plt.cm.ScalarMappable(cmap="viridis", norm=norm)
    m.set_array(Std)
    cbar = fig.colorbar(m, ax=ax, shrink=0.65, aspect=12)
    cbar.set_label("GP Predictive Std (uncertainty)")

    # overlay training points near this action
    if show_data:
        act = X_train[:, 2]
        mask = np.abs(act - a_fixed) < data_tol_a
        if np.sum(mask) > 0:
            ax.scatter(
                X_train[mask, 0], X_train[mask, 1], y_train[mask],
                color="k", s=10, alpha=0.55, label=f"train (u≈{a_fixed:+.1f})"
            )

    ax.set_xlim(p_min, p_max)
    ax.set_ylim(v_min, v_max)
    ax.set_zlim(z_min, z_max)

    ax.set_xlabel("Position p")
    ax.set_ylabel("Velocity v")
    ax.set_zlabel(z_label)

    if show_minmax:
        extra = (f"\nmean[min,max]=({Mean.min():+.3e},{Mean.max():+.3e})"
                 f"  std[min,max]=({Std.min():+.3e},{Std.max():+.3e})")
        ax.set_title(f"{title} | u={a_fixed:+.1f}" + extra)
    else:
        ax.set_title(f"{title} | u={a_fixed:+.1f}")

    ax.view_init(elev=30, azim=230)
    ax.legend(loc="best")
    plt.tight_layout()
    plt.show()


# ============================================================
# Example usage (YOU can change these numbers easily)
# ============================================================

# ---- Δp slice plot ----
plot_slice_two_actions(
    m_dp, X0, Y0[:, 0],
    title="OSGPR-VFE slice: Δp",
    y_label="Δp",
    v_fixed=0.0,
    a_list=(+1.0, -1.0),
    n_grid=280,
    x_min=-1.2, x_max=0.6,
    y_min=-0.012, y_max=0.012,
    x_tick_step=0.1,
    y_tick_step=0.005
)

# # ---- Δv slice plot ----
# plot_slice_two_actions(
#     m_dv, X0, Y0[:, 1],
#     title="OSGPR-VFE slice: Δv",
#     y_label="Δv",
#     v_fixed=0.0,
#     a_list=(+1.0, -1.0),
#     n_grid=280,
#     x_min=-1.2, x_max=0.6,
#     y_min=-0.015, y_max=0.015,
#     x_tick_step=0.2,
#     y_tick_step=0.001
# )

# ---- 3D surface Δv (u = -1) ----
# plot_surface_mean_colored_by_std(
#     m_dv, X0, Y0[:, 1],
#     title="OSGPR-VFE surface: Δv",
#     z_label="Δv",
#     a_fixed=-1.0,
#     n_grid=80,
#     p_min=-1.2, p_max=0.6,
#     v_min=-0.07, v_max=0.07,
#     z_min=-0.0035, z_max=0.0035,
#     std_min=0.0, std_max=None
# )

# ---- 3D surface Δv (u = +1) ----
plot_surface_mean_colored_by_std(
    m_dv, X0, Y0[:, 1],
    title="OSGPR-VFE surface: Δv",
    z_label="Δv",
    a_fixed=+1.0,
    n_grid=80,
    p_min=-1.2, p_max=0.6,
    v_min=-0.07, v_max=0.07,
    z_min=-0.0035, z_max=0.0035,
    std_min=0.0, std_max=None
)


MPPI


In [None]:
# ===========================
# Cell 10 — MPPI planner using GP dynamics (m_dp, m_dv)
# ===========================

import numpy as np

# MountainCarContinuous bounds
P_MIN, P_MAX = -1.2, 0.6
V_MIN, V_MAX = -0.07, 0.07
U_MIN, U_MAX = -1.0, 1.0
GOAL_POS = 0.45   # goal position (typical for MountainCarContinuous)

def gp_predict_mu(model, X):
    """Return mean only: X (N,3) -> mu (N,)"""
    mu_tf, _ = model.predict_f(X, full_cov=False)
    return mu_tf.numpy().reshape(-1)

def gp_step_batch(m_dp, m_dv, states, actions):
    """
    GP dynamics one-step:
      state: [p,v]
      input: [p,v,u] -> predict dp,dv
      next = [p+dp, v+dv] (clipped)
    states:  (K,2)
    actions: (K,)
    """
    states = np.asarray(states, dtype=np.float64)
    actions = np.asarray(actions, dtype=np.float64).reshape(-1)
    K = states.shape[0]

    X = np.column_stack([states[:, 0], states[:, 1], actions]).astype(np.float64)

    dp = gp_predict_mu(m_dp, X)
    dv = gp_predict_mu(m_dv, X)

    p2 = states[:, 0] + dp
    v2 = states[:, 1] + dv

    p2 = np.clip(p2, P_MIN, P_MAX)
    v2 = np.clip(v2, V_MIN, V_MAX)
    return np.stack([p2, v2], axis=1).astype(np.float64)

def running_cost(states, actions,
                 w_goal=25.0, w_vel=0.05, w_u=0.02):
    """
    Cost to minimize (lower is better)
    Goal: reach GOAL_POS as fast as possible.
    """
    p = states[:, 0]
    v = states[:, 1]
    u = actions.reshape(-1)

    goal_err = np.maximum(0.0, GOAL_POS - p)     # only penalize below goal
    return (w_goal * goal_err**2 + w_vel * (v**2) + w_u * (u**2)).astype(np.float64)

def terminal_cost(states, w_terminal=250.0):
    p = states[:, 0]
    goal_err = np.maximum(0.0, GOAL_POS - p)
    return (w_terminal * goal_err**2).astype(np.float64)

def mppi_plan_action(
    m_dp, m_dv,
    state0,
    u_nominal,
    rng,
    horizon=30,
    num_samples=512,
    noise_sigma=0.35,
    lam=1.0,
    u_smooth=0.65,
):
    """
    MPPI with receding horizon:
      - sample actions around u_nominal
      - roll out with GP dynamics
      - update u_nominal using exp(-cost/lam)
    Returns:
      u0 (float), updated u_nominal (H,)
    """
    state0 = np.asarray(state0, dtype=np.float64).reshape(2,)
    H = int(horizon)
    K = int(num_samples)

    if u_nominal is None:
        u_nominal = np.zeros(H, dtype=np.float64)
    else:
        u_nominal = np.asarray(u_nominal, dtype=np.float64).reshape(H,)

    # sample noise
    noise = rng.normal(loc=0.0, scale=noise_sigma, size=(K, H)).astype(np.float64)
    U = u_nominal[None, :] + noise
    U = np.clip(U, U_MIN, U_MAX)

    # rollout
    states = np.repeat(state0[None, :], K, axis=0)  # (K,2)
    total_cost = np.zeros(K, dtype=np.float64)

    for t in range(H):
        a_t = U[:, t]
        states = gp_step_batch(m_dp, m_dv, states, a_t)
        total_cost += running_cost(states, a_t)

    total_cost += terminal_cost(states)

    # weights
    beta = np.min(total_cost)
    w = np.exp(-(total_cost - beta) / max(1e-9, lam))
    w = w / (np.sum(w) + 1e-12)

    # MPPI update: u_new = u_nom + Σ w_k * noise_k
    du = np.sum(w[:, None] * noise, axis=0)
    u_new = u_nominal + du
    u_new = np.clip(u_new, U_MIN, U_MAX)

    # smooth for stability
    u_nominal = u_smooth * u_nominal + (1.0 - u_smooth) * u_new

    # receding horizon: take first action
    u0 = float(u_nominal[0])

    # shift nominal sequence (warm start next step)
    u_nominal = np.roll(u_nominal, -1)
    u_nominal[-1] = u_nominal[-2]

    return u0, u_nominal, float(np.mean(total_cost)), float(np.min(total_cost))


In [None]:
# ===========================
# Cell 12 — Episodic MPPI + Streaming OSGPR-VFE
#   Each episode:
#     - MPPI rollout until success OR max steps
#     - record wall-clock time + steps + success/fail
#     - SAVE ALL transitions of that episode as batch
#     - ONE streaming update using ONLY this batch (no replay buffer)
#     - render episode + show 2 plots:
#         (1) Δp slice plot (u=+1/-1)
#         (2) Δv surface plot (u=+1, mean height, std color)
# ===========================

import time
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import animation
from IPython.display import HTML, display
from PIL import Image

# ----------------------------
# Controls (you can change)
# ----------------------------
NUM_EPISODES = 10
MAX_STEPS    = 500

# render settings
RENDER = True
FRAME_STRIDE = 1
RESIZE = (720, 450)
FPS = 30

# MPPI params
HORIZON     = 35
NUM_SAMPLES = 256
NOISE_SIGMA = 0.35
LAM         = 1.0
U_SMOOTH    = 0.65

# streaming update params (ONE update per episode)
M_INDUCING  = 32
KEEP_FRAC   = 0.8
TRAIN_ITERS = 100
LR          = 0.02

# MountainCar bounds/constants
P_MIN, P_MAX = -1.2, 0.6
V_MIN, V_MAX = -0.07, 0.07
U_MIN, U_MAX = -1.0, 1.0
GOAL_POS = 0.45


# ----------------------------
# Render helpers
# ----------------------------
def resize_frame(frame, size=(720, 450)):
    img = Image.fromarray(frame)
    img = img.resize(size, resample=Image.BILINEAR)
    return np.asarray(img)

def show_frames(frames, fps=30):
    if len(frames) == 0:
        print("[Render] No frames to display.")
        return

    fig = plt.figure(figsize=(frames[0].shape[1]/100, frames[0].shape[0]/100), dpi=100)
    plt.axis("off")
    im = plt.imshow(frames[0])

    def animate(i):
        im.set_data(frames[i])
        return [im]

    ani = animation.FuncAnimation(fig, animate, frames=len(frames), interval=1000/fps, blit=True)
    plt.close(fig)
    display(HTML(ani.to_jshtml()))


# ----------------------------
# Inducing update helper (3D input)
# ----------------------------
def choose_inducing_keep_old_nd(Z_old, X_new, M, keep_frac=0.8, rng=None):
    """
    Z_old: (M_old, D), X_new: (N, D)  -> Z: (M, D)
    Keep some old inducing, add some new batch points.
    """
    if rng is None:
        rng = np.random.default_rng(0)

    Z_old = np.asarray(Z_old, dtype=np.float64)
    X_new = np.asarray(X_new, dtype=np.float64)

    M_keep = int(np.round(M * keep_frac))
    M_new  = M - M_keep

    M_keep = min(M_keep, Z_old.shape[0])
    M_new  = min(M_new,  X_new.shape[0])

    old_idx = rng.choice(Z_old.shape[0], size=M_keep, replace=False) if M_keep > 0 else np.array([], dtype=int)
    new_idx = rng.choice(X_new.shape[0], size=M_new,  replace=False) if M_new  > 0 else np.array([], dtype=int)

    Z = np.vstack([Z_old[old_idx], X_new[new_idx]]).astype(np.float64)

    if Z.shape[0] < M:
        need = M - Z.shape[0]
        extra = rng.choice(X_new.shape[0], size=need, replace=True)
        Z = np.vstack([Z, X_new[extra]]).astype(np.float64)

    return Z


# ----------------------------
# ONE streaming update using ONLY the new episode batch
# ----------------------------
def streaming_update_once(m_dp, m_dv, kernel_dp, kernel_dv,
                          X_batch, ydp_batch, ydv_batch,
                          M=64, keep_frac=0.8, train_iters=250, lr=0.02,
                          rng=None):
    if rng is None:
        rng = np.random.default_rng(0)

    # ===== dp update =====
    mu_old, Su_old, Kaa_old, Z_old = extract_summary_from_model(m_dp)
    Z_new = choose_inducing_keep_old_nd(Z_old, X_batch, M=M, keep_frac=keep_frac, rng=rng)

    m_dp_new = OSGPR_VFE(
        data=(X_batch, ydp_batch),
        kernel=kernel_dp,
        mu_old=mu_old, Su_old=Su_old, Kaa_old=Kaa_old, Z_old=Z_old,
        Z=Z_new
    )
    m_dp_new.likelihood.variance.assign(np.maximum(1e-8, float(m_dp.likelihood.variance.numpy())))
    t_dp, neg_dp = train_osgpr(m_dp_new, iters=train_iters, lr=lr)

    # ===== dv update =====
    mu_old, Su_old, Kaa_old, Z_old = extract_summary_from_model(m_dv)
    Z_new = choose_inducing_keep_old_nd(Z_old, X_batch, M=M, keep_frac=keep_frac, rng=rng)

    m_dv_new = OSGPR_VFE(
        data=(X_batch, ydv_batch),
        kernel=kernel_dv,
        mu_old=mu_old, Su_old=Su_old, Kaa_old=Kaa_old, Z_old=Z_old,
        Z=Z_new
    )
    m_dv_new.likelihood.variance.assign(np.maximum(1e-8, float(m_dv.likelihood.variance.numpy())))
    t_dv, neg_dv = train_osgpr(m_dv_new, iters=train_iters, lr=lr)

    return m_dp_new, m_dv_new, (t_dp, neg_dp, t_dv, neg_dv)


# ----------------------------
# Main episodic loop
# ----------------------------
episode_logs = []

rng_master = np.random.default_rng(SEED)

for ep in range(NUM_EPISODES):
    ep_seed = SEED + 1000 * ep
    rng = np.random.default_rng(ep_seed)

    env = make_env(render_mode="rgb_array" if RENDER else None, seed=ep_seed)
    obs, info = env.reset(seed=ep_seed)

    # MPPI warm start
    u_nom = np.zeros(HORIZON, dtype=np.float64)

    # Episode buffers (SAVE ALL transitions)
    X_ep  = []
    Ydp_ep = []
    Ydv_ep = []

    # Render frames
    frames = []

    start_t = time.perf_counter()

    success = False
    steps_taken = 0

    for t in range(MAX_STEPS):
        p, v = float(obs[0]), float(obs[1])

        # MPPI plan (uses GP model dynamics)
        u, u_nom, avg_cost, min_cost = mppi_plan_action(
            m_dp, m_dv,
            state0=np.array([p, v], dtype=np.float64),
            u_nominal=u_nom,
            rng=rng,
            horizon=HORIZON,
            num_samples=NUM_SAMPLES,
            noise_sigma=NOISE_SIGMA,
            lam=LAM,
            u_smooth=U_SMOOTH,
        )

        # step real env
        obs2, reward, terminated, truncated, info = env.step(np.array([u], dtype=np.float64))
        p2, v2 = float(obs2[0]), float(obs2[1])

        dp = p2 - p
        dv = v2 - v

        # ✅ SAVE ALL transitions from episode
        X_ep.append([p, v, float(u)])
        Ydp_ep.append([dp])
        Ydv_ep.append([dv])

        obs = obs2
        steps_taken = t + 1

        # render
        if RENDER and ((t % FRAME_STRIDE) == 0):
            frame = env.render()
            frame = resize_frame(frame, RESIZE)
            frames.append(frame)

        # success check
        if p2 >= GOAL_POS:
            success = True
            break

        # reset if environment terminates/truncates early
        if terminated or truncated:
            obs, info = env.reset(seed=ep_seed + 777 + t)
            u_nom[:] = 0.0

    end_t = time.perf_counter()
    wall_time = end_t - start_t

    env.close()

    # Convert episode batch to numpy
    X_ep = np.asarray(X_ep, dtype=np.float64)          # (N,3)
    ydp_ep = np.asarray(Ydp_ep, dtype=np.float64)      # (N,1)
    ydv_ep = np.asarray(Ydv_ep, dtype=np.float64)      # (N,1)

    # Print episode summary
    status = "SUCCESS ✅" if success else "FAIL ❌"
    print("\n" + "="*80)
    print(f"[Episode {ep+1}/{NUM_EPISODES}] {status}")
    print(f"  steps_taken = {steps_taken} / {MAX_STEPS}")
    print(f"  wall_time   = {wall_time:.3f} sec")
    print(f"  batch_size  = {X_ep.shape[0]} transitions  (ALL episode samples)")
    print("="*80)

    # ✅ ONE streaming update using ONLY this batch
    t0_upd = time.perf_counter()
    m_dp, m_dv, stats = streaming_update_once(
        m_dp, m_dv, kernel_dp, kernel_dv,
        X_ep, ydp_ep, ydv_ep,
        M=M_INDUCING, keep_frac=KEEP_FRAC,
        train_iters=TRAIN_ITERS, lr=LR,
        rng=rng
    )
    t_upd = time.perf_counter() - t0_upd
    t_dp, neg_dp, t_dv, neg_dv = stats

    print(f"[Streaming Update] total={t_upd:.3f}s | "
          f"dp(train={t_dp:.3f}s, neg={neg_dp:.4f}) | "
          f"dv(train={t_dv:.3f}s, neg={neg_dv:.4f})")

    # ===== Render the episode =====
    if RENDER:
        print("[Render] Episode playback:")
        show_frames(frames, fps=FPS)

    # ===== Your 2 evaluation plots (use CURRENT episode data for overlay) =====
    # Δp slice plot (exactly your settings)
    plot_slice_two_actions(
        m_dp, X_ep, ydp_ep.reshape(-1),
        title="OSGPR-VFE slice: Δp",
        y_label="Δp",
        v_fixed=0.0,
        a_list=(+1.0, -1.0),
        n_grid=280,
        x_min=-1.2, x_max=0.6,
        y_min=-0.012, y_max=0.012,
        x_tick_step=0.1,
        y_tick_step=0.005
    )

    # Δv surface plot (exactly your settings)
    plot_surface_mean_colored_by_std(
        m_dv, X_ep, ydv_ep.reshape(-1),
        title="OSGPR-VFE surface: Δv",
        z_label="Δv",
        a_fixed=+1.0,
        n_grid=80,
        p_min=-1.2, p_max=0.6,
        v_min=-0.07, v_max=0.07,
        z_min=-0.0035, z_max=0.0035,
        std_min=0.0, std_max=None
    )

    # log it
    episode_logs.append({
        "episode": ep + 1,
        "success": bool(success),
        "steps": int(steps_taken),
        "wall_time_s": float(wall_time),
        "batch_size": int(X_ep.shape[0]),
        "update_time_s": float(t_upd),
        "dp_train_s": float(t_dp),
        "dv_train_s": float(t_dv),
        "dp_negobj": float(neg_dp),
        "dv_negobj": float(neg_dv),
    })


# ===========================
# Summary table at the end
# ===========================
print("\n" + "#"*90)
print("EPISODE SUMMARY")
print("#"*90)
for row in episode_logs:
    status = "SUCCESS" if row["success"] else "FAIL"
    print(f"Ep {row['episode']:02d} | {status:7s} | steps={row['steps']:4d} | "
          f"wall={row['wall_time_s']:.2f}s | batch={row['batch_size']:4d} | upd={row['update_time_s']:.2f}s")
