# MANUAL DATA COLLECTION

In [1]:
import matplotlib
matplotlib.use('TkAgg')  # <--- THIS IS THE FIX
import matplotlib.pyplot as plt

In [22]:
import time
import math
import numpy as np
import mujoco as mj
import mujoco.viewer as viewer
import matplotlib
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt # <--- Added for offline filtering

# --- GUI Backend Setup ---
try:
    matplotlib.use('TkAgg')
except ImportError:
    pass

# ==========================================
# 1. CONFIGURATION
# ==========================================
class Config:
    XML_PATH   = "monstertruck.xml"
    DURATION   = 100.0
    CTRL_DT    = 0.1
    REFRESH_HZ = 10
    RTF        = 1.0

    # --- BINNING SETTINGS ---
    N_BINS_PHI   = 50
    N_BINS_OMEGA = 50
    MAX_SAMPLES  = 40
    OMEGA_RANGE  = 5.0

# ==========================================
# 2. MATH & HELPERS
# ==========================================
class MathUtils:
    @staticmethod
    def get_flip_pitch(qw, qx, qy, qz):
        r22 = 1 - 2 * (qx * qx + qy * qy)
        r02 = 2 * (qx * qz + qw * qy)
        return math.atan2(r02, -r22)

    @staticmethod
    def angdiff(a, b):
        return ((a - b + math.pi) % (2.0 * math.pi)) - math.pi

def lowpass_offline(acc, dt, cutoff_hz=5.0, order=2):
    """
    Applies Zero-Phase filtering (forward + backward).
    No time delay! Perfect for training data.
    """
    acc = np.asarray(acc, dtype=float)
    if len(acc) < 10: return acc # Safety check

    fs = 1.0 / dt
    wn = cutoff_hz / (fs / 2.0)
    wn = min(max(wn, 1e-6), 0.999999)

    b, a = butter(order, wn, btype='low')
    acc_filt = filtfilt(b, a, acc)
    return acc_filt

# ==========================================
# 3. VISUALIZATION (Live = Raw)
# ==========================================
class LivePlotter:
    def __init__(self):
        plt.ion()
        self.fig, (self.ax1, self.ax2, self.ax3, self.ax4) = plt.subplots(4, 1, figsize=(8, 10), sharex=True)
        self.fig.canvas.manager.set_window_title("Collecting Raw Data...")

        self.line_pitch, = self.ax1.plot([], [], lw=2, c='blue')
        self.line_u,     = self.ax2.plot([], [], lw=1.5, c='orange')
        self.line_rate,  = self.ax3.plot([], [], lw=1.5, c='green', label="Raw Rate")
        self.line_acc,   = self.ax4.plot([], [], lw=0.5, c='gray', alpha=0.5, label="Raw Acc")

        self._style_ax(self.ax1, "Pitch (rad)", -3.5, 3.5)
        self._style_ax(self.ax2, "Throttle", -1.1, 1.1)
        self._style_ax(self.ax3, "Rate (rad/s)", -10.0, 10.0)
        self._style_ax(self.ax4, "Acc (m/s^2)", -20.0, 20.0)

        self.ax1.axhline(math.pi, c='g', ls=':', alpha=0.6)
        self.ax1.axhline(-math.pi, c='g', ls=':', alpha=0.6)

        plt.show(block=False)
        self.last_time = time.perf_counter()

    def _style_ax(self, ax, label, ymin, ymax):
        ax.set_ylabel(label); ax.set_ylim(ymin, ymax); ax.grid(True, alpha=0.5)

    def update(self, t, pitch, u, rate, acc):
        now = time.perf_counter()
        if now - self.last_time < (1.0 / Config.REFRESH_HZ): return True
        try:
            self.line_pitch.set_data(t, pitch)
            self.line_u.set_data(t, u)
            self.line_rate.set_data(t, rate)
            self.line_acc.set_data(t, acc)
            curr_t = t[-1] if t else 0
            self.ax1.set_xlim(max(0, curr_t - 10), curr_t + 0.2)
            self.fig.canvas.draw(); self.fig.canvas.flush_events()
            self.last_time = now
            return True
        except: return False

    def close(self): plt.ioff(); plt.show()

# ==========================================
# 4. MAIN EXECUTION (Collect Raw -> Filter Later)
# ==========================================
m = mj.MjModel.from_xml_path(Config.XML_PATH)
data = mj.MjData(m)
mj.mj_resetData(m, data); mj.mj_forward(m, data)

gyro_id = mj.mj_name2id(m, mj.mjtObj.mjOBJ_SENSOR, "imu_gyro")
acc_id  = mj.mj_name2id(m, mj.mjtObj.mjOBJ_SENSOR, "imu_acc")
gyro_adr = m.sensor_adr[gyro_id] if gyro_id >= 0 else 0
acc_adr  = m.sensor_adr[acc_id] if acc_id >= 0 else 0
qadr     = m.jnt_qposadr[next(j for j in range(m.njnt) if m.jnt_type[j] == mj.mjtJoint.mjJNT_FREE)] + 3
sim_dt   = m.opt.timestep

plotter = LivePlotter()
logs = {'t':[], 'pitch':[], 'u':[], 'rate':[], 'acc':[]}

t0_sim = data.time
t0_wall = time.perf_counter()
next_cmd = t0_sim
prev_pitch = None

print(">>> Recording RAW data... (Offline filter applied after)")

with viewer.launch_passive(m, data) as v:
    while data.time - t0_sim < Config.DURATION:

        if data.time >= next_cmd:
            data.ctrl[:] = float(np.random.uniform(-1.0, 1.0))
            next_cmd += Config.CTRL_DT

        mj.mj_step(m, data)

        # 1. Sensing (RAW)
        qw, qx, qy, qz = data.qpos[qadr:qadr+4]
        pitch = MathUtils.get_flip_pitch(qw, qx, qy, qz)

        if gyro_id >= 0: raw_rate = float(data.sensordata[gyro_adr + 1])
        else: raw_rate = MathUtils.angdiff(pitch, prev_pitch)/sim_dt if prev_pitch else 0.0

        raw_acc = float(data.sensordata[acc_adr + 0]) if acc_id >= 0 else 0.0
        prev_pitch = pitch

        # 2. Log Raw
        t_rel = data.time - t0_sim
        logs['t'].append(t_rel)
        logs['pitch'].append(pitch)
        logs['u'].append(data.ctrl[0])
        logs['rate'].append(raw_rate)
        logs['acc'].append(raw_acc)

        # 3. Plot Raw (Just to monitor)
        if not plotter.update(logs['t'], logs['pitch'], logs['u'], logs['rate'], logs['acc']):
            break

        rt_target = t0_wall + (data.time - t0_sim) / Config.RTF
        sleep_needed = rt_target - time.perf_counter()
        if sleep_needed > 0: time.sleep(min(sleep_needed, 0.01))
        v.sync()

plotter.close()
print(f"\n>>> Collection Finished. Raw Samples: {len(logs['t'])}")

# ==========================================
# 5. OFFLINE FILTERING (The User's Logic)
# ==========================================
print("\n>>> Applying Offline Zero-Phase Filter...")

# Convert lists to arrays
t_arr = np.array(logs['t'])
acc_raw = np.array(logs['acc'])
rate_raw = np.array(logs['rate'])

# Calculate average dt from the logs
dt_avg = np.mean(np.diff(t_arr))

# --- APPLY YOUR FUNCTION HERE ---
acc_filtered = lowpass_offline(acc_raw, dt_avg, cutoff_hz=5.0, order=2)
# We also filter rate slightly to match the smoothness
rate_filtered = lowpass_offline(rate_raw, dt_avg, cutoff_hz=10.0, order=2)

# Update logs with clean data for binning
logs['acc']  = acc_filtered
logs['rate'] = rate_filtered

print(">>> Filtering Complete. No Phase Lag!")

# ==========================================
# 6. POST-PROCESSING (Binning with Clean Data)
# ==========================================
print("\n>>> Applying Binning / Rejection Sampling...")

occupancy = np.zeros((Config.N_BINS_PHI, Config.N_BINS_OMEGA), dtype=np.int32)
X_filtered, Y_filtered = [], []

phi_min, phi_max = -math.pi, math.pi
om_min, om_max   = -Config.OMEGA_RANGE, Config.OMEGA_RANGE

saved_count = 0
extreme_count = 0

for i in range(len(logs['t']) - 1):
    # Current State (Now using Zero-Phase Filtered Data)
    curr_phi = logs['pitch'][i]
    curr_om  = logs['rate'][i]
    curr_acc = logs['acc'][i]

    # Targets
    d_phi = MathUtils.angdiff(logs['pitch'][i+1], curr_phi)
    d_om  = logs['rate'][i+1] - curr_om

    # Outlier Check
    if (curr_om < om_min) or (curr_om > om_max):
        X_filtered.append([curr_phi, curr_om, curr_acc])
        Y_filtered.append([d_phi, d_om])
        extreme_count += 1
        continue

    # Binning
    p_norm = (curr_phi - phi_min) / (phi_max - phi_min)
    o_norm = (curr_om - om_min) / (om_max - om_min)
    idx_p = max(0, min(Config.N_BINS_PHI - 1, int(p_norm * (Config.N_BINS_PHI - 1))))
    idx_o = max(0, min(Config.N_BINS_OMEGA - 1, int(o_norm * (Config.N_BINS_OMEGA - 1))))

    if occupancy[idx_p, idx_o] < Config.MAX_SAMPLES:
        occupancy[idx_p, idx_o] += 1
        X_filtered.append([curr_phi, curr_om, curr_acc])
        Y_filtered.append([d_phi, d_om])
        saved_count += 1

X = np.array(X_filtered)
Y = np.array(Y_filtered)

# --- DETAILED STATS (Restored) ---
unique_bins = np.count_nonzero(occupancy)
total_bins = Config.N_BINS_PHI * Config.N_BINS_OMEGA
num_steps = len(logs['t'])

print(f"------------------------------------------------")
print(f"Raw Data Points:    {num_steps}")
print(f"Filtered Data (X):  {X.shape[0]} (Saved)")
print(f"Rejection Ratio:    {(1 - (X.shape[0]/num_steps))*100:.1f}% rejected")
print(f"Extreme Speed Pts:  {extreme_count} (Saved automatically)")
print(f"Unique Bins Filled: {unique_bins} / {total_bins}")
print(f"------------------------------------------------")


# --- ORIGINAL VISUALIZATION (Restored) ---
plt.ioff() # Turn off interactive mode so this plot stays open
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
fig.canvas.manager.set_window_title("Data Distribution Analysis")

# Plot 1: Heatmap (The Bins)

img = ax1.imshow(occupancy.T, origin='lower', aspect='auto',
                 extent=[phi_min, phi_max, om_min, om_max], cmap='viridis')
ax1.set_title(f"Bin Occupancy (Max {Config.MAX_SAMPLES})")
ax1.set_xlabel("Pitch (rad)")
ax1.set_ylabel("Rate (rad/s)")
plt.colorbar(img, ax=ax1, label="Count")

# Plot 2: Scatter (The Saved Points)
ax2.scatter(X[:, 0], X[:, 1], s=2, alpha=0.3, c='blue', label='Saved Data')

# Draw the "Binning Box" to see where the grid ends
ax2.vlines([phi_min, phi_max], om_min, om_max, colors='red', linestyles='--')
ax2.hlines([om_min, om_max], phi_min, phi_max, colors='red', linestyles='--', label='Grid Limit')

ax2.set_title(f"Saved Points (Includes Extreme Speeds: {extreme_count})")
ax2.set_xlabel("Pitch (rad)")
ax2.set_ylabel("Rate (rad/s)")
ax2.legend(loc='upper right')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

>>> Recording RAW data... (Offline filter applied after)

>>> Collection Finished. Raw Samples: 100000

>>> Applying Offline Zero-Phase Filter...
>>> Filtering Complete. No Phase Lag!

>>> Applying Binning / Rejection Sampling...
------------------------------------------------
Raw Data Points:    100000
Filtered Data (X):  14786 (Saved)
Rejection Ratio:    85.2% rejected
Extreme Speed Pts:  0 (Saved automatically)
Unique Bins Filled: 425 / 2500
------------------------------------------------


# GAUSSIAN PROCESS DYNAMICS

In [23]:
import torch
import gpytorch
import numpy as np

# ============================================================
# ---- Base Exact GP model -----------------------------------
# ============================================================

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, kernel='RBF', ard_dims=None):
        super().__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()

        # Kernel Choice
        if kernel == 'RBF':
            base_kernel = gpytorch.kernels.RBFKernel(ard_num_dims=ard_dims)
        elif kernel == 'Matern':
            base_kernel = gpytorch.kernels.MaternKernel(nu=2.5, ard_num_dims=ard_dims)
        elif kernel == 'RQ':
            base_kernel = gpytorch.kernels.RQKernel(ard_num_dims=ard_dims)
        else:
            raise ValueError(f"Unsupported kernel type: {kernel}")

        self.covar_module = gpytorch.kernels.ScaleKernel(base_kernel)

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)


# ============================================================
# ---- Individual GP Manager: data + normalization -----------
# ============================================================

class GPManager:
    def __init__(self, kernel='RBF', lr=0.05, iters=300, device=device):
        self.kernel = kernel
        self.lr = lr
        self.iters = iters
        self.device = device

        self.trained = False
        self.X_train = None
        self.Y_train = None

        self.likelihood = None
        self.model = None

    # ----------------------------- #
    #        FIT / INITIAL TRAIN    #
    # ----------------------------- #
    def fit(self, X, Y):
        # --- FIX 1: USE FLOAT64 (Double Precision) ---
        X = torch.tensor(X, dtype=torch.float64, device=self.device)
        Y = torch.tensor(Y, dtype=torch.float64, device=self.device).flatten()

        self.X_train = X.clone()
        self.Y_train = Y.clone()

        self.retrain()

    def retrain(self):
        self._compute_normalization()
        self._train_model()

    def add_data(self, X_new, Y_new, retrain=True):
        # --- FIX 1: USE FLOAT64 ---
        X_new = torch.tensor(X_new, dtype=torch.float64, device=self.device)
        Y_new = torch.tensor(Y_new, dtype=torch.float64, device=self.device).flatten()

        if self.Y_train.ndim > 1:
            self.Y_train = self.Y_train.flatten()

        self.X_train = torch.cat([self.X_train, X_new], dim=0)
        self.Y_train = torch.cat([self.Y_train, Y_new], dim=0)

        if retrain:
            self.retrain()

    def _compute_normalization(self):
        self.X_mean = self.X_train.mean(0)
        self.X_std  = self.X_train.std(0)
        self.X_std[self.X_std < 1e-6] = 1.0

        self.Y_mean = self.Y_train.mean()
        self.Y_std  = self.Y_train.std()

        # Use tensor(1.0) with float64
        if self.Y_std < 1e-6:
            self.Y_std = torch.tensor(1.0, dtype=torch.float64, device=self.device)

        self.Xn = (self.X_train - self.X_mean) / self.X_std
        self.Yn = (self.Y_train - self.Y_mean) / self.Y_std

    def _train_model(self):
        self.likelihood = gpytorch.likelihoods.GaussianLikelihood().to(self.device)
        # Initialize noise slightly higher for stability
        self.likelihood.noise_covar.initialize(noise=1e-2)

        self.model = ExactGPModel(
            self.Xn, self.Yn, self.likelihood,
            kernel=self.kernel,
            ard_dims=self.X_train.shape[-1]
        ).to(self.device)

        # Move model to float64
        self.model.double()
        self.likelihood.double()

        self.train_gp(self.model, self.likelihood, self.Xn, self.Yn)
        self.trained = True

    def train_gp(self, model, likelihood, x, y):
        model.train(); likelihood.train()
        opt = torch.optim.Adam(model.parameters(), lr=self.lr)
        mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

        # --- FIX 2: STABILITY SETTINGS ---
        # cholesky_jitter: Adds noise to diagonal to prevent crash
        # max_cg_iterations: Gives solver more attempts
        with gpytorch.settings.cholesky_jitter(1e-4), gpytorch.settings.max_cg_iterations(2000):
            for i in range(self.iters):
                opt.zero_grad()
                out = model(x)
                loss = -mll(out, y)
                loss.backward()
                opt.step()

        model.eval(); likelihood.eval()

    def predict_torch(self, X):
        if not self.trained:
            raise RuntimeError("GP has not been trained yet.")

        # Ensure input is float64
        X = torch.as_tensor(X, dtype=torch.float64, device=self.device)
        Xn = (X - self.X_mean) / self.X_std

        # Use fast prediction settings
        with torch.no_grad(), gpytorch.settings.fast_pred_var(), gpytorch.settings.cholesky_jitter(1e-4):
            pred = self.likelihood(self.model(Xn))
            mean = pred.mean * self.Y_std + self.Y_mean
            var  = pred.variance * (self.Y_std ** 2)

        # Return as float32 for compatibility with other parts of your code (MPPI usually likes float32)
        return mean.float(), var.float()

In [24]:
import numpy as np
import torch
from scipy.signal import butter, filtfilt

# ==============================================================
# 1. PREPARE DATA
# ==============================================================

def lowpass_offline(acc, dt, cutoff_hz=5.0, order=2):
    acc = np.asarray(acc, dtype=float)
    if len(acc) < 10: return acc
    fs = 1.0 / dt
    wn = cutoff_hz / (fs / 2.0)
    wn = min(max(wn, 1e-6), 0.999999)
    b, a = butter(order, wn, btype='low')
    return filtfilt(b, a, acc)

# Extract Logs
t_arr = np.array(logs['t'])
u_arr = np.array(logs['u'])
pitch_arr = np.array(logs['pitch'])
rate_raw = np.array(logs['rate'])
acc_raw  = np.array(logs['acc'])

# Filter
dt_avg = np.mean(np.diff(t_arr))
acc_clean  = lowpass_offline(acc_raw, dt_avg, cutoff_hz=5.0)
rate_clean = lowpass_offline(rate_raw, dt_avg, cutoff_hz=10.0)

# ==============================================================
# 2. BUILD DATASET (With Throttle!)
# ==============================================================
print("Building Dataset (X=[Pitch, Rate, Throttle] -> Y=[Rate, Acc])...")

N_BINS = 40
MAX_SAMPLES = 20
OMEGA_RANGE = 15.0
occupancy = np.zeros((N_BINS, N_BINS), dtype=np.int32)

X_list = []
Y_list = []

for i in range(len(t_arr) - 1):

    # State Variables
    p = pitch_arr[i]
    r = rate_clean[i]
    a = acc_clean[i]
    u = u_arr[i]     # <--- We use this now

    # --- DEFINITION OF X AND Y ---

    # Input X: [Pitch, Rate, Throttle]
    # This allows the model to learn f(state, action)
    x_sample = [p, r, u]

    # Target Y: [Rate (Velocity), Accel (Derivative of Rate)]
    y_sample = [r, a]

    # --- BINNING LOGIC ---

    # 1. Outliers
    if (r < -OMEGA_RANGE) or (r > OMEGA_RANGE):
        X_list.append(x_sample)
        Y_list.append(y_sample)
        continue

    # 2. Binning
    p_norm = (p - (-np.pi)) / (2 * np.pi)
    r_norm = (r - (-OMEGA_RANGE)) / (2 * OMEGA_RANGE)

    idx_p = max(0, min(N_BINS - 1, int(p_norm * (N_BINS - 1))))
    idx_r = max(0, min(N_BINS - 1, int(r_norm * (N_BINS - 1))))

    if occupancy[idx_p, idx_r] < MAX_SAMPLES:
        occupancy[idx_p, idx_r] += 1
        X_list.append(x_sample)
        Y_list.append(y_sample)

# Convert
X = np.array(X_list, dtype=np.float32)
Y = np.array(Y_list, dtype=np.float32)

print(f"Data Processing Complete.")
print(f"Selected Samples: {X.shape[0]}")
print(f"X Shape: {X.shape} -> [Pitch, Rate, Throttle]")
print(f"Y Shape: {Y.shape} -> [Rate, Accel]")

# ==============================================================
# 3. TRAIN GPs
# ==============================================================

CHOSEN_KERNEL = 'RQ'

print(f"\nTraining GPs with kernel: {CHOSEN_KERNEL}...")

# 1. Train Derivative 1 (Rate)
print("--- Training GP for Output 1: Pitch Rate ---")
gp_rate = GPManager(kernel=CHOSEN_KERNEL, iters=300)
gp_rate.fit(X, Y[:, 0]) # Target: Rate

# 2. Train Derivative 2 (Acceleration)
print("--- Training GP for Output 2: Pitch Acceleration ---")
gp_acc = GPManager(kernel=CHOSEN_KERNEL, iters=300)
gp_acc.fit(X, Y[:, 1]) # Target: Accel

print("\n>>> Models Trained.")

Building Dataset (X=[Pitch, Rate, Throttle] -> Y=[Rate, Acc])...
Data Processing Complete.
Selected Samples: 1962
X Shape: (1962, 3) -> [Pitch, Rate, Throttle]
Y Shape: (1962, 2) -> [Rate, Accel]

Training GPs with kernel: RQ...
--- Training GP for Output 1: Pitch Rate ---
--- Training GP for Output 2: Pitch Acceleration ---

>>> Models Trained.


# Visualize Collected Data

In [26]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Use the Acceleration model (Torque dynamics)
target_gp = gp_acc
actions = [-1.0, 0.0, 1.0]

# ---------------------------------------------------------
# FIX: Access tensors directly instead of using .dataset()
# ---------------------------------------------------------
# 1. Get data from GPU/Torch -> CPU/Numpy
X_train = target_gp.X_train.detach().cpu().numpy()
Y_train = target_gp.Y_train.detach().cpu().numpy()

pitch_data = X_train[:, 0]
rate_data  = X_train[:, 1]
act_data   = X_train[:, 2]
acc_data   = Y_train

# Create Grid for plotting
p_grid = np.linspace(pitch_data.min(), pitch_data.max(), 60)
r_grid = np.linspace(rate_data.min(), rate_data.max(), 60)
P, R = np.meshgrid(p_grid, r_grid)

for a in actions:
    # Query: [Pitch, Rate, Fixed_Action]
    X_query = np.column_stack([
        P.ravel(),
        R.ravel(),
        np.full(P.size, a)
    ])

    # Predict
    # Note: predict_torch expects float64 now, so we ensure the input matches
    mean_t, var_t = target_gp.predict_torch(X_query)
    Mean = mean_t.detach().cpu().numpy().reshape(P.shape)

    # Plot
    fig = plt.figure(figsize=(10, 6))
    ax = fig.add_subplot(111, projection='3d')


    surf = ax.plot_surface(P, R, Mean, cmap='viridis', alpha=0.8, edgecolor='none')

    # Overlay data points near this action
    # We use a small threshold to find points where throttle was close to 'a'
    mask = np.abs(act_data - a) < 0.15

    if np.sum(mask) > 0:
        ax.scatter(
            pitch_data[mask], rate_data[mask], acc_data[mask],
            color='black', s=5, label=f'Data (u≈{a})'
        )

    ax.set_xlabel('Pitch')
    ax.set_ylabel('Rate')
    ax.set_zlabel('Predicted Accel')
    ax.set_title(f"GP Dynamics Surface: Throttle = {a}")
    fig.colorbar(surf, ax=ax, shrink=0.5, aspect=10)

    plt.show()

In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401

# --------------------------------------------------------
# Choose which GP to visualize
# --------------------------------------------------------
gp = gps[1]   # e.g. GP for Δpitch_rate (index 0 would be Δpitch)
a_fixed = 1.0 # fixed continuous action (maximum thrust)

# --------------------------------------------------------
# Get training data (NumPy from dataset())
# --------------------------------------------------------
X_train, Y_train = gp.dataset()
pitch, pitch_rate, act = X_train[:, 0], X_train[:, 1], X_train[:, 2]
d_pitch_rate = Y_train  # targets for this GP

# Select samples close to a = a_fixed for overlay
mask = np.abs(act - a_fixed) < 0.5
print(f"Values near action a={a_fixed} → n={np.sum(mask)}")

# --------------------------------------------------------
# Define pitch–pitch_rate grid
# --------------------------------------------------------
p_min, p_max = pitch.min(), pitch.max()
v_min, v_max = pitch_rate.min(), pitch_rate.max()

p_grid = np.linspace(p_min, p_max, 80)
v_grid = np.linspace(v_min, v_max, 80)
P, V = np.meshgrid(p_grid, v_grid)

# Query points for the fixed action: [pitch, pitch_rate, a_fixed]
X_grid = np.column_stack([
    P.ravel(),                     # pitch
    V.ravel(),                     # pitch_rate
    np.full_like(P.ravel(), a_fixed)  # fixed action
])

# --------------------------------------------------------
# GP predictions (torch -> numpy)
# --------------------------------------------------------
Mean_t, Var_t = gp.predict_torch(X_grid)   # torch tensors on GPU

# move to CPU and numpy for plotting
Mean = Mean_t.detach().cpu().numpy().reshape(P.shape)
Var  = Var_t.detach().cpu().numpy().reshape(P.shape)
Std  = np.sqrt(Var)

# Normalize Std for color mapping
norm = plt.Normalize(vmin=Std.min(), vmax=Std.max())
colors = plt.cm.viridis(norm(Std))

# --------------------------------------------------------
# Plot surface: Mean as height, Std as color
# --------------------------------------------------------
fig = plt.figure(figsize=(10, 7))
ax = fig.add_subplot(111, projection='3d')

surf = ax.plot_surface(
    P, V, Mean,
    facecolors=colors,
    linewidth=0, antialiased=False, shade=False
)

# Colorbar = predictive uncertainty
m = plt.cm.ScalarMappable(cmap='viridis', norm=norm)
m.set_array(Std)
cbar = fig.colorbar(m, ax=ax, shrink=0.6, aspect=10)
cbar.set_label('GP Predictive Std (uncertainty)')

# Overlay raw data (samples with similar a)
ax.scatter(
    pitch[mask], pitch_rate[mask], d_pitch_rate[mask],
    color='k', s=15, alpha=0.6, label=f'training data (a≈{a_fixed})'
)

# --------------------------------------------------------
# Labels and title
# --------------------------------------------------------
ax.set_xlabel('Pitch')
ax.set_ylabel('Pitch Rate')
ax.set_zlabel('ΔPitch Rate')
ax.set_title(f"GP Model for Action a={a_fixed:.1f} — Mean Surface (height), Std (color)")
ax.view_init(elev=30, azim=230)
ax.legend()
plt.tight_layout()
plt.show()


In [27]:
import numpy as np
import matplotlib.pyplot as plt

# --------------------------------------------------------
# 1. Choose GP Model & Slice
# --------------------------------------------------------
# In previous cell, we named them gp_rate and gp_acc
gp = gp_rate      # Predicting Pitch Rate (dPhi/dt)
model_name = "Pitch Rate"

v_fixed = 0.0     # Slice: Car is currently not rotating
a_fixed = 1.0     # Slice: Full Throttle (+1.0)

# --------------------------------------------------------
# 2. Define Query Grid
# --------------------------------------------------------
p_min, p_max = -3.14, 3.14
p_grid = np.linspace(p_min, p_max, 200)

# Construct query: [Pitch, Rate_Fixed, Action_Fixed]
X_query = np.column_stack([
    p_grid,
    np.full_like(p_grid, v_fixed),
    np.full_like(p_grid, a_fixed)
])

# --------------------------------------------------------
# 3. Predict (Torch -> Numpy)
# --------------------------------------------------------
# The predict_torch method handles the float64 conversion internally
Mean_t, Var_t = gp.predict_torch(X_query)

Mean = Mean_t.detach().cpu().numpy()
Var  = Var_t.detach().cpu().numpy()
Std  = np.sqrt(Var)

# --------------------------------------------------------
# 4. Get Training Data for Overlay
# --------------------------------------------------------
# FIX: Access data directly from tensors
X_train = gp.X_train.detach().cpu().numpy()
Y_train = gp.Y_train.detach().cpu().numpy()

# Extract columns for masking
data_pitch = X_train[:, 0]
data_rate  = X_train[:, 1]
data_act   = X_train[:, 2]
data_y     = Y_train

# Create mask to find data points "near" this slice
# Rate within 0.5 rad/s, Action within 0.2 units
mask = (np.abs(data_rate - v_fixed) < 0.5) & (np.abs(data_act - a_fixed) < 0.2)

# --------------------------------------------------------
# 5. Plot
# --------------------------------------------------------
plt.figure(figsize=(10, 6))

# Plot GP Confidence Region
plt.plot(p_grid, Mean, 'b-', lw=2, label=f'GP Mean {model_name}')
plt.fill_between(
    p_grid,
    Mean - 2 * Std,
    Mean + 2 * Std,
    color='blue',
    alpha=0.2,
    label='±2σ Uncertainty'
)

# Plot Raw Data
if np.sum(mask) > 0:
    plt.scatter(
        data_pitch[mask],
        data_y[mask],
        color='k', s=25, alpha=0.7,
        label=f'Data (v≈{v_fixed}, u≈{a_fixed})'
    )
else:
    print("No training data found near this slice to plot.")

plt.xlabel("Pitch Angle (rad)")
plt.ylabel(f"Predicted {model_name} (rad/s)")
plt.title(f"GP Slice: {model_name} vs Pitch\n(Fixed Rate={v_fixed}, Fixed Throttle={a_fixed})")
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.show()

# MPPI CONTROLLER

#### GP step for the MuJoCo car (using your learned GP)