In [1]:
import torch
import numpy as np
import gymnasium as gym
import mujoco
from Diffusion import PoseDiffusion, QuadrupedMorphDiffusionDataset


def load_diffusion(
    weights_path="quadruped_morph_diffusion_weights.pt",
    traj_file="quadruped_morph_trajectories.npz",
    device=None ):

    if device is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"
    print("Using device:", device)

    # Load checkpoint and config
    ckpt = torch.load(weights_path, map_location=device)
    cfg = ckpt["config"]

    past_len   = cfg["past_len"]
    future_len = cfg["future_len"]
    n_steps    = cfg["n_steps"]
    hidden     = cfg["hidden"]
    imitation_obs_indices = cfg.get("imitation_obs_indices", None)

    print("Loaded config from checkpoint:")
    print("  traj_file :", traj_file)
    print("  past_len  :", past_len)
    print("  future_len:", future_len)
    print("  n_steps   :", n_steps)
    print("  hidden    :", hidden)
    print("  imitation_obs_indices:", imitation_obs_indices)


    # Rebuild dataset to get obs_dim / cond_dim
    dataset = QuadrupedMorphDiffusionDataset(
        traj_path=traj_file,
        past_len=past_len,
        future_len=future_len,
        imitation_obs_indices=imitation_obs_indices,
        device=device
    )

    D_target = dataset.D_target   # obs_dim per time step
    D_cond   = dataset.D_cond     # past_len*obs_dim + morph_dim

    print("Reconstructed diffusion dims:")
    print("  D_target (obs_dim) =", D_target)
    print("  D_cond             =", D_cond)

    diff_model = PoseDiffusion(
        D_target=D_target,
        D_cond=D_cond,
        future_len=future_len,
        n_steps=n_steps,
        hidden=hidden,
    ).to(device)

    diff_model.load_state_dict(ckpt["state_dict"])
    diff_model.eval()

    print("Loaded diffusion model.")

    # Return metadata for sampling
    meta = {
        "past_len": past_len,
        "future_len": future_len,
        "obs_dim": D_target,
        "morph_dim": dataset.morph_dim,
        "D_target": D_target,
        "D_cond": D_cond,
        "imitation_obs_indices": imitation_obs_indices,
    }

    return diff_model, meta, device

def make_cond_from_past_and_morph(past_obs, morph_vec, ckpt, device):
    """
    past_obs: (past_len, obs_dim)
    morph_vec: (morph_dim,)
    """
    past_len = ckpt["past_len"]
    obs_dim  = ckpt["obs_dim"]

    past_obs = np.asarray(past_obs, dtype=np.float32)
    morph_vec = np.asarray(morph_vec, dtype=np.float32)

    assert past_obs.shape == (past_len, obs_dim), (
        f"past_obs shape {past_obs.shape} != ({past_len}, {obs_dim})"
    )

    past_flat = past_obs.reshape(-1)                # (past_len * obs_dim,)
    cond_np   = np.concatenate([past_flat, morph_vec], axis=0)

    cond = torch.tensor(cond_np, device=device, dtype=torch.float32).unsqueeze(0)
    # shape: (1, D_cond)
    return cond

def sample_future_obs(diff_model, cond, meta):
    future_len = meta["future_len"]
    obs_dim    = meta["obs_dim"]

    with torch.no_grad():
        x0_seq = diff_model.sample(cond)   # (1, future_len, obs_dim)

    future_obs = x0_seq[0].cpu().numpy()   # (future_len, obs_dim)
    return future_obs

def pred_to_qpos_from_diffusion(pred, env):
    pred = np.asarray(pred, dtype=np.float32)
    qpos = env.unwrapped.data.qpos.copy()

    nq = env.unwrapped.model.nq  # 15 for Ant
    max_joints = nq - 2          # number of slots after x,y

    L = min(pred.shape[0], max_joints)
    qpos[2:2+L] = pred[:L]

    return qpos

def obs_to_qpos_from_augmented(obs_aug, env, morph_dim=8):
    obs_aug = np.asarray(obs_aug, dtype=np.float32)

    nq = env.unwrapped.model.nq  # 15
    base_obs_dim = obs_aug.shape[0] - (morph_dim + 2)  # 97 - 10 = 87

    base_obs = obs_aug[:base_obs_dim]   # (87,)
    qpos = env.unwrapped.data.qpos.copy()   

    # Assume base_obs[0 : nq-2] == qpos[2:]
    qpos[2:] = base_obs[: (nq - 2)]  
    return qpos

# freeze frame simulated gait cycle
def make_diffusion_video(full_cycle, xml_path, save_dir="diff_video", fps=30):
    env = gym.make("Ant-v5", xml_file=xml_path, render_mode="rgb_array" )

    env = gym.wrappers.RecordVideo( env, video_folder=save_dir, 
                                    name_prefix="diffusion_gait",
                                    episode_trigger=lambda ep: True )

    obs, info = env.reset()
    model = env.unwrapped.model
    data = env.unwrapped.data
    T = full_cycle.shape[0]
    morph_dim=8
    frame_count = 0        

    for t in range(T):
        pred = full_cycle[t] 

        qpos = pred_to_qpos_from_diffusion(pred, env)
        data.qpos[:] = qpos
        mujoco.mj_forward(model, data)

        # Take astep just to trigger render & RecordVideo
        zero_action = np.zeros(env.action_space.shape, dtype=np.float32)
        obs, reward, done, truncated, info = env.step(zero_action)
        frame_count += 1 

    env.close()
    print(f"Frames rendered: {frame_count}")
    print(f"Video saved to {save_dir}")

In [2]:
### Diff Video ###

import Diffusion as diff
from Diffusion import DiffusionPoseTemplate



quad_morph = [
    0.141421, 0.282843,  # FR leg, FR ankle
    0.141421, 0.282843,  # FL leg, FL ankle
    0.141421, 0.282843,  # BL leg, BL ankle
    0.141421, 0.282843,  # BR leg, BR ankle
]

diff_model, meta, device = load_diffusion(
    weights_path="quadruped_morph_diffusion_weights.pt",
    traj_file="quadruped_morph_trajectories.npz",)


full_cycle = diff.sample_full_cycle_from_morph(
    diff_model,
    meta,
    quad_morph,
    cycle_steps=200,
    device="cuda",
)

print(full_cycle.shape)
print(full_cycle[0])


make_diffusion_video(full_cycle, "./quadruped.xml")


Using device: cuda
Loaded config from checkpoint:
  traj_file : quadruped_morph_trajectories.npz
  past_len  : 10
  future_len: 200
  n_steps   : 100
  hidden    : 256
  imitation_obs_indices: [5, 6, 7, 8, 9, 10, 11, 12]
[QuadrupedDataset] obs_dim=97, morph_dim=8
[QuadrupedDataset] target_dim=8
[QuadrupedDataset] total samples possible=71087
Reconstructed diffusion dims:
  D_target (obs_dim) = 8
  D_cond             = 88
Loaded diffusion model.
(200, 8)
[ 0.20517375  0.6367862   0.34641522 -1.3149067   0.33477023 -0.52083486
 -0.31458032  0.7027811 ]


  logger.warn(


Frames rendered: 200
Video saved to diff_video


In [3]:
#Train the Cycle with Imitation

import Reinforcement as re

#standard obs indices for Ant joints
imitation_obs_indices = list(range(5, 13))

# Morph of interest
quad_morph = [
    0.141421, 0.282843,
    0.141421, 0.282843,
    0.141421, 0.282843,
    0.141421, 0.282843,
]


pose_template = DiffusionPoseTemplate(full_cycle)

re.train_ppo_with_pose_template(
    run_name="ppo_with_imitation",
    pose_generator=pose_template,
    morph_vec=quad_morph,  
    xml_path="./quadruped.xml",
    timesteps=300_000,
    parallel_envs=1,
    initial_learning_rate=3e-4,
    imitation_w=4,
    imitation_obs_indices=imitation_obs_indices
)

Logging to ./logs_ppo_with_imitation
Using cuda device





ðŸš€ Training PPO with pose templates for ppo_with_imitation ...
[env 0] ep_len=  20 | R_mean=-1.050 | fwd=-0.395 | vel=-1.284 | imitat=-0.816 | alive= 0.100 | energy_p= 0.027 | fail= 0.750 | var=unknown
[env 0] ep_len=  40 | R_mean=-0.105 | fwd=-0.111 | vel=-0.430 | imitat=-0.683 | alive= 0.100 | energy_p= 0.028 | fail= 0.375 | var=unknown
[env 0] ep_len=  41 | R_mean=-0.181 | fwd=-0.442 | vel=-1.386 | imitat=-0.890 | alive= 0.100 | energy_p= 0.031 | fail= 0.366 | var=unknown
[env 0] ep_len=  32 | R_mean=-0.133 | fwd=-0.369 | vel=-1.298 | imitat=-0.762 | alive= 0.100 | energy_p= 0.028 | fail= 0.469 | var=unknown
[env 0] ep_len=  20 | R_mean=-0.130 | fwd=-0.271 | vel=-0.797 | imitat=-1.098 | alive= 0.100 | energy_p= 0.029 | fail= 0.750 | var=unknown
[env 0] ep_len=  36 | R_mean=-0.097 | fwd=-0.194 | vel=-0.657 | imitat=-0.805 | alive= 0.100 | energy_p= 0.028 | fail= 0.417 | var=unknown
[env 0] ep_len=  25 | R_mean=-0.229 | fwd=-0.863 | vel=-2.886 | imitat=-0.698 | alive= 0.100 | energ

  logger.warn(


Video recording complete.
[env 0] ep_len=  38 | R_mean=-0.050 | fwd=-0.032 | vel=-0.201 | imitat=-0.486 | alive= 0.100 | energy_p= 0.026 | fail= 0.395 | var=unknown
[env 0] ep_len=  25 | R_mean=-0.100 | fwd=-0.182 | vel=-0.634 | imitat=-0.806 | alive= 0.100 | energy_p= 0.028 | fail= 0.600 | var=unknown
[env 0] ep_len=  60 | R_mean=-0.049 | fwd=-0.016 | vel=-0.130 | imitat=-0.698 | alive= 0.100 | energy_p= 0.028 | fail= 0.250 | var=unknown
[env 0] ep_len=  44 | R_mean=-0.094 | fwd=-0.225 | vel=-0.768 | imitat=-0.744 | alive= 0.100 | energy_p= 0.028 | fail= 0.341 | var=unknown
[env 0] ep_len=  63 | R_mean=-0.092 | fwd=-0.214 | vel=-0.715 | imitat=-0.853 | alive= 0.100 | energy_p= 0.029 | fail= 0.238 | var=unknown
[env 0] ep_len=  64 | R_mean=-0.065 | fwd=-0.149 | vel=-0.508 | imitat=-0.540 | alive= 0.100 | energy_p= 0.029 | fail= 0.234 | var=unknown
[env 0] ep_len=  59 | R_mean=-0.053 | fwd=-0.002 | vel=-0.201 | imitat=-0.727 | alive= 0.100 | energy_p= 0.031 | fail= 0.254 | var=unknown
[

<stable_baselines3.ppo.ppo.PPO at 0x1e6167ea3c0>

In [6]:
import numpy as np

data = np.load("quadruped_morph_trajectories.npz")
obs = data["obs"]
morph = data["morph"]

print("obs_dim:", obs.shape[1])
print("morph_dim:", morph.shape[1])

obs_dim: 97
morph_dim: 8


In [9]:
import Reinforcement as re
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize, VecMonitor, VecVideoRecorder
from stable_baselines3.common.utils import LinearSchedule
from stable_baselines3.common.logger import configure

def evaluate_quadruped_ppo(
    ppo_path: str,
    xml_path: str,
    morph_vec,
    episodes: int = 10,
    max_steps_per_ep: int = 1000,
    deterministic: bool = True,
    pose_generator=None,
    imitation_w: float = 0.0,
    imitation_obs_indices=None ):

    print(f"Loading PPO model from {ppo_path} ...")
    model = PPO.load(ppo_path)

    # Base env: same quadruped XML as in training
    base_env = re.make_quadruped_env(seed=0, xml_path=xml_path)

    # Wrap in MorphPhaseEnvWrapper so we get delta_x and energy_penalty in info
    eval_env = re.MorphPhaseEnvWrapper(
        base_env=base_env,
        morph_vec=morph_vec,
        cycle_steps=200,
        settle_steps=5,
        pose_generator=pose_generator,
        imitation_w=imitation_w,
        imitation_obs_indices=imitation_obs_indices,
        xml_path=xml_path,
    )

    episode_forward = []
    episode_energy = []

    for ep in range(episodes):
        obs, info = eval_env.reset()
        done = False
        truncated = False
        ep_forward = 0.0
        ep_energy = 0.0

        for t in range(max_steps_per_ep):
            # stable-baselines3 expects obs shape (n_envs, obs_dim) for VecEnv,
            # but here we have a single Gym env, so squeeze/unsqueeze as needed:
            action, _ = model.predict(obs, deterministic=deterministic)
            obs, reward, done, truncated, info = eval_env.step(action)

            # info fields are populated by MorphPhaseEnvWrapper.step(...)
            ep_forward += float(info.get("delta_x", 0.0))
            ep_energy  += float(info.get("energy_penalty", 0.0))

            if done or truncated:
                break

        episode_forward.append(ep_forward)
        episode_energy.append(ep_energy)
        print(
            f"Episode {ep+1:2d} | "
            f"forward = {ep_forward:.3f} | "
            f"energy = {ep_energy:.3f} | "
            f"steps = {t+1}"
        )

    avg_forward = float(np.mean(episode_forward)) if episode_forward else 0.0
    avg_energy  = float(np.mean(episode_energy)) if episode_energy else 0.0

    print("\n=== PPO evaluation over "
          f"{episodes} episodes ({xml_path}) ===")
    print(f"Average forward motion: {avg_forward:.3f}")
    print(f"Average energy spent:   {avg_energy:.3f}")

    return avg_forward, avg_energy
    

In [13]:
quad_morph = [
    0.141421, 0.282843,
    0.141421, 0.282843,
    0.141421, 0.282843,
    0.141421, 0.282843,
]

var1_morph = [
    0.2, 0.25,  # FR leg, FR ankle
    0.2, 0.25,  # FL leg, FL ankle
    0.141421, 0.3,  # BL leg, BL ankle
    0.141421, 0.3,  # BR leg, BR ankle
]

explore_var = "quadrl_m0_ppo.zip"
mlp = "ppo_with_mlp.zip"
generative = "ppo_with_imitation_ppo.zip"

diff_model, meta, device = load_diffusion(
    weights_path="quadruped_morph_diffusion_weights.pt",
    traj_file="quadruped_morph_trajectories.npz",)

full_cycle = diff.sample_full_cycle_from_morph(
    diff_model,
    meta,
    quad_morph,
    cycle_steps=200,
    device="cuda",
)
pose_template = DiffusionPoseTemplate(full_cycle)


avg_fwd1, avg_energy1 = evaluate_quadruped_ppo(
    ppo_path=explore_var,   
    xml_path="./quadruped_var1.xml",
    morph_vec=quad_morph,
    episodes=10,
    max_steps_per_ep=1000,
    deterministic=True,
    pose_generator=None,   
    imitation_w=0.0,    
    imitation_obs_indices=None,
)

avg_fwd2, avg_energy2 = evaluate_quadruped_ppo(
    ppo_path=mlp,   
    xml_path="./quadruped.xml",
    morph_vec=quad_morph,
    episodes=10,
    max_steps_per_ep=1000,
    deterministic=True,
    pose_generator=None,   
    imitation_w=0.0,    
    imitation_obs_indices=None,
)

avg_fwd3, avg_energy3 = evaluate_quadruped_ppo(
    ppo_path=generative,   
    xml_path="./quadruped.xml",
    morph_vec=quad_morph,
    episodes=10,
    max_steps_per_ep=1000,
    deterministic=True,
    pose_generator=pose_template,    # DiffusionPoseTemplate active
    imitation_w=0.0,    
    imitation_obs_indices=None,
)


print(" Quadruped PPO Evaluation Summary")
print("")

header = f"{'Model':<20} | {'Avg Forward':>12} | {'Avg Energy':>12} | {'Notes'}"
print(header)
print("-"*72)

rows = [
    ("Exploration-PPO",  avg_fwd1, avg_energy1, "baseline (explore_var)"),
    ("MLP-PPO",          avg_fwd2, avg_energy2, "baseline (mlp)"),
    ("Generative-PPO",   avg_fwd3, avg_energy3, "diffusion-guided"),
]

for name, fwd, energy, note in rows:
    print(f"{name:<20} | {fwd:12.4f} | {energy:12.4f} | {note}")

Using device: cuda
Loaded config from checkpoint:
  traj_file : quadruped_morph_trajectories.npz
  past_len  : 10
  future_len: 200
  n_steps   : 100
  hidden    : 256
  imitation_obs_indices: [5, 6, 7, 8, 9, 10, 11, 12]
[QuadrupedDataset] obs_dim=97, morph_dim=8
[QuadrupedDataset] target_dim=8
[QuadrupedDataset] total samples possible=71087
Reconstructed diffusion dims:
  D_target (obs_dim) = 8
  D_cond             = 88
Loaded diffusion model.
Loading PPO model from quadrl_m0_ppo.zip ...
Episode  1 | forward = 8.794 | energy = 14.674 | steps = 309
Episode  2 | forward = 12.804 | energy = 13.413 | steps = 283
Episode  3 | forward = 9.673 | energy = 10.576 | steps = 225
Episode  4 | forward = 4.295 | energy = 19.749 | steps = 419
Episode  5 | forward = 11.587 | energy = 17.192 | steps = 370
Episode  6 | forward = 22.788 | energy = 42.143 | steps = 891
Episode  7 | forward = 8.500 | energy = 10.912 | steps = 230
Episode  8 | forward = 17.632 | energy = 30.501 | steps = 642
Episode  9 | f