In [1]:
import Reinforcement as re
import numpy as np

def evaluate_quadruped_ppo(
    ppo_path: str,
    xml_path: str,
    morph_vec,
    episodes: int = 10,
    max_steps_per_ep: int = 1000,
    deterministic: bool = True,
    pose_generator=None,
    imitation_w: float = 0.0,
    imitation_obs_indices=None ):

    print(f"Loading PPO model from {ppo_path} ...")
    model = PPO.load(ppo_path)

    # Base env: same quadruped XML as in training
    base_env = make_quadruped_env(seed=0, xml_path=xml_path)

    # Wrap in MorphPhaseEnvWrapper so we get delta_x and energy_penalty in info
    eval_env = MorphPhaseEnvWrapper(
        base_env=base_env,
        morph_vec=morph_vec,
        cycle_steps=200,
        settle_steps=5,
        pose_generator=pose_generator,
        imitation_w=imitation_w,
        imitation_obs_indices=imitation_obs_indices,
        xml_path=xml_path,
    )

    episode_forward = []
    episode_energy = []

    for ep in range(episodes):
        obs, info = eval_env.reset()
        done = False
        truncated = False
        ep_forward = 0.0
        ep_energy = 0.0

        for t in range(max_steps_per_ep):
            # stable-baselines3 expects obs shape (n_envs, obs_dim) for VecEnv,
            # but here we have a single Gym env, so squeeze/unsqueeze as needed:
            action, _ = model.predict(obs, deterministic=deterministic)
            obs, reward, done, truncated, info = eval_env.step(action)

            # info fields are populated by MorphPhaseEnvWrapper.step(...)
            ep_forward += float(info.get("delta_x", 0.0))
            ep_energy  += float(info.get("energy_penalty", 0.0))

            if done or truncated:
                break

        episode_forward.append(ep_forward)
        episode_energy.append(ep_energy)
        print(
            f"Episode {ep+1:2d} | "
            f"forward = {ep_forward:.3f} | "
            f"energy = {ep_energy:.3f} | "
            f"steps = {t+1}"
        )

    avg_forward = float(np.mean(episode_forward)) if episode_forward else 0.0
    avg_energy  = float(np.mean(episode_energy)) if episode_energy else 0.0

    print("\n=== PPO evaluation over "
          f"{episodes} episodes ({xml_path}) ===")
    print(f"Average forward motion: {avg_forward:.3f}")
    print(f"Average energy spent:   {avg_energy:.3f}")

    return avg_forward, avg_energy
    

In [2]:
quad_morph = [
    0.141421, 0.282843,
    0.141421, 0.282843,
    0.141421, 0.282843,
    0.141421, 0.282843,
]

var1_morph = [
    0.2, 0.25,  # FR leg, FR ankle
    0.2, 0.25,  # FL leg, FL ankle
    0.141421, 0.3,  # BL leg, BL ankle
    0.141421, 0.3,  # BR leg, BR ankle
]

explore_var = "quadrl_m0_ppo.zip"
mlp = "ppo_mlp.zip"
generative = "ppo_with_imitation.zip"

diff_model, meta, device = load_diffusion(
    weights_path="quadruped_morph_diffusion_weights.pt",
    traj_file="quadruped_morph_trajectories.npz",)

full_cycle = diff.sample_full_cycle_from_morph(
    diff_model,
    meta,
    quad_morph,
    cycle_steps=200,
    device="cuda",
)
pose_template = DiffusionPoseTemplate(full_cycle)


avg_fwd1, avg_energy1 = evaluate_quadruped_ppo(
    ppo_path=explore_var,   
    xml_path="./quadruped.xml",
    morph_vec=quad_morph,
    episodes=10,
    max_steps_per_ep=1000,
    deterministic=True,
    pose_generator=None,   
    imitation_w=0.0,    
    imitation_obs_indices=None,
)

avg_fwd2, avg_energy2 = evaluate_quadruped_ppo(
    ppo_path=mlp,   
    xml_path="./quadruped.xml",
    morph_vec=quad_morph,
    episodes=10,
    max_steps_per_ep=1000,
    deterministic=True,
    pose_generator=None,   
    imitation_w=0.0,    
    imitation_obs_indices=None,
)

avg_fwd3, avg_energy3 = evaluate_quadruped_ppo(
    ppo_path=generative,   
    xml_path="./quadruped.xml",
    morph_vec=quad_morph,
    episodes=10,
    max_steps_per_ep=1000,
    deterministic=True,
    pose_generator=pose_template,    # DiffusionPoseTemplate active
    imitation_w=0.0,    
    imitation_obs_indices=None,
)


print(" Quadruped PPO Evaluation Summary")
print("")

header = f"{'Model':<20} | {'Avg Forward':>12} | {'Avg Energy':>12} | {'Notes'}"
print(header)
print("-"*72)

rows = [
    ("Exploration-PPO",  avg_fwd1, avg_energy1, "baseline (explore_var)"),
    ("MLP-PPO",          avg_fwd2, avg_energy2, "baseline (mlp)"),
    ("Generative-PPO",   avg_fwd3, avg_energy3, "diffusion-guided"),
]

for name, fwd, energy, note in rows:
    print(f"{name:<20} | {fwd:12.4f} | {energy:12.4f} | {note}")

NameError: name 'load_diffusion' is not defined