# Examples for Policy Evaluation

This notebook demonstrates the evaluation of policies via simulation rollouts for quantitative performance comparisons.

In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
from pathlib import Path
sys.path.append(str(Path(os.getcwd()).parent.absolute()))

In [None]:
from policy.privileged_pass_block_policy import PrivilegedPassBlockPolicy
from robot.sim import BimanualAction, BimanualObs, BimanualSim, randomize_block_position
from validate.evaluation import PassBlockTaskEvaluator, evaluate_policy

# secret hacky stuff that hooks the privileged policy directly into sim data when create_sim is called
privileged_policy: PrivilegedPassBlockPolicy | None = None
def policy(obs: BimanualObs) -> BimanualAction:
  global privileged_policy
  assert privileged_policy is not None
  return privileged_policy(obs)

def create_sim() -> BimanualSim:
  global privileged_policy
  sim = BimanualSim(merge_xml_files=[Path('block.xml')], on_mujoco_init=randomize_block_position)
  privileged_policy = PrivilegedPassBlockPolicy(sim.model, sim.data)
  return sim

# call evaluate_policy to calculate the success rate over many rollouts
success_rate = evaluate_policy(
  policy=policy,
  create_sim=create_sim,
  create_task_evaluator=PassBlockTaskEvaluator,
  num_rollouts=100,
  verbose=True
)

print(f'The handcrafted privileged policy has a success rate of {success_rate * 100:.2f}%.')

Rollouts of <function policy at 0x000001E06AB75900>: 100%|██████████| 100/100 [1:55:12<00:00, 69.13s/it]

The handcrafted privileged policy has a success rate of 73.00%.





In [None]:
from robot.sim import BimanualAction, BimanualSim, randomize_block_position
from validate.evaluation import PassBlockTaskEvaluator, evaluate_policy

success_rate = evaluate_policy(
  policy=lambda _: BimanualAction(),
  create_sim=lambda: BimanualSim(merge_xml_files=[Path('block.xml')], on_mujoco_init=randomize_block_position),
  create_task_evaluator=PassBlockTaskEvaluator,
  num_rollouts=20,
  verbose=True
)

print(f'The zero action policy has a success rate of {success_rate * 100:.2f}%.')

Rollouts of <function <lambda> at 0x000002177963E950>: 100%|██████████| 20/20 [03:59<00:00, 11.97s/it]

The zero action policy has a success rate of 0.00%.



