In [None]:
!pip install phyre --quiet

In [17]:
# ✅ 导入所需库
import phyre
import numpy as np
import matplotlib.pyplot as plt
import random
random.seed(0)

In [18]:
from typing import Sequence
from phyre.action_simulator import ActionSimulator

def disturbed(tasks: Sequence,
              pos_pct : float = 0.001,
              ang_pct : float = 0.001):
    """
    Apply random Gaussian perturbations to all dynamic bodies (bodyType != 1)
    in each TaskSpec in `tasks`.

    Args:
        tasks: Sequence of TaskSpec objects loaded via load_compiled_task_list.
        pos_sigma: Standard deviation (in pixels) for x,y position noise.
        ang_sigma: Standard deviation (in radians) for angle noise.

    Returns:
        The same list of TaskSpec objects with modified initial body states.
    """
    for task in tasks:
        width, height = task.scene.width, task.scene.height
        # pos_sigma = pos_pct * width           # 2% of width in pixels
        # ang_sigma = ang_pct * 2 * np.pi       # 1% of 2π in radians
        pos_sigma = 2          # 2% of width in pixels
        ang_sigma = 0.0175      # 1% of 2π in radians
        
        for body in task.scene.bodies:
            if body.bodyType != 1:  # Skip static boundary bodies
                # Original state
                x, y = body.position.x, body.position.y
                theta = body.angle
                
                # Add Gaussian noise
                x_new = x + np.random.normal(scale=pos_sigma)
                y_new = y + np.random.normal(scale=pos_sigma)
                theta_new = theta + np.random.normal(scale=ang_sigma)
                
                # Clip positions and wrap angle
                x_new = np.clip(x_new, 0, width)
                y_new = np.clip(y_new, 0, height)
                theta_new = (theta_new + np.pi) % (2 * np.pi) - np.pi
                
                # Write back
                body.position.x = x_new
                body.position.y = y_new
                body.angle      = theta_new

    return tasks


def initialize_disturbed_simulator(task_ids: Sequence[str],
                         action_tier: str) -> ActionSimulator:
    """Initialize ActionSimulator for given tasks and tier."""
    tasks = phyre.loader.load_compiled_task_list(task_ids)
    tasks = disturbed(tasks)
    return ActionSimulator(tasks, action_tier)


In [None]:
import re
import json
def strip_markdown(text: str) -> str:
    text = re.sub(r"```", "", text)
    text = re.sub("json", "", text)

    text = re.sub(r"^#+\s*", "", text, flags=re.MULTILINE)
    text = re.sub(r"(\*\*|__)(.*?)\1", r"\2", text)
    text = re.sub(r"(\*|_)(.*?)\1", r"\2", text)
    text = re.sub(r"`(.*?)`", r"\1", text)
    text = re.sub(r"^[-*+]\s+", "", text, flags=re.MULTILINE)
    text = re.sub(r"\n{2,}", "\n", text)
    return text.strip()


# Control Policy
def decode(clean):
    try:
        action_objs = json.loads(clean)
    except json.JSONDecodeError:
        m = re.search(r'\[.*\]', clean, re.DOTALL)
        if not m:
            raise ValueError("No valid JSON array found in decision.")
        try:
            action_objs = json.loads(m.group())
        except Exception as e:
            raise ValueError(f"Failed to parse JSON array: {e}")

    if not isinstance(action_objs, list):
        raise ValueError("Decoded JSON is not a list of actions.")

    res = []
    for obj in action_objs:
        try:
            x = float(obj['x'])
            y = float(obj['y'])
            r = float(obj['r'])
        except Exception as e:
            raise ValueError(f"Action object missing or invalid keys: {e}")
        res.append([x, y, r])

    return res

In [20]:
import RouterBridge
import time
import importlib
importlib.reload(RouterBridge)

def solve_fn(task_idx, simulator,actions, max_attempts):
    # Set need_images=False and need_featurized_objects=False to speed up simulation, when only statuses are needed.

    res = []
    for action in actions:
        simulation = simulator.simulate_action(task_idx, action, need_images=False, need_featurized_objects=False)
        if simulation.status.is_solved():
            # print('Does', action, 'solve task', tasks[task_idx], '?', simulation.status.is_solved())
            res.append(action)
        if len(res) == 100:
            break
    # action_space = simulator.build_discrete_action_space(max_attempts)
    if res != []:
        return res
    print("no action")
    action = random.choice(actions)
    return [action] 


In [21]:
from tqdm import tqdm
def evaluate_custom_agent(tasks, tier, solve_fn, max_attempts=100, save_path="./results/phyre_gpt4o.json"):
    simulator = phyre.initialize_simulator(tasks, tier)
    evaluator = phyre.Evaluator(tasks)
    disturbed_simulator = initialize_disturbed_simulator(tasks,tier)
    actions = disturbed_simulator.build_discrete_action_space(max_actions=10000)
    print('A random action:', actions[0])
    print(len(actions))
    results = []

    for task_idx in tqdm(range(len(tasks)), desc="Evaluating custom agent"):
        attempts = 0
    
        #read raw_action
        proposed_actions= solve_fn(task_idx,disturbed_simulator,actions, max_attempts)
        solved = False
        # proposed_actions = raw_actions
        for action in proposed_actions:
            status= simulator.simulate_action(task_idx, action, need_images=False).status
            evaluator.maybe_log_attempt(task_idx, status)
            attempts += 1
            if status.is_solved() :
                solved = True
                if attempts >= max_attempts:
                    break
        # results.append({
        #     "task_idx":task_idx,
        #     "action":raw_actions,
        #     "res_time":res_time,
        #     "sim_time":sim_time,
        #     "solved":solved
        # })
        # with open(save_path,'w') as f:
        #     f.write(json.dumps(results))
        
    return evaluator



In [None]:

eval_setup = 'two_balls_within_template'
fold_id = 0  # For simplicity, we will just use one fold for evaluation.
train_tasks, dev_tasks, test_tasks = phyre.get_fold(eval_setup, fold_id)
tasks = test_tasks
print('Size of resulting splits:\n train:', len(train_tasks), '\n dev:',
      len(dev_tasks), '\n test:', len(test_tasks))

action_tier = phyre.eval_setup_to_action_tier(eval_setup)
print('Action tier for', eval_setup, 'is', action_tier)

tire  = phyre.eval_setup_to_action_tier(eval_setup)
evaluator = evaluate_custom_agent(tasks,tire, solve_fn,100)

print('AUC:', 
      evaluator.get_aucess())

Size of resulting splits:
 train: 1600 
 dev: 400 
 test: 500
Action tier for two_balls_within_template is two_balls
A random action: [4.17022005e-01 7.20324493e-01 1.14374817e-04 3.02332573e-01
 1.46755891e-01 9.23385948e-02]
10000


Evaluating custom agent:   9%|▊         | 43/500 [05:22<51:02,  6.70s/it]  

no action


Evaluating custom agent:   9%|▉         | 45/500 [05:28<36:04,  4.76s/it]

no action


Evaluating custom agent:   9%|▉         | 47/500 [05:36<34:06,  4.52s/it]

no action


Evaluating custom agent:  10%|▉         | 48/500 [05:41<33:41,  4.47s/it]

no action


Evaluating custom agent:  11%|█         | 56/500 [06:24<37:31,  5.07s/it]

no action


Evaluating custom agent:  12%|█▏        | 60/500 [06:47<38:45,  5.29s/it]

no action


Evaluating custom agent:  20%|██        | 101/500 [18:30<2:21:06, 21.22s/it]

no action


Evaluating custom agent:  21%|██        | 103/500 [19:09<2:10:28, 19.72s/it]

no action


Evaluating custom agent:  21%|██        | 106/500 [19:52<1:43:50, 15.81s/it]

no action


Evaluating custom agent:  23%|██▎       | 116/500 [23:02<2:12:02, 20.63s/it]

no action


Evaluating custom agent:  28%|██▊       | 141/500 [32:03<1:43:53, 17.36s/it]

no action


Evaluating custom agent:  29%|██▊       | 143/500 [32:17<1:10:46, 11.90s/it]

no action


Evaluating custom agent:  30%|██▉       | 149/500 [32:55<40:31,  6.93s/it]  

no action


Evaluating custom agent:  30%|███       | 151/500 [33:09<39:21,  6.77s/it]

no action


Evaluating custom agent:  31%|███       | 154/500 [33:27<35:27,  6.15s/it]

no action


Evaluating custom agent:  34%|███▍      | 169/500 [35:07<33:20,  6.04s/it]

no action


Evaluating custom agent:  36%|███▌      | 181/500 [36:32<42:04,  7.91s/it]

no action


Evaluating custom agent:  36%|███▋      | 182/500 [36:41<43:28,  8.20s/it]

no action


Evaluating custom agent:  37%|███▋      | 185/500 [37:10<47:23,  9.03s/it]

no action


Evaluating custom agent:  38%|███▊      | 189/500 [37:45<46:56,  9.06s/it]

no action


Evaluating custom agent:  38%|███▊      | 192/500 [38:13<47:26,  9.24s/it]

no action


Evaluating custom agent:  39%|███▊      | 193/500 [38:22<46:48,  9.15s/it]

no action


Evaluating custom agent:  40%|████      | 201/500 [39:35<44:39,  8.96s/it]

no action


Evaluating custom agent:  42%|████▏     | 208/500 [40:36<42:33,  8.75s/it]

no action


Evaluating custom agent:  42%|████▏     | 209/500 [40:44<42:08,  8.69s/it]

no action


Evaluating custom agent:  42%|████▏     | 210/500 [40:52<40:34,  8.39s/it]

no action


Evaluating custom agent:  43%|████▎     | 215/500 [41:35<41:53,  8.82s/it]

no action


Evaluating custom agent:  44%|████▍     | 220/500 [42:22<43:10,  9.25s/it]

no action


Evaluating custom agent:  44%|████▍     | 222/500 [42:36<36:56,  7.97s/it]

no action


Evaluating custom agent:  45%|████▌     | 226/500 [42:53<24:09,  5.29s/it]

no action


Evaluating custom agent:  45%|████▌     | 227/500 [42:57<22:10,  4.87s/it]

no action


Evaluating custom agent:  46%|████▌     | 228/500 [43:02<21:42,  4.79s/it]

no action


Evaluating custom agent:  46%|████▌     | 229/500 [43:06<21:04,  4.67s/it]

no action


Evaluating custom agent:  46%|████▌     | 230/500 [43:10<20:06,  4.47s/it]

no action


Evaluating custom agent:  47%|████▋     | 235/500 [43:40<23:45,  5.38s/it]

no action


Evaluating custom agent:  48%|████▊     | 238/500 [43:55<20:46,  4.76s/it]

no action


Evaluating custom agent:  49%|████▊     | 243/500 [44:53<55:22, 12.93s/it]

no action


Evaluating custom agent:  50%|████▉     | 248/500 [46:27<1:14:38, 17.77s/it]

no action


Evaluating custom agent:  50%|█████     | 251/500 [47:21<1:14:37, 17.98s/it]

no action


Evaluating custom agent:  51%|█████     | 253/500 [47:57<1:13:37, 17.89s/it]

no action


Evaluating custom agent:  52%|█████▏    | 260/500 [50:05<1:11:24, 17.85s/it]

no action


Evaluating custom agent:  53%|█████▎    | 263/500 [51:07<1:15:57, 19.23s/it]

no action


Evaluating custom agent:  53%|█████▎    | 267/500 [52:41<1:28:22, 22.76s/it]

no action


Evaluating custom agent:  54%|█████▎    | 268/500 [53:03<1:26:58, 22.49s/it]

no action


Evaluating custom agent:  54%|█████▍    | 269/500 [53:29<1:31:19, 23.72s/it]

no action


Evaluating custom agent:  54%|█████▍    | 270/500 [53:49<1:26:54, 22.67s/it]

no action


Evaluating custom agent:  56%|█████▋    | 282/500 [58:07<59:56, 16.50s/it]  

no action


Evaluating custom agent:  57%|█████▋    | 283/500 [58:17<52:19, 14.47s/it]

no action


Evaluating custom agent:  57%|█████▋    | 286/500 [58:44<37:42, 10.57s/it]

no action


Evaluating custom agent:  58%|█████▊    | 290/500 [59:26<35:44, 10.21s/it]

no action


Evaluating custom agent:  59%|█████▉    | 294/500 [1:00:08<38:09, 11.11s/it]

no action


Evaluating custom agent:  59%|█████▉    | 296/500 [1:00:25<33:08,  9.75s/it]

no action


Evaluating custom agent:  60%|██████    | 302/500 [1:01:15<27:12,  8.25s/it]

no action


Evaluating custom agent:  61%|██████    | 306/500 [1:01:51<27:54,  8.63s/it]

no action


Evaluating custom agent:  62%|██████▏   | 309/500 [1:02:19<28:20,  8.90s/it]

no action


Evaluating custom agent:  62%|██████▏   | 312/500 [1:02:41<25:15,  8.06s/it]

no action


Evaluating custom agent:  63%|██████▎   | 313/500 [1:02:49<24:44,  7.94s/it]

no action


Evaluating custom agent:  64%|██████▍   | 321/500 [1:03:53<29:12,  9.79s/it]

no action


Evaluating custom agent:  64%|██████▍   | 322/500 [1:04:08<32:55, 11.10s/it]

no action


Evaluating custom agent:  65%|██████▍   | 323/500 [1:04:21<34:50, 11.81s/it]

no action


Evaluating custom agent:  65%|██████▍   | 324/500 [1:04:33<34:50, 11.88s/it]

no action


Evaluating custom agent:  65%|██████▌   | 325/500 [1:04:47<36:34, 12.54s/it]

no action


Evaluating custom agent:  66%|██████▌   | 328/500 [1:05:24<37:36, 13.12s/it]

no action


Evaluating custom agent:  66%|██████▌   | 329/500 [1:05:40<39:57, 14.02s/it]

no action


Evaluating custom agent:  66%|██████▌   | 330/500 [1:05:53<38:03, 13.43s/it]

no action


Evaluating custom agent:  66%|██████▌   | 331/500 [1:06:03<35:00, 12.43s/it]

no action


Evaluating custom agent:  66%|██████▋   | 332/500 [1:06:13<33:22, 11.92s/it]

no action


Evaluating custom agent:  67%|██████▋   | 334/500 [1:06:37<33:20, 12.05s/it]

no action


Evaluating custom agent:  67%|██████▋   | 336/500 [1:06:58<30:58, 11.33s/it]

no action


Evaluating custom agent:  68%|██████▊   | 339/500 [1:07:31<29:35, 11.03s/it]

no action


Evaluating custom agent:  68%|██████▊   | 341/500 [1:08:04<37:37, 14.20s/it]

no action


Evaluating custom agent:  68%|██████▊   | 342/500 [1:08:22<40:07, 15.24s/it]

no action


Evaluating custom agent:  69%|██████▊   | 343/500 [1:08:41<42:58, 16.43s/it]

no action


Evaluating custom agent:  69%|██████▉   | 344/500 [1:09:03<46:42, 17.96s/it]

no action


Evaluating custom agent:  69%|██████▉   | 345/500 [1:09:25<49:55, 19.32s/it]

no action


Evaluating custom agent:  69%|██████▉   | 347/500 [1:10:07<51:36, 20.24s/it]

no action


Evaluating custom agent:  70%|██████▉   | 348/500 [1:10:30<53:12, 21.01s/it]

no action


Evaluating custom agent:  70%|██████▉   | 349/500 [1:10:51<52:59, 21.06s/it]

no action


Evaluating custom agent:  70%|███████   | 351/500 [1:11:30<49:46, 20.04s/it]

no action


Evaluating custom agent:  70%|███████   | 352/500 [1:11:50<49:59, 20.27s/it]

no action


Evaluating custom agent:  71%|███████   | 354/500 [1:12:29<47:44, 19.62s/it]

no action


Evaluating custom agent:  71%|███████   | 355/500 [1:12:50<48:39, 20.14s/it]

no action


Evaluating custom agent:  71%|███████▏  | 357/500 [1:13:33<49:36, 20.81s/it]

no action


Evaluating custom agent:  72%|███████▏  | 359/500 [1:14:15<49:02, 20.87s/it]

no action


Evaluating custom agent:  73%|███████▎  | 365/500 [1:15:12<24:39, 10.96s/it]

no action


Evaluating custom agent:  74%|███████▍  | 372/500 [1:16:06<21:09,  9.92s/it]

no action


Evaluating custom agent:  75%|███████▍  | 373/500 [1:16:20<24:01, 11.35s/it]

no action


Evaluating custom agent:  75%|███████▌  | 376/500 [1:16:45<19:51,  9.61s/it]

no action


Evaluating custom agent:  76%|███████▌  | 381/500 [1:17:42<23:00, 11.60s/it]

no action


Evaluating custom agent:  76%|███████▋  | 382/500 [1:17:58<25:38, 13.04s/it]

no action


Evaluating custom agent:  77%|███████▋  | 383/500 [1:18:13<26:05, 13.38s/it]

no action


Evaluating custom agent:  77%|███████▋  | 385/500 [1:18:48<30:34, 15.95s/it]

no action


Evaluating custom agent:  77%|███████▋  | 386/500 [1:19:08<32:08, 16.91s/it]

no action


Evaluating custom agent:  77%|███████▋  | 387/500 [1:19:25<32:12, 17.10s/it]

no action


Evaluating custom agent:  78%|███████▊  | 388/500 [1:19:41<31:09, 16.69s/it]

no action


Evaluating custom agent:  78%|███████▊  | 389/500 [1:19:56<29:50, 16.13s/it]

no action


Evaluating custom agent:  78%|███████▊  | 390/500 [1:20:11<29:03, 15.85s/it]

no action


Evaluating custom agent:  78%|███████▊  | 392/500 [1:20:51<32:13, 17.90s/it]

no action


Evaluating custom agent:  79%|███████▊  | 393/500 [1:21:09<31:59, 17.94s/it]

no action


Evaluating custom agent:  79%|███████▉  | 394/500 [1:21:25<30:34, 17.31s/it]

no action


Evaluating custom agent:  79%|███████▉  | 395/500 [1:21:45<31:50, 18.19s/it]

no action


Evaluating custom agent:  79%|███████▉  | 396/500 [1:22:06<32:49, 18.94s/it]

no action


Evaluating custom agent:  79%|███████▉  | 397/500 [1:22:27<33:25, 19.48s/it]

no action


Evaluating custom agent:  80%|███████▉  | 399/500 [1:23:06<32:53, 19.54s/it]

no action


Evaluating custom agent:  80%|████████  | 400/500 [1:23:27<33:16, 19.97s/it]

no action


Evaluating custom agent:  80%|████████  | 401/500 [1:23:33<26:10, 15.87s/it]

no action


Evaluating custom agent:  80%|████████  | 402/500 [1:23:38<20:47, 12.73s/it]

no action


Evaluating custom agent:  81%|████████  | 404/500 [1:23:50<14:48,  9.26s/it]

no action


Evaluating custom agent:  81%|████████  | 405/500 [1:23:55<12:44,  8.04s/it]

no action


Evaluating custom agent:  81%|████████▏ | 407/500 [1:24:07<10:33,  6.81s/it]

no action


Evaluating custom agent:  82%|████████▏ | 409/500 [1:24:17<08:51,  5.84s/it]

no action


Evaluating custom agent:  82%|████████▏ | 410/500 [1:24:23<09:02,  6.03s/it]

no action


Evaluating custom agent:  83%|████████▎ | 414/500 [1:24:45<07:59,  5.58s/it]

no action


Evaluating custom agent:  83%|████████▎ | 416/500 [1:24:55<07:15,  5.18s/it]

no action


Evaluating custom agent:  83%|████████▎ | 417/500 [1:25:00<07:06,  5.13s/it]

no action


Evaluating custom agent:  84%|████████▎ | 418/500 [1:25:06<07:13,  5.28s/it]

no action


Evaluating custom agent:  84%|████████▍ | 419/500 [1:25:13<07:39,  5.67s/it]

no action


Evaluating custom agent:  84%|████████▍ | 422/500 [1:25:35<08:58,  6.90s/it]

no action


Evaluating custom agent:  85%|████████▍ | 423/500 [1:25:44<09:46,  7.61s/it]

no action


Evaluating custom agent:  85%|████████▍ | 424/500 [1:25:54<10:37,  8.39s/it]

no action


Evaluating custom agent:  85%|████████▌ | 427/500 [1:26:17<09:28,  7.79s/it]

no action


Evaluating custom agent:  86%|████████▌ | 428/500 [1:26:27<10:03,  8.39s/it]

no action


Evaluating custom agent:  86%|████████▌ | 431/500 [1:26:52<10:03,  8.74s/it]

no action


Evaluating custom agent:  87%|████████▋ | 433/500 [1:27:16<11:49, 10.59s/it]

no action


Evaluating custom agent:  87%|████████▋ | 434/500 [1:27:27<11:57, 10.88s/it]

no action


Evaluating custom agent:  87%|████████▋ | 437/500 [1:27:51<09:27,  9.01s/it]

no action


Evaluating custom agent:  88%|████████▊ | 438/500 [1:28:01<09:22,  9.07s/it]

no action


Evaluating custom agent:  88%|████████▊ | 441/500 [1:28:24<08:18,  8.44s/it]

no action


Evaluating custom agent:  89%|████████▊ | 443/500 [1:28:39<07:26,  7.83s/it]

no action


Evaluating custom agent:  89%|████████▉ | 444/500 [1:28:46<07:02,  7.54s/it]

no action


Evaluating custom agent:  89%|████████▉ | 445/500 [1:28:54<07:01,  7.67s/it]

no action


Evaluating custom agent:  89%|████████▉ | 446/500 [1:29:02<07:08,  7.93s/it]

no action


Evaluating custom agent:  89%|████████▉ | 447/500 [1:29:12<07:30,  8.51s/it]

no action


Evaluating custom agent:  90%|████████▉ | 449/500 [1:29:30<07:21,  8.65s/it]

no action


Evaluating custom agent:  90%|█████████ | 451/500 [1:29:48<07:13,  8.85s/it]

no action


Evaluating custom agent:  90%|█████████ | 452/500 [1:29:56<06:51,  8.57s/it]

no action


Evaluating custom agent:  91%|█████████ | 453/500 [1:30:04<06:28,  8.28s/it]

no action


Evaluating custom agent:  91%|█████████ | 454/500 [1:30:14<06:46,  8.84s/it]

no action


Evaluating custom agent:  91%|█████████ | 455/500 [1:30:22<06:22,  8.49s/it]

no action


Evaluating custom agent:  91%|█████████ | 456/500 [1:30:31<06:20,  8.64s/it]

no action


Evaluating custom agent:  91%|█████████▏| 457/500 [1:30:41<06:31,  9.10s/it]

no action


Evaluating custom agent:  92%|█████████▏| 458/500 [1:30:48<05:57,  8.51s/it]

no action


Evaluating custom agent:  92%|█████████▏| 460/500 [1:31:07<06:05,  9.15s/it]

no action


Evaluating custom agent:  92%|█████████▏| 461/500 [1:31:16<05:49,  8.97s/it]

no action


Evaluating custom agent:  92%|█████████▏| 462/500 [1:31:25<05:50,  9.22s/it]

no action


Evaluating custom agent:  93%|█████████▎| 463/500 [1:31:35<05:44,  9.31s/it]

no action


Evaluating custom agent:  93%|█████████▎| 464/500 [1:31:44<05:27,  9.09s/it]

no action


Evaluating custom agent:  93%|█████████▎| 465/500 [1:31:52<05:09,  8.85s/it]

no action


Evaluating custom agent:  93%|█████████▎| 466/500 [1:32:00<04:58,  8.78s/it]

no action


Evaluating custom agent:  93%|█████████▎| 467/500 [1:32:11<05:05,  9.24s/it]

no action


Evaluating custom agent:  94%|█████████▎| 468/500 [1:32:21<05:09,  9.67s/it]

no action


Evaluating custom agent:  94%|█████████▍| 469/500 [1:32:33<05:22, 10.39s/it]

no action


Evaluating custom agent:  94%|█████████▍| 471/500 [1:32:50<04:34,  9.45s/it]

no action


Evaluating custom agent:  94%|█████████▍| 472/500 [1:33:01<04:38,  9.94s/it]

no action


Evaluating custom agent:  95%|█████████▍| 473/500 [1:33:12<04:29,  9.99s/it]

no action


Evaluating custom agent:  95%|█████████▌| 475/500 [1:33:29<03:53,  9.34s/it]

no action


Evaluating custom agent:  95%|█████████▌| 476/500 [1:33:38<03:38,  9.11s/it]

no action


Evaluating custom agent:  95%|█████████▌| 477/500 [1:33:46<03:25,  8.95s/it]

no action


Evaluating custom agent:  96%|█████████▌| 479/500 [1:34:02<02:57,  8.44s/it]

no action


Evaluating custom agent:  96%|█████████▌| 480/500 [1:34:12<02:58,  8.91s/it]

no action


Evaluating custom agent:  96%|█████████▋| 482/500 [1:35:08<05:33, 18.52s/it]

no action


Evaluating custom agent:  97%|█████████▋| 483/500 [1:35:30<05:29, 19.41s/it]

no action


Evaluating custom agent:  97%|█████████▋| 484/500 [1:35:54<05:32, 20.79s/it]

no action


Evaluating custom agent:  97%|█████████▋| 485/500 [1:36:09<04:46, 19.07s/it]

no action


Evaluating custom agent:  97%|█████████▋| 486/500 [1:36:31<04:42, 20.17s/it]

no action


Evaluating custom agent:  97%|█████████▋| 487/500 [1:36:50<04:17, 19.83s/it]

no action


Evaluating custom agent:  98%|█████████▊| 488/500 [1:37:14<04:11, 20.96s/it]

no action


Evaluating custom agent:  98%|█████████▊| 489/500 [1:37:33<03:42, 20.25s/it]

no action


Evaluating custom agent:  98%|█████████▊| 490/500 [1:37:48<03:06, 18.68s/it]

no action


Evaluating custom agent:  98%|█████████▊| 492/500 [1:38:26<02:34, 19.26s/it]

no action


Evaluating custom agent:  99%|█████████▊| 493/500 [1:38:58<02:42, 23.22s/it]

no action


Evaluating custom agent:  99%|█████████▉| 495/500 [1:39:54<02:10, 26.13s/it]

no action


Evaluating custom agent:  99%|█████████▉| 496/500 [1:40:10<01:31, 22.98s/it]

no action


Evaluating custom agent:  99%|█████████▉| 497/500 [1:40:24<01:00, 20.11s/it]

no action


Evaluating custom agent: 100%|█████████▉| 498/500 [1:40:46<00:41, 20.78s/it]

no action


Evaluating custom agent: 100%|█████████▉| 499/500 [1:41:06<00:20, 20.63s/it]

no action


Evaluating custom agent: 100%|██████████| 500/500 [1:41:33<00:00, 12.19s/it]

no action



Used 12.046000 attempts per task instead of maximum allowed 100.000000. That probably indicate a bug in evaluation loop.


AUC: 0.42357294356005815
