
# Simple DQN

In [8]:
!uv venv
!uv pip install -r ClusterEnv/pyproject.toml

Using CPython 3.10.12 interpreter at: [36m/usr/bin/python3[39m
Creating virtual environment at: [36m.venv[39m
Activate with: [32msource .venv/bin/activate[39m
[2K  [31m×[0m No solution found when resolving dependencies:
[31m  ╰─▶ [0mBecause the current Python version (3.10.12) does not satisfy
[31m      [0mPython>=3.11,<4.0 and tianshou>=1.0.0 depends on Python>=3.11,<4.0, we
[31m      [0mcan conclude that tianshou>=1.0.0 cannot be used.
[31m      [0mAnd because only the following versions of tianshou are available:
[31m      [0m    tianshou<=1.0.0
[31m      [0m    tianshou==1.1.0
[31m      [0mand clusterenv depends on tianshou>=1.0.0, we can conclude that your
[31m      [0mrequirements are unsatisfiable.


### Imports

In [1]:
from tianshou.utils.logger.tensorboard import SummaryWriter
import gymnasium as gym
import tianshou as ts
import numpy as np
import torch as th
import typing

ModuleNotFoundError: No module named 'tianshou'

In [None]:
# TODO:
#  Run For The Following sizes:
#  Machines: 2 Jobs: 2 Resource: 1 time: 1
#  Machines: 2 Jobs: 20 Resource: 1 time: 1
#  Machines: 2 Jobs: 20 Resource: 3 time: 5
#  Machines: 2 Jobs: 20 Resource: 3 time: 10

In [None]:
from envs.cluster import ClusterWrapper

def scenario_0(q_size: int = 2):
    return ClusterWrapper(
        gym.make(
            "Cluster-v0",
            n_machines=2,
            n_jobs=2,
            n_resource=1,
            max_ticks=1,
            max_episode_steps=10
        ),
        queue_size=q_size,
    )

def scenario_1(q_size: int = 2):
    return ClusterWrapper(
        gym.make(
            "Cluster-v0",
            n_machines=2,
            n_jobs=20,
            n_resource=1,
            max_ticks=1,
            max_episode_steps=100
        ),

        queue_size=q_size,
    )

def scenario_2(q_size: int = 2):
    return ClusterWrapper(
        gym.make(
            "Cluster-v0",
            n_machines=2,
            n_jobs=20,
            n_resource=3,
            max_ticks=5,
            max_episode_steps=1_000
        ),
        queue_size=q_size,
    )

def scenario_3(q_size: int = 2):
    return ClusterWrapper(
        gym.make(
            "Cluster-v0",
            n_machines=2,
            n_jobs=20,
            n_resource=3,
            max_ticks=10,
            max_episode_steps=3_000
        ),
        queue_size=q_size,
    )

In [None]:
def run(
    model_cls,
    gen_env: typing.Callable[[],gym.Env],
    *,
    lr: float = 1e-3,
    max_epoch: int = 100,
    episode_per_collect: int = 10,
    step_per_epoch: int = 200,
    batch_size: int = 32,
    episode_per_test=1,
    repeat_per_collect=1
):
    env = gen_env()
    model = model_cls(env.observation_space, env.action_space)
    train_envs = ts.env.DummyVectorEnv([gen_env for _ in range(1)])
    test_envs = ts.env.DummyVectorEnv([gen_env for _ in range(1)])
    optimizer = th.optim.Adam(model.parameters(), lr=lr)
    logger = ts.utils.TensorboardLogger(writer=SummaryWriter(f"assets/log/dqn/{type(model).__name__}_{gen_env.__name__}"))
    policy = ts.policy.DQNPolicy(
        model=model,
        optim=optimizer,
        action_space=env.action_space,
    )
    train_collector = ts.data.Collector(policy, train_envs, ts.data.VectorReplayBuffer(10_000, 10), exploration_noise=True)
    test_collector = ts.data.Collector(policy, test_envs, exploration_noise=True)
    val =  ts.trainer.OffpolicyTrainer(
        policy=policy,
        train_collector=train_collector,
        test_collector=test_collector,
        max_epoch=max_epoch,
        episode_per_collect=episode_per_collect,
        step_per_epoch=step_per_epoch,
        batch_size=batch_size,
        episode_per_test=episode_per_test,
        logger=logger,
        repeat_per_collect=repeat_per_collect,
    ).run()
    th.save(policy.state_dict(), f"assets/policy/{type(model).__name__}_{gen_env.__name__}.pth")
    return val

## Models

In [None]:
class SimpleNN(th.nn.Module):
    def __init__(self, state: dict, action: dict):
        super().__init__()
        machines = state["machinesAvailability"]
        jobs = state["jobsUsage"]
        combined: int = np.prod(machines.shape) + np.prod(jobs.shape)
        self.model = th.nn.Sequential(
            th.nn.Linear(combined, 64),
            th.nn.ReLU(),
            th.nn.Linear(64, 64),
            th.nn.ReLU(),
            th.nn.Linear(64, action.n),
            th.nn.Softmax(dim=-1),
        )

    def forward(self, obs, state=None, **kwargs):
        if not isinstance(obs, dict):
            obs = {k: th.tensor(v, dtype=th.float) for k, v in obs.items()}
        batch_size = obs["machinesAvailability"].shape[0]
        machines = obs["machinesAvailability"].view(batch_size, -1)
        jobs = obs["jobsUsage"].view(batch_size, -1)
        x = th.cat([machines, jobs], dim=1)
        logits = self.model(x)
        return logits, state

In [None]:
class SimpleConv(th.nn.Module):
    def __init__(self, state: dict, action: gym.Space, *args, **kwargs):
        super().__init__()
        machines = state["machinesAvailability"]
        jobs = state["jobsUsage"]
        self.in_channels: int = max(jobs.shape[0], machines.shape[0])
        self.embeder = th.nn.Sequential(
            th.nn.Conv2d(in_channels=self.in_channels, out_channels=64, kernel_size=(1, machines.shape[2])),
            th.nn.ReLU(),
            th.nn.Conv2d(in_channels=64, out_channels=32, kernel_size=(jobs.shape[1], 1)),
            th.nn.ReLU(),
            th.nn.Conv2d(in_channels=32, out_channels=16, kernel_size=(1, 1)),
            th.nn.Flatten(),
            th.nn.ReLU(),
            th.nn.Linear(in_features=16, out_features=16),
            th.nn.ReLU(),
        )
        self.clf = th.nn.Sequential(
            th.nn.Linear(in_features=32, out_features=64),
            th.nn.ReLU(),
            th.nn.AvgPool1d(2),
            th.nn.Linear(in_features=32, out_features=action.n),
            th.nn.Softmax()
        )


    def forward(self, obs, state=None, **kwargs):
        if not isinstance(obs, dict):
            obs = {k: th.tensor(v, dtype=th.float) for k, v in obs.items()}
        machines = obs["machinesAvailability"]
        jobs = obs["jobsUsage"]
        machines_emb = self.embeder(machines)
        jobs_emb = self.embeder(jobs)
        x = th.cat([machines_emb, jobs_emb], dim=1)
        logits = self.clf(x)
        return logits, state

## Runs

### Simple NN

In [None]:
run(SimpleNN, scenario_0)

Epoch #1: 260it [00:00, 585.35it/s, env_step=260, gradient_step=260, len=10, n/ep=10, n/st=100, rew=-2.83]                         

Epoch #1: test_reward: -2.828968 ± 0.000000, best_reward: -2.000000 ± 0.000000 in #0



Epoch #2: 201it [00:00, 623.46it/s, env_step=460, gradient_step=460, len=10, n/ep=10, n/st=100, rew=-2.83]                         

Epoch #2: test_reward: -2.000000 ± 0.000000, best_reward: -2.000000 ± 0.000000 in #0



Epoch #3: 210it [00:00, 539.57it/s, env_step=670, gradient_step=670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #3: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #4: 201it [00:00, 535.12it/s, env_step=870, gradient_step=870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #4: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #5: 201it [00:00, 543.57it/s, env_step=1070, gradient_step=1070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #5: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #6: 201it [00:00, 532.76it/s, env_step=1270, gradient_step=1270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #6: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #7: 201it [00:00, 518.82it/s, env_step=1470, gradient_step=1470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #7: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #8: 201it [00:00, 549.50it/s, env_step=1670, gradient_step=1670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #8: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #9: 201it [00:00, 544.19it/s, env_step=1870, gradient_step=1870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #9: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #10: 201it [00:00, 558.12it/s, env_step=2070, gradient_step=2070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #10: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #11: 201it [00:00, 541.66it/s, env_step=2270, gradient_step=2270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #11: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #12: 201it [00:00, 540.66it/s, env_step=2470, gradient_step=2470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #12: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #13: 201it [00:00, 568.73it/s, env_step=2670, gradient_step=2670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #13: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #14: 201it [00:00, 538.59it/s, env_step=2870, gradient_step=2870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #14: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #15: 201it [00:00, 532.68it/s, env_step=3070, gradient_step=3070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #15: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #16: 201it [00:00, 545.85it/s, env_step=3270, gradient_step=3270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #16: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #17: 201it [00:00, 550.23it/s, env_step=3470, gradient_step=3470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #17: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #18: 201it [00:00, 555.90it/s, env_step=3670, gradient_step=3670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #18: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #19: 201it [00:00, 561.87it/s, env_step=3870, gradient_step=3870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #19: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #20: 201it [00:00, 518.94it/s, env_step=4070, gradient_step=4070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #20: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #21: 201it [00:00, 553.61it/s, env_step=4270, gradient_step=4270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #21: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #22: 201it [00:00, 544.61it/s, env_step=4470, gradient_step=4470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #22: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #23: 201it [00:00, 555.97it/s, env_step=4670, gradient_step=4670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #23: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #24: 201it [00:00, 559.71it/s, env_step=4870, gradient_step=4870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #24: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #25: 201it [00:00, 540.05it/s, env_step=5070, gradient_step=5070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #25: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #26: 201it [00:00, 559.95it/s, env_step=5270, gradient_step=5270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #26: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #27: 201it [00:00, 535.90it/s, env_step=5470, gradient_step=5470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #27: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #28: 201it [00:00, 545.83it/s, env_step=5670, gradient_step=5670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #28: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #29: 201it [00:00, 556.31it/s, env_step=5870, gradient_step=5870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #29: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #30: 201it [00:00, 496.38it/s, env_step=6070, gradient_step=6070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #30: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #31: 201it [00:00, 523.39it/s, env_step=6270, gradient_step=6270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #31: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #32: 201it [00:00, 482.47it/s, env_step=6470, gradient_step=6470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #32: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #33: 201it [00:00, 548.66it/s, env_step=6670, gradient_step=6670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #33: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #34: 201it [00:00, 547.55it/s, env_step=6870, gradient_step=6870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #34: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #35: 201it [00:00, 553.49it/s, env_step=7070, gradient_step=7070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #35: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #36: 201it [00:00, 557.76it/s, env_step=7270, gradient_step=7270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #36: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #37: 201it [00:00, 538.32it/s, env_step=7470, gradient_step=7470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #37: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #38: 201it [00:00, 547.20it/s, env_step=7670, gradient_step=7670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #38: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #39: 201it [00:00, 564.52it/s, env_step=7870, gradient_step=7870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #39: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #40: 201it [00:00, 564.11it/s, env_step=8070, gradient_step=8070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #40: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #41: 201it [00:00, 558.75it/s, env_step=8270, gradient_step=8270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #41: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #42: 201it [00:00, 562.15it/s, env_step=8470, gradient_step=8470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #42: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #43: 201it [00:00, 563.59it/s, env_step=8670, gradient_step=8670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #43: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #44: 201it [00:00, 550.63it/s, env_step=8870, gradient_step=8870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #44: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #45: 201it [00:00, 548.56it/s, env_step=9070, gradient_step=9070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #45: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #46: 201it [00:00, 554.17it/s, env_step=9270, gradient_step=9270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #46: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #47: 201it [00:00, 538.19it/s, env_step=9470, gradient_step=9470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #47: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #48: 201it [00:00, 571.65it/s, env_step=9670, gradient_step=9670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #48: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #49: 201it [00:00, 539.41it/s, env_step=9870, gradient_step=9870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #49: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #50: 201it [00:00, 534.36it/s, env_step=10070, gradient_step=10070, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #50: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3


Epoch #51: 201it [00:00, 538.91it/s, env_step=10270, gradient_step=10270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #51: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #52: 201it [00:00, 559.08it/s, env_step=10470, gradient_step=10470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #52: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #53: 201it [00:00, 519.00it/s, env_step=10670, gradient_step=10670, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #53: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3


Epoch #54: 201it [00:00, 557.76it/s, env_step=10870, gradient_step=10870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #54: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #55: 201it [00:00, 528.65it/s, env_step=11070, gradient_step=11070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #55: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #56: 201it [00:00, 570.75it/s, env_step=11270, gradient_step=11270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #56: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #57: 201it [00:00, 532.35it/s, env_step=11470, gradient_step=11470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #57: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #58: 201it [00:00, 554.87it/s, env_step=11670, gradient_step=11670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #58: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #59: 201it [00:00, 545.86it/s, env_step=11870, gradient_step=11870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #59: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #60: 201it [00:00, 534.35it/s, env_step=12070, gradient_step=12070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #60: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #61: 201it [00:00, 524.39it/s, env_step=12270, gradient_step=12270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #61: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #62: 201it [00:00, 557.49it/s, env_step=12470, gradient_step=12470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #62: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #63: 201it [00:00, 358.08it/s, env_step=12670, gradient_step=12670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #63: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #64: 201it [00:00, 330.28it/s, env_step=12870, gradient_step=12870, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #64: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3


Epoch #65: 201it [00:00, 477.02it/s, env_step=13070, gradient_step=13070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #65: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #66: 201it [00:00, 356.53it/s, env_step=13270, gradient_step=13270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #66: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #67: 201it [00:00, 456.02it/s, env_step=13470, gradient_step=13470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #67: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #68: 201it [00:00, 472.64it/s, env_step=13670, gradient_step=13670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #68: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #69: 201it [00:00, 403.16it/s, env_step=13870, gradient_step=13870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #69: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #70: 201it [00:00, 496.98it/s, env_step=14070, gradient_step=14070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #70: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #71: 201it [00:00, 478.77it/s, env_step=14270, gradient_step=14270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #71: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #72: 201it [00:00, 367.05it/s, env_step=14470, gradient_step=14470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #72: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #73: 201it [00:00, 294.19it/s, env_step=14670, gradient_step=14670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #73: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #74: 201it [00:00, 413.41it/s, env_step=14870, gradient_step=14870, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #74: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3


Epoch #75: 201it [00:00, 497.77it/s, env_step=15070, gradient_step=15070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #75: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #76: 201it [00:00, 477.13it/s, env_step=15270, gradient_step=15270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #76: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #77: 201it [00:00, 506.80it/s, env_step=15470, gradient_step=15470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #77: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #78: 201it [00:00, 453.33it/s, env_step=15670, gradient_step=15670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #78: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #79: 201it [00:00, 410.31it/s, env_step=15870, gradient_step=15870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #79: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #80: 201it [00:00, 488.68it/s, env_step=16070, gradient_step=16070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #80: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #81: 201it [00:00, 502.70it/s, env_step=16270, gradient_step=16270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #81: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #82: 201it [00:00, 505.92it/s, env_step=16470, gradient_step=16470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #82: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #83: 201it [00:00, 515.67it/s, env_step=16670, gradient_step=16670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #83: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #84: 201it [00:00, 489.40it/s, env_step=16870, gradient_step=16870, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #84: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3


Epoch #85: 201it [00:00, 389.54it/s, env_step=17070, gradient_step=17070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #85: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #86: 201it [00:00, 401.60it/s, env_step=17270, gradient_step=17270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #86: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #87: 201it [00:00, 492.51it/s, env_step=17470, gradient_step=17470, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #87: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3


Epoch #88: 201it [00:00, 441.84it/s, env_step=17670, gradient_step=17670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #88: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #89: 201it [00:00, 303.65it/s, env_step=17870, gradient_step=17870, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #89: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3


Epoch #90: 201it [00:00, 375.67it/s, env_step=18070, gradient_step=18070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #90: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #91: 201it [00:00, 534.84it/s, env_step=18270, gradient_step=18270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #91: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #92: 201it [00:00, 505.41it/s, env_step=18470, gradient_step=18470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #92: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #93: 201it [00:00, 496.41it/s, env_step=18670, gradient_step=18670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #93: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #94: 201it [00:00, 452.62it/s, env_step=18870, gradient_step=18870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #94: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #95: 201it [00:00, 443.87it/s, env_step=19070, gradient_step=19070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #95: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #96: 201it [00:00, 375.36it/s, env_step=19270, gradient_step=19270, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #96: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #97: 201it [00:00, 453.70it/s, env_step=19470, gradient_step=19470, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #97: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #98: 201it [00:00, 395.23it/s, env_step=19670, gradient_step=19670, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #98: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #99: 201it [00:00, 474.38it/s, env_step=19870, gradient_step=19870, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #99: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3



Epoch #100: 201it [00:00, 337.14it/s, env_step=20070, gradient_step=20070, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #100: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #3





InfoStats(gradient_step=20070, best_reward=0.0, best_reward_std=0.0, train_step=20070, train_episode=9860, test_step=212, test_episode=101, timing=TimingStats(total_time=41.64589500427246, train_time=41.3753387928009, train_time_collect=16.080097675323486, train_time_update=22.51021146774292, test_time=0.2705562114715576, update_speed=485.07155676733885))

In [None]:
run(SimpleNN, scenario_1)

Epoch #1: 1000it [00:02, 380.99it/s, env_step=1000, gradient_step=1000, len=100, n/ep=10, n/st=1000, rew=-415.30]

Epoch #1: test_reward: -200.723693 ± 0.000000, best_reward: -200.723693 ± 0.000000 in #1



Epoch #2: 390it [00:00, 561.47it/s, env_step=1390, gradient_step=1390, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #2: test_reward: -415.302740 ± 0.000000, best_reward: -200.723693 ± 0.000000 in #1



Epoch #3: 1000it [00:01, 583.32it/s, env_step=2390, gradient_step=2390, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #3: test_reward: -200.723693 ± 0.000000, best_reward: -200.723693 ± 0.000000 in #1



Epoch #4: 390it [00:00, 644.52it/s, env_step=2780, gradient_step=2780, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #4: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #5: 1000it [00:01, 641.23it/s, env_step=3780, gradient_step=3780, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #5: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #6: 390it [00:00, 565.20it/s, env_step=4170, gradient_step=4170, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #6: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #7: 1000it [00:01, 591.79it/s, env_step=5170, gradient_step=5170, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #7: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #8: 1000it [00:01, 651.99it/s, env_step=6170, gradient_step=6170, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #8: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #9: 390it [00:00, 420.28it/s, env_step=6560, gradient_step=6560, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #9: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #10: 1000it [00:01, 525.03it/s, env_step=7560, gradient_step=7560, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #10: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #11: 1000it [00:01, 525.15it/s, env_step=8560, gradient_step=8560, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #11: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #12: 390it [00:00, 621.11it/s, env_step=8950, gradient_step=8950, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #12: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #13: 390it [00:00, 569.11it/s, env_step=9340, gradient_step=9340, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #13: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #14: 390it [00:00, 567.06it/s, env_step=9730, gradient_step=9730, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #14: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #15: 1000it [00:01, 570.46it/s, env_step=10730, gradient_step=10730, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #15: test_reward: -214.502381 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #16: 290it [00:00, 481.27it/s, env_step=11020, gradient_step=11020, len=29, n/ep=10, n/st=290, rew=-214.50] 

Epoch #16: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #17: 1000it [00:01, 601.62it/s, env_step=12020, gradient_step=12020, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #17: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #18: 1000it [00:01, 644.36it/s, env_step=13020, gradient_step=13020, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #18: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #19: 1000it [00:01, 603.97it/s, env_step=14020, gradient_step=14020, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #19: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #20: 390it [00:00, 430.26it/s, env_step=14410, gradient_step=14410, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #20: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #21: 1000it [00:01, 518.82it/s, env_step=15410, gradient_step=15410, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #21: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #22: 1000it [00:01, 604.49it/s, env_step=16410, gradient_step=16410, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #22: test_reward: -214.502381 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #23: 290it [00:00, 557.87it/s, env_step=16700, gradient_step=16700, len=29, n/ep=10, n/st=290, rew=-214.50] 

Epoch #23: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #24: 390it [00:00, 627.84it/s, env_step=17090, gradient_step=17090, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #24: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #25: 390it [00:00, 576.63it/s, env_step=17480, gradient_step=17480, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #25: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #26: 390it [00:00, 622.94it/s, env_step=17870, gradient_step=17870, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #26: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #27: 1000it [00:01, 609.20it/s, env_step=18870, gradient_step=18870, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #27: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #28: 390it [00:00, 648.35it/s, env_step=19260, gradient_step=19260, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #28: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #29: 390it [00:00, 627.32it/s, env_step=19650, gradient_step=19650, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #29: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #30: 1000it [00:01, 605.80it/s, env_step=20650, gradient_step=20650, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #30: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #31: 390it [00:00, 611.37it/s, env_step=21040, gradient_step=21040, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #31: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #32: 1000it [00:01, 614.89it/s, env_step=22040, gradient_step=22040, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #32: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #33: 390it [00:00, 567.83it/s, env_step=22430, gradient_step=22430, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #33: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #34: 1000it [00:02, 454.75it/s, env_step=23430, gradient_step=23430, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #34: test_reward: -214.502381 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #35: 290it [00:00, 596.49it/s, env_step=23720, gradient_step=23720, len=29, n/ep=10, n/st=290, rew=-214.50] 

Epoch #35: test_reward: -214.502381 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #36: 290it [00:00, 641.87it/s, env_step=24010, gradient_step=24010, len=29, n/ep=10, n/st=290, rew=-214.50] 

Epoch #36: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #37: 1000it [00:01, 657.15it/s, env_step=25010, gradient_step=25010, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #37: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #38: 1000it [00:01, 632.62it/s, env_step=26010, gradient_step=26010, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #38: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #39: 1000it [00:01, 650.94it/s, env_step=27010, gradient_step=27010, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #39: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #40: 1000it [00:01, 654.68it/s, env_step=28010, gradient_step=28010, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #40: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #41: 390it [00:00, 582.98it/s, env_step=28400, gradient_step=28400, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #41: test_reward: -220.138095 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #42: 300it [00:00, 591.53it/s, env_step=28700, gradient_step=28700, len=30, n/ep=10, n/st=300, rew=-220.14] 

Epoch #42: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #43: 1000it [00:01, 615.21it/s, env_step=29700, gradient_step=29700, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #43: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #44: 1000it [00:01, 643.90it/s, env_step=30700, gradient_step=30700, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #44: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #45: 390it [00:00, 653.42it/s, env_step=31090, gradient_step=31090, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #45: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #46: 390it [00:00, 647.66it/s, env_step=31480, gradient_step=31480, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #46: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #47: 1000it [00:01, 531.19it/s, env_step=32480, gradient_step=32480, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #47: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #48: 390it [00:00, 628.26it/s, env_step=32870, gradient_step=32870, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #48: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #49: 1000it [00:01, 655.58it/s, env_step=33870, gradient_step=33870, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #49: test_reward: -610.364730 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #50: 1000it [00:01, 663.33it/s, env_step=34870, gradient_step=34870, len=100, n/ep=10, n/st=1000, rew=-610.36] 

Epoch #50: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #51: 1000it [00:01, 672.29it/s, env_step=35870, gradient_step=35870, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #51: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #52: 390it [00:00, 666.70it/s, env_step=36260, gradient_step=36260, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #52: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #53: 390it [00:00, 648.70it/s, env_step=36650, gradient_step=36650, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #53: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #54: 1000it [00:01, 691.98it/s, env_step=37650, gradient_step=37650, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #54: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #55: 390it [00:00, 686.88it/s, env_step=38040, gradient_step=38040, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #55: test_reward: -214.502381 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #56: 290it [00:00, 643.48it/s, env_step=38330, gradient_step=38330, len=29, n/ep=10, n/st=290, rew=-214.50] 

Epoch #56: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #57: 1000it [00:01, 673.14it/s, env_step=39330, gradient_step=39330, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #57: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #58: 1000it [00:01, 652.24it/s, env_step=40330, gradient_step=40330, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #58: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #59: 1000it [00:01, 545.45it/s, env_step=41330, gradient_step=41330, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #59: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #60: 390it [00:00, 602.29it/s, env_step=41720, gradient_step=41720, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #60: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #61: 1000it [00:01, 639.31it/s, env_step=42720, gradient_step=42720, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #61: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #62: 1000it [00:01, 663.63it/s, env_step=43720, gradient_step=43720, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #62: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #63: 390it [00:00, 663.01it/s, env_step=44110, gradient_step=44110, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #63: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #64: 1000it [00:01, 671.33it/s, env_step=45110, gradient_step=45110, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #64: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #65: 1000it [00:01, 676.19it/s, env_step=46110, gradient_step=46110, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #65: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #66: 1000it [00:01, 685.24it/s, env_step=47110, gradient_step=47110, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #66: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #67: 390it [00:00, 667.06it/s, env_step=47500, gradient_step=47500, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #67: test_reward: -214.502381 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #68: 290it [00:00, 656.76it/s, env_step=47790, gradient_step=47790, len=29, n/ep=10, n/st=290, rew=-214.50] 

Epoch #68: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #69: 390it [00:00, 667.39it/s, env_step=48180, gradient_step=48180, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #69: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #70: 1000it [00:01, 682.94it/s, env_step=49180, gradient_step=49180, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #70: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #71: 390it [00:00, 676.87it/s, env_step=49570, gradient_step=49570, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #71: test_reward: -610.364730 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #72: 1000it [00:01, 532.72it/s, env_step=50570, gradient_step=50570, len=100, n/ep=10, n/st=1000, rew=-610.36] 

Epoch #72: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #73: 1000it [00:01, 574.76it/s, env_step=51570, gradient_step=51570, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #73: test_reward: -220.138095 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #74: 300it [00:00, 644.69it/s, env_step=51870, gradient_step=51870, len=30, n/ep=10, n/st=300, rew=-220.14] 

Epoch #74: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #75: 1000it [00:01, 678.73it/s, env_step=52870, gradient_step=52870, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #75: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #76: 390it [00:00, 652.53it/s, env_step=53260, gradient_step=53260, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #76: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #77: 1000it [00:01, 678.36it/s, env_step=54260, gradient_step=54260, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #77: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #78: 390it [00:00, 657.37it/s, env_step=54650, gradient_step=54650, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #78: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #79: 390it [00:00, 659.42it/s, env_step=55040, gradient_step=55040, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #79: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #80: 1000it [00:01, 693.40it/s, env_step=56040, gradient_step=56040, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #80: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #81: 390it [00:00, 662.73it/s, env_step=56430, gradient_step=56430, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #81: test_reward: -214.502381 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #82: 290it [00:00, 585.76it/s, env_step=56720, gradient_step=56720, len=29, n/ep=10, n/st=290, rew=-214.50] 

Epoch #82: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #83: 390it [00:00, 655.10it/s, env_step=57110, gradient_step=57110, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #83: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #84: 1000it [00:01, 678.10it/s, env_step=58110, gradient_step=58110, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #84: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #85: 1000it [00:01, 608.12it/s, env_step=59110, gradient_step=59110, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #85: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #86: 1000it [00:01, 570.37it/s, env_step=60110, gradient_step=60110, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #86: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #87: 390it [00:00, 672.54it/s, env_step=60500, gradient_step=60500, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #87: test_reward: -214.502381 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #88: 290it [00:00, 582.69it/s, env_step=60790, gradient_step=60790, len=29, n/ep=10, n/st=290, rew=-214.50] 

Epoch #88: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #89: 390it [00:00, 586.97it/s, env_step=61180, gradient_step=61180, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #89: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #90: 390it [00:00, 608.45it/s, env_step=61570, gradient_step=61570, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #90: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #91: 1000it [00:01, 675.34it/s, env_step=62570, gradient_step=62570, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #91: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #92: 1000it [00:01, 697.74it/s, env_step=63570, gradient_step=63570, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #92: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #93: 1000it [00:01, 647.09it/s, env_step=64570, gradient_step=64570, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #93: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #94: 1000it [00:01, 619.67it/s, env_step=65570, gradient_step=65570, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #94: test_reward: -103.747550 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #95: 1000it [00:01, 658.66it/s, env_step=66570, gradient_step=66570, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #95: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #96: 1000it [00:01, 673.99it/s, env_step=67570, gradient_step=67570, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #96: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #97: 390it [00:00, 617.49it/s, env_step=67960, gradient_step=67960, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #97: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #98: 390it [00:00, 672.95it/s, env_step=68350, gradient_step=68350, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #98: test_reward: -214.502381 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #99: 290it [00:00, 639.48it/s, env_step=68640, gradient_step=68640, len=29, n/ep=10, n/st=290, rew=-214.50] 

Epoch #99: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4



Epoch #100: 1000it [00:01, 518.25it/s, env_step=69640, gradient_step=69640, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #100: test_reward: -220.138095 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #4





InfoStats(gradient_step=69640, best_reward=-103.7475503527924, best_reward_std=0.0, train_step=69640, train_episode=1000, test_step=6994, test_episode=101, timing=TimingStats(total_time=119.08180403709412, train_time=114.66745591163635, train_time_collect=41.5474009513855, train_time_update=69.21594715118408, test_time=4.414348125457764, update_speed=607.3214012323177))

In [None]:
run(SimpleNN, scenario_2)

Epoch #1: 650it [00:01, 599.16it/s, env_step=650, gradient_step=650, len=65, n/ep=10, n/st=650, rew=-127.04] 

Epoch #1: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #2: 1150it [00:01, 630.56it/s, env_step=1800, gradient_step=1800, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #2: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #3: 10000it [00:15, 654.65it/s, env_step=11800, gradient_step=11800, len=1000, n/ep=10, n/st=10000, rew=-149.71] 

Epoch #3: test_reward: -153.828118 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #4: 700it [00:01, 667.05it/s, env_step=12500, gradient_step=12500, len=70, n/ep=10, n/st=700, rew=-153.83] 

Epoch #4: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #5: 1150it [00:01, 681.19it/s, env_step=13650, gradient_step=13650, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #5: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #6: 10000it [00:15, 663.66it/s, env_step=23650, gradient_step=23650, len=1000, n/ep=10, n/st=10000, rew=-149.71] 

Epoch #6: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #7: 1150it [00:01, 658.92it/s, env_step=24800, gradient_step=24800, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #7: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #8: 10000it [00:14, 682.95it/s, env_step=34800, gradient_step=34800, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #8: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #9: 10000it [00:15, 662.26it/s, env_step=44800, gradient_step=44800, len=1000, n/ep=10, n/st=10000, rew=-149.71] 

Epoch #9: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #10: 1150it [00:01, 637.08it/s, env_step=45950, gradient_step=45950, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #10: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #11: 10000it [00:15, 640.22it/s, env_step=55950, gradient_step=55950, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #11: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #12: 10000it [00:15, 658.11it/s, env_step=65950, gradient_step=65950, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #12: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #13: 10000it [00:15, 636.68it/s, env_step=75950, gradient_step=75950, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #13: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #14: 10000it [00:15, 648.17it/s, env_step=85950, gradient_step=85950, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #14: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #15: 10000it [00:15, 628.70it/s, env_step=95950, gradient_step=95950, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #15: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #16: 10000it [00:15, 658.06it/s, env_step=105950, gradient_step=105950, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #16: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #17: 1150it [00:01, 656.31it/s, env_step=107100, gradient_step=107100, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #17: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #18: 10000it [00:15, 650.88it/s, env_step=117100, gradient_step=117100, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #18: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #19: 10000it [00:17, 585.05it/s, env_step=127100, gradient_step=127100, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #19: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #20: 10000it [00:15, 648.91it/s, env_step=137100, gradient_step=137100, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #20: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #21: 10000it [00:16, 600.37it/s, env_step=147100, gradient_step=147100, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #21: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #22: 1150it [00:01, 614.25it/s, env_step=148250, gradient_step=148250, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #22: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #23: 10000it [00:16, 595.25it/s, env_step=158250, gradient_step=158250, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #23: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #24: 10000it [00:15, 648.28it/s, env_step=168250, gradient_step=168250, len=1000, n/ep=10, n/st=10000, rew=-149.71] 

Epoch #24: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #25: 1150it [00:01, 589.43it/s, env_step=169400, gradient_step=169400, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #25: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #26: 10000it [00:15, 645.45it/s, env_step=179400, gradient_step=179400, len=1000, n/ep=10, n/st=10000, rew=-149.71] 

Epoch #26: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #27: 1150it [00:01, 617.11it/s, env_step=180550, gradient_step=180550, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #27: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #28: 10000it [00:16, 597.53it/s, env_step=190550, gradient_step=190550, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #28: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #29: 10000it [00:16, 601.88it/s, env_step=200550, gradient_step=200550, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #29: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #30: 10000it [00:14, 678.67it/s, env_step=210550, gradient_step=210550, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #30: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #31: 10000it [00:14, 681.68it/s, env_step=220550, gradient_step=220550, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #31: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #32: 1150it [00:01, 719.50it/s, env_step=221700, gradient_step=221700, len=115, n/ep=10, n/st=1150, rew=-149.72] 

Epoch #32: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #33: 1150it [00:01, 702.37it/s, env_step=222850, gradient_step=222850, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #33: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #34: 10000it [00:14, 710.46it/s, env_step=232850, gradient_step=232850, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #34: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #35: 10000it [00:14, 710.06it/s, env_step=242850, gradient_step=242850, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #35: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #36: 10000it [00:13, 724.77it/s, env_step=252850, gradient_step=252850, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #36: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #37: 10000it [00:13, 715.17it/s, env_step=262850, gradient_step=262850, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #37: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #38: 1150it [00:01, 709.74it/s, env_step=264000, gradient_step=264000, len=115, n/ep=10, n/st=1150, rew=-149.72] 

Epoch #38: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #39: 1150it [00:01, 716.70it/s, env_step=265150, gradient_step=265150, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #39: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #40: 10000it [00:14, 713.66it/s, env_step=275150, gradient_step=275150, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #40: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #41: 10000it [00:13, 719.28it/s, env_step=285150, gradient_step=285150, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #41: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #42: 10000it [00:13, 718.64it/s, env_step=295150, gradient_step=295150, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #42: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #43: 10000it [00:13, 719.37it/s, env_step=305150, gradient_step=305150, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #43: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #44: 1150it [00:01, 719.67it/s, env_step=306300, gradient_step=306300, len=115, n/ep=10, n/st=1150, rew=-149.72] 

Epoch #44: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #45: 1150it [00:01, 702.21it/s, env_step=307450, gradient_step=307450, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #45: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #46: 10000it [00:13, 722.06it/s, env_step=317450, gradient_step=317450, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #46: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #47: 10000it [00:14, 709.70it/s, env_step=327450, gradient_step=327450, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #47: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #48: 10000it [00:14, 712.70it/s, env_step=337450, gradient_step=337450, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #48: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #49: 10000it [00:13, 714.34it/s, env_step=347450, gradient_step=347450, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #49: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #50: 1150it [00:01, 720.99it/s, env_step=348600, gradient_step=348600, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #50: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #51: 10000it [00:13, 715.40it/s, env_step=358600, gradient_step=358600, len=1000, n/ep=10, n/st=10000, rew=-149.71] 

Epoch #51: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #52: 1150it [00:01, 687.14it/s, env_step=359750, gradient_step=359750, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #52: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #53: 10000it [00:14, 706.21it/s, env_step=369750, gradient_step=369750, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #53: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #54: 10000it [00:14, 702.03it/s, env_step=379750, gradient_step=379750, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #54: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #55: 10000it [00:21, 469.94it/s, env_step=389750, gradient_step=389750, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #55: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #56: 1150it [00:01, 652.54it/s, env_step=390900, gradient_step=390900, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #56: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #57: 10000it [00:14, 688.36it/s, env_step=400900, gradient_step=400900, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #57: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #58: 1150it [00:01, 680.28it/s, env_step=402050, gradient_step=402050, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #58: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #59: 10000it [00:14, 693.47it/s, env_step=412050, gradient_step=412050, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #59: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #60: 10000it [00:14, 706.20it/s, env_step=422050, gradient_step=422050, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #60: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #61: 10000it [00:14, 707.67it/s, env_step=432050, gradient_step=432050, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #61: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #62: 10000it [00:14, 704.18it/s, env_step=442050, gradient_step=442050, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #62: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #63: 1150it [00:01, 704.88it/s, env_step=443200, gradient_step=443200, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #63: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #64: 10000it [00:14, 711.11it/s, env_step=453200, gradient_step=453200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #64: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #65: 1150it [00:01, 702.54it/s, env_step=454350, gradient_step=454350, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #65: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #66: 10000it [00:14, 711.85it/s, env_step=464350, gradient_step=464350, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #66: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #67: 10000it [00:13, 720.31it/s, env_step=474350, gradient_step=474350, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #67: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #68: 10000it [00:14, 705.84it/s, env_step=484350, gradient_step=484350, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #68: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #69: 1150it [00:01, 717.14it/s, env_step=485500, gradient_step=485500, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #69: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #70: 10000it [00:13, 716.80it/s, env_step=495500, gradient_step=495500, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #70: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #71: 10000it [00:13, 724.14it/s, env_step=505500, gradient_step=505500, len=1000, n/ep=10, n/st=10000, rew=-149.71] 

Epoch #71: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #72: 1150it [00:01, 720.01it/s, env_step=506650, gradient_step=506650, len=115, n/ep=10, n/st=1150, rew=-149.72] 

Epoch #72: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #73: 1150it [00:01, 693.57it/s, env_step=507800, gradient_step=507800, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #73: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #74: 10000it [00:14, 712.25it/s, env_step=517800, gradient_step=517800, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #74: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #75: 10000it [00:13, 723.43it/s, env_step=527800, gradient_step=527800, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #75: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #76: 10000it [00:14, 712.50it/s, env_step=537800, gradient_step=537800, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #76: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #77: 1150it [00:01, 657.28it/s, env_step=538950, gradient_step=538950, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #77: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #78: 10000it [00:13, 721.25it/s, env_step=548950, gradient_step=548950, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #78: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #79: 10000it [00:14, 697.22it/s, env_step=558950, gradient_step=558950, len=1000, n/ep=10, n/st=10000, rew=-149.71] 

Epoch #79: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #80: 1150it [00:02, 525.65it/s, env_step=560100, gradient_step=560100, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #80: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #81: 10000it [00:14, 702.48it/s, env_step=570100, gradient_step=570100, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #81: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #82: 1150it [00:01, 720.55it/s, env_step=571250, gradient_step=571250, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #82: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #83: 10000it [00:14, 705.55it/s, env_step=581250, gradient_step=581250, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #83: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #84: 10000it [00:14, 708.92it/s, env_step=591250, gradient_step=591250, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #84: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #85: 10000it [00:14, 708.09it/s, env_step=601250, gradient_step=601250, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #85: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #86: 1150it [00:01, 692.95it/s, env_step=602400, gradient_step=602400, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #86: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #87: 10000it [00:14, 709.88it/s, env_step=612400, gradient_step=612400, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #87: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #88: 10000it [00:14, 708.50it/s, env_step=622400, gradient_step=622400, len=1000, n/ep=10, n/st=10000, rew=-149.71] 

Epoch #88: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #89: 1150it [00:01, 674.13it/s, env_step=623550, gradient_step=623550, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #89: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #90: 10000it [00:14, 681.23it/s, env_step=633550, gradient_step=633550, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #90: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #91: 1150it [00:01, 703.05it/s, env_step=634700, gradient_step=634700, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #91: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #92: 10000it [00:14, 705.12it/s, env_step=644700, gradient_step=644700, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #92: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #93: 10000it [00:14, 704.84it/s, env_step=654700, gradient_step=654700, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #93: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #94: 10000it [00:14, 669.53it/s, env_step=664700, gradient_step=664700, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #94: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #95: 1150it [00:02, 535.35it/s, env_step=665850, gradient_step=665850, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #95: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #96: 10000it [00:16, 622.67it/s, env_step=675850, gradient_step=675850, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #96: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #97: 10000it [00:14, 687.07it/s, env_step=685850, gradient_step=685850, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #97: test_reward: -149.709417 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #98: 10000it [00:14, 670.41it/s, env_step=695850, gradient_step=695850, len=1000, n/ep=10, n/st=10000, rew=-149.71] 

Epoch #98: test_reward: -149.715201 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0



Epoch #99: 1150it [00:01, 663.99it/s, env_step=697000, gradient_step=697000, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #99: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


Epoch #100: 10000it [00:14, 681.61it/s, env_step=707000, gradient_step=707000, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #100: test_reward: -770.668756 ± 0.000000, best_reward: -127.040722 ± 0.000000 in #0


InfoStats(gradient_step=707000, best_reward=-127.04072227145149, best_reward_std=0.0, train_step=707000, train_episode=1000, test_step=71700, test_episode=101, timing=TimingStats(total_time=1084.2739489078522, train_time=1044.073079109192, train_time_collect=390.3086099624634, train_time_update=624.7311413288116, test_time=40.20086979866028, update_speed=677.1556648153554))

In [None]:
run(SimpleNN, scenario_3)

Epoch #1: 30000it [00:49, 611.32it/s, env_step=30000, gradient_step=30000, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #1: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #2: 2100it [00:03, 684.46it/s, env_step=32100, gradient_step=32100, len=210, n/ep=10, n/st=2100, rew=-152.68] 

Epoch #2: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #3: 2100it [00:03, 670.87it/s, env_step=34200, gradient_step=34200, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #3: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #4: 30000it [00:49, 605.11it/s, env_step=64200, gradient_step=64200, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #4: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #5: 30000it [00:42, 699.80it/s, env_step=94200, gradient_step=94200, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #5: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #6: 2100it [00:03, 651.77it/s, env_step=96300, gradient_step=96300, len=210, n/ep=10, n/st=2100, rew=-152.68] 

Epoch #6: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #7: 2100it [00:03, 690.81it/s, env_step=98400, gradient_step=98400, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #7: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #8: 30000it [00:43, 695.97it/s, env_step=128400, gradient_step=128400, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #8: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #9: 30000it [00:42, 699.07it/s, env_step=158400, gradient_step=158400, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #9: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #10: 2100it [00:03, 642.83it/s, env_step=160500, gradient_step=160500, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #10: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #11: 30000it [00:43, 693.76it/s, env_step=190500, gradient_step=190500, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #11: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #12: 30000it [00:43, 691.64it/s, env_step=220500, gradient_step=220500, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #12: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #13: 30000it [00:43, 691.47it/s, env_step=250500, gradient_step=250500, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #13: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #14: 2100it [00:03, 672.69it/s, env_step=252600, gradient_step=252600, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #14: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #15: 30000it [00:43, 691.74it/s, env_step=282600, gradient_step=282600, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 

Epoch #15: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #16: 2100it [00:03, 679.80it/s, env_step=284700, gradient_step=284700, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #16: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #17: 30000it [00:43, 690.02it/s, env_step=314700, gradient_step=314700, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #17: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #18: 30000it [00:43, 695.71it/s, env_step=344700, gradient_step=344700, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #18: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #19: 2100it [00:03, 678.19it/s, env_step=346800, gradient_step=346800, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #19: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #20: 30000it [00:44, 680.94it/s, env_step=376800, gradient_step=376800, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #20: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #21: 30000it [00:43, 690.38it/s, env_step=406800, gradient_step=406800, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #21: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #22: 30000it [00:42, 697.85it/s, env_step=436800, gradient_step=436800, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #22: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #23: 2100it [00:03, 683.74it/s, env_step=438900, gradient_step=438900, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #23: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #24: 30000it [00:43, 692.37it/s, env_step=468900, gradient_step=468900, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #24: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #25: 30000it [00:43, 687.33it/s, env_step=498900, gradient_step=498900, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #25: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #26: 30000it [00:43, 691.65it/s, env_step=528900, gradient_step=528900, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #26: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #27: 2100it [00:02, 703.32it/s, env_step=531000, gradient_step=531000, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #27: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #28: 30000it [00:43, 695.23it/s, env_step=561000, gradient_step=561000, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #28: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #29: 30000it [00:43, 691.92it/s, env_step=591000, gradient_step=591000, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #29: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #30: 30000it [00:43, 696.70it/s, env_step=621000, gradient_step=621000, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #30: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #31: 30000it [00:43, 695.32it/s, env_step=651000, gradient_step=651000, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #31: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #32: 30000it [00:42, 699.14it/s, env_step=681000, gradient_step=681000, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #32: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #33: 2100it [00:02, 713.40it/s, env_step=683100, gradient_step=683100, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #33: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #34: 30000it [00:42, 699.72it/s, env_step=713100, gradient_step=713100, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #34: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #35: 2100it [00:03, 632.60it/s, env_step=715200, gradient_step=715200, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #35: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #36: 30000it [00:42, 697.82it/s, env_step=745200, gradient_step=745200, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #36: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #37: 30000it [00:43, 697.28it/s, env_step=775200, gradient_step=775200, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #37: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #38: 30000it [00:42, 703.73it/s, env_step=805200, gradient_step=805200, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #38: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #39: 30000it [00:42, 698.16it/s, env_step=835200, gradient_step=835200, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #39: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #40: 30000it [00:42, 702.37it/s, env_step=865200, gradient_step=865200, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #40: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #41: 30000it [00:43, 688.48it/s, env_step=895200, gradient_step=895200, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #41: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #42: 30000it [00:42, 699.58it/s, env_step=925200, gradient_step=925200, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #42: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #43: 2100it [00:03, 647.17it/s, env_step=927300, gradient_step=927300, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #43: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #44: 30000it [00:46, 651.77it/s, env_step=957300, gradient_step=957300, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #44: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #45: 30000it [00:44, 680.32it/s, env_step=987300, gradient_step=987300, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #45: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #46: 30000it [00:44, 680.97it/s, env_step=1017300, gradient_step=1017300, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #46: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #47: 2100it [00:03, 558.13it/s, env_step=1019400, gradient_step=1019400, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #47: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #48: 30000it [00:49, 601.31it/s, env_step=1049400, gradient_step=1049400, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #48: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #49: 30000it [00:45, 665.39it/s, env_step=1079400, gradient_step=1079400, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #49: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #50: 30000it [00:49, 607.47it/s, env_step=1109400, gradient_step=1109400, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #50: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #51: 30000it [00:47, 626.85it/s, env_step=1139400, gradient_step=1139400, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #51: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #52: 30000it [00:42, 710.09it/s, env_step=1169400, gradient_step=1169400, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #52: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #53: 2100it [00:03, 699.34it/s, env_step=1171500, gradient_step=1171500, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #53: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #54: 30000it [00:42, 710.59it/s, env_step=1201500, gradient_step=1201500, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #54: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #55: 2100it [00:02, 707.49it/s, env_step=1203600, gradient_step=1203600, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #55: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #56: 30000it [00:42, 712.30it/s, env_step=1233600, gradient_step=1233600, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #56: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #57: 30000it [00:42, 710.75it/s, env_step=1263600, gradient_step=1263600, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #57: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #58: 30000it [00:45, 661.03it/s, env_step=1293600, gradient_step=1293600, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #58: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #59: 30000it [00:42, 703.41it/s, env_step=1323600, gradient_step=1323600, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 

Epoch #59: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #60: 2100it [00:02, 702.24it/s, env_step=1325700, gradient_step=1325700, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #60: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #61: 30000it [00:42, 708.25it/s, env_step=1355700, gradient_step=1355700, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #61: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #62: 2100it [00:02, 706.12it/s, env_step=1357800, gradient_step=1357800, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #62: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #63: 30000it [00:42, 710.75it/s, env_step=1387800, gradient_step=1387800, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #63: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #64: 30000it [00:42, 706.75it/s, env_step=1417800, gradient_step=1417800, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #64: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #65: 30000it [00:42, 711.94it/s, env_step=1447800, gradient_step=1447800, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #65: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #66: 30000it [00:43, 693.82it/s, env_step=1477800, gradient_step=1477800, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #66: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #67: 30000it [00:42, 706.54it/s, env_step=1507800, gradient_step=1507800, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #67: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #68: 30000it [00:42, 705.12it/s, env_step=1537800, gradient_step=1537800, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #68: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #69: 30000it [00:43, 694.18it/s, env_step=1567800, gradient_step=1567800, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #69: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #70: 2100it [00:02, 709.27it/s, env_step=1569900, gradient_step=1569900, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #70: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #71: 30000it [00:47, 634.87it/s, env_step=1599900, gradient_step=1599900, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #71: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #72: 30000it [00:44, 675.19it/s, env_step=1629900, gradient_step=1629900, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #72: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #73: 30000it [00:44, 675.50it/s, env_step=1659900, gradient_step=1659900, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #73: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #74: 2100it [00:03, 625.02it/s, env_step=1662000, gradient_step=1662000, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #74: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #75: 30000it [00:43, 689.96it/s, env_step=1692000, gradient_step=1692000, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #75: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #76: 30000it [00:43, 685.75it/s, env_step=1722000, gradient_step=1722000, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #76: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #77: 30000it [00:44, 669.59it/s, env_step=1752000, gradient_step=1752000, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #77: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #78: 30000it [00:45, 663.40it/s, env_step=1782000, gradient_step=1782000, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #78: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #79: 30000it [00:45, 664.08it/s, env_step=1812000, gradient_step=1812000, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #79: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #80: 2100it [00:03, 679.86it/s, env_step=1814100, gradient_step=1814100, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #80: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #81: 30000it [00:44, 672.58it/s, env_step=1844100, gradient_step=1844100, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #81: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #82: 2100it [00:03, 693.73it/s, env_step=1846200, gradient_step=1846200, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #82: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #83: 30000it [00:43, 686.57it/s, env_step=1876200, gradient_step=1876200, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #83: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #84: 30000it [00:43, 694.81it/s, env_step=1906200, gradient_step=1906200, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #84: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #85: 30000it [00:43, 685.41it/s, env_step=1936200, gradient_step=1936200, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #85: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #86: 30000it [00:43, 693.51it/s, env_step=1966200, gradient_step=1966200, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #86: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #87: 30000it [00:43, 684.92it/s, env_step=1996200, gradient_step=1996200, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #87: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #88: 30000it [00:43, 693.35it/s, env_step=2026200, gradient_step=2026200, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #88: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #89: 2100it [00:02, 711.40it/s, env_step=2028300, gradient_step=2028300, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #89: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #90: 30000it [00:43, 691.56it/s, env_step=2058300, gradient_step=2058300, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #90: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #91: 30000it [00:44, 668.65it/s, env_step=2088300, gradient_step=2088300, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #91: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #92: 30000it [00:43, 684.73it/s, env_step=2118300, gradient_step=2118300, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #92: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #93: 2100it [00:03, 681.07it/s, env_step=2120400, gradient_step=2120400, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #93: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #94: 30000it [00:43, 684.78it/s, env_step=2150400, gradient_step=2150400, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #94: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #95: 30000it [00:43, 685.79it/s, env_step=2180400, gradient_step=2180400, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #95: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #96: 30000it [00:43, 689.68it/s, env_step=2210400, gradient_step=2210400, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #96: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1



Epoch #97: 2100it [00:03, 681.00it/s, env_step=2212500, gradient_step=2212500, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #97: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #98: 30000it [00:43, 684.05it/s, env_step=2242500, gradient_step=2242500, len=3000, n/ep=10, n/st=30000, rew=-171.67] 


Epoch #98: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #99: 30000it [00:43, 682.75it/s, env_step=2272500, gradient_step=2272500, len=3000, n/ep=10, n/st=30000, rew=-1130.56] 


Epoch #99: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1


Epoch #100: 30000it [00:44, 670.44it/s, env_step=2302500, gradient_step=2302500, len=3000, n/ep=10, n/st=30000, rew=-171.67] 

Epoch #100: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #1





InfoStats(gradient_step=2302500, best_reward=-152.6753965638624, best_reward_std=0.0, train_step=2302500, train_episode=1000, test_step=230460, test_episode=101, timing=TimingStats(total_time=3495.8948669433594, train_time=3369.6520574092865, train_time_collect=1254.5685031414032, train_time_update=2024.8962972164154, test_time=126.24280953407288, update_speed=683.304970593981))

### Conv NN

In [None]:
run(SimpleConv, scenario_0)

Epoch #1: 201it [00:01, 101.52it/s, env_step=200, gradient_step=200, len=10, n/ep=10, n/st=100, rew=-5.86]                         

Epoch #1: test_reward: -2.828968 ± 0.000000, best_reward: -2.828968 ± 0.000000 in #0



Epoch #2: 201it [00:02, 96.27it/s, env_step=400, gradient_step=400, len=2, n/ep=10, n/st=20, rew=0.00]                             

Epoch #2: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #3: 250it [00:02, 85.51it/s, env_step=650, gradient_step=650, len=10, n/ep=10, n/st=100, rew=-2.83]                       

Epoch #3: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #4: 201it [00:02, 78.13it/s, env_step=850, gradient_step=850, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #4: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #5: 201it [00:02, 84.00it/s, env_step=1050, gradient_step=1050, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #5: test_reward: -2.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #6: 201it [00:02, 89.03it/s, env_step=1250, gradient_step=1250, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #6: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #7: 210it [00:02, 102.51it/s, env_step=1460, gradient_step=1460, len=3, n/ep=10, n/st=30, rew=-2.00]                        

Epoch #7: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #8: 210it [00:02, 102.42it/s, env_step=1670, gradient_step=1670, len=3, n/ep=10, n/st=30, rew=-2.00]                           

Epoch #8: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #9: 290it [00:04, 69.19it/s, env_step=1960, gradient_step=1960, len=10, n/ep=10, n/st=100, rew=-2.83]                        

Epoch #9: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #10: 220it [00:02, 91.06it/s, env_step=2180, gradient_step=2180, len=10, n/ep=10, n/st=100, rew=-2.83]                      

Epoch #10: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #11: 201it [00:02, 99.51it/s, env_step=2380, gradient_step=2380, len=10, n/ep=10, n/st=100, rew=-5.86]                          

Epoch #11: test_reward: -2.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #12: 220it [00:03, 71.95it/s, env_step=2600, gradient_step=2600, len=3, n/ep=10, n/st=30, rew=-2.00]                        

Epoch #12: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #13: 260it [00:02, 104.23it/s, env_step=2860, gradient_step=2860, len=10, n/ep=10, n/st=100, rew=-2.83]                         

Epoch #13: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #14: 201it [00:02, 98.52it/s, env_step=3060, gradient_step=3060, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #14: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #15: 201it [00:01, 102.57it/s, env_step=3260, gradient_step=3260, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #15: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #16: 201it [00:02, 98.15it/s, env_step=3460, gradient_step=3460, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #16: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #17: 201it [00:01, 103.79it/s, env_step=3660, gradient_step=3660, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #17: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #18: 201it [00:01, 103.08it/s, env_step=3860, gradient_step=3860, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #18: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #19: 201it [00:02, 96.49it/s, env_step=4060, gradient_step=4060, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #19: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #20: 201it [00:01, 104.51it/s, env_step=4260, gradient_step=4260, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #20: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #21: 201it [00:01, 103.91it/s, env_step=4460, gradient_step=4460, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #21: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #22: 201it [00:02, 97.19it/s, env_step=4660, gradient_step=4660, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #22: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #23: 201it [00:01, 105.75it/s, env_step=4860, gradient_step=4860, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #23: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #24: 201it [00:02, 95.95it/s, env_step=5060, gradient_step=5060, len=2, n/ep=10, n/st=20, rew=0.00]                          


Epoch #24: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #25: 201it [00:02, 93.36it/s, env_step=5260, gradient_step=5260, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #25: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #26: 201it [00:01, 102.42it/s, env_step=5460, gradient_step=5460, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #26: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #27: 201it [00:01, 101.53it/s, env_step=5660, gradient_step=5660, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #27: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #28: 201it [00:01, 101.61it/s, env_step=5860, gradient_step=5860, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #28: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #29: 201it [00:01, 101.29it/s, env_step=6060, gradient_step=6060, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #29: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #30: 201it [00:02, 100.14it/s, env_step=6260, gradient_step=6260, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #30: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #31: 201it [00:02, 98.22it/s, env_step=6460, gradient_step=6460, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #31: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #32: 201it [00:02, 99.34it/s, env_step=6660, gradient_step=6660, len=2, n/ep=10, n/st=20, rew=0.00]                          


Epoch #32: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #33: 201it [00:01, 102.06it/s, env_step=6860, gradient_step=6860, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #33: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #34: 201it [00:02, 97.79it/s, env_step=7060, gradient_step=7060, len=2, n/ep=10, n/st=20, rew=0.00]                          


Epoch #34: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #35: 201it [00:02, 98.49it/s, env_step=7260, gradient_step=7260, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #35: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #36: 201it [00:01, 101.25it/s, env_step=7460, gradient_step=7460, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #36: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #37: 201it [00:02, 78.72it/s, env_step=7660, gradient_step=7660, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #37: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #38: 201it [00:01, 105.72it/s, env_step=7860, gradient_step=7860, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #38: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #39: 201it [00:01, 102.65it/s, env_step=8060, gradient_step=8060, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #39: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #40: 201it [00:01, 100.81it/s, env_step=8260, gradient_step=8260, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #40: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #41: 201it [00:01, 102.45it/s, env_step=8460, gradient_step=8460, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #41: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #42: 201it [00:01, 102.15it/s, env_step=8660, gradient_step=8660, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #42: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #43: 201it [00:01, 101.85it/s, env_step=8860, gradient_step=8860, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #43: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #44: 201it [00:03, 61.43it/s, env_step=9060, gradient_step=9060, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #44: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #45: 201it [00:02, 95.54it/s, env_step=9260, gradient_step=9260, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #45: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #46: 201it [00:02, 100.14it/s, env_step=9460, gradient_step=9460, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #46: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #47: 201it [00:01, 104.81it/s, env_step=9660, gradient_step=9660, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #47: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #48: 201it [00:01, 101.38it/s, env_step=9860, gradient_step=9860, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #48: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #49: 201it [00:01, 101.74it/s, env_step=10060, gradient_step=10060, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #49: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #50: 201it [00:02, 100.06it/s, env_step=10260, gradient_step=10260, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #50: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #51: 201it [00:02, 98.73it/s, env_step=10460, gradient_step=10460, len=2, n/ep=10, n/st=20, rew=0.00]                          


Epoch #51: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #52: 201it [00:01, 104.31it/s, env_step=10660, gradient_step=10660, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #52: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #53: 201it [00:02, 100.35it/s, env_step=10860, gradient_step=10860, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #53: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #54: 201it [00:02, 97.66it/s, env_step=11060, gradient_step=11060, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #54: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #55: 201it [00:01, 102.31it/s, env_step=11260, gradient_step=11260, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #55: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #56: 201it [00:01, 102.61it/s, env_step=11460, gradient_step=11460, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #56: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #57: 201it [00:02, 97.30it/s, env_step=11660, gradient_step=11660, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #57: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #58: 201it [00:03, 52.33it/s, env_step=11860, gradient_step=11860, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #58: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #59: 201it [00:02, 98.25it/s, env_step=12060, gradient_step=12060, len=2, n/ep=10, n/st=20, rew=0.00]                          


Epoch #59: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #60: 201it [00:01, 102.84it/s, env_step=12260, gradient_step=12260, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #60: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #61: 201it [00:01, 102.80it/s, env_step=12460, gradient_step=12460, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #61: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #62: 201it [00:01, 101.24it/s, env_step=12660, gradient_step=12660, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #62: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #63: 201it [00:01, 100.59it/s, env_step=12860, gradient_step=12860, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #63: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #64: 201it [00:01, 101.48it/s, env_step=13060, gradient_step=13060, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #64: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #65: 201it [00:02, 95.23it/s, env_step=13260, gradient_step=13260, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #65: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #66: 201it [00:02, 100.45it/s, env_step=13460, gradient_step=13460, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #66: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #67: 201it [00:01, 104.96it/s, env_step=13660, gradient_step=13660, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #67: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #68: 201it [00:02, 96.92it/s, env_step=13860, gradient_step=13860, len=2, n/ep=10, n/st=20, rew=0.00]                          


Epoch #68: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #69: 201it [00:02, 100.46it/s, env_step=14060, gradient_step=14060, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #69: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #70: 201it [00:01, 102.65it/s, env_step=14260, gradient_step=14260, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #70: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #71: 201it [00:01, 101.56it/s, env_step=14460, gradient_step=14460, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #71: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #72: 201it [00:01, 100.64it/s, env_step=14660, gradient_step=14660, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #72: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #73: 201it [00:02, 98.71it/s, env_step=14860, gradient_step=14860, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #73: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #74: 201it [00:01, 103.80it/s, env_step=15060, gradient_step=15060, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #74: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #75: 201it [00:02, 96.18it/s, env_step=15260, gradient_step=15260, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #75: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #76: 201it [00:01, 101.67it/s, env_step=15460, gradient_step=15460, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #76: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #77: 201it [00:02, 92.67it/s, env_step=15660, gradient_step=15660, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #77: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #78: 201it [00:01, 102.48it/s, env_step=15860, gradient_step=15860, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #78: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #79: 201it [00:01, 105.19it/s, env_step=16060, gradient_step=16060, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #79: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #80: 201it [00:01, 100.60it/s, env_step=16260, gradient_step=16260, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #80: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #81: 201it [00:01, 103.49it/s, env_step=16460, gradient_step=16460, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #81: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #82: 201it [00:01, 104.14it/s, env_step=16660, gradient_step=16660, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #82: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #83: 201it [00:01, 100.93it/s, env_step=16860, gradient_step=16860, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #83: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #84: 201it [00:01, 103.23it/s, env_step=17060, gradient_step=17060, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #84: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #85: 201it [00:01, 104.33it/s, env_step=17260, gradient_step=17260, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #85: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #86: 201it [00:01, 103.50it/s, env_step=17460, gradient_step=17460, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #86: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #87: 201it [00:01, 102.71it/s, env_step=17660, gradient_step=17660, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #87: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #88: 201it [00:01, 100.58it/s, env_step=17860, gradient_step=17860, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #88: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #89: 201it [00:01, 102.36it/s, env_step=18060, gradient_step=18060, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #89: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #90: 201it [00:01, 101.91it/s, env_step=18260, gradient_step=18260, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #90: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #91: 201it [00:02, 99.46it/s, env_step=18460, gradient_step=18460, len=2, n/ep=10, n/st=20, rew=0.00]                          


Epoch #91: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #92: 201it [00:02, 99.49it/s, env_step=18660, gradient_step=18660, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #92: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #93: 201it [00:01, 102.14it/s, env_step=18860, gradient_step=18860, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #93: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #94: 201it [00:01, 102.53it/s, env_step=19060, gradient_step=19060, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #94: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #95: 201it [00:02, 99.64it/s, env_step=19260, gradient_step=19260, len=2, n/ep=10, n/st=20, rew=0.00]                          

Epoch #95: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #96: 201it [00:03, 59.51it/s, env_step=19460, gradient_step=19460, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #96: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #97: 201it [00:01, 101.46it/s, env_step=19660, gradient_step=19660, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #97: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #98: 201it [00:01, 101.49it/s, env_step=19860, gradient_step=19860, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #98: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


Epoch #99: 201it [00:01, 102.92it/s, env_step=20060, gradient_step=20060, len=2, n/ep=10, n/st=20, rew=0.00]                         

Epoch #99: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2



Epoch #100: 201it [00:01, 100.55it/s, env_step=20260, gradient_step=20260, len=2, n/ep=10, n/st=20, rew=0.00]                         


Epoch #100: test_reward: 0.000000 ± 0.000000, best_reward: 0.000000 ± 0.000000 in #2


InfoStats(gradient_step=20260, best_reward=0.0, best_reward_std=0.0, train_step=20260, train_episode=9660, test_step=220, test_episode=101, timing=TimingStats(total_time=211.7680230140686, train_time=211.44733023643494, train_time_collect=20.043417930603027, train_time_update=188.40189242362976, test_time=0.320692777633667, update_speed=95.81582315248811))

In [None]:
run(SimpleConv, scenario_1)

Epoch #1: 1000it [00:09, 105.83it/s, env_step=1000, gradient_step=1000, len=100, n/ep=10, n/st=1000, rew=-103.75] 

Epoch #1: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #2: 390it [00:03, 100.70it/s, env_step=1390, gradient_step=1390, len=39, n/ep=10, n/st=390, rew=-200.72]

Epoch #2: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #3: 1000it [00:10, 91.56it/s, env_step=2390, gradient_step=2390, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #3: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #4: 390it [00:03, 105.87it/s, env_step=2780, gradient_step=2780, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #4: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #5: 1000it [00:09, 104.79it/s, env_step=3780, gradient_step=3780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #5: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #6: 1000it [00:09, 104.93it/s, env_step=4780, gradient_step=4780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #6: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #7: 1000it [00:09, 106.69it/s, env_step=5780, gradient_step=5780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #7: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #8: 1000it [00:11, 88.44it/s, env_step=6780, gradient_step=6780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #8: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #9: 1000it [00:09, 104.25it/s, env_step=7780, gradient_step=7780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #9: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #10: 1000it [00:09, 106.29it/s, env_step=8780, gradient_step=8780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #10: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #11: 1000it [00:09, 103.36it/s, env_step=9780, gradient_step=9780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #11: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #12: 1000it [00:09, 105.46it/s, env_step=10780, gradient_step=10780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #12: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #13: 1000it [00:09, 105.10it/s, env_step=11780, gradient_step=11780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #13: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #14: 1000it [00:10, 91.80it/s, env_step=12780, gradient_step=12780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #14: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #15: 1000it [00:09, 104.50it/s, env_step=13780, gradient_step=13780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #15: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #16: 1000it [00:09, 105.44it/s, env_step=14780, gradient_step=14780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #16: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #17: 1000it [00:09, 105.79it/s, env_step=15780, gradient_step=15780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #17: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #18: 1000it [00:09, 108.84it/s, env_step=16780, gradient_step=16780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #18: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #19: 1000it [00:11, 89.36it/s, env_step=17780, gradient_step=17780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #19: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #20: 1000it [00:09, 106.21it/s, env_step=18780, gradient_step=18780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #20: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #21: 1000it [00:09, 107.17it/s, env_step=19780, gradient_step=19780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #21: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #22: 1000it [00:09, 106.98it/s, env_step=20780, gradient_step=20780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #22: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #23: 1000it [00:10, 93.53it/s, env_step=21780, gradient_step=21780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #23: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #24: 1000it [00:09, 108.08it/s, env_step=22780, gradient_step=22780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #24: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #25: 1000it [00:09, 107.80it/s, env_step=23780, gradient_step=23780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #25: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #26: 1000it [00:09, 107.10it/s, env_step=24780, gradient_step=24780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #26: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #27: 1000it [00:09, 108.59it/s, env_step=25780, gradient_step=25780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #27: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #28: 1000it [00:11, 89.66it/s, env_step=26780, gradient_step=26780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #28: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #29: 1000it [00:09, 105.34it/s, env_step=27780, gradient_step=27780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #29: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #30: 1000it [00:09, 108.77it/s, env_step=28780, gradient_step=28780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #30: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #31: 1000it [00:09, 105.89it/s, env_step=29780, gradient_step=29780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #31: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #32: 1000it [00:09, 109.49it/s, env_step=30780, gradient_step=30780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #32: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #33: 1000it [00:11, 90.05it/s, env_step=31780, gradient_step=31780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #33: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #34: 1000it [00:09, 106.76it/s, env_step=32780, gradient_step=32780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #34: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #35: 1000it [00:09, 108.88it/s, env_step=33780, gradient_step=33780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #35: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #36: 1000it [00:09, 108.33it/s, env_step=34780, gradient_step=34780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #36: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #37: 1000it [00:09, 102.91it/s, env_step=35780, gradient_step=35780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #37: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #38: 1000it [00:09, 108.89it/s, env_step=36780, gradient_step=36780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #38: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #39: 1000it [00:10, 92.11it/s, env_step=37780, gradient_step=37780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #39: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #40: 1000it [00:09, 105.79it/s, env_step=38780, gradient_step=38780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #40: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #41: 1000it [00:10, 95.26it/s, env_step=39780, gradient_step=39780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #41: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #42: 1000it [00:09, 103.65it/s, env_step=40780, gradient_step=40780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #42: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #43: 1000it [00:11, 86.74it/s, env_step=41780, gradient_step=41780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #43: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #44: 1000it [00:09, 105.83it/s, env_step=42780, gradient_step=42780, len=100, n/ep=10, n/st=1000, rew=-415.30] 

Epoch #44: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #45: 1000it [00:14, 71.29it/s, env_step=43780, gradient_step=43780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #45: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #46: 1000it [00:11, 89.49it/s, env_step=44780, gradient_step=44780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #46: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #47: 1000it [00:12, 78.73it/s, env_step=45780, gradient_step=45780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #47: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #48: 1000it [00:10, 91.97it/s, env_step=46780, gradient_step=46780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #48: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #49: 1000it [00:11, 84.33it/s, env_step=47780, gradient_step=47780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #49: test_reward: -415.302740 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #50: 1000it [00:11, 86.25it/s, env_step=48780, gradient_step=48780, len=100, n/ep=10, n/st=1000, rew=-415.30]  

Epoch #50: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #51: 390it [00:06, 57.81it/s, env_step=49170, gradient_step=49170, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #51: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #52: 390it [00:05, 75.80it/s, env_step=49560, gradient_step=49560, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #52: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #53: 390it [00:04, 84.23it/s, env_step=49950, gradient_step=49950, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #53: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #54: 390it [00:04, 82.31it/s, env_step=50340, gradient_step=50340, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #54: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #55: 390it [00:04, 80.74it/s, env_step=50730, gradient_step=50730, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #55: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #56: 390it [00:04, 79.28it/s, env_step=51120, gradient_step=51120, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #56: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #57: 390it [00:05, 73.25it/s, env_step=51510, gradient_step=51510, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #57: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #58: 390it [00:04, 96.20it/s, env_step=51900, gradient_step=51900, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #58: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #59: 390it [00:04, 93.53it/s, env_step=52290, gradient_step=52290, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #59: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #60: 390it [00:05, 76.98it/s, env_step=52680, gradient_step=52680, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #60: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #61: 390it [00:04, 86.87it/s, env_step=53070, gradient_step=53070, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #61: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #62: 390it [00:04, 85.06it/s, env_step=53460, gradient_step=53460, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #62: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #63: 390it [00:04, 90.41it/s, env_step=53850, gradient_step=53850, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #63: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #64: 390it [00:04, 94.20it/s, env_step=54240, gradient_step=54240, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #64: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #65: 390it [00:04, 94.17it/s, env_step=54630, gradient_step=54630, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #65: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #66: 390it [00:04, 96.90it/s, env_step=55020, gradient_step=55020, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #66: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #67: 390it [00:04, 87.47it/s, env_step=55410, gradient_step=55410, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #67: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #68: 390it [00:04, 96.07it/s, env_step=55800, gradient_step=55800, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #68: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #69: 390it [00:04, 90.89it/s, env_step=56190, gradient_step=56190, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #69: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #70: 390it [00:04, 86.36it/s, env_step=56580, gradient_step=56580, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #70: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #71: 390it [00:04, 85.65it/s, env_step=56970, gradient_step=56970, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #71: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #72: 390it [00:03, 108.36it/s, env_step=57360, gradient_step=57360, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #72: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #73: 390it [00:03, 106.83it/s, env_step=57750, gradient_step=57750, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #73: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #74: 390it [00:03, 104.36it/s, env_step=58140, gradient_step=58140, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #74: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #75: 390it [00:03, 108.41it/s, env_step=58530, gradient_step=58530, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #75: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #76: 390it [00:03, 109.81it/s, env_step=58920, gradient_step=58920, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #76: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #77: 390it [00:03, 107.75it/s, env_step=59310, gradient_step=59310, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #77: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #78: 390it [00:03, 104.55it/s, env_step=59700, gradient_step=59700, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #78: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #79: 390it [00:03, 98.15it/s, env_step=60090, gradient_step=60090, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #79: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #80: 390it [00:03, 104.55it/s, env_step=60480, gradient_step=60480, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #80: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #81: 390it [00:03, 109.78it/s, env_step=60870, gradient_step=60870, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #81: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #82: 390it [00:05, 74.63it/s, env_step=61260, gradient_step=61260, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #82: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #83: 390it [00:03, 108.29it/s, env_step=61650, gradient_step=61650, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #83: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #84: 390it [00:03, 108.50it/s, env_step=62040, gradient_step=62040, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #84: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #85: 390it [00:03, 108.82it/s, env_step=62430, gradient_step=62430, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #85: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #86: 390it [00:03, 104.36it/s, env_step=62820, gradient_step=62820, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #86: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #87: 390it [00:03, 110.82it/s, env_step=63210, gradient_step=63210, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #87: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #88: 390it [00:03, 108.83it/s, env_step=63600, gradient_step=63600, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #88: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #89: 390it [00:03, 108.20it/s, env_step=63990, gradient_step=63990, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #89: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #90: 390it [00:03, 104.09it/s, env_step=64380, gradient_step=64380, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #90: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #91: 390it [00:03, 109.59it/s, env_step=64770, gradient_step=64770, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #91: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #92: 390it [00:03, 101.86it/s, env_step=65160, gradient_step=65160, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #92: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #93: 390it [00:03, 109.22it/s, env_step=65550, gradient_step=65550, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #93: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #94: 390it [00:05, 68.63it/s, env_step=65940, gradient_step=65940, len=39, n/ep=10, n/st=390, rew=-200.72]  

Epoch #94: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #95: 390it [00:03, 106.22it/s, env_step=66330, gradient_step=66330, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #95: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #96: 390it [00:03, 105.67it/s, env_step=66720, gradient_step=66720, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #96: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #97: 390it [00:03, 107.58it/s, env_step=67110, gradient_step=67110, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #97: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #98: 390it [00:03, 103.41it/s, env_step=67500, gradient_step=67500, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #98: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #99: 390it [00:03, 110.31it/s, env_step=67890, gradient_step=67890, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #99: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0



Epoch #100: 390it [00:03, 107.67it/s, env_step=68280, gradient_step=68280, len=39, n/ep=10, n/st=390, rew=-200.72] 

Epoch #100: test_reward: -200.723693 ± 0.000000, best_reward: -103.747550 ± 0.000000 in #0





InfoStats(gradient_step=68280, best_reward=-103.7475503527924, best_reward_std=0.0, train_step=68280, train_episode=1000, test_step=6867, test_episode=101, timing=TimingStats(total_time=703.9502420425415, train_time=698.1019577980042, train_time_collect=51.14694666862488, train_time_update=642.0736031532288, test_time=5.8482842445373535, update_speed=97.80806261505546))

In [None]:
run(SimpleConv, scenario_2)

Epoch #1: 10000it [01:39, 100.84it/s, env_step=10000, gradient_step=10000, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #1: test_reward: -149.715201 ± 0.000000, best_reward: -149.715201 ± 0.000000 in #1



Epoch #2: 1150it [00:12, 92.57it/s, env_step=11150, gradient_step=11150, len=115, n/ep=10, n/st=1150, rew=-149.72]  

Epoch #2: test_reward: -149.715201 ± 0.000000, best_reward: -149.715201 ± 0.000000 in #1



Epoch #3: 1150it [00:10, 106.20it/s, env_step=12300, gradient_step=12300, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #3: test_reward: -149.709417 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3


Epoch #4: 10000it [01:43, 97.08it/s, env_step=22300, gradient_step=22300, len=1000, n/ep=10, n/st=10000, rew=-149.71]  


Epoch #4: test_reward: -770.668756 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3


Epoch #5: 10000it [01:39, 100.69it/s, env_step=32300, gradient_step=32300, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #5: test_reward: -770.668756 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3


Epoch #6: 10000it [01:38, 101.35it/s, env_step=42300, gradient_step=42300, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #6: test_reward: -149.715201 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3



Epoch #7: 1150it [00:10, 105.90it/s, env_step=43450, gradient_step=43450, len=115, n/ep=10, n/st=1150, rew=-149.72] 

Epoch #7: test_reward: -149.715201 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3



Epoch #8: 1150it [00:11, 103.67it/s, env_step=44600, gradient_step=44600, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #8: test_reward: -149.709417 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3


Epoch #9: 10000it [01:38, 101.95it/s, env_step=54600, gradient_step=54600, len=1000, n/ep=10, n/st=10000, rew=-149.71] 


Epoch #9: test_reward: -770.668756 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3


Epoch #10: 10000it [01:40, 99.72it/s, env_step=64600, gradient_step=64600, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #10: test_reward: -770.668756 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3


Epoch #11: 10000it [01:37, 102.12it/s, env_step=74600, gradient_step=74600, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #11: test_reward: -149.709417 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3


Epoch #12: 10000it [01:41, 98.41it/s, env_step=84600, gradient_step=84600, len=1000, n/ep=10, n/st=10000, rew=-149.71]  

Epoch #12: test_reward: -149.715201 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3



Epoch #13: 1150it [00:11, 96.61it/s, env_step=85750, gradient_step=85750, len=115, n/ep=10, n/st=1150, rew=-149.72]  


Epoch #13: test_reward: -770.668756 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3


Epoch #14: 10000it [01:51, 89.35it/s, env_step=95750, gradient_step=95750, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #14: test_reward: -149.709417 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3


Epoch #15: 10000it [01:40, 100.00it/s, env_step=105750, gradient_step=105750, len=1000, n/ep=10, n/st=10000, rew=-149.71] 

Epoch #15: test_reward: -149.715201 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3



Epoch #16: 1150it [00:12, 91.02it/s, env_step=106900, gradient_step=106900, len=115, n/ep=10, n/st=1150, rew=-149.72]  

Epoch #16: test_reward: -149.715201 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3



Epoch #17: 1150it [00:10, 105.50it/s, env_step=108050, gradient_step=108050, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #17: test_reward: -770.668756 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3


Epoch #18: 10000it [01:38, 101.30it/s, env_step=118050, gradient_step=118050, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #18: test_reward: -770.668756 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3


Epoch #19: 10000it [01:41, 98.21it/s, env_step=128050, gradient_step=128050, len=1000, n/ep=10, n/st=10000, rew=-770.67]  

Epoch #19: test_reward: -149.715201 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3



Epoch #20: 1150it [00:12, 91.97it/s, env_step=129200, gradient_step=129200, len=115, n/ep=10, n/st=1150, rew=-149.72]  

Epoch #20: test_reward: -149.715201 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3



Epoch #21: 1150it [00:10, 106.20it/s, env_step=130350, gradient_step=130350, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #21: test_reward: -149.709417 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3


Epoch #22: 10000it [01:40, 99.74it/s, env_step=140350, gradient_step=140350, len=1000, n/ep=10, n/st=10000, rew=-149.71]  


Epoch #22: test_reward: -770.668756 ± 0.000000, best_reward: -149.709417 ± 0.000000 in #3


Epoch #23: 10000it [01:39, 100.93it/s, env_step=150350, gradient_step=150350, len=1000, n/ep=10, n/st=10000, rew=-770.67] 

Epoch #23: test_reward: -132.689583 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23



Epoch #24: 700it [00:06, 102.24it/s, env_step=151050, gradient_step=151050, len=70, n/ep=10, n/st=700, rew=-132.69] 

Epoch #24: test_reward: -149.715201 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23



Epoch #25: 1150it [00:11, 100.53it/s, env_step=152200, gradient_step=152200, len=115, n/ep=10, n/st=1150, rew=-149.72] 


Epoch #25: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #26: 10000it [01:38, 101.29it/s, env_step=162200, gradient_step=162200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #26: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #27: 10000it [01:44, 95.69it/s, env_step=172200, gradient_step=172200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #27: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #28: 10000it [01:39, 100.35it/s, env_step=182200, gradient_step=182200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #28: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #29: 10000it [01:39, 100.97it/s, env_step=192200, gradient_step=192200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #29: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #30: 10000it [01:39, 100.88it/s, env_step=202200, gradient_step=202200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #30: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #31: 10000it [01:38, 101.62it/s, env_step=212200, gradient_step=212200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #31: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #32: 10000it [01:38, 101.24it/s, env_step=222200, gradient_step=222200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #32: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #33: 10000it [01:38, 101.06it/s, env_step=232200, gradient_step=232200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #33: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #34: 10000it [01:39, 100.69it/s, env_step=242200, gradient_step=242200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #34: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #35: 10000it [01:38, 101.62it/s, env_step=252200, gradient_step=252200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #35: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #36: 10000it [01:42, 97.60it/s, env_step=262200, gradient_step=262200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #36: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #37: 10000it [01:54, 87.09it/s, env_step=272200, gradient_step=272200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #37: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #38: 10000it [01:40, 99.93it/s, env_step=282200, gradient_step=282200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #38: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #39: 10000it [01:59, 83.93it/s, env_step=292200, gradient_step=292200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #39: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #40: 10000it [02:11, 76.25it/s, env_step=302200, gradient_step=302200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #40: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #41: 10000it [01:47, 93.00it/s, env_step=312200, gradient_step=312200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #41: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #42: 10000it [01:47, 93.14it/s, env_step=322200, gradient_step=322200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #42: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #43: 10000it [01:43, 96.23it/s, env_step=332200, gradient_step=332200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #43: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #44: 10000it [01:36, 103.48it/s, env_step=342200, gradient_step=342200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #44: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #45: 10000it [01:37, 102.68it/s, env_step=352200, gradient_step=352200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #45: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #46: 10000it [01:38, 101.43it/s, env_step=362200, gradient_step=362200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #46: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #47: 10000it [01:42, 97.68it/s, env_step=372200, gradient_step=372200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #47: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #48: 10000it [01:39, 100.71it/s, env_step=382200, gradient_step=382200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #48: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #49: 10000it [01:43, 96.88it/s, env_step=392200, gradient_step=392200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #49: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #50: 10000it [01:37, 103.07it/s, env_step=402200, gradient_step=402200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #50: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #51: 10000it [01:37, 103.01it/s, env_step=412200, gradient_step=412200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #51: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #52: 10000it [01:46, 94.24it/s, env_step=422200, gradient_step=422200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #52: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #53: 10000it [01:47, 93.00it/s, env_step=432200, gradient_step=432200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #53: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #54: 10000it [01:44, 95.33it/s, env_step=442200, gradient_step=442200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #54: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #55: 10000it [01:44, 95.92it/s, env_step=452200, gradient_step=452200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #55: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #56: 10000it [01:45, 95.13it/s, env_step=462200, gradient_step=462200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #56: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #57: 10000it [01:43, 96.46it/s, env_step=472200, gradient_step=472200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #57: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #58: 10000it [01:42, 97.82it/s, env_step=482200, gradient_step=482200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #58: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #59: 10000it [01:45, 94.54it/s, env_step=492200, gradient_step=492200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #59: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #60: 10000it [01:42, 97.76it/s, env_step=502200, gradient_step=502200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #60: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #61: 10000it [01:43, 96.69it/s, env_step=512200, gradient_step=512200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #61: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #62: 10000it [01:43, 96.31it/s, env_step=522200, gradient_step=522200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #62: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #63: 10000it [01:43, 96.51it/s, env_step=532200, gradient_step=532200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #63: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #64: 10000it [01:41, 98.99it/s, env_step=542200, gradient_step=542200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #64: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #65: 10000it [01:41, 98.78it/s, env_step=552200, gradient_step=552200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #65: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #66: 10000it [01:35, 104.95it/s, env_step=562200, gradient_step=562200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #66: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #67: 10000it [01:35, 104.94it/s, env_step=572200, gradient_step=572200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #67: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #68: 10000it [01:35, 104.80it/s, env_step=582200, gradient_step=582200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #68: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #69: 10000it [01:34, 106.20it/s, env_step=592200, gradient_step=592200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #69: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #70: 10000it [01:35, 104.69it/s, env_step=602200, gradient_step=602200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #70: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #71: 10000it [01:38, 101.59it/s, env_step=612200, gradient_step=612200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #71: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #72: 10000it [01:34, 105.58it/s, env_step=622200, gradient_step=622200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #72: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #73: 10000it [01:35, 105.21it/s, env_step=632200, gradient_step=632200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #73: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #74: 10000it [01:35, 104.60it/s, env_step=642200, gradient_step=642200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #74: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #75: 10000it [01:35, 104.65it/s, env_step=652200, gradient_step=652200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #75: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #76: 10000it [01:34, 105.76it/s, env_step=662200, gradient_step=662200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #76: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #77: 10000it [01:34, 105.84it/s, env_step=672200, gradient_step=672200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #77: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #78: 10000it [01:35, 104.27it/s, env_step=682200, gradient_step=682200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #78: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #79: 10000it [01:34, 105.56it/s, env_step=692200, gradient_step=692200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #79: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #80: 10000it [01:33, 106.42it/s, env_step=702200, gradient_step=702200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #80: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #81: 10000it [01:35, 104.39it/s, env_step=712200, gradient_step=712200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #81: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #82: 10000it [01:36, 103.22it/s, env_step=722200, gradient_step=722200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #82: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #83: 10000it [01:35, 104.35it/s, env_step=732200, gradient_step=732200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #83: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #84: 10000it [01:35, 105.03it/s, env_step=742200, gradient_step=742200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #84: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #85: 10000it [01:35, 104.85it/s, env_step=752200, gradient_step=752200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #85: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #86: 10000it [01:34, 105.72it/s, env_step=762200, gradient_step=762200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #86: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #87: 10000it [01:35, 105.07it/s, env_step=772200, gradient_step=772200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #87: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #88: 10000it [01:36, 103.19it/s, env_step=782200, gradient_step=782200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #88: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #89: 10000it [01:36, 103.74it/s, env_step=792200, gradient_step=792200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #89: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #90: 10000it [01:40, 99.54it/s, env_step=802200, gradient_step=802200, len=1000, n/ep=10, n/st=10000, rew=-770.67]  


Epoch #90: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #91: 10000it [01:34, 105.85it/s, env_step=812200, gradient_step=812200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #91: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #92: 10000it [01:34, 105.36it/s, env_step=822200, gradient_step=822200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #92: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #93: 10000it [01:35, 104.75it/s, env_step=832200, gradient_step=832200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #93: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #94: 10000it [01:35, 104.67it/s, env_step=842200, gradient_step=842200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #94: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #95: 10000it [01:35, 104.68it/s, env_step=852200, gradient_step=852200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #95: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #96: 10000it [01:34, 105.40it/s, env_step=862200, gradient_step=862200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #96: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #97: 10000it [01:35, 104.93it/s, env_step=872200, gradient_step=872200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #97: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #98: 10000it [01:39, 100.85it/s, env_step=882200, gradient_step=882200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #98: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #99: 10000it [01:35, 104.47it/s, env_step=892200, gradient_step=892200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #99: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


Epoch #100: 10000it [01:36, 103.44it/s, env_step=902200, gradient_step=902200, len=1000, n/ep=10, n/st=10000, rew=-770.67] 


Epoch #100: test_reward: -770.668756 ± 0.000000, best_reward: -132.689583 ± 0.000000 in #23


InfoStats(gradient_step=902200, best_reward=-132.68958266883274, best_reward_std=0.0, train_step=902200, train_episode=1000, test_step=91220, test_episode=101, timing=TimingStats(total_time=9085.453593969345, train_time=9009.720768928528, train_time_collect=728.376672744751, train_time_update=8226.125409126282, test_time=75.73282504081726, update_speed=100.13628869735696))

In [None]:
run(SimpleConv, scenario_3)

Epoch #1: 30000it [05:21, 93.20it/s, env_step=30000, gradient_step=30000, len=3000, n/ep=10, n/st=30000, rew=-1130.56]  


Epoch #1: test_reward: -1130.555816 ± 0.000000, best_reward: -1130.555816 ± 0.000000 in #0


Epoch #2: 30000it [05:45, 86.95it/s, env_step=60000, gradient_step=60000, len=3000, n/ep=10, n/st=30000, rew=-1130.56]  


Epoch #2: test_reward: -171.674998 ± 0.000000, best_reward: -171.674998 ± 0.000000 in #2


Epoch #3: 30000it [06:05, 81.97it/s, env_step=90000, gradient_step=90000, len=3000, n/ep=10, n/st=30000, rew=-171.67]  

Epoch #3: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #3



Epoch #4: 2100it [00:23, 88.84it/s, env_step=92100, gradient_step=92100, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #4: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #3



Epoch #5: 2100it [00:25, 81.50it/s, env_step=94200, gradient_step=94200, len=210, n/ep=10, n/st=2100, rew=-152.68]  


Epoch #5: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #3


Epoch #6: 30000it [05:53, 84.95it/s, env_step=124200, gradient_step=124200, len=3000, n/ep=10, n/st=30000, rew=-171.67]  


Epoch #6: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #3


Epoch #7: 30000it [05:48, 85.97it/s, env_step=154200, gradient_step=154200, len=3000, n/ep=10, n/st=30000, rew=-1130.56]  


Epoch #7: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #3


Epoch #8: 30000it [05:45, 86.82it/s, env_step=184200, gradient_step=184200, len=3000, n/ep=10, n/st=30000, rew=-171.67]  


Epoch #8: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #3


Epoch #9: 30000it [05:51, 85.39it/s, env_step=214200, gradient_step=214200, len=3000, n/ep=10, n/st=30000, rew=-1130.56]  


Epoch #9: test_reward: -1130.555816 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #3


Epoch #10: 30000it [05:58, 83.79it/s, env_step=244200, gradient_step=244200, len=3000, n/ep=10, n/st=30000, rew=-1130.56]  

Epoch #10: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #3



Epoch #11: 2100it [00:23, 88.34it/s, env_step=246300, gradient_step=246300, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #11: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #3



Epoch #12: 2100it [00:25, 82.33it/s, env_step=248400, gradient_step=248400, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #12: test_reward: -171.674998 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #3


Epoch #13: 30000it [05:55, 84.28it/s, env_step=278400, gradient_step=278400, len=3000, n/ep=10, n/st=30000, rew=-171.67]  


Epoch #13: test_reward: -152.675397 ± 0.000000, best_reward: -152.675397 ± 0.000000 in #3


Epoch #14: 2100it [00:25, 83.37it/s, env_step=280500, gradient_step=280500, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #14: test_reward: -124.941988 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #15: 1100it [00:12, 90.22it/s, env_step=281600, gradient_step=281600, len=110, n/ep=10, n/st=1100, rew=-124.94]  


Epoch #15: test_reward: -171.674998 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #16: 30000it [05:47, 86.45it/s, env_step=311600, gradient_step=311600, len=3000, n/ep=10, n/st=30000, rew=-171.67]  

Epoch #16: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #17: 2100it [00:23, 90.04it/s, env_step=313700, gradient_step=313700, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #17: test_reward: -130.592509 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #18: 1200it [00:12, 95.64it/s, env_step=314900, gradient_step=314900, len=120, n/ep=10, n/st=1200, rew=-130.59]  


Epoch #18: test_reward: -171.674998 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #19: 30000it [05:21, 93.17it/s, env_step=344900, gradient_step=344900, len=3000, n/ep=10, n/st=30000, rew=-171.67]  

Epoch #19: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #20: 2100it [00:22, 92.06it/s, env_step=347000, gradient_step=347000, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #20: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #21: 2100it [00:22, 93.20it/s, env_step=349100, gradient_step=349100, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #21: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #22: 2100it [00:22, 92.50it/s, env_step=351200, gradient_step=351200, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #22: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #23: 2100it [00:23, 90.89it/s, env_step=353300, gradient_step=353300, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #23: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #24: 2100it [00:22, 94.24it/s, env_step=355400, gradient_step=355400, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #24: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #25: 2100it [00:22, 92.13it/s, env_step=357500, gradient_step=357500, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #25: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #26: 2100it [00:22, 93.53it/s, env_step=359600, gradient_step=359600, len=210, n/ep=10, n/st=2100, rew=-152.68]  


Epoch #26: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #27: 2100it [00:22, 94.18it/s, env_step=361700, gradient_step=361700, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #27: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #28: 2100it [00:22, 91.54it/s, env_step=363800, gradient_step=363800, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #28: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #29: 2100it [00:22, 93.81it/s, env_step=365900, gradient_step=365900, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #29: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #30: 2100it [00:22, 91.80it/s, env_step=368000, gradient_step=368000, len=210, n/ep=10, n/st=2100, rew=-152.68]  


Epoch #30: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #31: 2100it [00:24, 87.23it/s, env_step=370100, gradient_step=370100, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #31: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #32: 2100it [00:22, 92.21it/s, env_step=372200, gradient_step=372200, len=210, n/ep=10, n/st=2100, rew=-152.68]  


Epoch #32: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #33: 2100it [00:23, 90.13it/s, env_step=374300, gradient_step=374300, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #33: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #34: 2100it [00:23, 88.14it/s, env_step=376400, gradient_step=376400, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #34: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #35: 2100it [00:24, 85.36it/s, env_step=378500, gradient_step=378500, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #35: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #36: 2100it [00:22, 91.57it/s, env_step=380600, gradient_step=380600, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #36: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #37: 2100it [00:23, 89.27it/s, env_step=382700, gradient_step=382700, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #37: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #38: 2100it [00:22, 94.90it/s, env_step=384800, gradient_step=384800, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #38: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #39: 2100it [00:22, 91.52it/s, env_step=386900, gradient_step=386900, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #39: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #40: 2100it [00:22, 93.79it/s, env_step=389000, gradient_step=389000, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #40: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #41: 2100it [00:22, 91.72it/s, env_step=391100, gradient_step=391100, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #41: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #42: 2100it [00:22, 92.01it/s, env_step=393200, gradient_step=393200, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #42: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #43: 2100it [00:22, 93.83it/s, env_step=395300, gradient_step=395300, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #43: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #44: 2100it [00:22, 93.15it/s, env_step=397400, gradient_step=397400, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #44: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #45: 2100it [00:22, 91.94it/s, env_step=399500, gradient_step=399500, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #45: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #46: 2100it [00:21, 95.83it/s, env_step=401600, gradient_step=401600, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #46: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #47: 2100it [00:23, 90.40it/s, env_step=403700, gradient_step=403700, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #47: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #48: 2100it [00:22, 94.20it/s, env_step=405800, gradient_step=405800, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #48: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #49: 2100it [00:23, 90.48it/s, env_step=407900, gradient_step=407900, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #49: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #50: 2100it [00:22, 92.22it/s, env_step=410000, gradient_step=410000, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #50: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #51: 2100it [00:23, 90.73it/s, env_step=412100, gradient_step=412100, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #51: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #52: 2100it [00:22, 93.33it/s, env_step=414200, gradient_step=414200, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #52: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #53: 2100it [00:23, 90.05it/s, env_step=416300, gradient_step=416300, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #53: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #54: 2100it [00:22, 91.81it/s, env_step=418400, gradient_step=418400, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #54: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #55: 2100it [00:22, 91.89it/s, env_step=420500, gradient_step=420500, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #55: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #56: 2100it [00:22, 95.32it/s, env_step=422600, gradient_step=422600, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #56: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #57: 2100it [00:22, 92.88it/s, env_step=424700, gradient_step=424700, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #57: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #58: 2100it [00:22, 93.96it/s, env_step=426800, gradient_step=426800, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #58: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #59: 2100it [00:22, 91.68it/s, env_step=428900, gradient_step=428900, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #59: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #60: 2100it [00:22, 91.90it/s, env_step=431000, gradient_step=431000, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #60: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #61: 2100it [00:22, 94.88it/s, env_step=433100, gradient_step=433100, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #61: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #62: 2100it [00:23, 91.11it/s, env_step=435200, gradient_step=435200, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #62: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #63: 2100it [00:23, 90.97it/s, env_step=437300, gradient_step=437300, len=210, n/ep=10, n/st=2100, rew=-152.68]  


Epoch #63: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #64: 2100it [00:22, 92.33it/s, env_step=439400, gradient_step=439400, len=210, n/ep=10, n/st=2100, rew=-152.68]  


Epoch #64: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #65: 2100it [00:22, 91.41it/s, env_step=441500, gradient_step=441500, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #65: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #66: 2100it [00:23, 91.06it/s, env_step=443600, gradient_step=443600, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #66: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #67: 2100it [00:22, 95.18it/s, env_step=445700, gradient_step=445700, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #67: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #68: 2100it [00:23, 90.84it/s, env_step=447800, gradient_step=447800, len=210, n/ep=10, n/st=2100, rew=-152.68]  


Epoch #68: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #69: 2100it [00:22, 94.04it/s, env_step=449900, gradient_step=449900, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #69: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #70: 2100it [00:23, 90.34it/s, env_step=452000, gradient_step=452000, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #70: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #71: 2100it [00:22, 92.31it/s, env_step=454100, gradient_step=454100, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #71: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #72: 2100it [00:22, 94.31it/s, env_step=456200, gradient_step=456200, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #72: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #73: 2100it [00:22, 92.93it/s, env_step=458300, gradient_step=458300, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #73: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #74: 2100it [00:22, 92.50it/s, env_step=460400, gradient_step=460400, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #74: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #75: 2100it [00:22, 93.82it/s, env_step=462500, gradient_step=462500, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #75: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #76: 2100it [00:23, 91.29it/s, env_step=464600, gradient_step=464600, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #76: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #77: 2100it [00:22, 94.59it/s, env_step=466700, gradient_step=466700, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #77: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #78: 2100it [00:22, 93.83it/s, env_step=468800, gradient_step=468800, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #78: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #79: 2100it [00:22, 91.51it/s, env_step=470900, gradient_step=470900, len=210, n/ep=10, n/st=2100, rew=-152.68]  

Epoch #79: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14



Epoch #80: 2100it [00:23, 90.49it/s, env_step=473000, gradient_step=473000, len=210, n/ep=10, n/st=2100, rew=-152.68]  


Epoch #80: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #81: 2100it [00:28, 74.51it/s, env_step=475100, gradient_step=475100, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #81: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #82: 2100it [00:29, 71.79it/s, env_step=477200, gradient_step=477200, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #82: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #83: 2100it [00:28, 74.06it/s, env_step=479300, gradient_step=479300, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #83: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #84: 2100it [00:28, 72.88it/s, env_step=481400, gradient_step=481400, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #84: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #85: 2100it [00:28, 74.11it/s, env_step=483500, gradient_step=483500, len=210, n/ep=10, n/st=2100, rew=-152.68]  


Epoch #85: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #86: 2100it [00:27, 76.27it/s, env_step=485600, gradient_step=485600, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #86: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #87: 2100it [00:28, 72.43it/s, env_step=487700, gradient_step=487700, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #87: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #88: 2100it [00:28, 74.45it/s, env_step=489800, gradient_step=489800, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #88: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #89: 2100it [00:28, 72.64it/s, env_step=491900, gradient_step=491900, len=210, n/ep=10, n/st=2100, rew=-152.68]  


Epoch #89: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #90: 2100it [00:28, 74.50it/s, env_step=494000, gradient_step=494000, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #90: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #91: 2100it [00:28, 72.79it/s, env_step=496100, gradient_step=496100, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #91: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #92: 2100it [00:28, 73.69it/s, env_step=498200, gradient_step=498200, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #92: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #93: 2100it [00:28, 72.99it/s, env_step=500300, gradient_step=500300, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #93: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #94: 2100it [00:28, 74.57it/s, env_step=502400, gradient_step=502400, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #94: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #95: 2100it [00:28, 72.79it/s, env_step=504500, gradient_step=504500, len=210, n/ep=10, n/st=2100, rew=-152.68]  


Epoch #95: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #96: 2100it [00:28, 73.99it/s, env_step=506600, gradient_step=506600, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #96: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #97: 2100it [00:28, 73.48it/s, env_step=508700, gradient_step=508700, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #97: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #98: 2100it [00:27, 75.42it/s, env_step=510800, gradient_step=510800, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #98: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #99: 2100it [16:17:10, 27.92s/it, env_step=512900, gradient_step=512900, len=210, n/ep=10, n/st=2100, rew=-152.68] 


Epoch #99: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14


Epoch #100: 2100it [00:29, 70.07it/s, env_step=515000, gradient_step=515000, len=210, n/ep=10, n/st=2100, rew=-152.68] 

Epoch #100: test_reward: -152.675397 ± 0.000000, best_reward: -124.941988 ± 0.000000 in #14





InfoStats(gradient_step=515000, best_reward=-124.9419882449513, best_reward_std=0.0, train_step=515000, train_episode=1000, test_step=51710, test_episode=101, timing=TimingStats(total_time=64592.63916897774, train_time=64550.37857961655, train_time_collect=432.4592430591583, train_time_update=64085.09772801399, test_time=42.260589361190796, update_speed=7.978264594758311))