In [1]:
import gym
import wandb
import torch
import numpy as np
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# Define Policy and REINFORCE classes

class Policy(nn.Module):
    def __init__(self, state_dim, hidden_dim, action_dim):
        super(Policy, self).__init__()
        self.fc1 = torch.nn.Linear(state_dim, hidden_dim)
        self.fc2 = torch.nn.Linear(hidden_dim, action_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return F.softmax(self.fc2(x), dim=1)

class REINFORCE:
    def __init__(self, state_dim, hidden_dim, action_dim, learning_rate, gamma, device):
        self.policy_net = Policy(state_dim, hidden_dim, action_dim).to(device)
        self.optimizer = torch.optim.Adam(self.policy_net.parameters(), lr=learning_rate)
        self.gamma = gamma
        self.device = device

    def take_action(self, state):
        state = torch.tensor(state.reshape(1, -1), dtype=torch.float).to(self.device) 
        probs = self.policy_net(state)
        action_dist = torch.distributions.Categorical(probs) 
        action = action_dist.sample()
        return action.item()  

    def update(self, transition_dict):
        reward_list = transition_dict['rewards']
        state_list = transition_dict['states']
        action_list = transition_dict['actions']

        G = 0
        self.optimizer.zero_grad()
        for i in reversed(range(len(reward_list))):
            reward = reward_list[i]
            state = torch.tensor(state_list[i].reshape(1, -1), dtype=torch.float).to(self.device)
            action = torch.tensor(action_list[i]).view(-1, 1).to(self.device)
            log_prob = torch.log(self.policy_net(state).gather(1, action))
            G = self.gamma * G + reward
            loss = -log_prob * G
            loss.backward()
        self.optimizer.step()

# Hyperparameters
learning_rate = 1e-3
gamma = 0.99
hidden_dim = 128
num_pbar = 10
num_episodes = 500
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env_name = "CartPole-v1"

# Define the sweep configuration
sweep_config = {
    "method": "bayes",
    "metric": {"name": "regret", "goal": "minimize"},
    "parameters": {
        "learning_rate": {"min": 1e-5, "max": 1e-2},
        "hidden_dim": {"values": [64, 128, 256]}
    },
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 3,
        "max_iter": 100
}
}
# Initialize the sweep
sweep_id = wandb.sweep(sweep_config)

def train(learning_rate, hidden_dim):
    env = gym.make(env_name)
    env.reset(seed=0)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n
    agent = REINFORCE(state_dim, hidden_dim, action_dim, learning_rate, gamma, device)
    return_list = []

    for i in range(num_pbar):
        with tqdm(total=int(num_episodes/num_pbar), desc='Iteration %d' % i) as pbar:
            for i_episode in range(int(num_episodes/num_pbar)):
                episode_return = 0
                transition_dict = {
                    "states": [],
                    "actions": [],
                    "next_states": [],
                    "rewards": [],
                    "dones": []
                }
                state, _ = env.reset()
                terminated, truncated = False, False
                while not terminated and not truncated:
                    action = agent.take_action(state)
                    next_state, reward, terminated, truncated, _ = env.step(action)
                    transition_dict["states"].append(state)
                    transition_dict["actions"].append(action)
                    transition_dict["next_states"].append(next_state)
                    transition_dict["rewards"].append(reward)
                    transition_dict["dones"].append(terminated)
                    state = next_state
                    episode_return += reward
                return_list.append(episode_return)
                agent.update(transition_dict)
                if (i_episode+1)%10 == 0:
                    pbar.set_postfix({
                        'episode_return': '%d' % (num_episodes/num_pbar*i+i_episode+1),
                        'return': '%.3f' % np.mean(return_list[-10:])
                    })
                pbar.update(1)

    avg = []
    for i in range(num_episodes):
        if i < 100:
            avg_return = np.mean(return_list[:i+1])
        else:
            avg_return = np.mean(return_list[i-100:i+1])
        avg.append(avg_return)

    regret = 0
    for i in avg:
        if i < 195:
            regret += 195 - i

    return regret

def run_training():
    config_defaults = {
        "learning_rate": 1e-3,
        "hidden_dim": 128
    }
    config = wandb.init(config=config_defaults)
    learning_rate = config.config["learning_rate"]
    hidden_dim = config.config["hidden_dim"]
    regret = train(learning_rate, hidden_dim)
    wandb.log({"regret": regret})

# Run the sweep
wandb.agent(sweep_id, function=run_training)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: ebygp3ca
Sweep URL: https://wandb.ai/rl_shobhith/uncategorized/sweeps/ebygp3ca


[34m[1mwandb[0m: Agent Starting Run: knroq4ij with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008014309956249786
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mshobhith-v[0m ([33mrl_shobhith[0m). Use [1m`wandb login --relogin`[0m to force relogin


  if not isinstance(terminated, (bool, np.bool8)):
Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 41.72it/s, episode_return=50, return=32.300]
Iteration 1: 100%|██████████| 50/50 [00:02<00:00, 19.50it/s, episode_return=100, return=78.500]
Iteration 2: 100%|██████████| 50/50 [00:03<00:00, 12.62it/s, episode_return=150, return=115.000]
Iteration 3: 100%|██████████| 50/50 [00:06<00:00,  7.75it/s, episode_return=200, return=132.200]
Iteration 4: 100%|██████████| 50/50 [00:09<00:00,  5.13it/s, episode_return=250, return=154.400]
Iteration 5: 100%|██████████| 50/50 [00:09<00:00,  5.38it/s, episode_return=300, return=174.100]
Iteration 6: 100%|██████████| 50/50 [00:09<00:00,  5.36it/s, episode_return=350, return=246.200]
Iteration 7: 100%|██████████| 50/50 [00:11<00:00,  4.25it/s, episode_return=400, return=312.100]
Iteration 8: 100%|██████████| 50/50 [00:10<00:00,  4.72it/s, episode_return=450, return=241.100]
Iteration 9: 100%|██████████| 50/50 [00:11<00:00,  4.53it/s, episode_return=500

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,30714.25507


[34m[1mwandb[0m: Agent Starting Run: xlv3s8jo with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0006463588968790339
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 50.13it/s, episode_return=50, return=22.800]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 30.93it/s, episode_return=100, return=48.600]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 25.89it/s, episode_return=150, return=48.900]
Iteration 3: 100%|██████████| 50/50 [00:03<00:00, 15.11it/s, episode_return=200, return=71.500]
Iteration 4: 100%|██████████| 50/50 [00:04<00:00, 12.40it/s, episode_return=250, return=107.800]
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.81it/s, episode_return=300, return=144.800]
Iteration 6: 100%|██████████| 50/50 [00:08<00:00,  5.99it/s, episode_return=350, return=142.300]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.87it/s, episode_return=400, return=194.700]
Iteration 8: 100%|██████████| 50/50 [00:10<00:00,  4.78it/s, episode_return=450, return=272.900]
Iteration 9: 100%|██████████| 50/50 [00:10<00:00,  4.83it/s, episode_return=500, return=297.200]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1703319218685972, max=1.0…

0,1
regret,▁

0,1
regret,48126.64405


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nj8r8s07 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0009234512744067692
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 41.66it/s, episode_return=50, return=28.700]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 35.23it/s, episode_return=100, return=39.800]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 29.15it/s, episode_return=150, return=54.800]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 31.75it/s, episode_return=200, return=47.200]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 25.65it/s, episode_return=250, return=48.200]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 22.15it/s, episode_return=300, return=67.600]
Iteration 6: 100%|██████████| 50/50 [00:03<00:00, 16.15it/s, episode_return=350, return=106.800]
Iteration 7: 100%|██████████| 50/50 [00:04<00:00, 10.63it/s, episode_return=400, return=116.000]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.39it/s, episode_return=450, return=262.700]
Iteration 9: 100%|██████████| 50/50 [00:09<00:00,  5.34it/s, episode_return=500, return=227.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,65614.86782


[34m[1mwandb[0m: Agent Starting Run: bfs66mzk with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008186001347911228
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:02<00:00, 22.72it/s, episode_return=50, return=48.800]
Iteration 1: 100%|██████████| 50/50 [00:02<00:00, 18.64it/s, episode_return=100, return=59.300]
Iteration 2: 100%|██████████| 50/50 [00:03<00:00, 15.28it/s, episode_return=150, return=82.800]
Iteration 3: 100%|██████████| 50/50 [00:05<00:00,  8.35it/s, episode_return=200, return=129.900]
Iteration 4: 100%|██████████| 50/50 [00:07<00:00,  6.98it/s, episode_return=250, return=145.100]
Iteration 5: 100%|██████████| 50/50 [00:08<00:00,  5.60it/s, episode_return=300, return=230.500]
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.85it/s, episode_return=350, return=215.600]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.34it/s, episode_return=400, return=280.200]
Iteration 8: 100%|██████████| 50/50 [00:11<00:00,  4.28it/s, episode_return=450, return=184.000]
Iteration 9: 100%|██████████| 50/50 [00:18<00:00,  2.75it/s, episode_return=500, return=427.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,40372.91023


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9k3k0o8h with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.000259914132902407
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 36.47it/s, episode_return=50, return=21.500]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 32.21it/s, episode_return=100, return=41.000]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 29.40it/s, episode_return=150, return=36.600]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 24.17it/s, episode_return=200, return=34.400]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 21.34it/s, episode_return=250, return=48.300]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 18.52it/s, episode_return=300, return=64.800]
Iteration 6: 100%|██████████| 50/50 [00:03<00:00, 13.92it/s, episode_return=350, return=52.300]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 17.32it/s, episode_return=400, return=59.600]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 13.73it/s, episode_return=450, return=53.800]
Iteration 9: 100%|██████████| 50/50 [00:03<00:00, 12.65it/s, episode_return=500, return=86.100]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1677653706444235, max=1.0…

0,1
regret,▁

0,1
regret,75244.85249


[34m[1mwandb[0m: Agent Starting Run: jrcezunp with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00037329323262457766
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 53.28it/s, episode_return=50, return=17.000]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 56.21it/s, episode_return=100, return=23.500]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 45.71it/s, episode_return=150, return=25.500]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 50.42it/s, episode_return=200, return=30.400]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 46.82it/s, episode_return=250, return=30.400]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 38.90it/s, episode_return=300, return=32.900]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 36.94it/s, episode_return=350, return=28.900]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 37.12it/s, episode_return=400, return=26.600]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 30.30it/s, episode_return=450, return=33.800]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 32.16it/s, episode_return=500, return=30.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,84312.81358


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8s40bwxh with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.000795968701273675
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 31.47it/s, episode_return=50, return=30.300]
Iteration 1: 100%|██████████| 50/50 [00:02<00:00, 19.68it/s, episode_return=100, return=53.200]
Iteration 2: 100%|██████████| 50/50 [00:04<00:00, 10.86it/s, episode_return=150, return=127.500]
Iteration 3: 100%|██████████| 50/50 [00:06<00:00,  7.87it/s, episode_return=200, return=168.500]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  6.18it/s, episode_return=250, return=193.000]
Iteration 5: 100%|██████████| 50/50 [00:08<00:00,  5.62it/s, episode_return=300, return=299.000]
Iteration 6: 100%|██████████| 50/50 [00:11<00:00,  4.27it/s, episode_return=350, return=327.700]
Iteration 7: 100%|██████████| 50/50 [00:09<00:00,  5.51it/s, episode_return=400, return=137.200]
Iteration 8: 100%|██████████| 50/50 [00:09<00:00,  5.44it/s, episode_return=450, return=260.200]
Iteration 9: 100%|██████████| 50/50 [00:11<00:00,  4.54it/s, episode_return=500, return=374.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,34093.30635


[34m[1mwandb[0m: Agent Starting Run: pcuueutx with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.000743784965576459
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:02<00:00, 20.62it/s, episode_return=50, return=26.900]
Iteration 1: 100%|██████████| 50/50 [00:02<00:00, 20.12it/s, episode_return=100, return=50.700]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 21.07it/s, episode_return=150, return=53.200]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 18.80it/s, episode_return=200, return=67.300]
Iteration 4: 100%|██████████| 50/50 [00:03<00:00, 14.94it/s, episode_return=250, return=84.000]
Iteration 5: 100%|██████████| 50/50 [00:04<00:00, 12.23it/s, episode_return=300, return=99.500] 
Iteration 6: 100%|██████████| 50/50 [00:06<00:00,  7.54it/s, episode_return=350, return=158.100]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.88it/s, episode_return=400, return=163.400]
Iteration 8: 100%|██████████| 50/50 [00:09<00:00,  5.38it/s, episode_return=450, return=253.300]
Iteration 9: 100%|██████████| 50/50 [00:11<00:00,  4.38it/s, episode_return=500, return=339.700]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.17044833242208857, max=1.…

0,1
regret,▁

0,1
regret,51844.9123


[34m[1mwandb[0m: Agent Starting Run: n6fmqbjq with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00012546741234751018
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 40.66it/s, episode_return=50, return=23.400]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 43.32it/s, episode_return=100, return=17.500]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 42.18it/s, episode_return=150, return=19.800]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 39.40it/s, episode_return=200, return=31.600]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 31.85it/s, episode_return=250, return=40.000]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 29.55it/s, episode_return=300, return=33.300]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 27.97it/s, episode_return=350, return=42.200]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 22.43it/s, episode_return=400, return=52.200]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 25.08it/s, episode_return=450, return=43.700]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 20.37it/s, episode_return=500, return=47.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,81886.44244


[34m[1mwandb[0m: Agent Starting Run: 8beo1nlz with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0009753580355680552
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 40.82it/s, episode_return=50, return=37.700]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 39.53it/s, episode_return=100, return=36.800]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 31.03it/s, episode_return=150, return=43.900]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 27.05it/s, episode_return=200, return=37.700]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 22.69it/s, episode_return=250, return=59.300]
Iteration 5: 100%|██████████| 50/50 [00:03<00:00, 16.34it/s, episode_return=300, return=89.000]
Iteration 6: 100%|██████████| 50/50 [00:04<00:00, 10.44it/s, episode_return=350, return=184.200]
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  8.09it/s, episode_return=400, return=240.400]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.32it/s, episode_return=450, return=188.600]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  7.02it/s, episode_return=500, return=172.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,55365.14919


[34m[1mwandb[0m: Agent Starting Run: tu9584oc with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00054459266979986
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 43.30it/s, episode_return=50, return=32.700]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 40.83it/s, episode_return=100, return=33.300]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 35.39it/s, episode_return=150, return=33.200]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 28.85it/s, episode_return=200, return=44.700]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 24.99it/s, episode_return=250, return=48.000]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 18.33it/s, episode_return=300, return=92.300]
Iteration 6: 100%|██████████| 50/50 [00:04<00:00, 11.27it/s, episode_return=350, return=137.000]
Iteration 7: 100%|██████████| 50/50 [00:05<00:00,  8.48it/s, episode_return=400, return=157.200]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.44it/s, episode_return=450, return=234.900]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  8.04it/s, episode_return=500, return=155.700]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.17065827288901053, max=1.…

0,1
regret,▁

0,1
regret,60999.89139


[34m[1mwandb[0m: Agent Starting Run: lxr88e29 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0002257789115525846
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 45.96it/s, episode_return=50, return=23.100]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 51.85it/s, episode_return=100, return=25.200]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 54.74it/s, episode_return=150, return=23.000]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 43.78it/s, episode_return=200, return=26.700]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 46.60it/s, episode_return=250, return=27.600]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 43.82it/s, episode_return=300, return=27.600]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 40.17it/s, episode_return=350, return=29.600]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 36.41it/s, episode_return=400, return=51.500]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 30.73it/s, episode_return=450, return=37.900]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 28.16it/s, episode_return=500, return=45.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,83205.57987


[34m[1mwandb[0m: Agent Starting Run: j7e08wzd with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008488933458338834
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 28.96it/s, episode_return=50, return=42.700]
Iteration 1: 100%|██████████| 50/50 [00:02<00:00, 20.46it/s, episode_return=100, return=70.800]
Iteration 2: 100%|██████████| 50/50 [00:03<00:00, 15.52it/s, episode_return=150, return=81.400]
Iteration 3: 100%|██████████| 50/50 [00:05<00:00,  8.51it/s, episode_return=200, return=146.700]
Iteration 4: 100%|██████████| 50/50 [00:07<00:00,  6.52it/s, episode_return=250, return=202.200]
Iteration 5: 100%|██████████| 50/50 [00:11<00:00,  4.45it/s, episode_return=300, return=275.900]
Iteration 6: 100%|██████████| 50/50 [00:10<00:00,  4.62it/s, episode_return=350, return=245.200]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.60it/s, episode_return=400, return=127.300]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.45it/s, episode_return=450, return=187.000]
Iteration 9: 100%|██████████| 50/50 [00:12<00:00,  3.86it/s, episode_return=500, return=274.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,35361.51433


[34m[1mwandb[0m: Agent Starting Run: s3fklq73 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005827739253038395
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 39.57it/s, episode_return=50, return=23.000]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 29.76it/s, episode_return=100, return=48.000]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 23.21it/s, episode_return=150, return=56.600]
Iteration 3: 100%|██████████| 50/50 [00:03<00:00, 13.24it/s, episode_return=200, return=104.700]
Iteration 4: 100%|██████████| 50/50 [00:04<00:00, 10.53it/s, episode_return=250, return=101.600]
Iteration 5: 100%|██████████| 50/50 [00:05<00:00,  8.65it/s, episode_return=300, return=139.200]
Iteration 6: 100%|██████████| 50/50 [00:09<00:00,  5.37it/s, episode_return=350, return=254.400]
Iteration 7: 100%|██████████| 50/50 [00:11<00:00,  4.53it/s, episode_return=400, return=315.900]
Iteration 8: 100%|██████████| 50/50 [00:11<00:00,  4.36it/s, episode_return=450, return=238.600]
Iteration 9: 100%|██████████| 50/50 [00:12<00:00,  3.85it/s, episode_return=500, return=269.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,46887.04423


[34m[1mwandb[0m: Agent Starting Run: t0497yol with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0007238868973418295
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 44.10it/s, episode_return=50, return=22.600]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 32.70it/s, episode_return=100, return=36.500]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 28.93it/s, episode_return=150, return=49.000]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 24.16it/s, episode_return=200, return=62.600]
Iteration 4: 100%|██████████| 50/50 [00:03<00:00, 16.66it/s, episode_return=250, return=69.400] 
Iteration 5: 100%|██████████| 50/50 [00:03<00:00, 13.32it/s, episode_return=300, return=100.600]
Iteration 6: 100%|██████████| 50/50 [00:03<00:00, 15.37it/s, episode_return=350, return=94.600] 
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  7.32it/s, episode_return=400, return=174.400]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.87it/s, episode_return=450, return=138.500]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  6.18it/s, episode_return=500, return=246.300]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.17058823529411765, max=1.…

0,1
regret,▁

0,1
regret,56697.30606


[34m[1mwandb[0m: Agent Starting Run: k2icy44q with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.000565778024360261
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 54.00it/s, episode_return=50, return=17.800]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 46.15it/s, episode_return=100, return=27.700]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 43.21it/s, episode_return=150, return=31.200]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 34.74it/s, episode_return=200, return=40.700]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 36.51it/s, episode_return=250, return=41.400]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 33.69it/s, episode_return=300, return=54.500]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 33.04it/s, episode_return=350, return=42.300]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 27.66it/s, episode_return=400, return=45.400]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 24.24it/s, episode_return=450, return=66.500]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 22.98it/s, episode_return=500, return=65.900]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,79454.67377


[34m[1mwandb[0m: Agent Starting Run: 20wauuig with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.000160468934919586
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 61.57it/s, episode_return=50, return=19.500]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 64.30it/s, episode_return=100, return=22.400]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 54.72it/s, episode_return=150, return=20.100]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 56.18it/s, episode_return=200, return=19.500]
Iteration 4: 100%|██████████| 50/50 [00:00<00:00, 56.35it/s, episode_return=250, return=21.900]
Iteration 5: 100%|██████████| 50/50 [00:00<00:00, 56.13it/s, episode_return=300, return=25.700]
Iteration 6: 100%|██████████| 50/50 [00:00<00:00, 52.13it/s, episode_return=350, return=21.700]
Iteration 7: 100%|██████████| 50/50 [00:00<00:00, 50.18it/s, episode_return=400, return=24.200]
Iteration 8: 100%|██████████| 50/50 [00:00<00:00, 54.71it/s, episode_return=450, return=26.700]
Iteration 9: 100%|██████████| 50/50 [00:00<00:00, 52.44it/s, episode_return=500, return=21.400]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,86677.98236


[34m[1mwandb[0m: Agent Starting Run: ben7icc9 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0003252119712808914
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 48.68it/s, episode_return=50, return=19.900]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 51.21it/s, episode_return=100, return=21.400]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 47.74it/s, episode_return=150, return=22.900]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 41.00it/s, episode_return=200, return=25.200]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 36.99it/s, episode_return=250, return=30.900]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 27.07it/s, episode_return=300, return=40.900]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 23.03it/s, episode_return=350, return=46.200]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 21.85it/s, episode_return=400, return=49.200]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 20.79it/s, episode_return=450, return=66.500]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 16.70it/s, episode_return=500, return=70.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,80778.42395


[34m[1mwandb[0m: Agent Starting Run: skowrans with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.000922745219447915
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 37.74it/s, episode_return=50, return=28.900]
Iteration 1: 100%|██████████| 50/50 [00:02<00:00, 17.72it/s, episode_return=100, return=69.300]
Iteration 2: 100%|██████████| 50/50 [00:04<00:00, 11.36it/s, episode_return=150, return=111.900]
Iteration 3: 100%|██████████| 50/50 [00:06<00:00,  8.07it/s, episode_return=200, return=133.200]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  5.93it/s, episode_return=250, return=262.300]
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  7.57it/s, episode_return=300, return=252.700]
Iteration 6: 100%|██████████| 50/50 [00:12<00:00,  4.06it/s, episode_return=350, return=215.500]
Iteration 7: 100%|██████████| 50/50 [00:18<00:00,  2.67it/s, episode_return=400, return=273.800]
Iteration 8: 100%|██████████| 50/50 [00:15<00:00,  3.20it/s, episode_return=450, return=359.500]
Iteration 9: 100%|██████████| 50/50 [00:12<00:00,  3.94it/s, episode_return=500, return=197.100]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,34858.8562


[34m[1mwandb[0m: Agent Starting Run: 9mx36kiy with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00041474995736258673
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 62.13it/s, episode_return=50, return=24.000]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 57.25it/s, episode_return=100, return=27.000]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 42.40it/s, episode_return=150, return=37.000]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 46.32it/s, episode_return=200, return=27.200]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 42.25it/s, episode_return=250, return=26.800]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 24.24it/s, episode_return=300, return=53.800]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 19.71it/s, episode_return=350, return=39.600]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 20.49it/s, episode_return=400, return=52.600]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 16.03it/s, episode_return=450, return=69.300]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 17.47it/s, episode_return=500, return=56.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,79973.59985


[34m[1mwandb[0m: Agent Starting Run: ctbnxzwz with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00045291969938564063
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 33.09it/s, episode_return=50, return=30.000]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 30.56it/s, episode_return=100, return=32.800]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 22.45it/s, episode_return=150, return=39.800]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 18.49it/s, episode_return=200, return=67.000]
Iteration 4: 100%|██████████| 50/50 [00:03<00:00, 13.67it/s, episode_return=250, return=144.100]
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  8.08it/s, episode_return=300, return=132.300]
Iteration 6: 100%|██████████| 50/50 [00:10<00:00,  4.84it/s, episode_return=350, return=128.200]
Iteration 7: 100%|██████████| 50/50 [00:11<00:00,  4.49it/s, episode_return=400, return=217.700]
Iteration 8: 100%|██████████| 50/50 [00:13<00:00,  3.71it/s, episode_return=450, return=317.800]
Iteration 9: 100%|██████████| 50/50 [00:13<00:00,  3.67it/s, episode_return=500, return=238.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,49579.24403


[34m[1mwandb[0m: Agent Starting Run: 7r2tidtm with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00027441370524926946
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 43.16it/s, episode_return=50, return=34.100]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 43.89it/s, episode_return=100, return=26.300]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 34.03it/s, episode_return=150, return=30.600]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 32.28it/s, episode_return=200, return=42.100]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 25.97it/s, episode_return=250, return=45.900]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 29.95it/s, episode_return=300, return=40.100]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 27.13it/s, episode_return=350, return=48.100]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 23.12it/s, episode_return=400, return=49.200]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 22.17it/s, episode_return=450, return=65.600]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 19.00it/s, episode_return=500, return=47.100]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.17086873115922172, max=1.…

0,1
regret,▁

0,1
regret,77226.48174


[34m[1mwandb[0m: Agent Starting Run: fdf013gv with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 9.602056874479348e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 55.95it/s, episode_return=50, return=21.300]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 54.56it/s, episode_return=100, return=18.700]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 47.63it/s, episode_return=150, return=28.000]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 52.73it/s, episode_return=200, return=19.100]
Iteration 4: 100%|██████████| 50/50 [00:00<00:00, 52.81it/s, episode_return=250, return=24.100]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 46.44it/s, episode_return=300, return=24.600]
Iteration 6: 100%|██████████| 50/50 [00:00<00:00, 54.62it/s, episode_return=350, return=26.500]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 49.20it/s, episode_return=400, return=25.300]
Iteration 8: 100%|██████████| 50/50 [00:00<00:00, 58.53it/s, episode_return=450, return=21.400]
Iteration 9: 100%|██████████| 50/50 [00:00<00:00, 51.83it/s, episode_return=500, return=26.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16776625824693686, max=1.…

0,1
regret,▁

0,1
regret,86044.34056


[34m[1mwandb[0m: Agent Starting Run: q6vwl5mc with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008969253209904033
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:02<00:00, 23.92it/s, episode_return=50, return=40.900]
Iteration 1: 100%|██████████| 50/50 [00:03<00:00, 14.22it/s, episode_return=100, return=70.300]
Iteration 2: 100%|██████████| 50/50 [00:04<00:00, 10.08it/s, episode_return=150, return=115.200]
Iteration 3: 100%|██████████| 50/50 [00:07<00:00,  6.34it/s, episode_return=200, return=192.000]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  5.62it/s, episode_return=250, return=189.400]
Iteration 5: 100%|██████████| 50/50 [00:12<00:00,  3.98it/s, episode_return=300, return=179.400]
Iteration 6: 100%|██████████| 50/50 [00:13<00:00,  3.81it/s, episode_return=350, return=169.900]
Iteration 7: 100%|██████████| 50/50 [00:12<00:00,  4.05it/s, episode_return=400, return=336.000]
Iteration 8: 100%|██████████| 50/50 [00:13<00:00,  3.78it/s, episode_return=450, return=310.400]
Iteration 9: 100%|██████████| 50/50 [00:12<00:00,  4.09it/s, episode_return=500, return=245.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,35604.88822


[34m[1mwandb[0m: Agent Starting Run: m0c192xc with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005030828957003448
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 59.28it/s, episode_return=50, return=27.600]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 38.25it/s, episode_return=100, return=46.000]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 23.11it/s, episode_return=150, return=41.400]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 17.30it/s, episode_return=200, return=49.500]
Iteration 4: 100%|██████████| 50/50 [00:04<00:00, 11.25it/s, episode_return=250, return=63.600]
Iteration 5: 100%|██████████| 50/50 [00:05<00:00,  8.47it/s, episode_return=300, return=84.100] 
Iteration 6: 100%|██████████| 50/50 [00:10<00:00,  4.87it/s, episode_return=350, return=209.300]
Iteration 7: 100%|██████████| 50/50 [00:11<00:00,  4.52it/s, episode_return=400, return=168.400]
Iteration 8: 100%|██████████| 50/50 [00:12<00:00,  3.91it/s, episode_return=450, return=105.200]
Iteration 9: 100%|██████████| 50/50 [00:12<00:00,  3.92it/s, episode_return=500, return=252.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,53014.15151


[34m[1mwandb[0m: Agent Starting Run: nxf4eqks with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00013540778055369323
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 49.18it/s, episode_return=50, return=17.100]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 35.36it/s, episode_return=100, return=29.200]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 37.20it/s, episode_return=150, return=24.600]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 32.09it/s, episode_return=200, return=21.500]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 35.96it/s, episode_return=250, return=29.600]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 28.20it/s, episode_return=300, return=27.400]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 31.02it/s, episode_return=350, return=21.500]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 35.82it/s, episode_return=400, return=25.400]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 30.40it/s, episode_return=450, return=38.200]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 27.69it/s, episode_return=500, return=32.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,85513.15783


[34m[1mwandb[0m: Agent Starting Run: tncc3r6v with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008203137498887878
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 26.67it/s, episode_return=50, return=33.800]
Iteration 1: 100%|██████████| 50/50 [00:02<00:00, 19.41it/s, episode_return=100, return=57.300]
Iteration 2: 100%|██████████| 50/50 [00:04<00:00, 10.22it/s, episode_return=150, return=125.100]
Iteration 3: 100%|██████████| 50/50 [00:06<00:00,  8.06it/s, episode_return=200, return=116.300]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  5.66it/s, episode_return=250, return=261.800]
Iteration 5: 100%|██████████| 50/50 [00:14<00:00,  3.57it/s, episode_return=300, return=361.600]
Iteration 6: 100%|██████████| 50/50 [00:12<00:00,  3.85it/s, episode_return=350, return=344.000]
Iteration 7: 100%|██████████| 50/50 [00:12<00:00,  4.12it/s, episode_return=400, return=303.800]
Iteration 8: 100%|██████████| 50/50 [00:18<00:00,  2.73it/s, episode_return=450, return=293.400]
Iteration 9: 100%|██████████| 50/50 [00:16<00:00,  3.02it/s, episode_return=500, return=168.600]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16675581706338594, max=1.…

0,1
regret,▁

0,1
regret,32531.05118


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nzt6wyns with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00024157632586018545
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 28.75it/s, episode_return=50, return=20.000]
Iteration 1: 100%|██████████| 50/50 [00:02<00:00, 22.97it/s, episode_return=100, return=23.900]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 24.39it/s, episode_return=150, return=30.400]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 18.75it/s, episode_return=200, return=52.100]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 16.99it/s, episode_return=250, return=32.500]
Iteration 5: 100%|██████████| 50/50 [00:03<00:00, 13.37it/s, episode_return=300, return=64.000]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 20.31it/s, episode_return=350, return=46.900]
Iteration 7: 100%|██████████| 50/50 [00:03<00:00, 16.14it/s, episode_return=400, return=63.100]
Iteration 8: 100%|██████████| 50/50 [00:04<00:00, 11.06it/s, episode_return=450, return=140.700]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  8.28it/s, episode_return=500, return=92.800] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,74440.9717


[34m[1mwandb[0m: Agent Starting Run: difie02c with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00015327181644829357
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 43.84it/s, episode_return=50, return=22.500]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 53.01it/s, episode_return=100, return=19.900]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 48.53it/s, episode_return=150, return=25.000]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 51.26it/s, episode_return=200, return=20.000]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 45.94it/s, episode_return=250, return=29.500]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 42.70it/s, episode_return=300, return=29.700]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 40.10it/s, episode_return=350, return=27.400]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 45.08it/s, episode_return=400, return=30.400]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 41.89it/s, episode_return=450, return=26.100]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 41.39it/s, episode_return=500, return=48.200]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,84360.95496


[34m[1mwandb[0m: Agent Starting Run: rpfau8th with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0009769980339656365
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 62.87it/s, episode_return=50, return=23.300]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 52.00it/s, episode_return=100, return=42.300]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 34.21it/s, episode_return=150, return=65.300]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 27.76it/s, episode_return=200, return=89.200]
Iteration 4: 100%|██████████| 50/50 [00:03<00:00, 13.32it/s, episode_return=250, return=149.700]
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  7.35it/s, episode_return=300, return=240.500]
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.33it/s, episode_return=350, return=221.400]
Iteration 7: 100%|██████████| 50/50 [00:08<00:00,  6.11it/s, episode_return=400, return=258.200]
Iteration 8: 100%|██████████| 50/50 [00:28<00:00,  1.73it/s, episode_return=450, return=311.000]
Iteration 9: 100%|██████████| 50/50 [00:25<00:00,  1.96it/s, episode_return=500, return=352.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,42108.18203


[34m[1mwandb[0m: Agent Starting Run: g9raimeo with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008667231988266972
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:02<00:00, 16.89it/s, episode_return=50, return=41.400]
Iteration 1: 100%|██████████| 50/50 [00:05<00:00,  9.90it/s, episode_return=100, return=75.500]
Iteration 2: 100%|██████████| 50/50 [00:09<00:00,  5.24it/s, episode_return=150, return=181.300]
Iteration 3: 100%|██████████| 50/50 [00:13<00:00,  3.72it/s, episode_return=200, return=115.300]
Iteration 4: 100%|██████████| 50/50 [00:11<00:00,  4.38it/s, episode_return=250, return=229.300]
Iteration 5: 100%|██████████| 50/50 [00:15<00:00,  3.19it/s, episode_return=300, return=404.300]
Iteration 6: 100%|██████████| 50/50 [00:13<00:00,  3.68it/s, episode_return=350, return=284.700]
Iteration 7: 100%|██████████| 50/50 [00:11<00:00,  4.53it/s, episode_return=400, return=250.900]
Iteration 8: 100%|██████████| 50/50 [00:13<00:00,  3.67it/s, episode_return=450, return=286.400]
Iteration 9: 100%|██████████| 50/50 [00:22<00:00,  2.26it/s, episode_return=500, return=446.500]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16956758226815338, max=1.…

0,1
regret,▁

0,1
regret,28166.89796


[34m[1mwandb[0m: Agent Starting Run: wbo7s0p9 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0003476623218232946
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 62.16it/s, episode_return=50, return=22.700]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 65.98it/s, episode_return=100, return=25.600]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 62.50it/s, episode_return=150, return=25.800]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 48.68it/s, episode_return=200, return=38.100]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 38.55it/s, episode_return=250, return=60.500]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 39.50it/s, episode_return=300, return=31.700]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 27.58it/s, episode_return=350, return=49.000]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 21.25it/s, episode_return=400, return=87.800]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 19.25it/s, episode_return=450, return=93.600]
Iteration 9: 100%|██████████| 50/50 [00:03<00:00, 14.51it/s, episode_return=500, return=88.200] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,78687.56778


[34m[1mwandb[0m: Agent Starting Run: u3dexga3 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.000842162062218298
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 67.91it/s, episode_return=50, return=21.500]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 55.85it/s, episode_return=100, return=24.800]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 43.17it/s, episode_return=150, return=32.500]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 33.45it/s, episode_return=200, return=50.100]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 29.90it/s, episode_return=250, return=52.700]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 30.31it/s, episode_return=300, return=46.500]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 24.80it/s, episode_return=350, return=46.200]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 23.73it/s, episode_return=400, return=58.600]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 19.96it/s, episode_return=450, return=60.000]
Iteration 9: 100%|██████████| 50/50 [00:03<00:00, 15.68it/s, episode_return=500, return=87.400] 


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1709155701754386, max=1.0…

0,1
regret,▁

0,1
regret,75587.03603


[34m[1mwandb[0m: Agent Starting Run: pf8kakni with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0002499132335192946
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 61.14it/s, episode_return=50, return=31.100]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 71.99it/s, episode_return=100, return=26.200]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 60.63it/s, episode_return=150, return=31.700]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 57.94it/s, episode_return=200, return=34.900]
Iteration 4: 100%|██████████| 50/50 [00:00<00:00, 56.11it/s, episode_return=250, return=31.900]
Iteration 5: 100%|██████████| 50/50 [00:00<00:00, 55.56it/s, episode_return=300, return=32.400]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 45.52it/s, episode_return=350, return=45.300]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 39.12it/s, episode_return=400, return=46.800]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 30.23it/s, episode_return=450, return=49.100]
Iteration 9: 100%|██████████| 50/50 [00:03<00:00, 15.27it/s, episode_return=500, return=37.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,82779.31304


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4167ys1c with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0005698233494317861
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 46.07it/s, episode_return=50, return=30.900]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 44.82it/s, episode_return=100, return=26.800]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 35.30it/s, episode_return=150, return=29.700]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 33.65it/s, episode_return=200, return=34.800]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 25.28it/s, episode_return=250, return=46.100]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 25.63it/s, episode_return=300, return=45.500]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 23.51it/s, episode_return=350, return=50.500]
Iteration 7: 100%|██████████| 50/50 [00:03<00:00, 13.30it/s, episode_return=400, return=105.900]
Iteration 8: 100%|██████████| 50/50 [00:05<00:00,  9.70it/s, episode_return=450, return=129.200]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  8.15it/s, episode_return=500, return=192.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,71852.72512


[34m[1mwandb[0m: Agent Starting Run: mv6l589a with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0007665944392165715
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 45.80it/s, episode_return=50, return=36.800]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 31.39it/s, episode_return=100, return=38.700]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 24.94it/s, episode_return=150, return=41.800]
Iteration 3: 100%|██████████| 50/50 [00:03<00:00, 15.31it/s, episode_return=200, return=101.100]
Iteration 4: 100%|██████████| 50/50 [00:04<00:00, 11.07it/s, episode_return=250, return=143.300]
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  7.45it/s, episode_return=300, return=218.800]
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.65it/s, episode_return=350, return=185.600]
Iteration 7: 100%|██████████| 50/50 [00:09<00:00,  5.54it/s, episode_return=400, return=254.000]
Iteration 8: 100%|██████████| 50/50 [00:09<00:00,  5.55it/s, episode_return=450, return=220.700]
Iteration 9: 100%|██████████| 50/50 [00:10<00:00,  4.80it/s, episode_return=500, return=231.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,43705.17636


[34m[1mwandb[0m: Agent Starting Run: ukt4fxbe with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0001156262221082664
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 51.27it/s, episode_return=50, return=20.000]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 46.35it/s, episode_return=100, return=24.600]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 38.09it/s, episode_return=150, return=39.100]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 28.20it/s, episode_return=200, return=54.400]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 28.60it/s, episode_return=250, return=45.200]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 28.05it/s, episode_return=300, return=33.700]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 23.25it/s, episode_return=350, return=56.500]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 19.99it/s, episode_return=400, return=56.600]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 18.28it/s, episode_return=450, return=69.500]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 18.79it/s, episode_return=500, return=73.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,78385.79164


[34m[1mwandb[0m: Agent Starting Run: 5azwksh7 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0007989650755710843
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 32.82it/s, episode_return=50, return=29.300]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 40.19it/s, episode_return=100, return=35.900]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 24.75it/s, episode_return=150, return=52.600]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 19.34it/s, episode_return=200, return=80.500]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 16.82it/s, episode_return=250, return=78.200]
Iteration 5: 100%|██████████| 50/50 [00:04<00:00, 11.23it/s, episode_return=300, return=155.000]
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.50it/s, episode_return=350, return=266.400]
Iteration 7: 100%|██████████| 50/50 [00:09<00:00,  5.33it/s, episode_return=400, return=242.900]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  7.06it/s, episode_return=450, return=176.400]
Iteration 9: 100%|██████████| 50/50 [00:10<00:00,  4.86it/s, episode_return=500, return=268.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,48811.49137


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: o2x6inee with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0007009836134463743
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 39.22it/s, episode_return=50, return=22.900]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 29.51it/s, episode_return=100, return=44.500]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 26.60it/s, episode_return=150, return=42.100]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 20.46it/s, episode_return=200, return=80.500]
Iteration 4: 100%|██████████| 50/50 [00:04<00:00, 11.15it/s, episode_return=250, return=137.000]
Iteration 5: 100%|██████████| 50/50 [00:05<00:00,  9.04it/s, episode_return=300, return=158.500]
Iteration 6: 100%|██████████| 50/50 [45:23<00:00, 54.47s/it, episode_return=350, return=207.200]   
Iteration 7: 100%|██████████| 50/50 [00:13<00:00,  3.78it/s, episode_return=400, return=182.500]
Iteration 8: 100%|██████████| 50/50 [00:10<00:00,  4.68it/s, episode_return=450, return=180.300]
Iteration 9: 100%|██████████| 50/50 [00:16<00:00,  3.12it/s, episode_return=500, return=321.400]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,46519.3484


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 84x2mrde with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 5.786427635600682e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 52.83it/s, episode_return=50, return=24.300]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 61.62it/s, episode_return=100, return=15.900]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 64.10it/s, episode_return=150, return=19.600]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 58.46it/s, episode_return=200, return=26.000]
Iteration 4: 100%|██████████| 50/50 [00:00<00:00, 58.27it/s, episode_return=250, return=24.600]
Iteration 5: 100%|██████████| 50/50 [00:00<00:00, 55.39it/s, episode_return=300, return=22.400]
Iteration 6: 100%|██████████| 50/50 [00:00<00:00, 52.88it/s, episode_return=350, return=25.900]
Iteration 7: 100%|██████████| 50/50 [00:00<00:00, 51.69it/s, episode_return=400, return=25.300]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 44.77it/s, episode_return=450, return=23.700]
Iteration 9: 100%|██████████| 50/50 [00:00<00:00, 50.59it/s, episode_return=500, return=30.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,86307.38839


[34m[1mwandb[0m: Agent Starting Run: cw2yrk3k with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0009737693000205388
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 41.79it/s, episode_return=50, return=44.300]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 37.03it/s, episode_return=100, return=40.700]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 24.34it/s, episode_return=150, return=56.000]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 16.71it/s, episode_return=200, return=80.200]
Iteration 4: 100%|██████████| 50/50 [00:04<00:00, 12.30it/s, episode_return=250, return=105.700]
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  7.60it/s, episode_return=300, return=171.200]
Iteration 6: 100%|██████████| 50/50 [00:04<00:00, 11.06it/s, episode_return=350, return=137.600]
Iteration 7: 100%|██████████| 50/50 [00:08<00:00,  5.62it/s, episode_return=400, return=242.800]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.28it/s, episode_return=450, return=224.300]
Iteration 9: 100%|██████████| 50/50 [00:11<00:00,  4.23it/s, episode_return=500, return=366.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,44092.28735


[34m[1mwandb[0m: Agent Starting Run: mhx3lrht with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.000303494541560603
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 60.81it/s, episode_return=50, return=16.100]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 41.89it/s, episode_return=100, return=28.900]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 44.13it/s, episode_return=150, return=16.500]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 46.82it/s, episode_return=200, return=16.900]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 32.78it/s, episode_return=250, return=19.100]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 38.81it/s, episode_return=300, return=27.300]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 29.02it/s, episode_return=350, return=26.600]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 21.54it/s, episode_return=400, return=42.700]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 19.91it/s, episode_return=450, return=39.900]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 17.08it/s, episode_return=500, return=38.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,83707.97629


[34m[1mwandb[0m: Agent Starting Run: mroqa4v4 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0006665415413698562
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 49.22it/s, episode_return=50, return=24.600]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 39.81it/s, episode_return=100, return=36.500]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 28.21it/s, episode_return=150, return=53.200]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 23.51it/s, episode_return=200, return=60.300]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 17.95it/s, episode_return=250, return=85.400]
Iteration 5: 100%|██████████| 50/50 [00:04<00:00, 11.42it/s, episode_return=300, return=162.600]
Iteration 6: 100%|██████████| 50/50 [00:05<00:00,  8.91it/s, episode_return=350, return=179.000]
Iteration 7: 100%|██████████| 50/50 [00:08<00:00,  6.14it/s, episode_return=400, return=282.800]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  6.02it/s, episode_return=450, return=274.700]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.76it/s, episode_return=500, return=145.900]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,50176.06157


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1al9nh8r with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005734199155920707
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 40.57it/s, episode_return=50, return=31.100]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 31.57it/s, episode_return=100, return=52.500]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 22.17it/s, episode_return=150, return=83.300]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 21.94it/s, episode_return=200, return=62.400]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 20.82it/s, episode_return=250, return=57.900]
Iteration 5: 100%|██████████| 50/50 [00:03<00:00, 13.20it/s, episode_return=300, return=137.300]
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  7.03it/s, episode_return=350, return=245.400]
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  7.36it/s, episode_return=400, return=152.600]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  5.63it/s, episode_return=450, return=266.200]
Iteration 9: 100%|██████████| 50/50 [00:12<00:00,  4.01it/s, episode_return=500, return=382.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,48736.28978


[34m[1mwandb[0m: Agent Starting Run: r6oj6htg with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.000647190002060226
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 40.89it/s, episode_return=50, return=25.000]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 32.12it/s, episode_return=100, return=33.300]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 23.65it/s, episode_return=150, return=51.500]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 26.25it/s, episode_return=200, return=43.100]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 18.83it/s, episode_return=250, return=64.900] 
Iteration 5: 100%|██████████| 50/50 [00:03<00:00, 13.19it/s, episode_return=300, return=144.800]
Iteration 6: 100%|██████████| 50/50 [00:05<00:00,  9.83it/s, episode_return=350, return=135.200]
Iteration 7: 100%|██████████| 50/50 [00:05<00:00,  8.91it/s, episode_return=400, return=191.000]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.52it/s, episode_return=450, return=270.300]
Iteration 9: 100%|██████████| 50/50 [00:05<00:00,  9.07it/s, episode_return=500, return=239.200]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,53998.08746


[34m[1mwandb[0m: Agent Starting Run: 36o6uz34 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0005442547450610207
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 49.51it/s, episode_return=50, return=21.600]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 32.71it/s, episode_return=100, return=37.100]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 27.93it/s, episode_return=150, return=30.000]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 23.11it/s, episode_return=200, return=62.500]
Iteration 4: 100%|██████████| 50/50 [00:03<00:00, 16.24it/s, episode_return=250, return=48.600]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 18.41it/s, episode_return=300, return=40.900]
Iteration 6: 100%|██████████| 50/50 [00:03<00:00, 16.10it/s, episode_return=350, return=53.400]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 20.28it/s, episode_return=400, return=63.200]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 17.78it/s, episode_return=450, return=66.800]
Iteration 9: 100%|██████████| 50/50 [00:03<00:00, 13.39it/s, episode_return=500, return=82.400]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,77220.14465


[34m[1mwandb[0m: Agent Starting Run: yxehm5x5 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00044920952879637816
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 56.52it/s, episode_return=50, return=23.100]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 42.63it/s, episode_return=100, return=31.900]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 38.95it/s, episode_return=150, return=41.900]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 25.16it/s, episode_return=200, return=73.900]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 18.24it/s, episode_return=250, return=99.900]
Iteration 5: 100%|██████████| 50/50 [00:04<00:00, 11.90it/s, episode_return=300, return=103.200]
Iteration 6: 100%|██████████| 50/50 [00:05<00:00,  9.37it/s, episode_return=350, return=139.000]
Iteration 7: 100%|██████████| 50/50 [00:05<00:00,  8.57it/s, episode_return=400, return=135.200]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.63it/s, episode_return=450, return=281.400]
Iteration 9: 100%|██████████| 50/50 [00:09<00:00,  5.11it/s, episode_return=500, return=282.100]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,55290.63462


[34m[1mwandb[0m: Agent Starting Run: jxtbhjk1 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.000790414073571001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 44.53it/s, episode_return=50, return=24.800]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 37.26it/s, episode_return=100, return=28.000]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 24.62it/s, episode_return=150, return=56.200]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 22.76it/s, episode_return=200, return=56.600]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 17.01it/s, episode_return=250, return=60.500]
Iteration 5: 100%|██████████| 50/50 [00:03<00:00, 16.06it/s, episode_return=300, return=64.600]
Iteration 6: 100%|██████████| 50/50 [00:03<00:00, 13.49it/s, episode_return=350, return=100.200]
Iteration 7: 100%|██████████| 50/50 [00:05<00:00,  9.30it/s, episode_return=400, return=128.300]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  6.22it/s, episode_return=450, return=176.000]
Iteration 9: 100%|██████████| 50/50 [00:10<00:00,  4.76it/s, episode_return=500, return=246.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,66104.58968


[34m[1mwandb[0m: Agent Starting Run: mpkt22z1 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00028224837093361783
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 43.59it/s, episode_return=50, return=19.300]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 30.78it/s, episode_return=100, return=40.800]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 29.28it/s, episode_return=150, return=39.300]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 26.86it/s, episode_return=200, return=60.900]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 24.86it/s, episode_return=250, return=51.300]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 21.30it/s, episode_return=300, return=62.300]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 19.20it/s, episode_return=350, return=73.700]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 18.91it/s, episode_return=400, return=87.100]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 13.02it/s, episode_return=450, return=102.400]
Iteration 9: 100%|██████████| 50/50 [00:05<00:00,  9.47it/s, episode_return=500, return=131.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,70607.07945


[34m[1mwandb[0m: Agent Starting Run: lk8cf0t5 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0003569810649013352
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 63.22it/s, episode_return=50, return=20.500]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 63.15it/s, episode_return=100, return=19.100]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 53.25it/s, episode_return=150, return=28.900]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 50.01it/s, episode_return=200, return=26.000]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 45.51it/s, episode_return=250, return=35.400]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 37.13it/s, episode_return=300, return=44.500]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 31.33it/s, episode_return=350, return=52.300]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 27.78it/s, episode_return=400, return=52.000]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 25.23it/s, episode_return=450, return=62.400]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 21.53it/s, episode_return=500, return=64.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,81732.684


[34m[1mwandb[0m: Agent Starting Run: sgqzkjkb with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00022767699180984693
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 47.75it/s, episode_return=50, return=27.300]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 43.50it/s, episode_return=100, return=29.800]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 34.11it/s, episode_return=150, return=40.000]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 33.43it/s, episode_return=200, return=48.200]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 29.57it/s, episode_return=250, return=39.800]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 29.51it/s, episode_return=300, return=48.500]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 24.01it/s, episode_return=350, return=60.800]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 21.04it/s, episode_return=400, return=58.600]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 21.95it/s, episode_return=450, return=53.300]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 19.45it/s, episode_return=500, return=52.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,76796.49671


[34m[1mwandb[0m: Agent Starting Run: 9rgju97o with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.000399269029028001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 52.68it/s, episode_return=50, return=34.800]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 41.40it/s, episode_return=100, return=36.600]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 36.18it/s, episode_return=150, return=31.600]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 35.30it/s, episode_return=200, return=34.800]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 31.28it/s, episode_return=250, return=54.000]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 27.99it/s, episode_return=300, return=58.400]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 23.04it/s, episode_return=350, return=49.900]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 18.99it/s, episode_return=400, return=86.800]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 15.58it/s, episode_return=450, return=109.800]
Iteration 9: 100%|██████████| 50/50 [00:03<00:00, 12.88it/s, episode_return=500, return=92.800] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,74666.3367


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hhy53q8p with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00020968880257820024
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 52.26it/s, episode_return=50, return=20.400]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 47.58it/s, episode_return=100, return=21.500]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 44.95it/s, episode_return=150, return=31.500]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 60.09it/s, episode_return=200, return=22.800]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 44.02it/s, episode_return=250, return=21.800]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 36.54it/s, episode_return=300, return=26.300]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 29.38it/s, episode_return=350, return=30.700]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 31.68it/s, episode_return=400, return=29.600]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 31.48it/s, episode_return=450, return=40.900]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 35.61it/s, episode_return=500, return=30.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,84616.19343


[34m[1mwandb[0m: Agent Starting Run: fjsipqfe with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0006424632834525206
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 66.12it/s, episode_return=50, return=17.000]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 49.37it/s, episode_return=100, return=24.700]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 29.79it/s, episode_return=150, return=44.100]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 24.27it/s, episode_return=200, return=57.900]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 17.57it/s, episode_return=250, return=101.400]
Iteration 5: 100%|██████████| 50/50 [00:05<00:00,  8.94it/s, episode_return=300, return=139.200]
Iteration 6: 100%|██████████| 50/50 [00:06<00:00,  7.35it/s, episode_return=350, return=146.200]
Iteration 7: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s, episode_return=400, return=245.500]
Iteration 8: 100%|██████████| 50/50 [00:09<00:00,  5.25it/s, episode_return=450, return=241.700]
Iteration 9: 100%|██████████| 50/50 [00:12<00:00,  3.94it/s, episode_return=500, return=363.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,50900.41505


[34m[1mwandb[0m: Agent Starting Run: s43thgri with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 7.920520385736629e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 60.46it/s, episode_return=50, return=19.300]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 50.10it/s, episode_return=100, return=26.400]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 53.87it/s, episode_return=150, return=17.600]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 42.11it/s, episode_return=200, return=21.500]
Iteration 4: 100%|██████████| 50/50 [00:00<00:00, 53.21it/s, episode_return=250, return=19.400]
Iteration 5: 100%|██████████| 50/50 [00:00<00:00, 61.42it/s, episode_return=300, return=15.500]
Iteration 6: 100%|██████████| 50/50 [00:00<00:00, 53.39it/s, episode_return=350, return=22.600]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 44.82it/s, episode_return=400, return=20.300]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 47.07it/s, episode_return=450, return=22.800]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 44.67it/s, episode_return=500, return=23.200]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,87192.74501


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: lrf62fff with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 1.092852075356035e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 71.37it/s, episode_return=50, return=18.100]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 64.93it/s, episode_return=100, return=20.300]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 65.94it/s, episode_return=150, return=19.100]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 75.16it/s, episode_return=200, return=15.700]
Iteration 4: 100%|██████████| 50/50 [00:00<00:00, 71.99it/s, episode_return=250, return=18.500]
Iteration 5: 100%|██████████| 50/50 [00:00<00:00, 66.64it/s, episode_return=300, return=17.300]
Iteration 6: 100%|██████████| 50/50 [00:00<00:00, 57.70it/s, episode_return=350, return=21.500]
Iteration 7: 100%|██████████| 50/50 [00:00<00:00, 70.59it/s, episode_return=400, return=17.600]
Iteration 8: 100%|██████████| 50/50 [00:00<00:00, 65.80it/s, episode_return=450, return=22.600]
Iteration 9: 100%|██████████| 50/50 [00:00<00:00, 56.27it/s, episode_return=500, return=17.600]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16787829833064083, max=1.…

0,1
regret,▁

0,1
regret,88118.40459


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vv2opzkm with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0005207594563288727
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 46.27it/s, episode_return=50, return=26.000]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 40.16it/s, episode_return=100, return=26.000]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 38.69it/s, episode_return=150, return=28.900]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 35.47it/s, episode_return=200, return=30.000]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 28.72it/s, episode_return=250, return=37.200]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 26.59it/s, episode_return=300, return=50.600]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 23.03it/s, episode_return=350, return=40.400]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 29.32it/s, episode_return=400, return=46.100]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 21.75it/s, episode_return=450, return=49.400]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 20.57it/s, episode_return=500, return=60.100]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,79636.99294


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: w7pumf0y with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008562245716175199
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 60.50it/s, episode_return=50, return=20.500]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 46.15it/s, episode_return=100, return=31.700]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 23.69it/s, episode_return=150, return=65.300]
Iteration 3: 100%|██████████| 50/50 [00:03<00:00, 13.93it/s, episode_return=200, return=101.800]
Iteration 4: 100%|██████████| 50/50 [00:07<00:00,  6.59it/s, episode_return=250, return=186.200]
Iteration 5: 100%|██████████| 50/50 [00:08<00:00,  5.93it/s, episode_return=300, return=282.300]
Iteration 6: 100%|██████████| 50/50 [00:10<00:00,  4.72it/s, episode_return=350, return=301.100]
Iteration 7: 100%|██████████| 50/50 [00:09<00:00,  5.03it/s, episode_return=400, return=255.900]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.43it/s, episode_return=450, return=118.000]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.99it/s, episode_return=500, return=222.400]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,39093.15271


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ff2ed42n with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0003498610688006179
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 59.25it/s, episode_return=50, return=18.800]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 43.61it/s, episode_return=100, return=22.700]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 48.64it/s, episode_return=150, return=26.500]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 40.21it/s, episode_return=200, return=22.900]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 35.92it/s, episode_return=250, return=33.400]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 32.12it/s, episode_return=300, return=34.300]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 26.70it/s, episode_return=350, return=56.600]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 29.36it/s, episode_return=400, return=37.700]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 32.51it/s, episode_return=450, return=43.400]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 26.60it/s, episode_return=500, return=45.900]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,83056.01538


[34m[1mwandb[0m: Agent Starting Run: n9jiyjhb with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0007368112815092432
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 29.34it/s, episode_return=50, return=29.000]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 36.10it/s, episode_return=100, return=39.300]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 27.75it/s, episode_return=150, return=36.000]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 30.01it/s, episode_return=200, return=39.400]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 29.70it/s, episode_return=250, return=27.800]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 25.63it/s, episode_return=300, return=51.200]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 21.88it/s, episode_return=350, return=45.700]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 20.04it/s, episode_return=400, return=51.800]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 18.53it/s, episode_return=450, return=67.600]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 18.00it/s, episode_return=500, return=60.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,76214.87903


[34m[1mwandb[0m: Agent Starting Run: c936aond with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0008880120717828362
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 51.55it/s, episode_return=50, return=23.900]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 40.32it/s, episode_return=100, return=27.300]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 27.49it/s, episode_return=150, return=43.300]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 27.24it/s, episode_return=200, return=46.600]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 22.02it/s, episode_return=250, return=54.500]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 20.50it/s, episode_return=300, return=75.300]
Iteration 6: 100%|██████████| 50/50 [00:04<00:00, 11.61it/s, episode_return=350, return=97.800] 
Iteration 7: 100%|██████████| 50/50 [00:05<00:00,  9.86it/s, episode_return=400, return=123.000]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.52it/s, episode_return=450, return=183.800]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  7.07it/s, episode_return=500, return=159.900]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,66111.83573


[34m[1mwandb[0m: Agent Starting Run: 3l1mcsow with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0004400341753377112
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 42.38it/s, episode_return=50, return=26.400]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 31.93it/s, episode_return=100, return=44.500]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 25.32it/s, episode_return=150, return=39.200]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 21.93it/s, episode_return=200, return=64.100]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 18.09it/s, episode_return=250, return=69.300]
Iteration 5: 100%|██████████| 50/50 [00:03<00:00, 14.08it/s, episode_return=300, return=102.600]
Iteration 6: 100%|██████████| 50/50 [00:04<00:00, 11.34it/s, episode_return=350, return=115.700]
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  7.59it/s, episode_return=400, return=212.800]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  6.09it/s, episode_return=450, return=218.900]
Iteration 9: 100%|██████████| 50/50 [00:09<00:00,  5.54it/s, episode_return=500, return=206.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,55358.41381


[34m[1mwandb[0m: Agent Starting Run: xus4ye32 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00010449996384871948
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 57.00it/s, episode_return=50, return=17.100]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 53.67it/s, episode_return=100, return=22.000]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 41.24it/s, episode_return=150, return=30.700]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 48.35it/s, episode_return=200, return=27.600]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 38.93it/s, episode_return=250, return=33.500]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 39.05it/s, episode_return=300, return=31.200]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 38.50it/s, episode_return=350, return=35.300]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 32.74it/s, episode_return=400, return=38.500]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 33.89it/s, episode_return=450, return=24.500]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 30.39it/s, episode_return=500, return=41.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,82842.4627


[34m[1mwandb[0m: Agent Starting Run: yxcikcae with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0004690008177784307
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 39.22it/s, episode_return=50, return=35.300]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 33.77it/s, episode_return=100, return=46.800]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 24.05it/s, episode_return=150, return=48.600]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 25.10it/s, episode_return=200, return=45.300]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 21.73it/s, episode_return=250, return=77.600]
Iteration 5: 100%|██████████| 50/50 [00:03<00:00, 14.90it/s, episode_return=300, return=77.600] 
Iteration 6: 100%|██████████| 50/50 [00:04<00:00, 11.62it/s, episode_return=350, return=109.200]
Iteration 7: 100%|██████████| 50/50 [00:05<00:00,  8.41it/s, episode_return=400, return=171.700]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.32it/s, episode_return=450, return=214.600]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  6.05it/s, episode_return=500, return=222.200]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.17058823529411765, max=1.…

0,1
regret,▁

0,1
regret,57640.47571


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: oe3gimz0 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00010262345380182156
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 53.72it/s, episode_return=50, return=24.600]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 49.45it/s, episode_return=100, return=19.700]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 46.11it/s, episode_return=150, return=25.800]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 51.10it/s, episode_return=200, return=18.700]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 43.49it/s, episode_return=250, return=25.300]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 44.47it/s, episode_return=300, return=24.000]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 45.52it/s, episode_return=350, return=23.800]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 40.67it/s, episode_return=400, return=29.700]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 42.83it/s, episode_return=450, return=25.600]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 43.21it/s, episode_return=500, return=22.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,86054.28657


[34m[1mwandb[0m: Agent Starting Run: rb1vk7ih with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0007539909497477961
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 42.47it/s, episode_return=50, return=24.600]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 39.20it/s, episode_return=100, return=29.500]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 27.96it/s, episode_return=150, return=49.100]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 27.44it/s, episode_return=200, return=33.900]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 22.46it/s, episode_return=250, return=66.700]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 17.47it/s, episode_return=300, return=86.600]
Iteration 6: 100%|██████████| 50/50 [00:03<00:00, 13.41it/s, episode_return=350, return=89.400] 
Iteration 7: 100%|██████████| 50/50 [00:04<00:00, 10.33it/s, episode_return=400, return=125.500]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.91it/s, episode_return=450, return=175.200]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  5.56it/s, episode_return=500, return=212.300]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.17054475773336983, max=1.…

0,1
regret,▁

0,1
regret,64506.65149


[34m[1mwandb[0m: Agent Starting Run: js0nhzxq with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00033067403860493634
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 50.49it/s, episode_return=50, return=29.600]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 37.02it/s, episode_return=100, return=31.100]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 40.58it/s, episode_return=150, return=35.100]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 39.64it/s, episode_return=200, return=42.500]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 36.05it/s, episode_return=250, return=38.800]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 30.03it/s, episode_return=300, return=47.900]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 28.87it/s, episode_return=350, return=46.000]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 26.51it/s, episode_return=400, return=69.600]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 23.04it/s, episode_return=450, return=53.800]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 18.00it/s, episode_return=500, return=74.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,78887.0459


[34m[1mwandb[0m: Agent Starting Run: z3qm2i9m with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0009140919351741104
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 36.46it/s, episode_return=50, return=38.400]
Iteration 1: 100%|██████████| 50/50 [00:02<00:00, 23.73it/s, episode_return=100, return=50.200]
Iteration 2: 100%|██████████| 50/50 [00:03<00:00, 15.39it/s, episode_return=150, return=114.200]
Iteration 3: 100%|██████████| 50/50 [00:07<00:00,  6.70it/s, episode_return=200, return=232.700]
Iteration 4: 100%|██████████| 50/50 [00:07<00:00,  6.77it/s, episode_return=250, return=159.800]
Iteration 5: 100%|██████████| 50/50 [00:08<00:00,  5.99it/s, episode_return=300, return=203.200]
Iteration 6: 100%|██████████| 50/50 [00:09<00:00,  5.11it/s, episode_return=350, return=310.000]
Iteration 7: 100%|██████████| 50/50 [00:14<00:00,  3.37it/s, episode_return=400, return=436.100]
Iteration 8: 100%|██████████| 50/50 [00:14<00:00,  3.55it/s, episode_return=450, return=258.400]
Iteration 9: 100%|██████████| 50/50 [00:14<00:00,  3.52it/s, episode_return=500, return=343.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,28525.93294


[34m[1mwandb[0m: Agent Starting Run: jxr87o6u with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00036876341011009674
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 52.48it/s, episode_return=50, return=23.600]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 53.93it/s, episode_return=100, return=22.900]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 48.28it/s, episode_return=150, return=29.300]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 42.15it/s, episode_return=200, return=28.200]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 30.50it/s, episode_return=250, return=47.700]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 30.32it/s, episode_return=300, return=46.900]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 27.16it/s, episode_return=350, return=49.700]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 23.83it/s, episode_return=400, return=67.900]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 21.73it/s, episode_return=450, return=61.400]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 19.27it/s, episode_return=500, return=71.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,78503.62099


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: t9y833m7 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0009922857810341034
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 57.28it/s, episode_return=50, return=23.000]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 32.77it/s, episode_return=100, return=38.800]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 25.17it/s, episode_return=150, return=48.600]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 19.10it/s, episode_return=200, return=70.500]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 19.79it/s, episode_return=250, return=47.400]
Iteration 5: 100%|██████████| 50/50 [00:04<00:00, 11.74it/s, episode_return=300, return=124.100]
Iteration 6: 100%|██████████| 50/50 [00:06<00:00,  7.66it/s, episode_return=350, return=165.300]
Iteration 7: 100%|██████████| 50/50 [00:08<00:00,  6.15it/s, episode_return=400, return=317.000]
Iteration 8: 100%|██████████| 50/50 [00:09<00:00,  5.15it/s, episode_return=450, return=236.800]
Iteration 9: 100%|██████████| 50/50 [00:09<00:00,  5.21it/s, episode_return=500, return=231.800]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16720386784850927, max=1.…

0,1
regret,▁

0,1
regret,52123.51002


[34m[1mwandb[0m: Agent Starting Run: on2i5xqr with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005539997783668911
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 51.43it/s, episode_return=50, return=37.100]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 32.20it/s, episode_return=100, return=47.200]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 21.02it/s, episode_return=150, return=74.100]
Iteration 3: 100%|██████████| 50/50 [00:03<00:00, 12.53it/s, episode_return=200, return=121.700]
Iteration 4: 100%|██████████| 50/50 [00:05<00:00,  8.41it/s, episode_return=250, return=222.900]
Iteration 5: 100%|██████████| 50/50 [00:05<00:00,  9.01it/s, episode_return=300, return=193.300]
Iteration 6: 100%|██████████| 50/50 [00:06<00:00,  7.68it/s, episode_return=350, return=158.400]
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  7.35it/s, episode_return=400, return=257.000]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  6.25it/s, episode_return=450, return=173.300]
Iteration 9: 100%|██████████| 50/50 [00:05<00:00,  9.36it/s, episode_return=500, return=185.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,34295.26396


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xtbwoj77 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0004228905565675447
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 47.02it/s, episode_return=50, return=37.700]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 31.51it/s, episode_return=100, return=58.700]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 27.86it/s, episode_return=150, return=63.200]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 23.11it/s, episode_return=200, return=67.000]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 17.79it/s, episode_return=250, return=82.100] 
Iteration 5: 100%|██████████| 50/50 [00:03<00:00, 12.94it/s, episode_return=300, return=142.800]
Iteration 6: 100%|██████████| 50/50 [00:04<00:00, 10.53it/s, episode_return=350, return=165.700]
Iteration 7: 100%|██████████| 50/50 [00:05<00:00,  8.35it/s, episode_return=400, return=169.300]
Iteration 8: 100%|██████████| 50/50 [00:05<00:00,  8.59it/s, episode_return=450, return=185.700]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  7.74it/s, episode_return=500, return=248.100]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16753997044202606, max=1.…

0,1
regret,▁

0,1
regret,50364.87272


[34m[1mwandb[0m: Agent Starting Run: ton63k0q with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 4.7371209838605094e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 54.88it/s, episode_return=50, return=25.200]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 54.82it/s, episode_return=100, return=21.900]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 47.46it/s, episode_return=150, return=33.200]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 53.14it/s, episode_return=200, return=28.200]
Iteration 4: 100%|██████████| 50/50 [00:00<00:00, 59.80it/s, episode_return=250, return=27.900]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 44.94it/s, episode_return=300, return=32.000]
Iteration 6: 100%|██████████| 50/50 [00:00<00:00, 50.27it/s, episode_return=350, return=27.600]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 48.83it/s, episode_return=400, return=24.900]
Iteration 8: 100%|██████████| 50/50 [00:00<00:00, 51.60it/s, episode_return=450, return=26.900]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 42.59it/s, episode_return=500, return=26.700]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.17077850877192982, max=1.…

0,1
regret,▁

0,1
regret,83484.76843


[34m[1mwandb[0m: Agent Starting Run: rjr15ous with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0005521262461258088
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 63.36it/s, episode_return=50, return=21.900]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 63.94it/s, episode_return=100, return=28.300]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 47.80it/s, episode_return=150, return=50.300]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 41.47it/s, episode_return=200, return=29.300]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 32.40it/s, episode_return=250, return=52.700]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 25.32it/s, episode_return=300, return=69.900]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 18.39it/s, episode_return=350, return=102.900]
Iteration 7: 100%|██████████| 50/50 [00:04<00:00, 11.34it/s, episode_return=400, return=176.700]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 13.13it/s, episode_return=450, return=147.100]
Iteration 9: 100%|██████████| 50/50 [00:04<00:00, 10.28it/s, episode_return=500, return=125.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,64338.65179


[34m[1mwandb[0m: Agent Starting Run: 0nfj9wpf with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0003758993048066655
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 65.49it/s, episode_return=50, return=21.300]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 53.73it/s, episode_return=100, return=34.800]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 58.59it/s, episode_return=150, return=28.500]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 47.91it/s, episode_return=200, return=25.700]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 39.99it/s, episode_return=250, return=21.400]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 43.70it/s, episode_return=300, return=23.800]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 43.80it/s, episode_return=350, return=38.800]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 43.69it/s, episode_return=400, return=33.600]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 31.85it/s, episode_return=450, return=37.300]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 31.50it/s, episode_return=500, return=40.200]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1678557006326558, max=1.0…

0,1
regret,▁

0,1
regret,83420.44312


[34m[1mwandb[0m: Agent Starting Run: rm3p2uxd with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0006779948678443814
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 46.36it/s, episode_return=50, return=30.600]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 29.18it/s, episode_return=100, return=55.900]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 28.78it/s, episode_return=150, return=67.000]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 19.68it/s, episode_return=200, return=113.100]
Iteration 4: 100%|██████████| 50/50 [00:04<00:00, 10.85it/s, episode_return=250, return=111.800]
Iteration 5: 100%|██████████| 50/50 [00:04<00:00, 10.09it/s, episode_return=300, return=180.700]
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.77it/s, episode_return=350, return=252.400]
Iteration 7: 100%|██████████| 50/50 [00:10<00:00,  4.71it/s, episode_return=400, return=354.400]
Iteration 8: 100%|██████████| 50/50 [00:12<00:00,  3.86it/s, episode_return=450, return=393.200]
Iteration 9: 100%|██████████| 50/50 [00:12<00:00,  3.92it/s, episode_return=500, return=405.900]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,42827.44166


[34m[1mwandb[0m: Agent Starting Run: 1kwctn1k with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0005610687965770663
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 40.57it/s, episode_return=50, return=25.500]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 40.33it/s, episode_return=100, return=23.000]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 34.07it/s, episode_return=150, return=32.700]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 27.85it/s, episode_return=200, return=35.500]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 23.30it/s, episode_return=250, return=36.600]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 18.77it/s, episode_return=300, return=60.300]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 19.57it/s, episode_return=350, return=49.500]
Iteration 7: 100%|██████████| 50/50 [00:03<00:00, 15.22it/s, episode_return=400, return=72.900]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 13.40it/s, episode_return=450, return=98.200]
Iteration 9: 100%|██████████| 50/50 [00:04<00:00, 10.19it/s, episode_return=500, return=88.800] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,74444.86624


[34m[1mwandb[0m: Agent Starting Run: cp93kozo with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00019863995510722925
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 56.97it/s, episode_return=50, return=22.300]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 54.66it/s, episode_return=100, return=20.500]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 55.46it/s, episode_return=150, return=25.400]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 55.44it/s, episode_return=200, return=25.200]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 47.94it/s, episode_return=250, return=24.600]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 46.43it/s, episode_return=300, return=23.600]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 41.44it/s, episode_return=350, return=29.300]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 31.69it/s, episode_return=400, return=25.700]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 35.75it/s, episode_return=450, return=32.100]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 34.43it/s, episode_return=500, return=30.400]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,85590.80846


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cnb5tzjc with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.000994124907048311
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 62.84it/s, episode_return=50, return=28.400]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 33.51it/s, episode_return=100, return=51.100]
Iteration 2: 100%|██████████| 50/50 [00:03<00:00, 14.98it/s, episode_return=150, return=71.400]
Iteration 3: 100%|██████████| 50/50 [00:03<00:00, 13.19it/s, episode_return=200, return=97.200] 
Iteration 4: 100%|██████████| 50/50 [00:03<00:00, 12.63it/s, episode_return=250, return=87.100] 
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.58it/s, episode_return=300, return=349.900]
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.31it/s, episode_return=350, return=176.300]
Iteration 7: 100%|██████████| 50/50 [00:09<00:00,  5.50it/s, episode_return=400, return=328.100]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.39it/s, episode_return=450, return=416.100]
Iteration 9: 100%|██████████| 50/50 [00:11<00:00,  4.33it/s, episode_return=500, return=457.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,37607.87621


[34m[1mwandb[0m: Agent Starting Run: bgvh757o with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0007903617128769745
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 60.39it/s, episode_return=50, return=29.300]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 48.05it/s, episode_return=100, return=32.900]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 39.10it/s, episode_return=150, return=42.000]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 35.08it/s, episode_return=200, return=54.800]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 26.69it/s, episode_return=250, return=69.900]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 26.38it/s, episode_return=300, return=61.900]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 20.84it/s, episode_return=350, return=92.700]
Iteration 7: 100%|██████████| 50/50 [00:04<00:00, 12.03it/s, episode_return=400, return=201.900]
Iteration 8: 100%|██████████| 50/50 [00:05<00:00,  8.84it/s, episode_return=450, return=168.500]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  5.67it/s, episode_return=500, return=298.200]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,61020.52917


[34m[1mwandb[0m: Agent Starting Run: sngm592n with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0002796433615944547
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 68.46it/s, episode_return=50, return=18.800] 
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 56.57it/s, episode_return=100, return=25.500]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 56.74it/s, episode_return=150, return=32.400]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 55.67it/s, episode_return=200, return=20.900]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 47.15it/s, episode_return=250, return=28.200]
Iteration 5: 100%|██████████| 50/50 [00:00<00:00, 51.55it/s, episode_return=300, return=32.200]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 44.58it/s, episode_return=350, return=36.200]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 42.91it/s, episode_return=400, return=32.300]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 37.71it/s, episode_return=450, return=35.600]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 39.40it/s, episode_return=500, return=34.100]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,83689.4252


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wpqkqb9w with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0008187386093650488
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 56.36it/s, episode_return=50, return=40.700]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 41.82it/s, episode_return=100, return=44.200]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 30.89it/s, episode_return=150, return=57.300]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 27.09it/s, episode_return=200, return=70.400]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 16.75it/s, episode_return=250, return=131.300]
Iteration 5: 100%|██████████| 50/50 [00:04<00:00, 11.02it/s, episode_return=300, return=104.300]
Iteration 6: 100%|██████████| 50/50 [00:05<00:00,  8.43it/s, episode_return=350, return=245.900]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  7.01it/s, episode_return=400, return=186.700]
Iteration 8: 100%|██████████| 50/50 [00:05<00:00,  9.30it/s, episode_return=450, return=270.900]
Iteration 9: 100%|██████████| 50/50 [00:10<00:00,  4.96it/s, episode_return=500, return=287.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,44387.24604


[34m[1mwandb[0m: Agent Starting Run: 52umiorp with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00048440663789336394
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 76.17it/s, episode_return=50, return=25.700]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 63.58it/s, episode_return=100, return=26.000]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 55.34it/s, episode_return=150, return=33.800]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 42.59it/s, episode_return=200, return=33.300]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 39.68it/s, episode_return=250, return=46.800]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 37.27it/s, episode_return=300, return=46.200]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 33.84it/s, episode_return=350, return=62.600]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 23.80it/s, episode_return=400, return=93.300]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 16.55it/s, episode_return=450, return=143.700]
Iteration 9: 100%|██████████| 50/50 [00:05<00:00,  9.07it/s, episode_return=500, return=214.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,74405.59605


[34m[1mwandb[0m: Agent Starting Run: pdafgtib with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00022392644032029997
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 63.29it/s, episode_return=50, return=23.900]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 58.10it/s, episode_return=100, return=26.400]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 42.60it/s, episode_return=150, return=19.000]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 44.69it/s, episode_return=200, return=25.600]
Iteration 4: 100%|██████████| 50/50 [07:23<00:00,  8.88s/it, episode_return=250, return=31.900]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 36.92it/s, episode_return=300, return=28.700]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 37.23it/s, episode_return=350, return=26.000]
Iteration 7:  22%|██▏       | 11/50 [00:00<00:01, 24.88it/s, episode_return=360, return=36.200][34m[1mwandb[0m: Network error (ConnectionError), entering retry loop.
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 20.39it/s, episode_return=400, return=34.300]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 16.42it/s, epi

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,83043.61792


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: r859xdtx with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00041372457207685905
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 46.12it/s, episode_return=50, return=24.600]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 39.89it/s, episode_return=100, return=30.800]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 30.72it/s, episode_return=150, return=34.600]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 32.13it/s, episode_return=200, return=31.900]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 45.19it/s, episode_return=250, return=39.000]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 35.67it/s, episode_return=300, return=54.700]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 34.68it/s, episode_return=350, return=54.900]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 34.15it/s, episode_return=400, return=46.500]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 29.98it/s, episode_return=450, return=55.500]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 23.91it/s, episode_return=500, return=46.400]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,79195.4267


[34m[1mwandb[0m: Agent Starting Run: hq1516a1 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 3.3521696456586216e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 67.73it/s, episode_return=50, return=20.000]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 65.74it/s, episode_return=100, return=20.000]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 57.82it/s, episode_return=150, return=18.700]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 62.91it/s, episode_return=200, return=15.700]
Iteration 4: 100%|██████████| 50/50 [00:00<00:00, 54.24it/s, episode_return=250, return=22.800]
Iteration 5: 100%|██████████| 50/50 [00:00<00:00, 63.43it/s, episode_return=300, return=18.900]
Iteration 6: 100%|██████████| 50/50 [00:00<00:00, 56.09it/s, episode_return=350, return=20.200]
Iteration 7: 100%|██████████| 50/50 [00:00<00:00, 63.57it/s, episode_return=400, return=16.400]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 47.99it/s, episode_return=450, return=27.600]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 46.84it/s, episode_return=500, return=23.900]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,87493.74691


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 07yerxh9 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008785199999144352
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 33.10it/s, episode_return=50, return=29.600]
Iteration 1: 100%|██████████| 50/50 [00:02<00:00, 18.77it/s, episode_return=100, return=67.000]
Iteration 2: 100%|██████████| 50/50 [00:04<00:00, 11.06it/s, episode_return=150, return=161.400]
Iteration 3: 100%|██████████| 50/50 [00:06<00:00,  8.32it/s, episode_return=200, return=144.800]
Iteration 4: 100%|██████████| 50/50 [00:09<00:00,  5.25it/s, episode_return=250, return=256.800]
Iteration 5: 100%|██████████| 50/50 [00:10<00:00,  4.58it/s, episode_return=300, return=180.200]
Iteration 6: 100%|██████████| 50/50 [00:08<00:00,  5.95it/s, episode_return=350, return=241.000]
Iteration 7: 100%|██████████| 50/50 [00:14<00:00,  3.57it/s, episode_return=400, return=391.600]
Iteration 8: 100%|██████████| 50/50 [00:13<00:00,  3.76it/s, episode_return=450, return=317.800]
Iteration 9: 100%|██████████| 50/50 [00:14<00:00,  3.57it/s, episode_return=500, return=446.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,30139.75663


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jxnndask with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0003394658878817113
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 48.20it/s, episode_return=50, return=23.900]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 42.03it/s, episode_return=100, return=31.300]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 35.24it/s, episode_return=150, return=39.300]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 32.08it/s, episode_return=200, return=40.700]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 29.37it/s, episode_return=250, return=49.200]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 25.51it/s, episode_return=300, return=49.000]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 26.85it/s, episode_return=350, return=38.800]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 21.72it/s, episode_return=400, return=56.900]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 18.94it/s, episode_return=450, return=69.000]
Iteration 9: 100%|██████████| 50/50 [00:03<00:00, 15.92it/s, episode_return=500, return=92.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,76487.61959


[34m[1mwandb[0m: Agent Starting Run: 05bgads2 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00011250256671919614
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 58.82it/s, episode_return=50, return=19.100]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 51.00it/s, episode_return=100, return=18.900]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 40.50it/s, episode_return=150, return=27.500]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 43.06it/s, episode_return=200, return=27.300]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 45.15it/s, episode_return=250, return=33.800]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 46.91it/s, episode_return=300, return=31.800]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 36.73it/s, episode_return=350, return=26.300]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 46.65it/s, episode_return=400, return=23.400]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 41.63it/s, episode_return=450, return=28.900]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 37.38it/s, episode_return=500, return=28.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,85083.86847


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: f2pbmg0h with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001367270032390846
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 56.82it/s, episode_return=50, return=19.600]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 60.87it/s, episode_return=100, return=20.300]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 58.23it/s, episode_return=150, return=21.800]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 46.60it/s, episode_return=200, return=28.700]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 47.87it/s, episode_return=250, return=22.500]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 34.46it/s, episode_return=300, return=25.200]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 38.46it/s, episode_return=350, return=32.100]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 41.06it/s, episode_return=400, return=23.900]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 36.46it/s, episode_return=450, return=33.400]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 35.05it/s, episode_return=500, return=30.600]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.17077850877192982, max=1.…

0,1
regret,▁

0,1
regret,85146.39338


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cmw2iivc with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0003451563163537657
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 62.11it/s, episode_return=50, return=26.800]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 49.84it/s, episode_return=100, return=31.300]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 47.35it/s, episode_return=150, return=21.500]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 45.96it/s, episode_return=200, return=23.300]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 39.96it/s, episode_return=250, return=22.400]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 43.81it/s, episode_return=300, return=27.800]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 40.56it/s, episode_return=350, return=29.600]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 32.46it/s, episode_return=400, return=40.300]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 32.64it/s, episode_return=450, return=47.700]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 23.29it/s, episode_return=500, return=49.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,84116.61916


[34m[1mwandb[0m: Agent Starting Run: ofj068ar with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008144035696588986
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 38.75it/s, episode_return=50, return=30.600]
Iteration 1: 100%|██████████| 50/50 [00:02<00:00, 23.56it/s, episode_return=100, return=75.000]
Iteration 2: 100%|██████████| 50/50 [00:04<00:00, 12.06it/s, episode_return=150, return=132.800]
Iteration 3: 100%|██████████| 50/50 [00:05<00:00,  9.48it/s, episode_return=200, return=156.600]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  6.12it/s, episode_return=250, return=218.900]
Iteration 5: 100%|██████████| 50/50 [00:10<00:00,  4.83it/s, episode_return=300, return=299.400]
Iteration 6: 100%|██████████| 50/50 [00:09<00:00,  5.21it/s, episode_return=350, return=249.100]
Iteration 7: 100%|██████████| 50/50 [00:08<00:00,  5.84it/s, episode_return=400, return=274.400]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  5.98it/s, episode_return=450, return=215.100]
Iteration 9: 100%|██████████| 50/50 [00:12<00:00,  4.05it/s, episode_return=500, return=340.100]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,29771.94286


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wt4blw0d with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00043282841993360634
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 46.07it/s, episode_return=50, return=30.700]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 46.72it/s, episode_return=100, return=26.000]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 39.49it/s, episode_return=150, return=32.800]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 31.66it/s, episode_return=200, return=40.900]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 26.17it/s, episode_return=250, return=38.100]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 24.62it/s, episode_return=300, return=52.800]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 26.22it/s, episode_return=350, return=59.700]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 22.92it/s, episode_return=400, return=78.800]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 20.21it/s, episode_return=450, return=51.600]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 18.55it/s, episode_return=500, return=90.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,77313.97602


[34m[1mwandb[0m: Agent Starting Run: soc2urjg with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0008317969798843433
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 56.18it/s, episode_return=50, return=15.200]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 49.16it/s, episode_return=100, return=23.900]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 43.45it/s, episode_return=150, return=32.900]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 33.99it/s, episode_return=200, return=44.700]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 23.96it/s, episode_return=250, return=77.400]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 22.96it/s, episode_return=300, return=60.200]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 22.05it/s, episode_return=350, return=60.100]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 16.85it/s, episode_return=400, return=93.300]
Iteration 8: 100%|██████████| 50/50 [00:04<00:00, 12.42it/s, episode_return=450, return=123.300]
Iteration 9: 100%|██████████| 50/50 [00:05<00:00,  8.53it/s, episode_return=500, return=105.200]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1676308354634737, max=1.0…

0,1
regret,▁

0,1
regret,73604.64842


[34m[1mwandb[0m: Agent Starting Run: gqtfuvrg with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.000999918888408162
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 45.99it/s, episode_return=50, return=22.100]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 31.98it/s, episode_return=100, return=53.900]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 22.95it/s, episode_return=150, return=54.300]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 18.75it/s, episode_return=200, return=94.200]
Iteration 4: 100%|██████████| 50/50 [00:05<00:00, 10.00it/s, episode_return=250, return=167.300]
Iteration 5: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s, episode_return=300, return=181.700]
Iteration 6: 100%|██████████| 50/50 [00:08<00:00,  6.05it/s, episode_return=350, return=169.000]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.87it/s, episode_return=400, return=219.200]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  5.79it/s, episode_return=450, return=255.200]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.56it/s, episode_return=500, return=223.900]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,40479.46238


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: im1x9nee with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00020260105858550584
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 48.79it/s, episode_return=50, return=23.300]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 44.70it/s, episode_return=100, return=31.000]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 43.56it/s, episode_return=150, return=23.800]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 34.36it/s, episode_return=200, return=41.500]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 31.50it/s, episode_return=250, return=32.600]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 25.24it/s, episode_return=300, return=53.500]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 22.63it/s, episode_return=350, return=64.900]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 22.11it/s, episode_return=400, return=44.300]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 14.65it/s, episode_return=450, return=81.100]
Iteration 9: 100%|██████████| 50/50 [00:03<00:00, 13.36it/s, episode_return=500, return=123.300]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1677653706444235, max=1.0…

0,1
regret,▁

0,1
regret,77646.44681


[34m[1mwandb[0m: Agent Starting Run: ygqca02v with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0003404815939241913
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 41.39it/s, episode_return=50, return=27.300]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 38.93it/s, episode_return=100, return=24.900]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 27.16it/s, episode_return=150, return=48.000]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 23.00it/s, episode_return=200, return=61.100]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 20.48it/s, episode_return=250, return=63.200]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 17.39it/s, episode_return=300, return=78.400]
Iteration 6: 100%|██████████| 50/50 [00:03<00:00, 15.71it/s, episode_return=350, return=99.300]
Iteration 7: 100%|██████████| 50/50 [00:03<00:00, 14.66it/s, episode_return=400, return=113.900]
Iteration 8: 100%|██████████| 50/50 [00:04<00:00, 10.55it/s, episode_return=450, return=154.200]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  7.20it/s, episode_return=500, return=185.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,67850.39638


[34m[1mwandb[0m: Agent Starting Run: yl9e0y3w with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0006964707760552388
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 41.68it/s, episode_return=50, return=34.900]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 25.96it/s, episode_return=100, return=70.600]
Iteration 2: 100%|██████████| 50/50 [00:03<00:00, 12.98it/s, episode_return=150, return=95.600] 
Iteration 3: 100%|██████████| 50/50 [00:05<00:00,  9.14it/s, episode_return=200, return=146.600]
Iteration 4: 100%|██████████| 50/50 [00:05<00:00,  9.16it/s, episode_return=250, return=151.900]
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  7.39it/s, episode_return=300, return=253.900]
Iteration 6: 100%|██████████| 50/50 [00:10<00:00,  4.58it/s, episode_return=350, return=328.400]
Iteration 7: 100%|██████████| 50/50 [00:11<00:00,  4.32it/s, episode_return=400, return=313.000]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  5.69it/s, episode_return=450, return=181.700]
Iteration 9: 100%|██████████| 50/50 [00:12<00:00,  4.01it/s, episode_return=500, return=335.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,34140.9027


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zn9yhs0h with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008734096138995956
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 58.32it/s, episode_return=50, return=21.800]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 26.71it/s, episode_return=100, return=57.800]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 23.03it/s, episode_return=150, return=64.600]
Iteration 3: 100%|██████████| 50/50 [00:03<00:00, 13.09it/s, episode_return=200, return=129.500]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  5.71it/s, episode_return=250, return=328.700]
Iteration 5: 100%|██████████| 50/50 [00:10<00:00,  4.78it/s, episode_return=300, return=260.900]
Iteration 6: 100%|██████████| 50/50 [00:10<00:00,  4.60it/s, episode_return=350, return=377.400]
Iteration 7: 100%|██████████| 50/50 [00:13<00:00,  3.64it/s, episode_return=400, return=256.000]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.90it/s, episode_return=450, return=149.500]
Iteration 9: 100%|██████████| 50/50 [00:12<00:00,  4.01it/s, episode_return=500, return=361.300]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1669793621013133, max=1.0…

0,1
regret,▁

0,1
regret,35644.72479


[34m[1mwandb[0m: Agent Starting Run: oohkjoee with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.000989460832389099
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 44.24it/s, episode_return=50, return=27.000]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 33.76it/s, episode_return=100, return=51.000]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 27.17it/s, episode_return=150, return=61.800]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 21.11it/s, episode_return=200, return=60.400]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 18.70it/s, episode_return=250, return=88.800]
Iteration 5: 100%|██████████| 50/50 [00:05<00:00,  8.99it/s, episode_return=300, return=197.100]
Iteration 6: 100%|██████████| 50/50 [00:08<00:00,  5.68it/s, episode_return=350, return=258.400]
Iteration 7: 100%|██████████| 50/50 [00:10<00:00,  4.61it/s, episode_return=400, return=299.300]
Iteration 8: 100%|██████████| 50/50 [00:12<00:00,  4.03it/s, episode_return=450, return=300.500]
Iteration 9: 100%|██████████| 50/50 [00:10<00:00,  4.55it/s, episode_return=500, return=324.600]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.167337627482555, max=1.0)…

0,1
regret,▁

0,1
regret,46583.6703


[34m[1mwandb[0m: Agent Starting Run: 7xeyew5r with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00021061831063992425
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 53.93it/s, episode_return=50, return=17.100]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 42.61it/s, episode_return=100, return=18.200]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 46.29it/s, episode_return=150, return=24.800]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 43.73it/s, episode_return=200, return=24.300]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 35.48it/s, episode_return=250, return=27.000]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 42.29it/s, episode_return=300, return=27.700]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 34.25it/s, episode_return=350, return=29.500]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 41.19it/s, episode_return=400, return=27.800]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 34.41it/s, episode_return=450, return=29.200]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 43.36it/s, episode_return=500, return=34.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,85039.55288


[34m[1mwandb[0m: Agent Starting Run: tu7in6ad with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0007386979151970491
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 65.00it/s, episode_return=50, return=22.100]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 47.31it/s, episode_return=100, return=29.300]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 27.13it/s, episode_return=150, return=48.500]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 21.36it/s, episode_return=200, return=60.600]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 17.43it/s, episode_return=250, return=66.800]
Iteration 5: 100%|██████████| 50/50 [00:04<00:00, 11.54it/s, episode_return=300, return=134.800]
Iteration 6: 100%|██████████| 50/50 [00:05<00:00,  8.76it/s, episode_return=350, return=172.800]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.40it/s, episode_return=400, return=323.100]
Iteration 8: 100%|██████████| 50/50 [00:11<00:00,  4.32it/s, episode_return=450, return=315.700]
Iteration 9: 100%|██████████| 50/50 [00:09<00:00,  5.04it/s, episode_return=500, return=177.300]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.17042503758370917, max=1.…

0,1
regret,▁

0,1
regret,51873.33006


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zq7630sd with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0007347853748850174
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 47.28it/s, episode_return=50, return=24.000]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 33.75it/s, episode_return=100, return=41.000]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 22.11it/s, episode_return=150, return=60.400]
Iteration 3: 100%|██████████| 50/50 [00:03<00:00, 14.46it/s, episode_return=200, return=97.400] 
Iteration 4: 100%|██████████| 50/50 [00:04<00:00, 11.18it/s, episode_return=250, return=143.900]
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  7.72it/s, episode_return=300, return=128.700]
Iteration 6: 100%|██████████| 50/50 [00:08<00:00,  6.15it/s, episode_return=350, return=220.600]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.59it/s, episode_return=400, return=252.800]
Iteration 8: 100%|██████████| 50/50 [00:05<00:00,  9.27it/s, episode_return=450, return=180.900]
Iteration 9: 100%|██████████| 50/50 [00:13<00:00,  3.69it/s, episode_return=500, return=344.900]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16731517509727625, max=1.…

0,1
regret,▁

0,1
regret,42382.35643


[34m[1mwandb[0m: Agent Starting Run: 1l7uvnf3 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0003881242724422109
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 41.81it/s, episode_return=50, return=24.500]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 30.84it/s, episode_return=100, return=21.400]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 35.86it/s, episode_return=150, return=35.400]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 32.59it/s, episode_return=200, return=29.100]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 25.41it/s, episode_return=250, return=43.700]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 29.44it/s, episode_return=300, return=53.300]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 24.62it/s, episode_return=350, return=53.200]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 17.88it/s, episode_return=400, return=69.200]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 15.55it/s, episode_return=450, return=69.600]
Iteration 9: 100%|██████████| 50/50 [00:03<00:00, 12.56it/s, episode_return=500, return=74.500] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,76984.17717


[34m[1mwandb[0m: Agent Starting Run: wa89fsg5 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00040583606346654536
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 50.54it/s, episode_return=50, return=26.600]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 40.43it/s, episode_return=100, return=26.400]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 37.94it/s, episode_return=150, return=38.500]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 32.27it/s, episode_return=200, return=45.500]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 29.80it/s, episode_return=250, return=36.100]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 28.42it/s, episode_return=300, return=43.200]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 28.49it/s, episode_return=350, return=46.200]
Iteration 7: 100%|██████████| 50/50 [00:37<00:00,  1.34it/s, episode_return=400, return=64.200]
Iteration 8: 100%|██████████| 50/50 [07:21<00:00,  8.83s/it, episode_return=450, return=51.700]
Iteration 9: 100%|██████████| 50/50 [00:05<00:00,  8.44it/s, episode_return=500, return=69.100]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,76324.1828


[34m[1mwandb[0m: Agent Starting Run: x5r9eu7o with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0007710132821228806
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 48.88it/s, episode_return=50, return=26.000]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 32.77it/s, episode_return=100, return=39.700]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 25.91it/s, episode_return=150, return=46.200]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 21.15it/s, episode_return=200, return=72.800]
Iteration 4: 100%|██████████| 50/50 [00:03<00:00, 16.08it/s, episode_return=250, return=96.500] 
Iteration 5: 100%|██████████| 50/50 [00:04<00:00, 11.73it/s, episode_return=300, return=177.800]
Iteration 6: 100%|██████████| 50/50 [00:04<00:00, 10.02it/s, episode_return=350, return=173.400]
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  7.33it/s, episode_return=400, return=197.900]
Iteration 8: 100%|██████████| 50/50 [00:05<00:00,  8.36it/s, episode_return=450, return=229.200]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  7.80it/s, episode_return=500, return=278.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,51443.81694


[34m[1mwandb[0m: Agent Starting Run: p2yoexvy with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0009732551982512104
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 82.89it/s, episode_return=50, return=21.600]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 62.01it/s, episode_return=100, return=45.500]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 42.89it/s, episode_return=150, return=51.700]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 40.47it/s, episode_return=200, return=55.500]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 33.26it/s, episode_return=250, return=56.900]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 21.49it/s, episode_return=300, return=102.100]
Iteration 6: 100%|██████████| 50/50 [00:03<00:00, 12.97it/s, episode_return=350, return=155.100]
Iteration 7: 100%|██████████| 50/50 [00:04<00:00, 10.61it/s, episode_return=400, return=184.800]
Iteration 8: 100%|██████████| 50/50 [00:04<00:00, 11.16it/s, episode_return=450, return=111.700]
Iteration 9: 100%|██████████| 50/50 [00:04<00:00, 10.26it/s, episode_return=500, return=212.700]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.17068163153572405, max=1.…

0,1
regret,▁

0,1
regret,58605.57767


[34m[1mwandb[0m: Agent Starting Run: 8hnl61a8 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005116864981548327
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 68.72it/s, episode_return=50, return=25.600]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 45.00it/s, episode_return=100, return=37.800]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 34.89it/s, episode_return=150, return=41.900]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 22.78it/s, episode_return=200, return=71.800]
Iteration 4: 100%|██████████| 50/50 [00:03<00:00, 13.25it/s, episode_return=250, return=157.400]
Iteration 5: 100%|██████████| 50/50 [00:05<00:00,  9.85it/s, episode_return=300, return=167.000]
Iteration 6: 100%|██████████| 50/50 [00:05<00:00,  9.70it/s, episode_return=350, return=99.400] 
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  7.42it/s, episode_return=400, return=302.200]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.50it/s, episode_return=450, return=199.200]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  6.15it/s, episode_return=500, return=342.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,46434.12308


[34m[1mwandb[0m: Agent Starting Run: whne96ko with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.000387731200938501
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 69.91it/s, episode_return=50, return=19.900]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 61.19it/s, episode_return=100, return=22.300]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 51.27it/s, episode_return=150, return=32.100]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 40.51it/s, episode_return=200, return=39.300]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 34.03it/s, episode_return=250, return=42.900]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 30.27it/s, episode_return=300, return=62.200]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 28.34it/s, episode_return=350, return=67.000]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 16.80it/s, episode_return=400, return=113.200]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 13.09it/s, episode_return=450, return=133.800]
Iteration 9: 100%|██████████| 50/50 [00:04<00:00, 11.41it/s, episode_return=500, return=155.900]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16772024209818426, max=1.…

0,1
regret,▁

0,1
regret,71053.48625


[34m[1mwandb[0m: Agent Starting Run: qghgzs1h with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0006421772972739218
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 54.12it/s, episode_return=50, return=32.800]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 29.63it/s, episode_return=100, return=63.900]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 25.24it/s, episode_return=150, return=67.600]
Iteration 3: 100%|██████████| 50/50 [00:03<00:00, 15.55it/s, episode_return=200, return=82.200] 
Iteration 4: 100%|██████████| 50/50 [00:04<00:00, 10.84it/s, episode_return=250, return=148.000]
Iteration 5: 100%|██████████| 50/50 [00:05<00:00,  9.62it/s, episode_return=300, return=136.000]
Iteration 6: 100%|██████████| 50/50 [00:03<00:00, 12.61it/s, episode_return=350, return=174.100]
Iteration 7: 100%|██████████| 50/50 [00:04<00:00, 10.36it/s, episode_return=400, return=119.700]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.68it/s, episode_return=450, return=306.900]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  5.76it/s, episode_return=500, return=309.600]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1674499798576608, max=1.0…

0,1
regret,▁

0,1
regret,45421.54523


[34m[1mwandb[0m: Agent Starting Run: q9yo57lc with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0009372794352295476
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 71.95it/s, episode_return=50, return=31.500]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 42.66it/s, episode_return=100, return=30.400]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 33.79it/s, episode_return=150, return=59.600]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 29.17it/s, episode_return=200, return=52.700]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 23.94it/s, episode_return=250, return=86.500]
Iteration 5: 100%|██████████| 50/50 [00:03<00:00, 15.48it/s, episode_return=300, return=133.800]
Iteration 6: 100%|██████████| 50/50 [00:05<00:00,  9.57it/s, episode_return=350, return=199.200]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.39it/s, episode_return=400, return=366.100]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  5.87it/s, episode_return=450, return=310.500]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.51it/s, episode_return=500, return=322.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,48610.63051


[34m[1mwandb[0m: Agent Starting Run: zhmj8i26 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 9.11575921636037e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 61.84it/s, episode_return=50, return=25.000]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 67.58it/s, episode_return=100, return=31.600]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 69.64it/s, episode_return=150, return=27.600]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 62.75it/s, episode_return=200, return=23.600]
Iteration 4: 100%|██████████| 50/50 [00:00<00:00, 65.29it/s, episode_return=250, return=27.200]
Iteration 5: 100%|██████████| 50/50 [00:00<00:00, 55.99it/s, episode_return=300, return=28.500]
Iteration 6: 100%|██████████| 50/50 [00:00<00:00, 61.72it/s, episode_return=350, return=27.900]
Iteration 7: 100%|██████████| 50/50 [00:00<00:00, 50.42it/s, episode_return=400, return=32.500]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 48.55it/s, episode_return=450, return=24.900]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 46.53it/s, episode_return=500, return=30.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,83802.69387


[34m[1mwandb[0m: Agent Starting Run: 6490o570 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0007993416868792987
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 67.12it/s, episode_return=50, return=21.500]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 45.76it/s, episode_return=100, return=39.800]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 16.68it/s, episode_return=150, return=64.400]
Iteration 3: 100%|██████████| 50/50 [00:06<00:00,  7.90it/s, episode_return=200, return=193.000]
Iteration 4: 100%|██████████| 50/50 [00:09<00:00,  5.14it/s, episode_return=250, return=358.800]
Iteration 5: 100%|██████████| 50/50 [00:16<00:00,  3.12it/s, episode_return=300, return=405.300]
Iteration 6: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s, episode_return=350, return=398.200]
Iteration 7: 100%|██████████| 50/50 [00:18<00:00,  2.74it/s, episode_return=400, return=406.600]
Iteration 8: 100%|██████████| 50/50 [00:14<00:00,  3.43it/s, episode_return=450, return=324.500]
Iteration 9: 100%|██████████| 50/50 [00:14<00:00,  3.52it/s, episode_return=500, return=187.400]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16977535738597685, max=1.…

0,1
regret,▁

0,1
regret,36722.32486


[34m[1mwandb[0m: Agent Starting Run: pb1502ho with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 7.960329591947738e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 76.89it/s, episode_return=50, return=19.900]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 79.56it/s, episode_return=100, return=21.300]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 64.50it/s, episode_return=150, return=20.900]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 62.07it/s, episode_return=200, return=14.200]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 41.08it/s, episode_return=250, return=28.700]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 37.97it/s, episode_return=300, return=26.100]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 46.02it/s, episode_return=350, return=25.300]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 42.10it/s, episode_return=400, return=25.900]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 46.09it/s, episode_return=450, return=28.400]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 46.91it/s, episode_return=500, return=17.800]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1678557006326558, max=1.0…

0,1
regret,▁

0,1
regret,86382.16921


[34m[1mwandb[0m: Agent Starting Run: aqvbk0zw with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00015364921784328834
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 56.39it/s, episode_return=50, return=22.800]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 47.43it/s, episode_return=100, return=28.300]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 44.76it/s, episode_return=150, return=17.500]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 60.82it/s, episode_return=200, return=19.600]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 48.57it/s, episode_return=250, return=22.600]
Iteration 5: 100%|██████████| 50/50 [00:00<00:00, 50.57it/s, episode_return=300, return=30.900]
Iteration 6: 100%|██████████| 50/50 [00:00<00:00, 58.27it/s, episode_return=350, return=21.000]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 46.60it/s, episode_return=400, return=26.400]
Iteration 8: 100%|██████████| 50/50 [00:00<00:00, 58.56it/s, episode_return=450, return=22.400]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 49.16it/s, episode_return=500, return=34.900]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1678557006326558, max=1.0…

0,1
regret,▁

0,1
regret,84524.96678


[34m[1mwandb[0m: Agent Starting Run: g49rdr2t with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0008043122851557036
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 86.11it/s, episode_return=50, return=19.200]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 77.86it/s, episode_return=100, return=26.300]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 60.00it/s, episode_return=150, return=39.300]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 49.87it/s, episode_return=200, return=38.700]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 29.82it/s, episode_return=250, return=55.100]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 22.63it/s, episode_return=300, return=98.000] 
Iteration 6: 100%|██████████| 50/50 [00:03<00:00, 14.59it/s, episode_return=350, return=155.700]
Iteration 7: 100%|██████████| 50/50 [00:05<00:00,  8.76it/s, episode_return=400, return=221.100]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  6.20it/s, episode_return=450, return=236.300]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  5.69it/s, episode_return=500, return=252.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,58804.76644


[34m[1mwandb[0m: Agent Starting Run: 7jaw8w2d with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.000825104898610036
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 69.33it/s, episode_return=50, return=36.300]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 45.74it/s, episode_return=100, return=33.100]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 31.59it/s, episode_return=150, return=49.400]
Iteration 3: 100%|██████████| 50/50 [00:03<00:00, 14.92it/s, episode_return=200, return=96.200]
Iteration 4: 100%|██████████| 50/50 [00:04<00:00, 10.31it/s, episode_return=250, return=148.100]
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.49it/s, episode_return=300, return=208.200]
Iteration 6: 100%|██████████| 50/50 [00:06<00:00,  7.79it/s, episode_return=350, return=174.400]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  7.01it/s, episode_return=400, return=140.000]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  5.96it/s, episode_return=450, return=244.800]
Iteration 9: 100%|██████████| 50/50 [00:11<00:00,  4.30it/s, episode_return=500, return=258.900]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,45140.7145


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: dy6zllot with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 8.744437425408702e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 70.29it/s, episode_return=50, return=20.900]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 67.35it/s, episode_return=100, return=22.800]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 68.37it/s, episode_return=150, return=23.700]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 52.19it/s, episode_return=200, return=22.100]
Iteration 4: 100%|██████████| 50/50 [00:00<00:00, 57.84it/s, episode_return=250, return=30.100]
Iteration 5: 100%|██████████| 50/50 [00:00<00:00, 53.00it/s, episode_return=300, return=27.500]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 36.94it/s, episode_return=350, return=35.000]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 31.28it/s, episode_return=400, return=23.400]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 32.92it/s, episode_return=450, return=31.900]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 27.00it/s, episode_return=500, return=29.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,84629.53327


[34m[1mwandb[0m: Agent Starting Run: 02ej5dws with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0009045283690436432
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 74.33it/s, episode_return=50, return=27.600]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 62.96it/s, episode_return=100, return=31.100]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 35.77it/s, episode_return=150, return=49.400]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 22.68it/s, episode_return=200, return=49.000]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 22.71it/s, episode_return=250, return=51.500]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 21.40it/s, episode_return=300, return=71.000] 
Iteration 6: 100%|██████████| 50/50 [00:03<00:00, 13.48it/s, episode_return=350, return=92.500] 
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  8.31it/s, episode_return=400, return=202.500]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.44it/s, episode_return=450, return=250.500]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  5.94it/s, episode_return=500, return=261.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,57063.76273


[34m[1mwandb[0m: Agent Starting Run: vdyt60lg with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00011396151619388896
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 71.29it/s, episode_return=50, return=27.400]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 76.23it/s, episode_return=100, return=26.100]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 68.96it/s, episode_return=150, return=20.300]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 71.93it/s, episode_return=200, return=23.300]
Iteration 4: 100%|██████████| 50/50 [00:00<00:00, 55.66it/s, episode_return=250, return=22.500]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 42.24it/s, episode_return=300, return=36.900]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 40.89it/s, episode_return=350, return=29.300]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 41.27it/s, episode_return=400, return=28.700]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 48.32it/s, episode_return=450, return=29.700]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 45.76it/s, episode_return=500, return=31.100]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,83839.84559


[34m[1mwandb[0m: Agent Starting Run: d1v5q2p6 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.000346614463183325
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 63.80it/s, episode_return=50, return=24.300]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 63.39it/s, episode_return=100, return=24.800]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 33.57it/s, episode_return=150, return=30.000]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 29.60it/s, episode_return=200, return=41.100]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 23.83it/s, episode_return=250, return=62.500]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 21.22it/s, episode_return=300, return=56.000]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 20.24it/s, episode_return=350, return=50.400]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 19.71it/s, episode_return=400, return=58.700]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 18.89it/s, episode_return=450, return=71.600]
Iteration 9: 100%|██████████| 50/50 [00:03<00:00, 15.79it/s, episode_return=500, return=57.900] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,76636.28884


[34m[1mwandb[0m: Agent Starting Run: r2uoyrnb with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0009498373940206764
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 53.85it/s, episode_return=50, return=42.300]
Iteration 1: 100%|██████████| 50/50 [00:02<00:00, 17.21it/s, episode_return=100, return=73.800]
Iteration 2: 100%|██████████| 50/50 [00:05<00:00,  9.29it/s, episode_return=150, return=130.800]
Iteration 3: 100%|██████████| 50/50 [00:10<00:00,  4.97it/s, episode_return=200, return=260.400]
Iteration 4: 100%|██████████| 50/50 [00:13<00:00,  3.83it/s, episode_return=250, return=338.600]
Iteration 5: 100%|██████████| 50/50 [00:15<00:00,  3.15it/s, episode_return=300, return=371.800]
Iteration 6: 100%|██████████| 50/50 [00:14<00:00,  3.48it/s, episode_return=350, return=391.200]
Iteration 7: 100%|██████████| 50/50 [00:17<00:00,  2.86it/s, episode_return=400, return=205.100]
Iteration 8: 100%|██████████| 50/50 [00:13<00:00,  3.76it/s, episode_return=450, return=429.700]
Iteration 9: 100%|██████████| 50/50 [00:18<00:00,  2.70it/s, episode_return=500, return=404.200]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,27160.24163


[34m[1mwandb[0m: Agent Starting Run: nanls002 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0004477112013392691
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 63.37it/s, episode_return=50, return=37.000]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 56.82it/s, episode_return=100, return=43.200]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 47.33it/s, episode_return=150, return=35.700]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 39.81it/s, episode_return=200, return=39.100]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 32.37it/s, episode_return=250, return=47.900]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 26.50it/s, episode_return=300, return=46.900]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 27.43it/s, episode_return=350, return=57.200]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 22.18it/s, episode_return=400, return=77.500]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 23.24it/s, episode_return=450, return=71.300]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 18.87it/s, episode_return=500, return=64.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,75448.11978


[34m[1mwandb[0m: Agent Starting Run: pbplcki1 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0002160681849909195
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 71.54it/s, episode_return=50, return=23.600]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 62.22it/s, episode_return=100, return=20.700]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 47.11it/s, episode_return=150, return=23.600]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 44.09it/s, episode_return=200, return=18.200]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 38.46it/s, episode_return=250, return=22.300]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 43.39it/s, episode_return=300, return=33.900]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 48.51it/s, episode_return=350, return=19.700]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 49.55it/s, episode_return=400, return=34.400]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 40.90it/s, episode_return=450, return=37.300]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 43.00it/s, episode_return=500, return=37.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,84634.80973


[34m[1mwandb[0m: Agent Starting Run: 9qch9x7y with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0007364313480407701
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 29.61it/s, episode_return=50, return=33.900]
Iteration 1: 100%|██████████| 50/50 [00:02<00:00, 19.55it/s, episode_return=100, return=43.100]
Iteration 2: 100%|██████████| 50/50 [00:03<00:00, 13.21it/s, episode_return=150, return=78.600]
Iteration 3: 100%|██████████| 50/50 [00:06<00:00,  7.55it/s, episode_return=200, return=216.800]
Iteration 4: 100%|██████████| 50/50 [00:10<00:00,  4.91it/s, episode_return=250, return=256.300]
Iteration 5: 100%|██████████| 50/50 [00:13<00:00,  3.78it/s, episode_return=300, return=304.500]
Iteration 6: 100%|██████████| 50/50 [00:10<00:00,  4.84it/s, episode_return=350, return=163.800]
Iteration 7: 100%|██████████| 50/50 [00:11<00:00,  4.27it/s, episode_return=400, return=241.500]
Iteration 8: 100%|██████████| 50/50 [00:11<00:00,  4.24it/s, episode_return=450, return=261.300]
Iteration 9: 100%|██████████| 50/50 [00:11<00:00,  4.47it/s, episode_return=500, return=325.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,33309.47028


[34m[1mwandb[0m: Agent Starting Run: c2iref0z with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0003421371634237181
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 82.85it/s, episode_return=50, return=17.000]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 59.24it/s, episode_return=100, return=23.500]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 45.77it/s, episode_return=150, return=36.700]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 32.84it/s, episode_return=200, return=30.500]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 29.16it/s, episode_return=250, return=36.700]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 26.95it/s, episode_return=300, return=43.400]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 28.75it/s, episode_return=350, return=35.800]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 20.10it/s, episode_return=400, return=52.300]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 20.87it/s, episode_return=450, return=56.600]
Iteration 9: 100%|██████████| 50/50 [00:03<00:00, 16.46it/s, episode_return=500, return=80.100]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,79804.68605


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: lezhot9p with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0006436144772444968
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 93.54it/s, episode_return=50, return=19.200]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 65.07it/s, episode_return=100, return=30.700]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 58.89it/s, episode_return=150, return=29.100]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 39.68it/s, episode_return=200, return=47.000]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 40.23it/s, episode_return=250, return=52.900]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 32.97it/s, episode_return=300, return=75.500]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 28.40it/s, episode_return=350, return=90.000]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 23.14it/s, episode_return=400, return=74.700]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 12.79it/s, episode_return=450, return=111.200]
Iteration 9: 100%|██████████| 50/50 [00:04<00:00, 12.08it/s, episode_return=500, return=148.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,73796.24526


[34m[1mwandb[0m: Agent Starting Run: 6ldwilzh with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0009581482078165218
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 66.77it/s, episode_return=50, return=19.100]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 61.00it/s, episode_return=100, return=35.000]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 55.71it/s, episode_return=150, return=47.600]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 55.97it/s, episode_return=200, return=33.900]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 44.10it/s, episode_return=250, return=50.800]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 32.34it/s, episode_return=300, return=81.100]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 20.12it/s, episode_return=350, return=86.500]
Iteration 7: 100%|██████████| 50/50 [00:03<00:00, 15.72it/s, episode_return=400, return=112.200]
Iteration 8: 100%|██████████| 50/50 [00:04<00:00, 11.22it/s, episode_return=450, return=120.600]
Iteration 9: 100%|██████████| 50/50 [00:05<00:00,  9.53it/s, episode_return=500, return=136.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,69599.74514


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: b95c1usu with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0004538603913727649
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 62.70it/s, episode_return=50, return=33.600]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 41.60it/s, episode_return=100, return=36.100]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 25.04it/s, episode_return=150, return=33.500]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 23.11it/s, episode_return=200, return=48.600]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 18.54it/s, episode_return=250, return=64.600]
Iteration 5: 100%|██████████| 50/50 [00:03<00:00, 15.94it/s, episode_return=300, return=77.900]
Iteration 6: 100%|██████████| 50/50 [00:03<00:00, 13.07it/s, episode_return=350, return=76.700] 
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  8.05it/s, episode_return=400, return=143.800]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.98it/s, episode_return=450, return=146.500]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  6.21it/s, episode_return=500, return=148.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,64963.74734


[34m[1mwandb[0m: Agent Starting Run: qax8s9ix with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00032148010236146676
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 81.62it/s, episode_return=50, return=19.700]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 81.81it/s, episode_return=100, return=25.700]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 74.82it/s, episode_return=150, return=21.200]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 72.93it/s, episode_return=200, return=17.600]
Iteration 4: 100%|██████████| 50/50 [00:00<00:00, 62.07it/s, episode_return=250, return=30.200]
Iteration 5: 100%|██████████| 50/50 [00:00<00:00, 61.35it/s, episode_return=300, return=25.200]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 44.53it/s, episode_return=350, return=39.700]
Iteration 7: 100%|██████████| 50/50 [00:00<00:00, 52.38it/s, episode_return=400, return=34.600]
Iteration 8: 100%|██████████| 50/50 [00:00<00:00, 54.90it/s, episode_return=450, return=28.700]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 44.92it/s, episode_return=500, return=27.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,84792.67296


[34m[1mwandb[0m: Agent Starting Run: 7cswgag4 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0007241965049362192
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 53.96it/s, episode_return=50, return=25.000]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 39.48it/s, episode_return=100, return=34.600]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 24.26it/s, episode_return=150, return=51.200]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 17.15it/s, episode_return=200, return=54.500]
Iteration 4: 100%|██████████| 50/50 [00:03<00:00, 16.62it/s, episode_return=250, return=85.100]
Iteration 5: 100%|██████████| 50/50 [00:03<00:00, 12.61it/s, episode_return=300, return=110.900]
Iteration 6: 100%|██████████| 50/50 [00:05<00:00,  8.39it/s, episode_return=350, return=127.100]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  7.08it/s, episode_return=400, return=123.400]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  5.85it/s, episode_return=450, return=209.000]
Iteration 9: 100%|██████████| 50/50 [00:09<00:00,  5.55it/s, episode_return=500, return=218.100]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,54993.14813


[34m[1mwandb[0m: Agent Starting Run: uusjnxrq with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00018090922486336015
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 72.93it/s, episode_return=50, return=28.600]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 76.46it/s, episode_return=100, return=33.200]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 45.68it/s, episode_return=150, return=32.500]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 53.42it/s, episode_return=200, return=26.400]
Iteration 4: 100%|██████████| 50/50 [00:00<00:00, 62.97it/s, episode_return=250, return=19.300]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 49.82it/s, episode_return=300, return=31.500]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 48.26it/s, episode_return=350, return=24.800]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 43.64it/s, episode_return=400, return=38.700]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 45.58it/s, episode_return=450, return=40.000]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 44.82it/s, episode_return=500, return=27.400]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16787829833064083, max=1.…

0,1
regret,▁

0,1
regret,84019.41143


[34m[1mwandb[0m: Agent Starting Run: k2p9n8kq with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0009306908725765296
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 69.67it/s, episode_return=50, return=24.000]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 61.68it/s, episode_return=100, return=35.600]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 43.01it/s, episode_return=150, return=36.300]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 41.57it/s, episode_return=200, return=56.100]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 30.03it/s, episode_return=250, return=48.900]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 21.73it/s, episode_return=300, return=56.500]
Iteration 6: 100%|██████████| 50/50 [00:03<00:00, 12.85it/s, episode_return=350, return=152.600]
Iteration 7: 100%|██████████| 50/50 [00:04<00:00, 10.07it/s, episode_return=400, return=181.500]
Iteration 8: 100%|██████████| 50/50 [00:05<00:00,  9.81it/s, episode_return=450, return=213.800]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  7.17it/s, episode_return=500, return=232.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,60867.52149


[34m[1mwandb[0m: Agent Starting Run: c2i7qhj1 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001854549847407491
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 80.01it/s, episode_return=50, return=24.100]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 54.07it/s, episode_return=100, return=20.200]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 41.45it/s, episode_return=150, return=32.000]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 43.53it/s, episode_return=200, return=22.500]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 35.90it/s, episode_return=250, return=26.700]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 30.86it/s, episode_return=300, return=32.600]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 37.45it/s, episode_return=350, return=28.000]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 33.10it/s, episode_return=400, return=35.400]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 32.54it/s, episode_return=450, return=30.900]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 27.17it/s, episode_return=500, return=45.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,83893.31684


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jl7a0riu with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0008873949142736087
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 69.34it/s, episode_return=50, return=27.600]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 65.59it/s, episode_return=100, return=28.300]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 43.84it/s, episode_return=150, return=56.400]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 37.62it/s, episode_return=200, return=46.000]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 30.89it/s, episode_return=250, return=52.500]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 22.92it/s, episode_return=300, return=48.900]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 23.52it/s, episode_return=350, return=65.900]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 19.61it/s, episode_return=400, return=75.900]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 14.07it/s, episode_return=450, return=98.700] 
Iteration 9: 100%|██████████| 50/50 [00:04<00:00, 11.47it/s, episode_return=500, return=141.200]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,71187.99352


[34m[1mwandb[0m: Agent Starting Run: 90ygjh04 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0008448782460285859
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 58.26it/s, episode_return=50, return=27.200]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 27.17it/s, episode_return=100, return=37.100]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 19.08it/s, episode_return=150, return=43.900]
Iteration 3: 100%|██████████| 50/50 [00:03<00:00, 14.07it/s, episode_return=200, return=80.500] 
Iteration 4: 100%|██████████| 50/50 [00:05<00:00,  8.70it/s, episode_return=250, return=120.800]
Iteration 5: 100%|██████████| 50/50 [00:11<00:00,  4.53it/s, episode_return=300, return=326.900]
Iteration 6: 100%|██████████| 50/50 [00:12<00:00,  4.02it/s, episode_return=350, return=359.300]
Iteration 7: 100%|██████████| 50/50 [00:08<00:00,  6.04it/s, episode_return=400, return=167.400]
Iteration 8: 100%|██████████| 50/50 [00:11<00:00,  4.27it/s, episode_return=450, return=394.400]
Iteration 9: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, episode_return=500, return=331.200]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,40843.62161


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hlquzs7y with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 8.75814830428384e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 77.47it/s, episode_return=50, return=21.200]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 80.28it/s, episode_return=100, return=20.100]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 72.38it/s, episode_return=150, return=24.200]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 69.64it/s, episode_return=200, return=21.300]
Iteration 4: 100%|██████████| 50/50 [00:00<00:00, 77.06it/s, episode_return=250, return=19.700]
Iteration 5: 100%|██████████| 50/50 [00:00<00:00, 72.86it/s, episode_return=300, return=23.300]
Iteration 6: 100%|██████████| 50/50 [00:00<00:00, 67.14it/s, episode_return=350, return=21.100]
Iteration 7: 100%|██████████| 50/50 [00:00<00:00, 74.32it/s, episode_return=400, return=23.300]
Iteration 8: 100%|██████████| 50/50 [00:00<00:00, 57.38it/s, episode_return=450, return=32.100]
Iteration 9: 100%|██████████| 50/50 [00:00<00:00, 60.50it/s, episode_return=500, return=23.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,85505.47322


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: i127ioku with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 1.355118314802394e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 44.34it/s, episode_return=50, return=19.800]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 47.48it/s, episode_return=100, return=19.500]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 49.86it/s, episode_return=150, return=21.000]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 48.00it/s, episode_return=200, return=19.800]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 46.09it/s, episode_return=250, return=19.000]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 45.20it/s, episode_return=300, return=22.300]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 45.83it/s, episode_return=350, return=17.200]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 45.22it/s, episode_return=400, return=20.400]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 40.54it/s, episode_return=450, return=24.300]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 45.28it/s, episode_return=500, return=18.900]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,87394.96842


[34m[1mwandb[0m: Agent Starting Run: 36erks4a with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0009481003521372822
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 77.38it/s, episode_return=50, return=27.500]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 59.37it/s, episode_return=100, return=30.500]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 38.05it/s, episode_return=150, return=40.000]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 33.11it/s, episode_return=200, return=41.300]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 24.11it/s, episode_return=250, return=68.300]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 24.34it/s, episode_return=300, return=67.400]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 20.37it/s, episode_return=350, return=80.300]
Iteration 7: 100%|██████████| 50/50 [00:04<00:00, 12.16it/s, episode_return=400, return=167.800]
Iteration 8: 100%|██████████| 50/50 [00:04<00:00, 11.62it/s, episode_return=450, return=134.600]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  7.11it/s, episode_return=500, return=200.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,66275.35296


[34m[1mwandb[0m: Agent Starting Run: ecadepm8 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00028543644634662326
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 63.90it/s, episode_return=50, return=26.900]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 33.03it/s, episode_return=100, return=33.400]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 26.95it/s, episode_return=150, return=28.700]
Iteration 3: 100%|██████████| 50/50 [00:02<00:00, 23.18it/s, episode_return=200, return=40.200]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 18.62it/s, episode_return=250, return=46.400]
Iteration 5: 100%|██████████| 50/50 [00:03<00:00, 14.50it/s, episode_return=300, return=103.400]
Iteration 6: 100%|██████████| 50/50 [00:04<00:00, 11.30it/s, episode_return=350, return=116.100]
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  7.90it/s, episode_return=400, return=160.300]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  7.07it/s, episode_return=450, return=147.800]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  6.08it/s, episode_return=500, return=201.100]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,65323.16973


[34m[1mwandb[0m: Agent Starting Run: zor6d7bn with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0004257653517820208
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 48.93it/s, episode_return=50, return=25.000]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 28.99it/s, episode_return=100, return=38.900]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 23.84it/s, episode_return=150, return=42.400]
Iteration 3: 100%|██████████| 50/50 [00:03<00:00, 15.32it/s, episode_return=200, return=86.400]
Iteration 4: 100%|██████████| 50/50 [00:04<00:00, 11.31it/s, episode_return=250, return=117.000]
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  8.14it/s, episode_return=300, return=101.500]
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.98it/s, episode_return=350, return=157.300]
Iteration 7: 100%|██████████| 50/50 [00:10<00:00,  4.85it/s, episode_return=400, return=217.100]
Iteration 8: 100%|██████████| 50/50 [00:09<00:00,  5.34it/s, episode_return=450, return=145.600]
Iteration 9: 100%|██████████| 50/50 [00:10<00:00,  4.96it/s, episode_return=500, return=240.900]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,50475.01299


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6fhullc0 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00015371504370424057
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 38.98it/s, episode_return=50, return=21.800]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 39.87it/s, episode_return=100, return=23.700]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 27.89it/s, episode_return=150, return=24.900]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 32.55it/s, episode_return=200, return=32.100]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 27.29it/s, episode_return=250, return=45.600]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 28.88it/s, episode_return=300, return=40.300]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 23.09it/s, episode_return=350, return=42.800]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 18.47it/s, episode_return=400, return=55.500]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 23.19it/s, episode_return=450, return=44.900]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 17.82it/s, episode_return=500, return=59.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,79903.57609


[34m[1mwandb[0m: Agent Starting Run: nbf53hy6 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00032989940572168317
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 79.17it/s, episode_return=50, return=20.800]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 64.72it/s, episode_return=100, return=27.700]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 34.08it/s, episode_return=150, return=28.400]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 31.92it/s, episode_return=200, return=29.600]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 23.20it/s, episode_return=250, return=49.600]
Iteration 5: 100%|██████████| 50/50 [00:02<00:00, 24.02it/s, episode_return=300, return=37.200]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 23.58it/s, episode_return=350, return=63.100]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 22.09it/s, episode_return=400, return=34.100]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 16.10it/s, episode_return=450, return=79.400]
Iteration 9: 100%|██████████| 50/50 [00:03<00:00, 14.34it/s, episode_return=500, return=100.600]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.17082191780821918, max=1.…

0,1
regret,▁

0,1
regret,77552.19242


[34m[1mwandb[0m: Agent Starting Run: dskg1q38 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0009328820453203786
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 77.99it/s, episode_return=50, return=24.300] 
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 50.75it/s, episode_return=100, return=48.600]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 48.49it/s, episode_return=150, return=38.500]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 36.89it/s, episode_return=200, return=44.700]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 28.65it/s, episode_return=250, return=39.900]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 26.92it/s, episode_return=300, return=57.100]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 20.55it/s, episode_return=350, return=88.200]
Iteration 7: 100%|██████████| 50/50 [00:03<00:00, 14.84it/s, episode_return=400, return=111.600]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.85it/s, episode_return=450, return=273.600]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.54it/s, episode_return=500, return=163.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,64924.25409


[34m[1mwandb[0m: Agent Starting Run: n6ru054u with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0003644426344101778
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 62.30it/s, episode_return=50, return=24.500]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 34.10it/s, episode_return=100, return=37.300]
Iteration 2: 100%|██████████| 50/50 [00:02<00:00, 21.83it/s, episode_return=150, return=43.700]
Iteration 3: 100%|██████████| 50/50 [00:03<00:00, 15.79it/s, episode_return=200, return=59.400]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 21.49it/s, episode_return=250, return=59.300]
Iteration 5: 100%|██████████| 50/50 [00:04<00:00, 12.31it/s, episode_return=300, return=66.600]
Iteration 6: 100%|██████████| 50/50 [00:04<00:00, 11.47it/s, episode_return=350, return=101.800]
Iteration 7: 100%|██████████| 50/50 [00:05<00:00,  8.76it/s, episode_return=400, return=123.400]
Iteration 8: 100%|██████████| 50/50 [00:09<00:00,  5.19it/s, episode_return=450, return=139.600]
Iteration 9: 100%|██████████| 50/50 [00:10<00:00,  4.90it/s, episode_return=500, return=171.800]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1705182551620402, max=1.0…

0,1
regret,▁

0,1
regret,63373.97396


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5qhfxs62 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0003614697364546937
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 82.82it/s, episode_return=50, return=21.000]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 73.63it/s, episode_return=100, return=28.100]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 69.98it/s, episode_return=150, return=26.200]
Iteration 3: 100%|██████████| 50/50 [00:00<00:00, 58.23it/s, episode_return=200, return=29.200]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 49.20it/s, episode_return=250, return=52.300]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 45.38it/s, episode_return=300, return=41.700]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 36.81it/s, episode_return=350, return=48.100]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 31.53it/s, episode_return=400, return=38.800]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 30.11it/s, episode_return=450, return=51.600]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 24.74it/s, episode_return=500, return=75.800]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1709155701754386, max=1.0…

0,1
regret,▁

0,1
regret,81179.37326


[34m[1mwandb[0m: Agent Starting Run: yql6f1qz with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 7.680522552093799e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 69.55it/s, episode_return=50, return=14.300]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 47.55it/s, episode_return=100, return=23.000]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 35.87it/s, episode_return=150, return=29.200]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 39.75it/s, episode_return=200, return=20.300]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 36.74it/s, episode_return=250, return=26.700]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 26.80it/s, episode_return=300, return=31.300]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 33.90it/s, episode_return=350, return=21.200]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 38.11it/s, episode_return=400, return=26.000]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 29.34it/s, episode_return=450, return=43.800]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 25.31it/s, episode_return=500, return=36.400]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,84421.01728


[34m[1mwandb[0m: Agent Starting Run: v2d62zq9 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0007039488090614589
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 81.67it/s, episode_return=50, return=18.700]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 47.81it/s, episode_return=100, return=38.500]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 26.91it/s, episode_return=150, return=57.200]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 28.83it/s, episode_return=200, return=42.800]
Iteration 4: 100%|██████████| 50/50 [00:03<00:00, 15.26it/s, episode_return=250, return=63.900]
Iteration 5: 100%|██████████| 50/50 [00:04<00:00, 11.90it/s, episode_return=300, return=106.300]
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.82it/s, episode_return=350, return=173.700]
Iteration 7: 100%|██████████| 50/50 [00:09<00:00,  5.50it/s, episode_return=400, return=188.300]
Iteration 8: 100%|██████████| 50/50 [00:11<00:00,  4.45it/s, episode_return=450, return=276.000]
Iteration 9: 100%|██████████| 50/50 [00:14<00:00,  3.53it/s, episode_return=500, return=308.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,54020.15092


[34m[1mwandb[0m: Agent Starting Run: ost89n41 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0004416632381863911
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 40.82it/s, episode_return=50, return=33.700]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 33.17it/s, episode_return=100, return=27.800]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 31.29it/s, episode_return=150, return=33.700]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 27.55it/s, episode_return=200, return=37.700]
Iteration 4: 100%|██████████| 50/50 [00:02<00:00, 24.09it/s, episode_return=250, return=39.400]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 26.59it/s, episode_return=300, return=27.200]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 22.64it/s, episode_return=350, return=41.800]
Iteration 7: 100%|██████████| 50/50 [00:02<00:00, 23.49it/s, episode_return=400, return=40.600]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 22.13it/s, episode_return=450, return=53.400]
Iteration 9: 100%|██████████| 50/50 [00:02<00:00, 19.42it/s, episode_return=500, return=47.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,80015.0667


[34m[1mwandb[0m: Agent Starting Run: i62gxns0 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0005070639803495613
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 38.77it/s, episode_return=50, return=30.300]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 49.06it/s, episode_return=100, return=30.600]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 33.93it/s, episode_return=150, return=29.200]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 33.16it/s, episode_return=200, return=45.900]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 28.96it/s, episode_return=250, return=42.000]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 25.97it/s, episode_return=300, return=47.500]
Iteration 6: 100%|██████████| 50/50 [00:02<00:00, 21.64it/s, episode_return=350, return=37.100]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 25.25it/s, episode_return=400, return=52.600]
Iteration 8: 100%|██████████| 50/50 [00:02<00:00, 17.57it/s, episode_return=450, return=57.100]
Iteration 9: 100%|██████████| 50/50 [00:03<00:00, 16.62it/s, episode_return=500, return=51.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,78320.5953


[34m[1mwandb[0m: Agent Starting Run: z8gse3vi with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00026598581892854433
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 47.67it/s, episode_return=50, return=24.900]
Iteration 1: 100%|██████████| 50/50 [00:01<00:00, 43.19it/s, episode_return=100, return=25.300]
Iteration 2: 100%|██████████| 50/50 [00:01<00:00, 35.47it/s, episode_return=150, return=33.800]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 42.06it/s, episode_return=200, return=33.600]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 34.34it/s, episode_return=250, return=38.100]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 29.17it/s, episode_return=300, return=38.500]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 32.18it/s, episode_return=350, return=40.400]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 27.59it/s, episode_return=400, return=46.200]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 28.24it/s, episode_return=450, return=54.500]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 26.57it/s, episode_return=500, return=50.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,79507.81694


[34m[1mwandb[0m: Agent Starting Run: elsym6s2 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0007668197938719152
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:01<00:00, 26.87it/s, episode_return=50, return=59.900]
Iteration 1: 100%|██████████| 50/50 [00:02<00:00, 22.44it/s, episode_return=100, return=55.500]
Iteration 2: 100%|██████████| 50/50 [00:03<00:00, 14.26it/s, episode_return=150, return=95.200]
Iteration 3: 100%|██████████| 50/50 [00:06<00:00,  7.63it/s, episode_return=200, return=166.200]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s, episode_return=250, return=182.700]
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.74it/s, episode_return=300, return=169.300]
Iteration 6: 100%|██████████| 50/50 [00:11<00:00,  4.53it/s, episode_return=350, return=289.400]
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  8.19it/s, episode_return=400, return=93.900] 
Iteration 8: 100%|██████████| 50/50 [00:05<00:00,  9.14it/s, episode_return=450, return=148.200]
Iteration 9: 100%|██████████| 50/50 [00:11<00:00,  4.43it/s, episode_return=500, return=360.200]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.17023890784982934, max=1.…

0,1
regret,▁

0,1
regret,37445.43302


[34m[1mwandb[0m: Agent Starting Run: ikbb4g52 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.000531331974149722
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 79.84it/s, episode_return=50, return=20.800]
Iteration 1: 100%|██████████| 50/50 [00:00<00:00, 55.02it/s, episode_return=100, return=21.600]
Iteration 2: 100%|██████████| 50/50 [00:00<00:00, 53.39it/s, episode_return=150, return=32.600]
Iteration 3: 100%|██████████| 50/50 [00:01<00:00, 46.90it/s, episode_return=200, return=44.400]
Iteration 4: 100%|██████████| 50/50 [00:01<00:00, 48.45it/s, episode_return=250, return=33.600]
Iteration 5: 100%|██████████| 50/50 [00:01<00:00, 42.89it/s, episode_return=300, return=59.300]
Iteration 6: 100%|██████████| 50/50 [00:01<00:00, 38.35it/s, episode_return=350, return=59.100]
Iteration 7: 100%|██████████| 50/50 [00:01<00:00, 36.65it/s, episode_return=400, return=60.300]
Iteration 8: 100%|██████████| 50/50 [00:01<00:00, 34.02it/s, episode_return=450, return=69.600]
Iteration 9: 100%|██████████| 50/50 [00:01<00:00, 27.49it/s, episode_return=500, return=72.100]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.17093899931459905, max=1.…

0,1
regret,▁

0,1
regret,78640.69602


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: p5qr5hqd with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008842844680140334
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:00<00:00, 50.48it/s, episode_return=50, return=35.600]
Iteration 1: 100%|██████████| 50/50 [00:03<00:00, 14.68it/s, episode_return=100, return=68.800]
Iteration 2: 100%|██████████| 50/50 [00:05<00:00,  9.85it/s, episode_return=150, return=105.600]
Iteration 3: 100%|██████████| 50/50 [00:09<00:00,  5.42it/s, episode_return=200, return=226.100]
Iteration 4: 100%|██████████| 50/50 [00:09<00:00,  5.26it/s, episode_return=250, return=203.200]
Iteration 5: 100%|██████████| 50/50 [00:09<00:00,  5.38it/s, episode_return=300, return=221.700]
Iteration 6: 100%|██████████| 50/50 [00:11<00:00,  4.52it/s, episode_return=350, return=355.600]
Iteration 7: 100%|██████████| 50/50 [00:17<00:00,  2.85it/s, episode_return=400, return=337.900]
Iteration 8: 100%|██████████| 50/50 [00:16<00:00,  3.10it/s, episode_return=450, return=212.000]
Iteration 9: 100%|██████████| 50/50 [00:16<00:00,  3.01it/s, episode_return=500, return=460.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))