In [2]:
import gym
import wandb
import torch
import numpy as np
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

# Define Policy and REINFORCE classes

class Policy(nn.Module):
    def __init__(self, state_dim, hidden_dim, action_dim):
        super(Policy, self).__init__()
        self.fc1 = torch.nn.Linear(state_dim, hidden_dim)
        self.fc2 = torch.nn.Linear(hidden_dim, action_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return F.softmax(self.fc2(x), dim=1)

class REINFORCE:
    def __init__(self, state_dim, hidden_dim, action_dim, learning_rate, gamma, device):
        self.policy_net = Policy(state_dim, hidden_dim, action_dim).to(device)
        self.optimizer = torch.optim.Adam(self.policy_net.parameters(), lr=learning_rate)
        self.gamma = gamma
        self.device = device

    def take_action(self, state):
        state = torch.tensor(state.reshape(1, -1), dtype=torch.float).to(self.device) 
        probs = self.policy_net(state)
        action_dist = torch.distributions.Categorical(probs) 
        action = action_dist.sample()
        return action.item()  

    def update(self, transition_dict):
        reward_list = transition_dict['rewards']
        state_list = transition_dict['states']
        action_list = transition_dict['actions']

        G = 0
        self.optimizer.zero_grad()
        for i in reversed(range(len(reward_list))):
            reward = reward_list[i]
            state = torch.tensor(state_list[i].reshape(1, -1), dtype=torch.float).to(self.device)
            action = torch.tensor(action_list[i]).view(-1, 1).to(self.device)
            log_prob = torch.log(self.policy_net(state).gather(1, action))
            G = self.gamma * G + reward
            loss = -log_prob * G
            loss.backward()
        self.optimizer.step()

# Hyperparameters
learning_rate = 1e-3
gamma = 0.99
hidden_dim = 128
num_pbar = 10
num_episodes = 500
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env_name = "Acrobot-v1"

# Define the sweep configuration
sweep_config = {
    "method": "bayes",
    "metric": {"name": "regret", "goal": "minimize"},
    "parameters": {
        "learning_rate": {"min": 1e-5, "max": 1e-2},
        "hidden_dim": {"values": [64, 128, 256]}
    },
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 3,
        "max_iter": 100
}
}
# Initialize the sweep
sweep_id = wandb.sweep(sweep_config)

def train(learning_rate, hidden_dim):
    env = gym.make(env_name)
    env.reset(seed=0)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n
    agent = REINFORCE(state_dim, hidden_dim, action_dim, learning_rate, gamma, device)
    return_list = []

    for i in range(num_pbar):
        with tqdm(total=int(num_episodes/num_pbar), desc='Iteration %d' % i) as pbar:
            for i_episode in range(int(num_episodes/num_pbar)):
                episode_return = 0
                transition_dict = {
                    "states": [],
                    "actions": [],
                    "next_states": [],
                    "rewards": [],
                    "dones": []
                }
                state, _ = env.reset()
                terminated, truncated = False, False
                while not terminated and not truncated:
                    action = agent.take_action(state)
                    next_state, reward, terminated, truncated, _ = env.step(action)
                    transition_dict["states"].append(state)
                    transition_dict["actions"].append(action)
                    transition_dict["next_states"].append(next_state)
                    transition_dict["rewards"].append(reward)
                    transition_dict["dones"].append(terminated)
                    state = next_state
                    episode_return += reward
                return_list.append(episode_return)
                agent.update(transition_dict)
                if (i_episode+1)%10 == 0:
                    pbar.set_postfix({
                        'episode_return': '%d' % (num_episodes/num_pbar*i+i_episode+1),
                        'return': '%.3f' % np.mean(return_list[-10:])
                    })
                pbar.update(1)

    avg = []
    for i in range(num_episodes):
        if i < 100:
            avg_return = np.mean(return_list[:i+1])
        else:
            avg_return = np.mean(return_list[i-100:i+1])
        avg.append(avg_return)

    regret=np.sum(avg)

    return -(regret)

def run_training():
    config_defaults = {
        "learning_rate": 1e-3,
        "hidden_dim": 128
    }
    config = wandb.init(config=config_defaults,project="Acrobot_reinforce_without_baseline")
    learning_rate = config.config["learning_rate"]
    hidden_dim = config.config["hidden_dim"]
    regret = train(learning_rate, hidden_dim)
    wandb.log({"regret": regret})

# Run the sweep
wandb.agent(sweep_id, function=run_training)


Create sweep with ID: yivd2k4v
Sweep URL: https://wandb.ai/rl_shobhith/uncategorized/sweeps/yivd2k4v


[34m[1mwandb[0m: Agent Starting Run: x9tzpmyb with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0034544734589122225
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  if not isinstance(terminated, (bool, np.bool8)):
Iteration 0: 100%|██████████| 50/50 [00:21<00:00,  2.28it/s, episode_return=50, return=-262.200]
Iteration 1: 100%|██████████| 50/50 [00:07<00:00,  6.72it/s, episode_return=100, return=-138.500]
Iteration 2: 100%|██████████| 50/50 [00:05<00:00,  8.47it/s, episode_return=150, return=-104.600]
Iteration 3: 100%|██████████| 50/50 [00:04<00:00, 10.11it/s, episode_return=200, return=-98.700] 
Iteration 4: 100%|██████████| 50/50 [00:06<00:00,  8.12it/s, episode_return=250, return=-104.500]
Iteration 5: 100%|██████████| 50/50 [00:05<00:00,  8.93it/s, episode_return=300, return=-92.100] 
Iteration 6: 100%|██████████| 50/50 [00:05<00:00,  8.79it/s, episode_return=350, return=-84.900] 
Iteration 7: 100%|██████████| 50/50 [00:05<00:00,  8.90it/s, episode_return=400, return=-89.200] 
Iteration 8: 100%|██████████| 50/50 [00:05<00:00,  8.44it/s, episode_return=450, return=-116.000]
Iteration 9: 100%|██████████| 50/50 [00:05<00:00,  9.28it/s, episode

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,88398.84219


[34m[1mwandb[0m: Agent Starting Run: 3i1ptoje with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006770331299173917
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:07<00:00,  6.28it/s, episode_return=50, return=-189.000]
Iteration 1: 100%|██████████| 50/50 [00:04<00:00, 11.09it/s, episode_return=100, return=-105.800]
Iteration 2: 100%|██████████| 50/50 [00:03<00:00, 12.57it/s, episode_return=150, return=-97.800] 
Iteration 3: 100%|██████████| 50/50 [00:03<00:00, 12.56it/s, episode_return=200, return=-104.900]
Iteration 4: 100%|██████████| 50/50 [00:04<00:00, 12.12it/s, episode_return=250, return=-90.700] 
Iteration 5: 100%|██████████| 50/50 [00:05<00:00,  8.83it/s, episode_return=300, return=-129.300]
Iteration 6: 100%|██████████| 50/50 [00:04<00:00, 11.10it/s, episode_return=350, return=-103.800]
Iteration 7: 100%|██████████| 50/50 [00:04<00:00, 11.74it/s, episode_return=400, return=-98.900] 
Iteration 8: 100%|██████████| 50/50 [00:04<00:00, 11.17it/s, episode_return=450, return=-80.600] 
Iteration 9: 100%|██████████| 50/50 [00:04<00:00, 11.02it/s, episode_return=500, return=-97.700]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16722542577444013, max=1.…

0,1
regret,▁

0,1
regret,61016.15872


[34m[1mwandb[0m: Agent Starting Run: qq8dt6g1 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006817300816763001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:21<00:00,  2.36it/s, episode_return=50, return=-419.200]
Iteration 1: 100%|██████████| 50/50 [00:17<00:00,  2.80it/s, episode_return=100, return=-172.000]
Iteration 2: 100%|██████████| 50/50 [00:05<00:00,  9.98it/s, episode_return=150, return=-102.600]
Iteration 3: 100%|██████████| 50/50 [00:05<00:00,  8.61it/s, episode_return=200, return=-94.200] 
Iteration 4: 100%|██████████| 50/50 [00:06<00:00,  8.30it/s, episode_return=250, return=-113.100]
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  7.77it/s, episode_return=300, return=-90.400] 
Iteration 6: 100%|██████████| 50/50 [00:06<00:00,  7.63it/s, episode_return=350, return=-97.100] 
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  8.21it/s, episode_return=400, return=-111.500]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  8.15it/s, episode_return=450, return=-90.100] 
Iteration 9: 100%|██████████| 50/50 [00:04<00:00, 10.35it/s, episode_return=500, return=-94.400]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,102919.22348


[34m[1mwandb[0m: Agent Starting Run: b31wcyd9 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005495405683931503
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:17<00:00,  2.78it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:25<00:00,  1.97it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:26<00:00,  1.86it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:26<00:00,  1.88it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:25<00:00,  1.93it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:25<00:00,  1.95it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:27<00:00,  1.84it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:26<00:00,  1.89it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:26<00:00,  1.87it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:26<00:00,  1.88it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,235592.34531


[34m[1mwandb[0m: Agent Starting Run: i35d0ktp with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.008128049410257755
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:22<00:00,  2.27it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:21<00:00,  2.30it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:21<00:00,  2.31it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:22<00:00,  2.24it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:21<00:00,  2.35it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:22<00:00,  2.26it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:22<00:00,  2.24it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:21<00:00,  2.28it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:21<00:00,  2.36it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:20<00:00,  2.47it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1680830972615675, max=1.0…

0,1
regret,▁

0,1
regret,246751.44709


[34m[1mwandb[0m: Agent Starting Run: irb4c94c with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00900683959142062
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777286247, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:14<00:00,  3.56it/s, episode_return=50, return=-217.500]
Iteration 1: 100%|██████████| 50/50 [00:10<00:00,  4.66it/s, episode_return=100, return=-171.200]
Iteration 2: 100%|██████████| 50/50 [00:08<00:00,  5.60it/s, episode_return=150, return=-154.900]
Iteration 3: 100%|██████████| 50/50 [00:09<00:00,  5.14it/s, episode_return=200, return=-201.000]
Iteration 4: 100%|██████████| 50/50 [00:11<00:00,  4.45it/s, episode_return=250, return=-174.400]
Iteration 5: 100%|██████████| 50/50 [00:09<00:00,  5.20it/s, episode_return=300, return=-185.800]
Iteration 6: 100%|██████████| 50/50 [00:09<00:00,  5.13it/s, episode_return=350, return=-159.600]
Iteration 7: 100%|██████████| 50/50 [00:10<00:00,  4.99it/s, episode_return=400, return=-182.100]
Iteration 8: 100%|██████████| 50/50 [00:09<00:00,  5.15it/s, episode_return=450, return=-166.400]
Iteration 9: 100%|██████████| 50/50 [00:09<00:00,  5.14it/s, episode_return=500, return=-167.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,101931.32348


[34m[1mwandb[0m: Agent Starting Run: vt4724ru with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005593329198501046
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:24<00:00,  2.02it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:26<00:00,  1.90it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:25<00:00,  1.99it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:25<00:00,  1.96it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:24<00:00,  2.06it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:25<00:00,  1.97it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:24<00:00,  2.05it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:24<00:00,  2.02it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:24<00:00,  2.01it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:24<00:00,  2.00it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,240845.3702


[34m[1mwandb[0m: Agent Starting Run: rj7666g7 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005279820512891518
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:13<00:00,  3.82it/s, episode_return=50, return=-221.400]
Iteration 1: 100%|██████████| 50/50 [00:06<00:00,  8.33it/s, episode_return=100, return=-177.000]
Iteration 2: 100%|██████████| 50/50 [00:05<00:00,  8.40it/s, episode_return=150, return=-141.000]
Iteration 3: 100%|██████████| 50/50 [00:04<00:00, 10.81it/s, episode_return=200, return=-106.100]
Iteration 4: 100%|██████████| 50/50 [00:04<00:00, 11.65it/s, episode_return=250, return=-105.700]
Iteration 5: 100%|██████████| 50/50 [00:03<00:00, 12.65it/s, episode_return=300, return=-97.400] 
Iteration 6: 100%|██████████| 50/50 [00:04<00:00, 11.71it/s, episode_return=350, return=-88.400] 
Iteration 7: 100%|██████████| 50/50 [00:04<00:00, 11.20it/s, episode_return=400, return=-105.000]
Iteration 8: 100%|██████████| 50/50 [00:03<00:00, 13.09it/s, episode_return=450, return=-113.100]
Iteration 9: 100%|██████████| 50/50 [00:03<00:00, 13.01it/s, episode_return=500, return=-86.100] 


VBox(children=(Label(value='0.007 MB of 0.007 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,84519.69847


[34m[1mwandb[0m: Agent Starting Run: ji9ald4o with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0026849068411551697
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:13<00:00,  3.64it/s, episode_return=50, return=-159.200]
Iteration 1: 100%|██████████| 50/50 [00:08<00:00,  5.57it/s, episode_return=100, return=-196.100]
Iteration 2: 100%|██████████| 50/50 [00:08<00:00,  5.64it/s, episode_return=150, return=-213.300]
Iteration 3: 100%|██████████| 50/50 [00:08<00:00,  6.16it/s, episode_return=200, return=-184.400]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  5.94it/s, episode_return=250, return=-220.400]
Iteration 5: 100%|██████████| 50/50 [00:13<00:00,  3.77it/s, episode_return=300, return=-310.400]
Iteration 6: 100%|██████████| 50/50 [00:20<00:00,  2.47it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:21<00:00,  2.31it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:22<00:00,  2.24it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:21<00:00,  2.30it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,157785.51669


[34m[1mwandb[0m: Agent Starting Run: cvve4286 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.006778444897275646
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:09<00:00,  5.00it/s, episode_return=50, return=-109.500]
Iteration 1: 100%|██████████| 50/50 [00:09<00:00,  5.29it/s, episode_return=100, return=-108.600]
Iteration 2: 100%|██████████| 50/50 [00:06<00:00,  8.31it/s, episode_return=150, return=-95.800] 
Iteration 3: 100%|██████████| 50/50 [00:11<00:00,  4.48it/s, episode_return=200, return=-423.700]
Iteration 4: 100%|██████████| 50/50 [00:28<00:00,  1.76it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:31<00:00,  1.57it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:31<00:00,  1.59it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:32<00:00,  1.56it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:26<00:00,  1.86it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,160930.36785


[34m[1mwandb[0m: Agent Starting Run: 521q2i00 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006795384419294438
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:16<00:00,  2.99it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:15<00:00,  3.19it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:15<00:00,  3.13it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:16<00:00,  3.10it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:16<00:00,  2.98it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:14<00:00,  3.33it/s, episode_return=300, return=-407.400]
Iteration 6: 100%|██████████| 50/50 [00:15<00:00,  3.20it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:15<00:00,  3.32it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:16<00:00,  3.09it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:17<00:00,  2.81it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,247666.0


[34m[1mwandb[0m: Agent Starting Run: tf6n0y2i with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008108650677199437
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:20<00:00,  2.42it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:17<00:00,  2.88it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:17<00:00,  2.80it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:16<00:00,  2.96it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:17<00:00,  2.91it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:17<00:00,  2.81it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:16<00:00,  3.08it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:16<00:00,  3.09it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16869588744588745, max=1.…

0,1
regret,▁

0,1
regret,249487.44526


[34m[1mwandb[0m: Agent Starting Run: 38t1e2g5 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005605650705137361
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:14<00:00,  3.57it/s, episode_return=50, return=-181.700]
Iteration 1: 100%|██████████| 50/50 [00:12<00:00,  4.05it/s, episode_return=100, return=-415.500]
Iteration 2: 100%|██████████| 50/50 [00:24<00:00,  2.07it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:29<00:00,  1.67it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:30<00:00,  1.63it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:28<00:00,  1.77it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:25<00:00,  1.96it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:26<00:00,  1.90it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:24<00:00,  2.02it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:24<00:00,  2.01it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,212881.15568


[34m[1mwandb[0m: Agent Starting Run: 40qcwzos with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.005309639738870366
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:31<00:00,  1.59it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:23<00:00,  2.15it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:24<00:00,  2.05it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:20<00:00,  2.39it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:22<00:00,  2.19it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:28<00:00,  1.76it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:23<00:00,  2.11it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:23<00:00,  2.16it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:21<00:00,  2.33it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:23<00:00,  2.14it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16509996028068316, max=1.…

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: n9a81ggi with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.009307740234868842
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:31<00:00,  1.58it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:32<00:00,  1.55it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:38<00:00,  1.31it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:31<00:00,  1.60it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:30<00:00,  1.64it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:31<00:00,  1.60it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,242305.12533


[34m[1mwandb[0m: Agent Starting Run: py91qe95 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004997434735497943
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:14<00:00,  3.50it/s, episode_return=50, return=-139.000]
Iteration 1: 100%|██████████| 50/50 [00:08<00:00,  6.12it/s, episode_return=100, return=-126.400]
Iteration 2: 100%|██████████| 50/50 [00:09<00:00,  5.24it/s, episode_return=150, return=-101.800]
Iteration 3: 100%|██████████| 50/50 [00:08<00:00,  5.82it/s, episode_return=200, return=-123.900]
Iteration 4: 100%|██████████| 50/50 [00:07<00:00,  6.90it/s, episode_return=250, return=-106.200]
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.76it/s, episode_return=300, return=-110.600]
Iteration 6: 100%|██████████| 50/50 [00:06<00:00,  7.25it/s, episode_return=350, return=-123.200]
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  7.86it/s, episode_return=400, return=-85.400] 
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  8.23it/s, episode_return=450, return=-83.100]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  7.01it/s, episode_return=500, return=-89.200] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,69076.4516


[34m[1mwandb[0m: Agent Starting Run: bmco371e with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002954331250975507
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:31<00:00,  1.56it/s, episode_return=50, return=-494.100]
Iteration 1: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:39<00:00,  1.26it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:25<00:00,  1.93it/s, episode_return=200, return=-183.400]
Iteration 4: 100%|██████████| 50/50 [00:13<00:00,  3.78it/s, episode_return=250, return=-160.400]
Iteration 5: 100%|██████████| 50/50 [00:11<00:00,  4.36it/s, episode_return=300, return=-173.500]
Iteration 6: 100%|██████████| 50/50 [00:12<00:00,  3.91it/s, episode_return=350, return=-161.600]
Iteration 7: 100%|██████████| 50/50 [00:12<00:00,  3.87it/s, episode_return=400, return=-171.500]
Iteration 8: 100%|██████████| 50/50 [00:13<00:00,  3.69it/s, episode_return=450, return=-156.900]
Iteration 9: 100%|██████████| 50/50 [00:14<00:00,  3.55it/s, episode_return=500, return=-159.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,160072.4772


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 34bnjktg with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009812246117752057
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:31<00:00,  1.59it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:37<00:00,  1.34it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:31<00:00,  1.59it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:30<00:00,  1.65it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:31<00:00,  1.60it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:29<00:00,  1.67it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:30<00:00,  1.64it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:29<00:00,  1.71it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:31<00:00,  1.60it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,249459.48304


[34m[1mwandb[0m: Agent Starting Run: e0mvwyiu with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0007469597628320514
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:35<00:00,  1.39it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:37<00:00,  1.32it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:34<00:00,  1.46it/s, episode_return=150, return=-423.400]
Iteration 3: 100%|██████████| 50/50 [00:18<00:00,  2.67it/s, episode_return=200, return=-187.500]
Iteration 4: 100%|██████████| 50/50 [00:15<00:00,  3.30it/s, episode_return=250, return=-189.300]
Iteration 5: 100%|██████████| 50/50 [00:12<00:00,  4.14it/s, episode_return=300, return=-106.700]
Iteration 6: 100%|██████████| 50/50 [00:09<00:00,  5.51it/s, episode_return=350, return=-112.100]
Iteration 7: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s, episode_return=400, return=-125.700]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  6.03it/s, episode_return=450, return=-121.600]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  6.16it/s, episode_return=500, return=-122.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,144391.31683


[34m[1mwandb[0m: Agent Starting Run: lca4ropj with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.006588492510516708
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:11<00:00,  4.52it/s, episode_return=50, return=-164.400]
Iteration 1: 100%|██████████| 50/50 [00:09<00:00,  5.03it/s, episode_return=100, return=-113.800]
Iteration 2: 100%|██████████| 50/50 [00:06<00:00,  7.52it/s, episode_return=150, return=-85.900]
Iteration 3: 100%|██████████| 50/50 [00:07<00:00,  7.04it/s, episode_return=200, return=-84.900]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  5.96it/s, episode_return=250, return=-140.500]
Iteration 5: 100%|██████████| 50/50 [00:31<00:00,  1.58it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:35<00:00,  1.39it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:35<00:00,  1.43it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,134860.45175


[34m[1mwandb[0m: Agent Starting Run: r0elrjcj with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002146067857693506
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:15<00:00,  3.28it/s, episode_return=50, return=-136.600]
Iteration 1: 100%|██████████| 50/50 [00:09<00:00,  5.19it/s, episode_return=100, return=-147.700]
Iteration 2: 100%|██████████| 50/50 [00:09<00:00,  5.04it/s, episode_return=150, return=-141.600]
Iteration 3: 100%|██████████| 50/50 [00:09<00:00,  5.13it/s, episode_return=200, return=-134.500]
Iteration 4: 100%|██████████| 50/50 [00:11<00:00,  4.51it/s, episode_return=250, return=-140.200]
Iteration 5: 100%|██████████| 50/50 [00:10<00:00,  4.76it/s, episode_return=300, return=-149.600]
Iteration 6: 100%|██████████| 50/50 [00:09<00:00,  5.33it/s, episode_return=350, return=-123.100]
Iteration 7: 100%|██████████| 50/50 [00:08<00:00,  5.92it/s, episode_return=400, return=-129.400]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.61it/s, episode_return=450, return=-107.300]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  5.80it/s, episode_return=500, return=-118.300]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1664886515353805, max=1.0…

0,1
regret,▁

0,1
regret,81940.60268


[34m[1mwandb[0m: Agent Starting Run: engyj8ce with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0011635270672414912
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:30<00:00,  1.62it/s, episode_return=50, return=-454.500]
Iteration 1: 100%|██████████| 50/50 [00:29<00:00,  1.70it/s, episode_return=100, return=-428.100]
Iteration 2: 100%|██████████| 50/50 [00:18<00:00,  2.76it/s, episode_return=150, return=-214.200]
Iteration 3: 100%|██████████| 50/50 [00:14<00:00,  3.52it/s, episode_return=200, return=-254.000]
Iteration 4: 100%|██████████| 50/50 [00:11<00:00,  4.43it/s, episode_return=250, return=-164.800]
Iteration 5: 100%|██████████| 50/50 [00:10<00:00,  4.87it/s, episode_return=300, return=-139.900]
Iteration 6: 100%|██████████| 50/50 [00:09<00:00,  5.24it/s, episode_return=350, return=-129.200]
Iteration 7: 100%|██████████| 50/50 [00:08<00:00,  5.61it/s, episode_return=400, return=-115.800]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  5.81it/s, episode_return=450, return=-140.300]
Iteration 9: 100%|██████████| 50/50 [00:09<00:00,  5.27it/s, episode_return=500, return=-163.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,124594.80881


[34m[1mwandb[0m: Agent Starting Run: i4hdtwng with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.008039208063719692
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:32<00:00,  1.56it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:32<00:00,  1.55it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:32<00:00,  1.55it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:35<00:00,  1.39it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:34<00:00,  1.46it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:32<00:00,  1.53it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:32<00:00,  1.53it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,249813.34935


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ghh5k0jh with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0019979553144285236
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:33<00:00,  1.49it/s, episode_return=50, return=-405.100]
Iteration 1: 100%|██████████| 50/50 [00:34<00:00,  1.47it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:36<00:00,  1.35it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:36<00:00,  1.35it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:36<00:00,  1.35it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16353852211576322, max=1.…

0,1
regret,▁

0,1
regret,246178.33368


[34m[1mwandb[0m: Agent Starting Run: l1df0ytm with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005636964692421278
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:20<00:00,  2.43it/s, episode_return=50, return=-196.900]
Iteration 1: 100%|██████████| 50/50 [00:13<00:00,  3.68it/s, episode_return=100, return=-181.000]
Iteration 2: 100%|██████████| 50/50 [00:14<00:00,  3.43it/s, episode_return=150, return=-178.900]
Iteration 3: 100%|██████████| 50/50 [00:12<00:00,  3.86it/s, episode_return=200, return=-165.800]
Iteration 4: 100%|██████████| 50/50 [00:11<00:00,  4.49it/s, episode_return=250, return=-123.300]
Iteration 5: 100%|██████████| 50/50 [00:09<00:00,  5.41it/s, episode_return=300, return=-105.300]
Iteration 6: 100%|██████████| 50/50 [00:06<00:00,  7.56it/s, episode_return=350, return=-92.100]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.95it/s, episode_return=400, return=-83.400] 
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.31it/s, episode_return=450, return=-89.200] 
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  7.35it/s, episode_return=500, return=-80.900]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,84335.99915


[34m[1mwandb[0m: Agent Starting Run: rn5ccw1e with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0010339203945478608
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:24<00:00,  2.02it/s, episode_return=50, return=-190.300]
Iteration 1: 100%|██████████| 50/50 [00:15<00:00,  3.16it/s, episode_return=100, return=-194.700]
Iteration 2: 100%|██████████| 50/50 [00:11<00:00,  4.45it/s, episode_return=150, return=-136.500]
Iteration 3: 100%|██████████| 50/50 [00:11<00:00,  4.50it/s, episode_return=200, return=-134.500]
Iteration 4: 100%|██████████| 50/50 [00:10<00:00,  4.76it/s, episode_return=250, return=-127.300]
Iteration 5: 100%|██████████| 50/50 [00:08<00:00,  6.05it/s, episode_return=300, return=-114.900]
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.95it/s, episode_return=350, return=-93.200] 
Iteration 7: 100%|██████████| 50/50 [00:08<00:00,  6.20it/s, episode_return=400, return=-97.000] 
Iteration 8: 100%|██████████| 50/50 [00:09<00:00,  5.25it/s, episode_return=450, return=-95.400] 
Iteration 9: 100%|██████████| 50/50 [00:09<00:00,  5.47it/s, episode_return=500, return=-96.500] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,90388.08451


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wrl6j9qc with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00785925265509163
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:38<00:00,  1.28it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:39<00:00,  1.28it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:37<00:00,  1.34it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:39<00:00,  1.28it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:38<00:00,  1.29it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bkj9pb5a with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004746319411626931
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:39<00:00,  1.28it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:34<00:00,  1.46it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:37<00:00,  1.35it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:41<00:00,  1.22it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:37<00:00,  1.34it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:39<00:00,  1.28it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:41<00:00,  1.20it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,249431.30901


[34m[1mwandb[0m: Agent Starting Run: 5k29hepy with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.007459034272849588
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:28<00:00,  1.74it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:30<00:00,  1.64it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:30<00:00,  1.64it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:14<00:00,  3.47it/s, episode_return=200, return=-161.500]
Iteration 4: 100%|██████████| 50/50 [00:10<00:00,  4.74it/s, episode_return=250, return=-167.900]
Iteration 5: 100%|██████████| 50/50 [00:09<00:00,  5.47it/s, episode_return=300, return=-146.300]
Iteration 6: 100%|██████████| 50/50 [00:08<00:00,  5.90it/s, episode_return=350, return=-136.500]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.27it/s, episode_return=400, return=-122.300]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.48it/s, episode_return=450, return=-83.000]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  7.16it/s, episode_return=500, return=-105.400]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,139784.95669


[34m[1mwandb[0m: Agent Starting Run: en2e2365 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.008557207810542073
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:19<00:00,  2.63it/s, episode_return=50, return=-219.700]
Iteration 1: 100%|██████████| 50/50 [00:13<00:00,  3.82it/s, episode_return=100, return=-155.300]
Iteration 2: 100%|██████████| 50/50 [00:13<00:00,  3.71it/s, episode_return=150, return=-194.100]
Iteration 3: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s, episode_return=200, return=-263.000]
Iteration 4: 100%|██████████| 50/50 [00:16<00:00,  3.04it/s, episode_return=250, return=-215.900]
Iteration 5: 100%|██████████| 50/50 [00:14<00:00,  3.53it/s, episode_return=300, return=-194.200]
Iteration 6: 100%|██████████| 50/50 [00:24<00:00,  2.03it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:33<00:00,  1.48it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,146517.63213


[34m[1mwandb[0m: Agent Starting Run: 0g3iwav5 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.007095391417298069
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:30<00:00,  1.62it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:34<00:00,  1.43it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:37<00:00,  1.34it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,248939.75518


[34m[1mwandb[0m: Agent Starting Run: 3k6d4ez7 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001160996133202727
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:39<00:00,  1.28it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:39<00:00,  1.28it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:38<00:00,  1.29it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:38<00:00,  1.31it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:38<00:00,  1.29it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:37<00:00,  1.32it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:39<00:00,  1.27it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:38<00:00,  1.29it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:39<00:00,  1.27it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16339098532494759, max=1.…

0,1
regret,▁

0,1
regret,248441.08833


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: va4ju0wx with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0018845522842206365
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:32<00:00,  1.55it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:30<00:00,  1.66it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:30<00:00,  1.64it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:30<00:00,  1.66it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:34<00:00,  1.43it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:34<00:00,  1.46it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:31<00:00,  1.60it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:29<00:00,  1.68it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:34<00:00,  1.47it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: 5xiooh66 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005564348523833343
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:14<00:00,  3.56it/s, episode_return=50, return=-114.200]
Iteration 1: 100%|██████████| 50/50 [00:07<00:00,  6.43it/s, episode_return=100, return=-93.600]
Iteration 2: 100%|██████████| 50/50 [00:07<00:00,  6.63it/s, episode_return=150, return=-99.700] 
Iteration 3: 100%|██████████| 50/50 [00:07<00:00,  6.71it/s, episode_return=200, return=-99.400] 
Iteration 4: 100%|██████████| 50/50 [00:06<00:00,  7.70it/s, episode_return=250, return=-105.400]
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.63it/s, episode_return=300, return=-99.500] 
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.57it/s, episode_return=350, return=-105.400]
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  7.56it/s, episode_return=400, return=-89.000]
Iteration 8: 100%|██████████| 50/50 [00:05<00:00,  8.50it/s, episode_return=450, return=-84.000] 
Iteration 9: 100%|██████████| 50/50 [00:04<00:00, 10.10it/s, episode_return=500, return=-90.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,65104.86107


[34m[1mwandb[0m: Agent Starting Run: z6te66fz with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.007221126963879197
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:34<00:00,  1.45it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:34<00:00,  1.46it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:27<00:00,  1.80it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:28<00:00,  1.73it/s, episode_return=450, return=-362.200]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  7.77it/s, episode_return=500, return=-100.200]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,243757.08202


[34m[1mwandb[0m: Agent Starting Run: 1ugdzqqc with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002234795891937935
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:15<00:00,  3.24it/s, episode_return=50, return=-158.700]
Iteration 1: 100%|██████████| 50/50 [00:09<00:00,  5.21it/s, episode_return=100, return=-117.800]
Iteration 2: 100%|██████████| 50/50 [00:09<00:00,  5.26it/s, episode_return=150, return=-131.900]
Iteration 3: 100%|██████████| 50/50 [00:09<00:00,  5.18it/s, episode_return=200, return=-109.300]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  6.24it/s, episode_return=250, return=-108.100]
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.65it/s, episode_return=300, return=-89.900] 
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.83it/s, episode_return=350, return=-94.900] 
Iteration 7: 100%|██████████| 50/50 [00:08<00:00,  5.98it/s, episode_return=400, return=-106.400]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.95it/s, episode_return=450, return=-89.200] 
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  6.19it/s, episode_return=500, return=-100.100]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,63227.82171


[34m[1mwandb[0m: Agent Starting Run: 4r6arfml with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004942302837605629
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:12<00:00,  4.10it/s, episode_return=50, return=-114.000]
Iteration 1: 100%|██████████| 50/50 [00:07<00:00,  6.41it/s, episode_return=100, return=-92.700]
Iteration 2: 100%|██████████| 50/50 [00:07<00:00,  6.58it/s, episode_return=150, return=-116.800]
Iteration 3: 100%|██████████| 50/50 [00:11<00:00,  4.42it/s, episode_return=200, return=-161.600]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  5.69it/s, episode_return=250, return=-103.100]
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.33it/s, episode_return=300, return=-104.000]
Iteration 6: 100%|██████████| 50/50 [00:08<00:00,  6.20it/s, episode_return=350, return=-109.600]
Iteration 7: 100%|██████████| 50/50 [00:11<00:00,  4.34it/s, episode_return=400, return=-154.200]
Iteration 8: 100%|██████████| 50/50 [00:15<00:00,  3.21it/s, episode_return=450, return=-173.300]
Iteration 9: 100%|██████████| 50/50 [00:14<00:00,  3.46it/s, episode_return=500, return=-200.100]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,71701.35817


[34m[1mwandb[0m: Agent Starting Run: byomy441 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005107696770897987
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:16<00:00,  3.02it/s, episode_return=50, return=-188.200]
Iteration 1: 100%|██████████| 50/50 [00:14<00:00,  3.49it/s, episode_return=100, return=-173.000]
Iteration 2: 100%|██████████| 50/50 [00:21<00:00,  2.37it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:37<00:00,  1.35it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,197963.32554


[34m[1mwandb[0m: Agent Starting Run: b8n5k72m with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.009590788859355872
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:09<00:00,  5.09it/s, episode_return=50, return=-93.000] 
Iteration 1: 100%|██████████| 50/50 [00:07<00:00,  6.40it/s, episode_return=100, return=-97.900]
Iteration 2: 100%|██████████| 50/50 [00:05<00:00,  8.62it/s, episode_return=150, return=-74.300] 
Iteration 3: 100%|██████████| 50/50 [00:06<00:00,  7.37it/s, episode_return=200, return=-84.300] 
Iteration 4: 100%|██████████| 50/50 [00:06<00:00,  7.70it/s, episode_return=250, return=-86.400] 
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  8.16it/s, episode_return=300, return=-96.400]
Iteration 6: 100%|██████████| 50/50 [00:06<00:00,  7.19it/s, episode_return=350, return=-99.300] 
Iteration 7: 100%|██████████| 50/50 [00:05<00:00,  8.53it/s, episode_return=400, return=-86.800]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  8.27it/s, episode_return=450, return=-95.100]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  7.79it/s, episode_return=500, return=-85.500] 


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1669793621013133, max=1.0…

0,1
regret,▁

0,1
regret,52537.1875


[34m[1mwandb[0m: Agent Starting Run: x11xkmnm with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0035482895128003795
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:34<00:00,  1.43it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:37<00:00,  1.34it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: 7g0iyzv2 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0035070758767692225
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=50, return=-478.500]
Iteration 1: 100%|██████████| 50/50 [00:20<00:00,  2.47it/s, episode_return=100, return=-267.400]
Iteration 2: 100%|██████████| 50/50 [00:17<00:00,  2.92it/s, episode_return=150, return=-193.500]
Iteration 3: 100%|██████████| 50/50 [00:14<00:00,  3.42it/s, episode_return=200, return=-180.400]
Iteration 4: 100%|██████████| 50/50 [00:14<00:00,  3.44it/s, episode_return=250, return=-195.400]
Iteration 5: 100%|██████████| 50/50 [00:12<00:00,  3.87it/s, episode_return=300, return=-156.300]
Iteration 6: 100%|██████████| 50/50 [00:12<00:00,  4.07it/s, episode_return=350, return=-150.300]
Iteration 7: 100%|██████████| 50/50 [00:12<00:00,  3.87it/s, episode_return=400, return=-193.400]
Iteration 8: 100%|██████████| 50/50 [00:11<00:00,  4.42it/s, episode_return=450, return=-129.000]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.26it/s, episode_return=500, return=-97.800] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,125478.65883


[34m[1mwandb[0m: Agent Starting Run: ajori2c5 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0034853049669278952
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:16<00:00,  3.04it/s, episode_return=50, return=-114.100]
Iteration 1: 100%|██████████| 50/50 [00:06<00:00,  7.21it/s, episode_return=100, return=-95.200]
Iteration 2: 100%|██████████| 50/50 [00:07<00:00,  7.00it/s, episode_return=150, return=-100.400]
Iteration 3: 100%|██████████| 50/50 [00:07<00:00,  6.90it/s, episode_return=200, return=-105.100]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s, episode_return=250, return=-107.900]
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.96it/s, episode_return=300, return=-95.300] 
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.96it/s, episode_return=350, return=-87.600] 
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  7.16it/s, episode_return=400, return=-108.300]
Iteration 8: 100%|██████████| 50/50 [00:09<00:00,  5.49it/s, episode_return=450, return=-91.400] 
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  5.98it/s, episode_return=500, return=-133.100]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,69249.68917


[34m[1mwandb[0m: Agent Starting Run: 7xaymvef with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009868992082953653
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:30<00:00,  1.62it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:31<00:00,  1.61it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:23<00:00,  2.13it/s, episode_return=150, return=-189.500]
Iteration 3: 100%|██████████| 50/50 [00:11<00:00,  4.52it/s, episode_return=200, return=-176.700]
Iteration 4: 100%|██████████| 50/50 [00:11<00:00,  4.45it/s, episode_return=250, return=-200.200]
Iteration 5: 100%|██████████| 50/50 [00:13<00:00,  3.77it/s, episode_return=300, return=-245.700]
Iteration 6: 100%|██████████| 50/50 [00:24<00:00,  2.04it/s, episode_return=350, return=-452.100]
Iteration 7: 100%|██████████| 50/50 [00:27<00:00,  1.82it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:29<00:00,  1.69it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:29<00:00,  1.68it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,191553.35379


[34m[1mwandb[0m: Agent Starting Run: jxkm6gt6 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008721276114999232
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:15<00:00,  3.14it/s, episode_return=50, return=-175.400]
Iteration 1: 100%|██████████| 50/50 [00:15<00:00,  3.13it/s, episode_return=100, return=-242.000]
Iteration 2: 100%|██████████| 50/50 [00:32<00:00,  1.53it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:38<00:00,  1.31it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:40<00:00,  1.25it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:43<00:00,  1.15it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:37<00:00,  1.32it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:35<00:00,  1.39it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:40<00:00,  1.23it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:39<00:00,  1.25it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,203687.83392


[34m[1mwandb[0m: Agent Starting Run: a80b7jbg with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004355007747860562
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:37<00:00,  1.32it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:39<00:00,  1.27it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:38<00:00,  1.28it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:38<00:00,  1.28it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:35<00:00,  1.39it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:38<00:00,  1.29it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:41<00:00,  1.19it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:37<00:00,  1.32it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:38<00:00,  1.31it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,246884.75468


[34m[1mwandb[0m: Agent Starting Run: 0tm5hmud with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0039397295305185435
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:10<00:00,  4.91it/s, episode_return=50, return=-143.300]
Iteration 1: 100%|██████████| 50/50 [00:08<00:00,  6.18it/s, episode_return=100, return=-130.200]
Iteration 2: 100%|██████████| 50/50 [00:07<00:00,  6.34it/s, episode_return=150, return=-114.800]
Iteration 3: 100%|██████████| 50/50 [00:08<00:00,  5.96it/s, episode_return=200, return=-140.600]
Iteration 4: 100%|██████████| 50/50 [00:07<00:00,  6.85it/s, episode_return=250, return=-92.800] 
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  7.49it/s, episode_return=300, return=-101.700]
Iteration 6: 100%|██████████| 50/50 [00:06<00:00,  8.26it/s, episode_return=350, return=-114.700]
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  7.78it/s, episode_return=400, return=-107.200]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.36it/s, episode_return=450, return=-108.300]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  5.89it/s, episode_return=500, return=-119.200]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16684506288466686, max=1.…

0,1
regret,▁

0,1
regret,63958.25603


[34m[1mwandb[0m: Agent Starting Run: t46zrlp6 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00458412908033848
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:34<00:00,  1.43it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:31<00:00,  1.60it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:30<00:00,  1.62it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:30<00:00,  1.66it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:29<00:00,  1.67it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:30<00:00,  1.64it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:30<00:00,  1.63it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:28<00:00,  1.75it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:31<00:00,  1.61it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,249651.78234


[34m[1mwandb[0m: Agent Starting Run: vj0hozp6 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008456904690631909
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:38<00:00,  1.29it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:39<00:00,  1.27it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:37<00:00,  1.32it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:39<00:00,  1.27it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:36<00:00,  1.39it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:34<00:00,  1.46it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:28<00:00,  1.75it/s, episode_return=500, return=-250.900]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,248665.26733


[34m[1mwandb[0m: Agent Starting Run: et26m10g with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008386448878760175
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:38<00:00,  1.29it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:37<00:00,  1.33it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:32<00:00,  1.55it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:39<00:00,  1.25it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:35<00:00,  1.39it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: y4w7hl7r with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005090024139067689
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:22<00:00,  2.19it/s, episode_return=50, return=-257.200]
Iteration 1: 100%|██████████| 50/50 [00:18<00:00,  2.77it/s, episode_return=100, return=-328.400]
Iteration 2: 100%|██████████| 50/50 [00:25<00:00,  1.99it/s, episode_return=150, return=-400.400]
Iteration 3: 100%|██████████| 50/50 [00:30<00:00,  1.63it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:30<00:00,  1.65it/s, episode_return=250, return=-312.400]
Iteration 5: 100%|██████████| 50/50 [00:12<00:00,  3.88it/s, episode_return=300, return=-198.700]
Iteration 6: 100%|██████████| 50/50 [00:11<00:00,  4.40it/s, episode_return=350, return=-175.000]
Iteration 7: 100%|██████████| 50/50 [00:12<00:00,  4.01it/s, episode_return=400, return=-171.500]
Iteration 8: 100%|██████████| 50/50 [00:13<00:00,  3.64it/s, episode_return=450, return=-203.700]
Iteration 9: 100%|██████████| 50/50 [00:13<00:00,  3.66it/s, episode_return=500, return=-196.600]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1655382981547856, max=1.0…

0,1
regret,▁

0,1
regret,158478.1254


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5ru4l881 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00538123052898101
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:35<00:00,  1.43it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:37<00:00,  1.34it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:39<00:00,  1.28it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:35<00:00,  1.39it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:31<00:00,  1.57it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16397107166337935, max=1.…

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: qnx9tsir with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.002511406575845394
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:15<00:00,  3.15it/s, episode_return=50, return=-156.300]
Iteration 1: 100%|██████████| 50/50 [00:09<00:00,  5.55it/s, episode_return=100, return=-161.800]
Iteration 2: 100%|██████████| 50/50 [00:10<00:00,  4.90it/s, episode_return=150, return=-129.500]
Iteration 3: 100%|██████████| 50/50 [00:09<00:00,  5.12it/s, episode_return=200, return=-131.700]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  6.22it/s, episode_return=250, return=-106.600]
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.35it/s, episode_return=300, return=-128.800]
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.43it/s, episode_return=350, return=-100.900]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.31it/s, episode_return=400, return=-110.100]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.32it/s, episode_return=450, return=-98.700] 
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.82it/s, episode_return=500, return=-102.100]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,76525.20085


[34m[1mwandb[0m: Agent Starting Run: mtjzaew7 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0069505772760839095
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:30<00:00,  1.62it/s, episode_return=50, return=-455.000]
Iteration 1: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:37<00:00,  1.35it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:35<00:00,  1.39it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:36<00:00,  1.39it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:35<00:00,  1.43it/s, episode_return=500, return=-428.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,248026.38509


[34m[1mwandb[0m: Agent Starting Run: 6svjsnuk with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0004403672012827457
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:36<00:00,  1.39it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:37<00:00,  1.35it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:37<00:00,  1.33it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:36<00:00,  1.35it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: mhsrrqs4 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0029712813838715523
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:22<00:00,  2.19it/s, episode_return=50, return=-167.800]
Iteration 1: 100%|██████████| 50/50 [00:10<00:00,  4.61it/s, episode_return=100, return=-134.000]
Iteration 2: 100%|██████████| 50/50 [00:09<00:00,  5.54it/s, episode_return=150, return=-104.500]
Iteration 3: 100%|██████████| 50/50 [00:07<00:00,  6.42it/s, episode_return=200, return=-116.400]
Iteration 4: 100%|██████████| 50/50 [00:09<00:00,  5.10it/s, episode_return=250, return=-133.300]
Iteration 5: 100%|██████████| 50/50 [00:22<00:00,  2.19it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:31<00:00,  1.59it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:32<00:00,  1.55it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,144654.06616


[34m[1mwandb[0m: Agent Starting Run: 1qrarj4p with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0018545121313780076
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:37<00:00,  1.33it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:37<00:00,  1.34it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:35<00:00,  1.39it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:37<00:00,  1.34it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,248254.23171


[34m[1mwandb[0m: Agent Starting Run: ui4i2hs5 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006538251267216049
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:33<00:00,  1.49it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:33<00:00,  1.48it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:31<00:00,  1.57it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:32<00:00,  1.54it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16416535018430753, max=1.…

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: thkiumat with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005581385272204105
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:40<00:00,  1.23it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:40<00:00,  1.25it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:41<00:00,  1.22it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=350, return=-311.800]
Iteration 7: 100%|██████████| 50/50 [00:16<00:00,  3.10it/s, episode_return=400, return=-208.300]
Iteration 8: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16375574523965858, max=1.…

0,1
regret,▁

0,1
regret,229243.89109


[34m[1mwandb[0m: Agent Starting Run: x9pfjb4l with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006539633275929722
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:34<00:00,  1.43it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:34<00:00,  1.43it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:34<00:00,  1.43it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:34<00:00,  1.45it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,247135.03518


[34m[1mwandb[0m: Agent Starting Run: d4oq6bbw with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006968137366172682
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, episode_return=50, return=-204.200]
Iteration 1: 100%|██████████| 50/50 [00:11<00:00,  4.38it/s, episode_return=100, return=-127.400]
Iteration 2: 100%|██████████| 50/50 [00:07<00:00,  6.49it/s, episode_return=150, return=-118.000]
Iteration 3: 100%|██████████| 50/50 [00:08<00:00,  6.23it/s, episode_return=200, return=-170.300]
Iteration 4: 100%|██████████| 50/50 [00:09<00:00,  5.35it/s, episode_return=250, return=-118.600]
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  8.04it/s, episode_return=300, return=-88.500] 
Iteration 6: 100%|██████████| 50/50 [00:05<00:00,  8.48it/s, episode_return=350, return=-84.700]
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  8.32it/s, episode_return=400, return=-97.700]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.55it/s, episode_return=450, return=-96.400]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.78it/s, episode_return=500, return=-147.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,74317.35528


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ahf641sv with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005234326530443733
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:29<00:00,  1.67it/s, episode_return=50, return=-474.200]
Iteration 1: 100%|██████████| 50/50 [00:22<00:00,  2.22it/s, episode_return=100, return=-263.800]
Iteration 2: 100%|██████████| 50/50 [00:12<00:00,  3.88it/s, episode_return=150, return=-235.300]
Iteration 3: 100%|██████████| 50/50 [00:16<00:00,  3.08it/s, episode_return=200, return=-230.100]
Iteration 4: 100%|██████████| 50/50 [00:14<00:00,  3.44it/s, episode_return=250, return=-208.100]
Iteration 5: 100%|██████████| 50/50 [00:13<00:00,  3.74it/s, episode_return=300, return=-201.800]
Iteration 6: 100%|██████████| 50/50 [00:11<00:00,  4.42it/s, episode_return=350, return=-189.500]
Iteration 7: 100%|██████████| 50/50 [00:10<00:00,  4.66it/s, episode_return=400, return=-177.200]
Iteration 8: 100%|██████████| 50/50 [00:11<00:00,  4.50it/s, episode_return=450, return=-180.800]
Iteration 9: 100%|██████████| 50/50 [00:10<00:00,  4.94it/s, episode_return=500, return=-182.800]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16582379558158106, max=1.…

0,1
regret,▁

0,1
regret,137738.19157


[34m[1mwandb[0m: Agent Starting Run: d7fw7j3i with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005721659235281456
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:39<00:00,  1.28it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:38<00:00,  1.31it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:38<00:00,  1.29it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:34<00:00,  1.45it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:35<00:00,  1.43it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: dxnth9jk with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004774891091182593
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:17<00:00,  2.88it/s, episode_return=50, return=-201.000]
Iteration 1: 100%|██████████| 50/50 [00:08<00:00,  5.75it/s, episode_return=100, return=-146.100]
Iteration 2: 100%|██████████| 50/50 [00:10<00:00,  4.71it/s, episode_return=150, return=-151.500]
Iteration 3: 100%|██████████| 50/50 [00:08<00:00,  5.96it/s, episode_return=200, return=-99.400] 
Iteration 4: 100%|██████████| 50/50 [00:06<00:00,  7.46it/s, episode_return=250, return=-92.500] 
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  7.19it/s, episode_return=300, return=-106.100]
Iteration 6: 100%|██████████| 50/50 [00:06<00:00,  7.48it/s, episode_return=350, return=-92.200] 
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  7.21it/s, episode_return=400, return=-136.800]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  8.03it/s, episode_return=450, return=-93.800]
Iteration 9: 100%|██████████| 50/50 [00:05<00:00,  8.44it/s, episode_return=500, return=-85.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,73464.01304


[34m[1mwandb[0m: Agent Starting Run: v0v6jc11 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.006268304148626981
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:39<00:00,  1.28it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:37<00:00,  1.34it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:39<00:00,  1.25it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:41<00:00,  1.20it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:41<00:00,  1.20it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:43<00:00,  1.14it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:41<00:00,  1.20it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:39<00:00,  1.26it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,248274.01494


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5uk7p4ny with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0019752108842442983
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:22<00:00,  2.22it/s, episode_return=50, return=-319.200]
Iteration 1: 100%|██████████| 50/50 [00:13<00:00,  3.59it/s, episode_return=100, return=-191.200]
Iteration 2: 100%|██████████| 50/50 [00:09<00:00,  5.49it/s, episode_return=150, return=-133.700]
Iteration 3: 100%|██████████| 50/50 [00:08<00:00,  5.87it/s, episode_return=200, return=-125.000]
Iteration 4: 100%|██████████| 50/50 [00:09<00:00,  5.47it/s, episode_return=250, return=-123.400]
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.73it/s, episode_return=300, return=-96.300] 
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  7.00it/s, episode_return=350, return=-120.800]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.95it/s, episode_return=400, return=-101.100]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.95it/s, episode_return=450, return=-99.500] 
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  7.40it/s, episode_return=500, return=-94.200] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,86153.53311


[34m[1mwandb[0m: Agent Starting Run: c8dn5wsl with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0024537016804231657
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:26<00:00,  1.88it/s, episode_return=50, return=-401.700]
Iteration 1: 100%|██████████| 50/50 [00:15<00:00,  3.25it/s, episode_return=100, return=-166.500]
Iteration 2: 100%|██████████| 50/50 [00:11<00:00,  4.41it/s, episode_return=150, return=-216.400]
Iteration 3: 100%|██████████| 50/50 [00:13<00:00,  3.65it/s, episode_return=200, return=-224.400]
Iteration 4: 100%|██████████| 50/50 [00:11<00:00,  4.46it/s, episode_return=250, return=-156.700]
Iteration 5: 100%|██████████| 50/50 [00:09<00:00,  5.31it/s, episode_return=300, return=-132.600]
Iteration 6: 100%|██████████| 50/50 [00:08<00:00,  5.99it/s, episode_return=350, return=-147.400]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.80it/s, episode_return=400, return=-115.600]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.55it/s, episode_return=450, return=-118.400]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.31it/s, episode_return=500, return=-117.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,109920.02848


[34m[1mwandb[0m: Agent Starting Run: rpeawzqm with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0053169973015974115
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:16<00:00,  3.06it/s, episode_return=50, return=-102.200]
Iteration 1: 100%|██████████| 50/50 [00:08<00:00,  5.87it/s, episode_return=100, return=-95.000]
Iteration 2: 100%|██████████| 50/50 [00:06<00:00,  7.19it/s, episode_return=150, return=-93.800]
Iteration 3: 100%|██████████| 50/50 [00:07<00:00,  6.87it/s, episode_return=200, return=-97.900]
Iteration 4: 100%|██████████| 50/50 [00:11<00:00,  4.42it/s, episode_return=250, return=-359.600]
Iteration 5: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:34<00:00,  1.46it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,148145.3834


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: f86k2piv with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0022952648227196077
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:31<00:00,  1.57it/s, episode_return=50, return=-327.600]
Iteration 1: 100%|██████████| 50/50 [00:13<00:00,  3.74it/s, episode_return=100, return=-143.700]
Iteration 2: 100%|██████████| 50/50 [00:08<00:00,  5.70it/s, episode_return=150, return=-129.400]
Iteration 3: 100%|██████████| 50/50 [00:08<00:00,  5.73it/s, episode_return=200, return=-115.500]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  5.91it/s, episode_return=250, return=-124.100]
Iteration 5: 100%|██████████| 50/50 [00:09<00:00,  5.28it/s, episode_return=300, return=-137.400]
Iteration 6: 100%|██████████| 50/50 [00:08<00:00,  5.62it/s, episode_return=350, return=-137.900]
Iteration 7: 100%|██████████| 50/50 [00:09<00:00,  5.44it/s, episode_return=400, return=-132.500]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.36it/s, episode_return=450, return=-107.500]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.54it/s, episode_return=500, return=-111.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,101143.99448


[34m[1mwandb[0m: Agent Starting Run: 9wn45rpx with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008352872950800158
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:17<00:00,  2.81it/s, episode_return=50, return=-131.500]
Iteration 1: 100%|██████████| 50/50 [00:09<00:00,  5.50it/s, episode_return=100, return=-121.300]
Iteration 2: 100%|██████████| 50/50 [00:08<00:00,  5.57it/s, episode_return=150, return=-163.900]
Iteration 3: 100%|██████████| 50/50 [00:09<00:00,  5.54it/s, episode_return=200, return=-141.500]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  5.94it/s, episode_return=250, return=-137.700]
Iteration 5: 100%|██████████| 50/50 [00:09<00:00,  5.51it/s, episode_return=300, return=-111.800]
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.70it/s, episode_return=350, return=-111.500]
Iteration 7: 100%|██████████| 50/50 [00:08<00:00,  6.04it/s, episode_return=400, return=-122.000]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.92it/s, episode_return=450, return=-119.300]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  8.15it/s, episode_return=500, return=-81.800] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,79782.5166


[34m[1mwandb[0m: Agent Starting Run: aax7m2sn with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0060649122841024685
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:32<00:00,  1.56it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:40<00:00,  1.24it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:36<00:00,  1.35it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:38<00:00,  1.31it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:32<00:00,  1.54it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,249922.04082


[34m[1mwandb[0m: Agent Starting Run: 2r29yod4 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005653038609250452
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:20<00:00,  2.47it/s, episode_return=50, return=-183.300]
Iteration 1: 100%|██████████| 50/50 [00:14<00:00,  3.51it/s, episode_return=100, return=-174.300]
Iteration 2: 100%|██████████| 50/50 [00:18<00:00,  2.77it/s, episode_return=150, return=-240.100]
Iteration 3: 100%|██████████| 50/50 [00:19<00:00,  2.50it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:31<00:00,  1.58it/s, episode_return=250, return=-357.100]
Iteration 5: 100%|██████████| 50/50 [00:14<00:00,  3.42it/s, episode_return=300, return=-183.600]
Iteration 6: 100%|██████████| 50/50 [00:12<00:00,  4.01it/s, episode_return=350, return=-174.900]
Iteration 7: 100%|██████████| 50/50 [00:10<00:00,  4.73it/s, episode_return=400, return=-149.400]
Iteration 8: 100%|██████████| 50/50 [00:12<00:00,  4.10it/s, episode_return=450, return=-150.300]
Iteration 9: 100%|██████████| 50/50 [00:13<00:00,  3.77it/s, episode_return=500, return=-191.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,118657.52485


[34m[1mwandb[0m: Agent Starting Run: jx6wk0so with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009364066292156652
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:13<00:00,  3.66it/s, episode_return=50, return=-174.300]
Iteration 1: 100%|██████████| 50/50 [00:11<00:00,  4.38it/s, episode_return=100, return=-177.200]
Iteration 2: 100%|██████████| 50/50 [00:12<00:00,  4.14it/s, episode_return=150, return=-232.800]
Iteration 3: 100%|██████████| 50/50 [00:17<00:00,  2.91it/s, episode_return=200, return=-227.200]
Iteration 4: 100%|██████████| 50/50 [00:11<00:00,  4.21it/s, episode_return=250, return=-177.300]
Iteration 5: 100%|██████████| 50/50 [00:24<00:00,  2.06it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:32<00:00,  1.53it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:32<00:00,  1.54it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,162788.52414


[34m[1mwandb[0m: Agent Starting Run: fbkb40ku with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005252569024944184
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:21<00:00,  2.29it/s, episode_return=50, return=-259.200]
Iteration 1: 100%|██████████| 50/50 [00:16<00:00,  3.06it/s, episode_return=100, return=-198.900]
Iteration 2: 100%|██████████| 50/50 [00:13<00:00,  3.58it/s, episode_return=150, return=-169.500]
Iteration 3: 100%|██████████| 50/50 [00:13<00:00,  3.83it/s, episode_return=200, return=-133.700]
Iteration 4: 100%|██████████| 50/50 [00:11<00:00,  4.31it/s, episode_return=250, return=-138.300]
Iteration 5: 100%|██████████| 50/50 [00:10<00:00,  4.56it/s, episode_return=300, return=-125.900]
Iteration 6: 100%|██████████| 50/50 [00:13<00:00,  3.76it/s, episode_return=350, return=-193.300]
Iteration 7: 100%|██████████| 50/50 [00:13<00:00,  3.61it/s, episode_return=400, return=-171.700]
Iteration 8: 100%|██████████| 50/50 [00:13<00:00,  3.80it/s, episode_return=450, return=-160.200]
Iteration 9: 100%|██████████| 50/50 [00:11<00:00,  4.25it/s, episode_return=500, return=-152.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,95162.7828


[34m[1mwandb[0m: Agent Starting Run: y07e8mcy with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006228690989681561
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=50, return=-481.300]
Iteration 1: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:37<00:00,  1.32it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:35<00:00,  1.43it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:36<00:00,  1.35it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:33<00:00,  1.49it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:33<00:00,  1.48it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,247853.1017


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nor79rau with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0017055618270306258
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:25<00:00,  1.97it/s, episode_return=50, return=-258.900]
Iteration 1: 100%|██████████| 50/50 [00:11<00:00,  4.19it/s, episode_return=100, return=-168.000]
Iteration 2: 100%|██████████| 50/50 [00:09<00:00,  5.34it/s, episode_return=150, return=-130.400]
Iteration 3: 100%|██████████| 50/50 [00:08<00:00,  5.62it/s, episode_return=200, return=-144.300]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  5.56it/s, episode_return=250, return=-106.600]
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.61it/s, episode_return=300, return=-112.800]
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.33it/s, episode_return=350, return=-109.600]
Iteration 7: 100%|██████████| 50/50 [00:09<00:00,  5.45it/s, episode_return=400, return=-125.600]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  6.08it/s, episode_return=450, return=-104.100]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  5.91it/s, episode_return=500, return=-137.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,92018.24004


[34m[1mwandb[0m: Agent Starting Run: cyaq07kk with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005650796645328939
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:34<00:00,  1.45it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=200, return=-485.900]
Iteration 4: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s, episode_return=250, return=-213.800]
Iteration 5: 100%|██████████| 50/50 [00:14<00:00,  3.41it/s, episode_return=300, return=-163.200]
Iteration 6: 100%|██████████| 50/50 [00:16<00:00,  2.94it/s, episode_return=350, return=-217.400]
Iteration 7: 100%|██████████| 50/50 [00:14<00:00,  3.56it/s, episode_return=400, return=-170.100]
Iteration 8: 100%|██████████| 50/50 [00:14<00:00,  3.34it/s, episode_return=450, return=-208.700]
Iteration 9: 100%|██████████| 50/50 [00:15<00:00,  3.13it/s, episode_return=500, return=-227.900]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,171249.41183


[34m[1mwandb[0m: Agent Starting Run: jp2y65sk with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0037270041563469062
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:20<00:00,  2.48it/s, episode_return=50, return=-201.600]
Iteration 1: 100%|██████████| 50/50 [00:29<00:00,  1.69it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:31<00:00,  1.57it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:38<00:00,  1.29it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:29<00:00,  1.70it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,226628.30528


[34m[1mwandb[0m: Agent Starting Run: kv35kve5 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.000998591269745745
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:32<00:00,  1.55it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:32<00:00,  1.55it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:32<00:00,  1.54it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:32<00:00,  1.55it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:31<00:00,  1.60it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: zls18zvn with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009267136838663197
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:32<00:00,  1.54it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:32<00:00,  1.53it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:34<00:00,  1.46it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:31<00:00,  1.58it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:24<00:00,  2.07it/s, episode_return=350, return=-285.700]
Iteration 7: 100%|██████████| 50/50 [00:14<00:00,  3.34it/s, episode_return=400, return=-197.900]
Iteration 8: 100%|██████████| 50/50 [00:13<00:00,  3.70it/s, episode_return=450, return=-157.300]
Iteration 9: 100%|██████████| 50/50 [00:11<00:00,  4.17it/s, episode_return=500, return=-166.500]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16769768692845616, max=1.…

0,1
regret,▁

0,1
regret,211719.52475


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vjizjtg7 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00700986505181529
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:10<00:00,  4.97it/s, episode_return=50, return=-148.300]
Iteration 1: 100%|██████████| 50/50 [00:17<00:00,  2.83it/s, episode_return=100, return=-440.900]
Iteration 2: 100%|██████████| 50/50 [00:20<00:00,  2.38it/s, episode_return=150, return=-329.700]
Iteration 3: 100%|██████████| 50/50 [00:22<00:00,  2.19it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:38<00:00,  1.31it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:34<00:00,  1.45it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,184315.1365


[34m[1mwandb[0m: Agent Starting Run: pm3xp20y with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0008588033760084197
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=100, return=-485.600]
Iteration 2: 100%|██████████| 50/50 [00:34<00:00,  1.43it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:33<00:00,  1.48it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:31<00:00,  1.60it/s, episode_return=300, return=-366.000]
Iteration 6: 100%|██████████| 50/50 [00:22<00:00,  2.20it/s, episode_return=350, return=-306.600]
Iteration 7: 100%|██████████| 50/50 [00:19<00:00,  2.60it/s, episode_return=400, return=-244.000]
Iteration 8: 100%|██████████| 50/50 [00:18<00:00,  2.74it/s, episode_return=450, return=-254.100]
Iteration 9: 100%|██████████| 50/50 [00:16<00:00,  3.01it/s, episode_return=500, return=-211.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,213013.95541


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cb2jm26n with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0007848919440091931
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:24<00:00,  2.04it/s, episode_return=50, return=-359.000]
Iteration 1: 100%|██████████| 50/50 [00:21<00:00,  2.29it/s, episode_return=100, return=-272.100]
Iteration 2: 100%|██████████| 50/50 [00:16<00:00,  3.02it/s, episode_return=150, return=-208.800]
Iteration 3: 100%|██████████| 50/50 [00:14<00:00,  3.44it/s, episode_return=200, return=-197.400]
Iteration 4: 100%|██████████| 50/50 [00:12<00:00,  4.08it/s, episode_return=250, return=-134.300]
Iteration 5: 100%|██████████| 50/50 [00:10<00:00,  4.93it/s, episode_return=300, return=-121.600]
Iteration 6: 100%|██████████| 50/50 [00:09<00:00,  5.23it/s, episode_return=350, return=-125.200]
Iteration 7: 100%|██████████| 50/50 [00:10<00:00,  4.66it/s, episode_return=400, return=-154.800]
Iteration 8: 100%|██████████| 50/50 [00:10<00:00,  4.98it/s, episode_return=450, return=-113.900]
Iteration 9: 100%|██████████| 50/50 [00:10<00:00,  4.72it/s, episode_return=500, return=-116.800]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16903890470380914, max=1.…

0,1
regret,▁

0,1
regret,108953.6786


[34m[1mwandb[0m: Agent Starting Run: lbfy36f9 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.002598979855056288
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777286247, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:28<00:00,  1.75it/s, episode_return=50, return=-270.000]
Iteration 1: 100%|██████████| 50/50 [00:14<00:00,  3.56it/s, episode_return=100, return=-196.700]
Iteration 2: 100%|██████████| 50/50 [00:14<00:00,  3.51it/s, episode_return=150, return=-204.200]
Iteration 3: 100%|██████████| 50/50 [00:14<00:00,  3.55it/s, episode_return=200, return=-217.100]
Iteration 4: 100%|██████████| 50/50 [00:15<00:00,  3.13it/s, episode_return=250, return=-207.900]
Iteration 5: 100%|██████████| 50/50 [00:12<00:00,  4.10it/s, episode_return=300, return=-170.300]
Iteration 6: 100%|██████████| 50/50 [00:11<00:00,  4.32it/s, episode_return=350, return=-151.500]
Iteration 7: 100%|██████████| 50/50 [00:10<00:00,  4.73it/s, episode_return=400, return=-140.200]
Iteration 8: 100%|██████████| 50/50 [00:09<00:00,  5.27it/s, episode_return=450, return=-133.200]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  7.48it/s, episode_return=500, return=-110.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,119584.55148


[34m[1mwandb[0m: Agent Starting Run: qac1tk2w with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006598143288978789
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:27<00:00,  1.83it/s, episode_return=50, return=-174.700]
Iteration 1: 100%|██████████| 50/50 [00:09<00:00,  5.29it/s, episode_return=100, return=-118.100]
Iteration 2: 100%|██████████| 50/50 [00:08<00:00,  6.03it/s, episode_return=150, return=-107.100]
Iteration 3: 100%|██████████| 50/50 [00:07<00:00,  6.93it/s, episode_return=200, return=-103.700]
Iteration 4: 100%|██████████| 50/50 [00:07<00:00,  7.03it/s, episode_return=250, return=-98.700] 
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.88it/s, episode_return=300, return=-95.300] 
Iteration 6: 100%|██████████| 50/50 [00:06<00:00,  7.21it/s, episode_return=350, return=-113.100]
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  7.55it/s, episode_return=400, return=-88.800]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  8.07it/s, episode_return=450, return=-83.500]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  7.43it/s, episode_return=500, return=-93.000] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,80568.15521


[34m[1mwandb[0m: Agent Starting Run: s25pu8e2 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 4.153869589212678e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:31<00:00,  1.57it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:31<00:00,  1.58it/s, episode_return=100, return=-433.100]
Iteration 2: 100%|██████████| 50/50 [00:29<00:00,  1.70it/s, episode_return=150, return=-471.700]
Iteration 3: 100%|██████████| 50/50 [00:28<00:00,  1.73it/s, episode_return=200, return=-424.300]
Iteration 4: 100%|██████████| 50/50 [00:27<00:00,  1.81it/s, episode_return=250, return=-432.300]
Iteration 5: 100%|██████████| 50/50 [00:28<00:00,  1.73it/s, episode_return=300, return=-465.400]
Iteration 6: 100%|██████████| 50/50 [00:26<00:00,  1.91it/s, episode_return=350, return=-422.400]
Iteration 7: 100%|██████████| 50/50 [00:24<00:00,  2.04it/s, episode_return=400, return=-398.600]
Iteration 8: 100%|██████████| 50/50 [00:25<00:00,  1.94it/s, episode_return=450, return=-379.100]
Iteration 9: 100%|██████████| 50/50 [00:25<00:00,  1.93it/s, episode_return=500, return=-400.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,219901.35005


[34m[1mwandb[0m: Agent Starting Run: bhz4b1le with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.009078073270325476
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:20<00:00,  2.43it/s, episode_return=50, return=-446.200]
Iteration 1: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:36<00:00,  1.39it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:35<00:00,  1.40it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:37<00:00,  1.32it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16382028376248028, max=1.…

0,1
regret,▁

0,1
regret,226774.32024


[34m[1mwandb[0m: Agent Starting Run: qzocn35v with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.009174127161485955
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:32<00:00,  1.55it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:35<00:00,  1.39it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:35<00:00,  1.39it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:32<00:00,  1.55it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:34<00:00,  1.45it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:33<00:00,  1.49it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:34<00:00,  1.45it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:37<00:00,  1.35it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:38<00:00,  1.31it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,243172.34785


[34m[1mwandb[0m: Agent Starting Run: vg3wx7ej with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004900475264415265
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:34<00:00,  1.43it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:34<00:00,  1.46it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:32<00:00,  1.55it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:33<00:00,  1.49it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:31<00:00,  1.58it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:33<00:00,  1.49it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.166889721627409, max=1.0)…

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: oih3pnjc with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008262066925753696
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:20<00:00,  2.44it/s, episode_return=50, return=-203.400]
Iteration 1: 100%|██████████| 50/50 [00:27<00:00,  1.83it/s, episode_return=100, return=-329.200]
Iteration 2: 100%|██████████| 50/50 [00:16<00:00,  2.95it/s, episode_return=150, return=-180.800]
Iteration 3: 100%|██████████| 50/50 [00:14<00:00,  3.53it/s, episode_return=200, return=-184.200]
Iteration 4: 100%|██████████| 50/50 [00:13<00:00,  3.76it/s, episode_return=250, return=-313.700]
Iteration 5: 100%|██████████| 50/50 [00:12<00:00,  4.07it/s, episode_return=300, return=-158.300]
Iteration 6: 100%|██████████| 50/50 [00:12<00:00,  4.05it/s, episode_return=350, return=-166.500]
Iteration 7: 100%|██████████| 50/50 [00:12<00:00,  3.92it/s, episode_return=400, return=-155.600]
Iteration 8: 100%|██████████| 50/50 [00:13<00:00,  3.75it/s, episode_return=450, return=-166.200]
Iteration 9: 100%|██████████| 50/50 [00:12<00:00,  3.98it/s, episode_return=500, return=-148.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,113728.74354


[34m[1mwandb[0m: Agent Starting Run: fpratbof with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00015685484914512628
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=200, return=-494.700]
Iteration 4: 100%|██████████| 50/50 [00:34<00:00,  1.46it/s, episode_return=250, return=-468.000]
Iteration 5: 100%|██████████| 50/50 [00:30<00:00,  1.65it/s, episode_return=300, return=-430.500]
Iteration 6: 100%|██████████| 50/50 [00:29<00:00,  1.68it/s, episode_return=350, return=-411.300]
Iteration 7: 100%|██████████| 50/50 [00:31<00:00,  1.58it/s, episode_return=400, return=-398.200]
Iteration 8: 100%|██████████| 50/50 [00:22<00:00,  2.22it/s, episode_return=450, return=-233.700]
Iteration 9: 100%|██████████| 50/50 [00:19<00:00,  2.57it/s, episode_return=500, return=-233.900]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,224553.95804


[34m[1mwandb[0m: Agent Starting Run: m4st524b with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005050367815164579
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:11<00:00,  4.33it/s, episode_return=50, return=-85.900] 
Iteration 1: 100%|██████████| 50/50 [00:08<00:00,  5.98it/s, episode_return=100, return=-82.000]
Iteration 2: 100%|██████████| 50/50 [00:07<00:00,  6.81it/s, episode_return=150, return=-113.600]
Iteration 3: 100%|██████████| 50/50 [00:33<00:00,  1.48it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:37<00:00,  1.33it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:37<00:00,  1.32it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:37<00:00,  1.34it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:37<00:00,  1.35it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:37<00:00,  1.33it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1645118733509235, max=1.0…

0,1
regret,▁

0,1
regret,170226.49522


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: om47mkwd with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0017698122390377071
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:26<00:00,  1.87it/s, episode_return=50, return=-440.700]
Iteration 1: 100%|██████████| 50/50 [00:22<00:00,  2.23it/s, episode_return=100, return=-254.300]
Iteration 2: 100%|██████████| 50/50 [00:12<00:00,  4.00it/s, episode_return=150, return=-176.700]
Iteration 3: 100%|██████████| 50/50 [00:11<00:00,  4.34it/s, episode_return=200, return=-162.700]
Iteration 4: 100%|██████████| 50/50 [00:09<00:00,  5.55it/s, episode_return=250, return=-150.300]
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.95it/s, episode_return=300, return=-116.500]
Iteration 6: 100%|██████████| 50/50 [00:08<00:00,  6.03it/s, episode_return=350, return=-129.200]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.34it/s, episode_return=400, return=-118.200]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.78it/s, episode_return=450, return=-127.200]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.40it/s, episode_return=500, return=-129.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,109912.02846


[34m[1mwandb[0m: Agent Starting Run: 7vs81syr with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.005398766987862276
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:31<00:00,  1.59it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:32<00:00,  1.55it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:32<00:00,  1.53it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:32<00:00,  1.54it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:34<00:00,  1.43it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: qyygsz5j with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00657812952206139
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:33<00:00,  1.48it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=150, return=-495.400]
Iteration 3: 100%|██████████| 50/50 [00:16<00:00,  3.10it/s, episode_return=200, return=-172.600]
Iteration 4: 100%|██████████| 50/50 [00:14<00:00,  3.37it/s, episode_return=250, return=-167.300]
Iteration 5: 100%|██████████| 50/50 [00:11<00:00,  4.42it/s, episode_return=300, return=-154.800]
Iteration 6: 100%|██████████| 50/50 [00:10<00:00,  4.79it/s, episode_return=350, return=-162.200]
Iteration 7: 100%|██████████| 50/50 [00:11<00:00,  4.53it/s, episode_return=400, return=-179.400]
Iteration 8: 100%|██████████| 50/50 [00:10<00:00,  4.63it/s, episode_return=450, return=-157.300]
Iteration 9: 100%|██████████| 50/50 [00:19<00:00,  2.61it/s, episode_return=500, return=-340.500]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,152758.89109


[34m[1mwandb[0m: Agent Starting Run: hdy51hnm with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.008183392879374871
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:14<00:00,  3.43it/s, episode_return=50, return=-182.500]
Iteration 1: 100%|██████████| 50/50 [00:14<00:00,  3.45it/s, episode_return=100, return=-223.900]
Iteration 2: 100%|██████████| 50/50 [00:14<00:00,  3.43it/s, episode_return=150, return=-165.500]
Iteration 3: 100%|██████████| 50/50 [00:15<00:00,  3.30it/s, episode_return=200, return=-198.700]
Iteration 4: 100%|██████████| 50/50 [00:12<00:00,  3.98it/s, episode_return=250, return=-180.500]
Iteration 5: 100%|██████████| 50/50 [00:12<00:00,  4.07it/s, episode_return=300, return=-164.500]
Iteration 6: 100%|██████████| 50/50 [00:10<00:00,  4.59it/s, episode_return=350, return=-153.900]
Iteration 7: 100%|██████████| 50/50 [00:17<00:00,  2.84it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:31<00:00,  1.58it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,121924.18079


[34m[1mwandb[0m: Agent Starting Run: wn5ziglj with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.005682247712221909
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:33<00:00,  1.48it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:31<00:00,  1.59it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:31<00:00,  1.57it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:29<00:00,  1.69it/s, episode_return=450, return=-491.100]
Iteration 9: 100%|██████████| 50/50 [00:31<00:00,  1.57it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,248130.86139


[34m[1mwandb[0m: Agent Starting Run: ujvqf8uu with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.007412033057506342
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:10<00:00,  4.64it/s, episode_return=50, return=-102.600]
Iteration 1: 100%|██████████| 50/50 [00:06<00:00,  7.80it/s, episode_return=100, return=-97.500]
Iteration 2: 100%|██████████| 50/50 [00:06<00:00,  8.06it/s, episode_return=150, return=-107.000]
Iteration 3: 100%|██████████| 50/50 [00:05<00:00,  9.14it/s, episode_return=200, return=-86.100]
Iteration 4: 100%|██████████| 50/50 [00:06<00:00,  7.87it/s, episode_return=250, return=-90.200] 
Iteration 5: 100%|██████████| 50/50 [00:05<00:00,  8.89it/s, episode_return=300, return=-95.500]
Iteration 6: 100%|██████████| 50/50 [00:05<00:00,  8.69it/s, episode_return=350, return=-86.900] 
Iteration 7: 100%|██████████| 50/50 [00:05<00:00,  8.93it/s, episode_return=400, return=-93.000]
Iteration 8: 100%|██████████| 50/50 [00:05<00:00,  8.90it/s, episode_return=450, return=-90.700]
Iteration 9: 100%|██████████| 50/50 [00:05<00:00,  9.45it/s, episode_return=500, return=-93.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,57805.13381


[34m[1mwandb[0m: Agent Starting Run: rfc4nr1g with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00963174200047685
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:14<00:00,  3.48it/s, episode_return=50, return=-155.100]
Iteration 1: 100%|██████████| 50/50 [00:09<00:00,  5.25it/s, episode_return=100, return=-125.700]
Iteration 2: 100%|██████████| 50/50 [00:09<00:00,  5.11it/s, episode_return=150, return=-135.200]
Iteration 3: 100%|██████████| 50/50 [00:12<00:00,  4.06it/s, episode_return=200, return=-151.500]
Iteration 4: 100%|██████████| 50/50 [00:10<00:00,  4.77it/s, episode_return=250, return=-228.100]
Iteration 5: 100%|██████████| 50/50 [00:09<00:00,  5.54it/s, episode_return=300, return=-97.600] 
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.84it/s, episode_return=350, return=-95.500] 
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  8.02it/s, episode_return=400, return=-83.600]
Iteration 8: 100%|██████████| 50/50 [00:06<00:00,  7.98it/s, episode_return=450, return=-103.500]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  8.20it/s, episode_return=500, return=-100.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,73960.82831


[34m[1mwandb[0m: Agent Starting Run: pluecfbs with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005780642591248327
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:25<00:00,  1.96it/s, episode_return=50, return=-348.400]
Iteration 1: 100%|██████████| 50/50 [00:11<00:00,  4.28it/s, episode_return=100, return=-180.400]
Iteration 2: 100%|██████████| 50/50 [00:08<00:00,  5.97it/s, episode_return=150, return=-116.100]
Iteration 3: 100%|██████████| 50/50 [00:08<00:00,  6.10it/s, episode_return=200, return=-120.300]
Iteration 4: 100%|██████████| 50/50 [00:08<00:00,  6.22it/s, episode_return=250, return=-131.600]
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  7.05it/s, episode_return=300, return=-120.600]
Iteration 6: 100%|██████████| 50/50 [00:06<00:00,  7.41it/s, episode_return=350, return=-110.500]
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  8.04it/s, episode_return=400, return=-96.800] 
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.39it/s, episode_return=450, return=-126.800]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  5.77it/s, episode_return=500, return=-150.500]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16646626586506347, max=1.…

0,1
regret,▁

0,1
regret,94235.63576


[34m[1mwandb[0m: Agent Starting Run: f8uo39zb with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.007932829529272746
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:33<00:00,  1.47it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:33<00:00,  1.49it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:32<00:00,  1.56it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:33<00:00,  1.49it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:32<00:00,  1.52it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:33<00:00,  1.48it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:33<00:00,  1.49it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:33<00:00,  1.49it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:32<00:00,  1.53it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,249148.83844


[34m[1mwandb[0m: Agent Starting Run: gp9bik0t with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0030537001511856563
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:26<00:00,  1.85it/s, episode_return=50, return=-412.700]
Iteration 1: 100%|██████████| 50/50 [00:13<00:00,  3.64it/s, episode_return=100, return=-219.900]
Iteration 2: 100%|██████████| 50/50 [00:13<00:00,  3.57it/s, episode_return=150, return=-218.600]
Iteration 3: 100%|██████████| 50/50 [00:13<00:00,  3.80it/s, episode_return=200, return=-222.400]
Iteration 4: 100%|██████████| 50/50 [00:11<00:00,  4.19it/s, episode_return=250, return=-166.800]
Iteration 5: 100%|██████████| 50/50 [00:09<00:00,  5.10it/s, episode_return=300, return=-162.400]
Iteration 6: 100%|██████████| 50/50 [00:09<00:00,  5.08it/s, episode_return=350, return=-186.500]
Iteration 7: 100%|██████████| 50/50 [00:13<00:00,  3.58it/s, episode_return=400, return=-194.500]
Iteration 8: 100%|██████████| 50/50 [00:10<00:00,  4.93it/s, episode_return=450, return=-131.900]
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  5.95it/s, episode_return=500, return=-160.700]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,121750.979


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4twkqxx8 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.007044952263274834
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:36<00:00,  1.39it/s, episode_return=50, return=-494.800]
Iteration 1: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=100, return=-492.300]
Iteration 2: 100%|██████████| 50/50 [00:34<00:00,  1.46it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:35<00:00,  1.39it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:35<00:00,  1.39it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:32<00:00,  1.56it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:34<00:00,  1.46it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16375574523965858, max=1.…

0,1
regret,▁

0,1
regret,248291.0523


[34m[1mwandb[0m: Agent Starting Run: yg37dwrt with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0019328801955960757
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:32<00:00,  1.53it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:28<00:00,  1.75it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:30<00:00,  1.61it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:27<00:00,  1.83it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:28<00:00,  1.75it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:30<00:00,  1.66it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:30<00:00,  1.66it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:30<00:00,  1.64it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: 7rlpi1qu with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0031691514437060396
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:36<00:00,  1.39it/s, episode_return=50, return=-368.500]
Iteration 1: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:39<00:00,  1.25it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:41<00:00,  1.21it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:43<00:00,  1.15it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:43<00:00,  1.16it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:41<00:00,  1.19it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:45<00:00,  1.10it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:46<00:00,  1.07it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16604527296937416, max=1.…

0,1
regret,▁

0,1
regret,246905.00931


[34m[1mwandb[0m: Agent Starting Run: ejokls9e with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00896646041345243
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:43<00:00,  1.15it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:42<00:00,  1.18it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:42<00:00,  1.18it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:42<00:00,  1.17it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:44<00:00,  1.12it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:41<00:00,  1.21it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:42<00:00,  1.18it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:41<00:00,  1.19it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:42<00:00,  1.19it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:40<00:00,  1.24it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 04qs4mfx with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.003437400914326904
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:43<00:00,  1.14it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:44<00:00,  1.13it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:44<00:00,  1.13it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:42<00:00,  1.16it/s, episode_return=200, return=-461.600]
Iteration 4: 100%|██████████| 50/50 [00:44<00:00,  1.14it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:44<00:00,  1.13it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:43<00:00,  1.15it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:44<00:00,  1.11it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:29<00:00,  1.69it/s, episode_return=450, return=-243.400]
Iteration 9: 100%|██████████| 50/50 [00:17<00:00,  2.80it/s, episode_return=500, return=-203.300]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,240249.91888


[34m[1mwandb[0m: Agent Starting Run: 6hq9xata with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0003575459304779966
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:40<00:00,  1.25it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:39<00:00,  1.25it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:39<00:00,  1.26it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:39<00:00,  1.28it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:39<00:00,  1.26it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:38<00:00,  1.29it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:41<00:00,  1.19it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:39<00:00,  1.27it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:39<00:00,  1.28it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: at61zqo9 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.007036780395029252
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:24<00:00,  2.06it/s, episode_return=50, return=-201.100]
Iteration 1: 100%|██████████| 50/50 [00:14<00:00,  3.36it/s, episode_return=100, return=-209.300]
Iteration 2: 100%|██████████| 50/50 [00:13<00:00,  3.73it/s, episode_return=150, return=-139.800]
Iteration 3: 100%|██████████| 50/50 [00:14<00:00,  3.55it/s, episode_return=200, return=-182.900]
Iteration 4: 100%|██████████| 50/50 [00:14<00:00,  3.35it/s, episode_return=250, return=-189.700]
Iteration 5: 100%|██████████| 50/50 [00:15<00:00,  3.30it/s, episode_return=300, return=-201.600]
Iteration 6: 100%|██████████| 50/50 [00:16<00:00,  2.98it/s, episode_return=350, return=-145.700]
Iteration 7: 100%|██████████| 50/50 [00:10<00:00,  4.96it/s, episode_return=400, return=-89.900] 
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.31it/s, episode_return=450, return=-101.400]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.31it/s, episode_return=500, return=-124.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,99980.54628


[34m[1mwandb[0m: Agent Starting Run: p1n3k53l with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002440908112126932
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:43<00:00,  1.16it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:42<00:00,  1.18it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:42<00:00,  1.18it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:44<00:00,  1.11it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:45<00:00,  1.10it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:42<00:00,  1.17it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:41<00:00,  1.20it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:42<00:00,  1.19it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:42<00:00,  1.16it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:42<00:00,  1.19it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: qs9aa8o5 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.006051788592537706
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:43<00:00,  1.15it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:42<00:00,  1.18it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:37<00:00,  1.32it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:41<00:00,  1.21it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:48<00:00,  1.04it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:42<00:00,  1.17it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:34<00:00,  1.45it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:35<00:00,  1.42it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16291251964379255, max=1.…

0,1
regret,▁

0,1
regret,248007.01344


[34m[1mwandb[0m: Agent Starting Run: fwcz2nue with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005824004038464582
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:29<00:00,  1.71it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:22<00:00,  2.18it/s, episode_return=100, return=-208.100]
Iteration 2: 100%|██████████| 50/50 [00:14<00:00,  3.39it/s, episode_return=150, return=-229.800]
Iteration 3: 100%|██████████| 50/50 [00:12<00:00,  3.94it/s, episode_return=200, return=-196.900]
Iteration 4: 100%|██████████| 50/50 [00:14<00:00,  3.38it/s, episode_return=250, return=-240.100]
Iteration 5: 100%|██████████| 50/50 [00:17<00:00,  2.85it/s, episode_return=300, return=-230.500]
Iteration 6: 100%|██████████| 50/50 [00:13<00:00,  3.72it/s, episode_return=350, return=-189.400]
Iteration 7: 100%|██████████| 50/50 [00:12<00:00,  3.91it/s, episode_return=400, return=-190.700]
Iteration 8: 100%|██████████| 50/50 [00:12<00:00,  3.86it/s, episode_return=450, return=-206.800]
Iteration 9: 100%|██████████| 50/50 [00:15<00:00,  3.31it/s, episode_return=500, return=-191.800]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,140360.44745


[34m[1mwandb[0m: Agent Starting Run: ljmelv7f with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0027571697945401887
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:25<00:00,  1.93it/s, episode_return=50, return=-172.700]
Iteration 1: 100%|██████████| 50/50 [00:14<00:00,  3.44it/s, episode_return=100, return=-186.900]
Iteration 2: 100%|██████████| 50/50 [00:14<00:00,  3.39it/s, episode_return=150, return=-160.300]
Iteration 3: 100%|██████████| 50/50 [00:14<00:00,  3.35it/s, episode_return=200, return=-164.300]
Iteration 4: 100%|██████████| 50/50 [00:14<00:00,  3.38it/s, episode_return=250, return=-160.000]
Iteration 5: 100%|██████████| 50/50 [00:13<00:00,  3.64it/s, episode_return=300, return=-171.900]
Iteration 6: 100%|██████████| 50/50 [00:26<00:00,  1.85it/s, episode_return=350, return=-490.700]
Iteration 7: 100%|██████████| 50/50 [00:40<00:00,  1.24it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:38<00:00,  1.31it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:38<00:00,  1.31it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,143925.80257


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ildgd6sa with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0077597048160387505
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:34<00:00,  1.46it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:38<00:00,  1.28it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:35<00:00,  1.43it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:17<00:00,  2.89it/s, episode_return=200, return=-203.500]
Iteration 4: 100%|██████████| 50/50 [00:12<00:00,  3.87it/s, episode_return=250, return=-159.700]
Iteration 5: 100%|██████████| 50/50 [00:10<00:00,  4.82it/s, episode_return=300, return=-155.300]
Iteration 6: 100%|██████████| 50/50 [00:09<00:00,  5.44it/s, episode_return=350, return=-144.500]
Iteration 7: 100%|██████████| 50/50 [00:11<00:00,  4.35it/s, episode_return=400, return=-166.100]
Iteration 8: 100%|██████████| 50/50 [00:12<00:00,  4.13it/s, episode_return=450, return=-172.700]
Iteration 9: 100%|██████████| 50/50 [00:11<00:00,  4.23it/s, episode_return=500, return=-150.200]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,152994.19215


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8dqg2e1d with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0012014058173007186
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=50, return=-489.200]
Iteration 1: 100%|██████████| 50/50 [00:34<00:00,  1.47it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:34<00:00,  1.45it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:39<00:00,  1.28it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:39<00:00,  1.27it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:41<00:00,  1.22it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:45<00:00,  1.11it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:47<00:00,  1.06it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:42<00:00,  1.17it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,246171.55059


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pfw5zehe with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.005611430050181849
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:25<00:00,  2.00it/s, episode_return=50, return=-210.800]
Iteration 1: 100%|██████████| 50/50 [00:12<00:00,  4.14it/s, episode_return=100, return=-105.800]
Iteration 2: 100%|██████████| 50/50 [00:10<00:00,  4.92it/s, episode_return=150, return=-152.000]
Iteration 3: 100%|██████████| 50/50 [00:11<00:00,  4.40it/s, episode_return=200, return=-175.200]
Iteration 4: 100%|██████████| 50/50 [00:10<00:00,  4.90it/s, episode_return=250, return=-87.700] 
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.33it/s, episode_return=300, return=-79.700]
Iteration 6: 100%|██████████| 50/50 [00:08<00:00,  6.04it/s, episode_return=350, return=-100.300]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.42it/s, episode_return=400, return=-88.600]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.46it/s, episode_return=450, return=-84.900] 
Iteration 9: 100%|██████████| 50/50 [00:08<00:00,  6.08it/s, episode_return=500, return=-84.600] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,81591.19398


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fpkj32ia with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001440694426448926
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:38<00:00,  1.29it/s, episode_return=50, return=-454.900]
Iteration 1: 100%|██████████| 50/50 [00:36<00:00,  1.36it/s, episode_return=100, return=-468.900]
Iteration 2: 100%|██████████| 50/50 [00:32<00:00,  1.54it/s, episode_return=150, return=-387.400]
Iteration 3: 100%|██████████| 50/50 [00:21<00:00,  2.35it/s, episode_return=200, return=-242.800]
Iteration 4: 100%|██████████| 50/50 [00:13<00:00,  3.58it/s, episode_return=250, return=-165.000]
Iteration 5: 100%|██████████| 50/50 [00:12<00:00,  4.09it/s, episode_return=300, return=-164.200]
Iteration 6: 100%|██████████| 50/50 [00:13<00:00,  3.80it/s, episode_return=350, return=-199.600]
Iteration 7: 100%|██████████| 50/50 [00:10<00:00,  4.82it/s, episode_return=400, return=-166.300]
Iteration 8: 100%|██████████| 50/50 [00:12<00:00,  4.04it/s, episode_return=450, return=-183.000]
Iteration 9: 100%|██████████| 50/50 [00:11<00:00,  4.21it/s, episode_return=500, return=-155.600]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1683769916284094, max=1.0…

0,1
regret,▁

0,1
regret,149522.45614


[34m[1mwandb[0m: Agent Starting Run: 86eyjl6g with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004662381646000616
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:37<00:00,  1.33it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:37<00:00,  1.34it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:37<00:00,  1.33it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:40<00:00,  1.23it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:41<00:00,  1.20it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:37<00:00,  1.32it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:37<00:00,  1.33it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:37<00:00,  1.33it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:37<00:00,  1.33it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:37<00:00,  1.34it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: hxm7rtpw with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.005172705586780423
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:10<00:00,  4.65it/s, episode_return=50, return=-138.500]
Iteration 1: 100%|██████████| 50/50 [00:09<00:00,  5.41it/s, episode_return=100, return=-155.500]
Iteration 2: 100%|██████████| 50/50 [00:15<00:00,  3.29it/s, episode_return=150, return=-210.400]
Iteration 3: 100%|██████████| 50/50 [00:13<00:00,  3.61it/s, episode_return=200, return=-155.200]
Iteration 4: 100%|██████████| 50/50 [00:13<00:00,  3.71it/s, episode_return=250, return=-161.300]
Iteration 5: 100%|██████████| 50/50 [00:13<00:00,  3.66it/s, episode_return=300, return=-184.200]
Iteration 6: 100%|██████████| 50/50 [00:11<00:00,  4.18it/s, episode_return=350, return=-193.300]
Iteration 7: 100%|██████████| 50/50 [00:17<00:00,  2.81it/s, episode_return=400, return=-296.800]
Iteration 8: 100%|██████████| 50/50 [00:16<00:00,  2.98it/s, episode_return=450, return=-164.200]
Iteration 9: 100%|██████████| 50/50 [00:14<00:00,  3.50it/s, episode_return=500, return=-162.400]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,90855.61961


[34m[1mwandb[0m: Agent Starting Run: tswyx7vs with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0020245105128374255
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:19<00:00,  2.62it/s, episode_return=50, return=-113.500]
Iteration 1: 100%|██████████| 50/50 [00:08<00:00,  6.11it/s, episode_return=100, return=-114.600]
Iteration 2: 100%|██████████| 50/50 [00:10<00:00,  4.91it/s, episode_return=150, return=-110.600]
Iteration 3: 100%|██████████| 50/50 [00:09<00:00,  5.21it/s, episode_return=200, return=-121.700]
Iteration 4: 100%|██████████| 50/50 [00:09<00:00,  5.54it/s, episode_return=250, return=-92.400] 
Iteration 5: 100%|██████████| 50/50 [00:08<00:00,  6.05it/s, episode_return=300, return=-100.400]
Iteration 6: 100%|██████████| 50/50 [00:08<00:00,  5.74it/s, episode_return=350, return=-119.200]
Iteration 7: 100%|██████████| 50/50 [00:11<00:00,  4.31it/s, episode_return=400, return=-157.800]
Iteration 8: 100%|██████████| 50/50 [00:09<00:00,  5.09it/s, episode_return=450, return=-102.900]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.45it/s, episode_return=500, return=-115.900]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.16644420715429792, max=1.…

0,1
regret,▁

0,1
regret,72163.28861


[34m[1mwandb[0m: Agent Starting Run: yniqvas2 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0006542810307502152
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:34<00:00,  1.47it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:34<00:00,  1.46it/s, episode_return=100, return=-482.000]
Iteration 2: 100%|██████████| 50/50 [00:31<00:00,  1.57it/s, episode_return=150, return=-379.300]
Iteration 3: 100%|██████████| 50/50 [00:22<00:00,  2.23it/s, episode_return=200, return=-237.300]
Iteration 4: 100%|██████████| 50/50 [00:15<00:00,  3.19it/s, episode_return=250, return=-219.200]
Iteration 5: 100%|██████████| 50/50 [00:15<00:00,  3.25it/s, episode_return=300, return=-212.900]
Iteration 6: 100%|██████████| 50/50 [00:14<00:00,  3.47it/s, episode_return=350, return=-188.100]
Iteration 7: 100%|██████████| 50/50 [00:11<00:00,  4.43it/s, episode_return=400, return=-167.100]
Iteration 8: 100%|██████████| 50/50 [00:13<00:00,  3.73it/s, episode_return=450, return=-164.200]
Iteration 9: 100%|██████████| 50/50 [00:12<00:00,  4.00it/s, episode_return=500, return=-173.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1683088136050749, max=1.0…

0,1
regret,▁

0,1
regret,163102.72


[34m[1mwandb[0m: Agent Starting Run: 0yx2hu6g with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004354142319070269
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:33<00:00,  1.51it/s, episode_return=50, return=-484.300]
Iteration 1: 100%|██████████| 50/50 [00:21<00:00,  2.34it/s, episode_return=100, return=-241.700]
Iteration 2: 100%|██████████| 50/50 [00:17<00:00,  2.92it/s, episode_return=150, return=-214.000]
Iteration 3: 100%|██████████| 50/50 [00:17<00:00,  2.78it/s, episode_return=200, return=-349.000]
Iteration 4: 100%|██████████| 50/50 [00:16<00:00,  3.00it/s, episode_return=250, return=-221.300]
Iteration 5: 100%|██████████| 50/50 [00:09<00:00,  5.05it/s, episode_return=300, return=-110.600]
Iteration 6: 100%|██████████| 50/50 [00:09<00:00,  5.51it/s, episode_return=350, return=-124.700]
Iteration 7: 100%|██████████| 50/50 [00:09<00:00,  5.46it/s, episode_return=400, return=-120.500]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.41it/s, episode_return=450, return=-106.000]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.46it/s, episode_return=500, return=-113.400]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,131389.78627


[34m[1mwandb[0m: Agent Starting Run: zp3neo7o with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.007760473741982262
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:40<00:00,  1.25it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:34<00:00,  1.44it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:37<00:00,  1.34it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:36<00:00,  1.38it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:33<00:00,  1.50it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:32<00:00,  1.53it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:32<00:00,  1.54it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:31<00:00,  1.57it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.007 MB uploaded\r'), FloatProgress(value=0.1637772524297347, max=1.0…

0,1
regret,▁

0,1
regret,248638.09881


[34m[1mwandb[0m: Agent Starting Run: nzu1i4a9 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008828357767950575
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

Iteration 0: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:36<00:00,  1.39it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:38<00:00,  1.28it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:37<00:00,  1.32it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:29<00:00,  1.69it/s, episode_return=250, return=-223.600]
Iteration 5: 100%|██████████| 50/50 [00:14<00:00,  3.53it/s, episode_return=300, return=-190.500]
Iteration 6: 100%|██████████| 50/50 [00:24<00:00,  2.04it/s, episode_return=350, return=-490.300]
Iteration 7: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:37<00:00,  1.32it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:37<00:00,  1.33it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,219120.02727


[34m[1mwandb[0m: Agent Starting Run: u9crp692 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009511341900193402
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:26<00:00,  1.86it/s, episode_return=50, return=-481.700]
Iteration 1: 100%|██████████| 50/50 [00:20<00:00,  2.47it/s, episode_return=100, return=-183.900]
Iteration 2: 100%|██████████| 50/50 [00:11<00:00,  4.50it/s, episode_return=150, return=-210.500]
Iteration 3: 100%|██████████| 50/50 [00:09<00:00,  5.18it/s, episode_return=200, return=-146.000]
Iteration 4: 100%|██████████| 50/50 [00:07<00:00,  7.01it/s, episode_return=250, return=-103.400]
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  7.88it/s, episode_return=300, return=-90.800] 
Iteration 6: 100%|██████████| 50/50 [00:09<00:00,  5.06it/s, episode_return=350, return=-122.900]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.53it/s, episode_return=400, return=-106.100]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.64it/s, episode_return=450, return=-92.100] 
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  7.30it/s, episode_return=500, return=-98.000] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,101205.39139


[34m[1mwandb[0m: Agent Starting Run: pan9i5lj with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0038045982880449353
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:43<00:00,  1.16it/s, episode_return=50, return=-500.000]
Iteration 1: 100%|██████████| 50/50 [00:43<00:00,  1.15it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:45<00:00,  1.11it/s, episode_return=150, return=-500.000]
Iteration 3: 100%|██████████| 50/50 [00:37<00:00,  1.33it/s, episode_return=200, return=-500.000]
Iteration 4: 100%|██████████| 50/50 [00:38<00:00,  1.29it/s, episode_return=250, return=-500.000]
Iteration 5: 100%|██████████| 50/50 [00:39<00:00,  1.26it/s, episode_return=300, return=-500.000]
Iteration 6: 100%|██████████| 50/50 [00:38<00:00,  1.31it/s, episode_return=350, return=-500.000]
Iteration 7: 100%|██████████| 50/50 [00:36<00:00,  1.35it/s, episode_return=400, return=-500.000]
Iteration 8: 100%|██████████| 50/50 [00:37<00:00,  1.33it/s, episode_return=450, return=-500.000]
Iteration 9: 100%|██████████| 50/50 [00:38<00:00,  1.30it/s, episode_return=500, return=-500.000]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: sxdol2vp with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.004822133781436487
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:16<00:00,  2.97it/s, episode_return=50, return=-403.500]
Iteration 1: 100%|██████████| 50/50 [00:33<00:00,  1.48it/s, episode_return=100, return=-500.000]
Iteration 2: 100%|██████████| 50/50 [00:29<00:00,  1.72it/s, episode_return=150, return=-318.800]
Iteration 3: 100%|██████████| 50/50 [00:09<00:00,  5.18it/s, episode_return=200, return=-91.000] 
Iteration 4: 100%|██████████| 50/50 [00:07<00:00,  6.97it/s, episode_return=250, return=-82.300] 
Iteration 5: 100%|██████████| 50/50 [00:06<00:00,  8.01it/s, episode_return=300, return=-86.800]
Iteration 6: 100%|██████████| 50/50 [00:06<00:00,  7.74it/s, episode_return=350, return=-81.900]
Iteration 7: 100%|██████████| 50/50 [00:06<00:00,  7.44it/s, episode_return=400, return=-85.100]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  7.14it/s, episode_return=450, return=-108.600]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  7.49it/s, episode_return=500, return=-91.600]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,97158.13971


[34m[1mwandb[0m: Agent Starting Run: j0o7o8aa with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.003322958929401467
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:11<00:00,  4.34it/s, episode_return=50, return=-119.100]
Iteration 1: 100%|██████████| 50/50 [00:08<00:00,  5.63it/s, episode_return=100, return=-112.000]
Iteration 2: 100%|██████████| 50/50 [00:08<00:00,  5.65it/s, episode_return=150, return=-113.700]
Iteration 3: 100%|██████████| 50/50 [00:08<00:00,  5.98it/s, episode_return=200, return=-110.900]
Iteration 4: 100%|██████████| 50/50 [00:07<00:00,  6.93it/s, episode_return=250, return=-97.700] 
Iteration 5: 100%|██████████| 50/50 [00:08<00:00,  6.05it/s, episode_return=300, return=-136.000]
Iteration 6: 100%|██████████| 50/50 [00:08<00:00,  6.04it/s, episode_return=350, return=-86.300] 
Iteration 7: 100%|██████████| 50/50 [00:08<00:00,  6.03it/s, episode_return=400, return=-117.000]
Iteration 8: 100%|██████████| 50/50 [00:08<00:00,  6.06it/s, episode_return=450, return=-110.200]
Iteration 9: 100%|██████████| 50/50 [00:07<00:00,  6.47it/s, episode_return=500, return=-94.300] 


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,65510.28285


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: dgfzr1c7 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.009769967007327912
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:10<00:00,  4.80it/s, episode_return=50, return=-87.500] 
Iteration 1: 100%|██████████| 50/50 [00:09<00:00,  5.29it/s, episode_return=100, return=-78.000]
Iteration 2: 100%|██████████| 50/50 [00:07<00:00,  6.90it/s, episode_return=150, return=-85.100]
Iteration 3: 100%|██████████| 50/50 [00:07<00:00,  7.12it/s, episode_return=200, return=-86.500]
Iteration 4: 100%|██████████| 50/50 [00:07<00:00,  6.64it/s, episode_return=250, return=-87.600]
Iteration 5: 100%|██████████| 50/50 [00:07<00:00,  6.38it/s, episode_return=300, return=-89.100] 
Iteration 6: 100%|██████████| 50/50 [00:07<00:00,  6.26it/s, episode_return=350, return=-103.900]
Iteration 7: 100%|██████████| 50/50 [00:07<00:00,  6.66it/s, episode_return=400, return=-95.200]
Iteration 8: 100%|██████████| 50/50 [00:07<00:00,  6.74it/s, episode_return=450, return=-84.500]
Iteration 9: 100%|██████████| 50/50 [00:06<00:00,  7.49it/s, episode_return=500, return=-79.200]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,49014.0805


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: o3dudohq with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00950924659224788
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:21<00:00,  2.31it/s, episode_return=50, return=-255.200]
Iteration 1: 100%|██████████| 50/50 [00:14<00:00,  3.48it/s, episode_return=100, return=-212.300]
Iteration 2: 100%|██████████| 50/50 [00:22<00:00,  2.23it/s, episode_return=150, return=-183.100]
Iteration 3: 100%|██████████| 50/50 [00:26<00:00,  1.88it/s, episode_return=200, return=-278.400]
Iteration 4: 100%|██████████| 50/50 [00:15<00:00,  3.30it/s, episode_return=250, return=-203.200]
Iteration 5: 100%|██████████| 50/50 [00:18<00:00,  2.65it/s, episode_return=300, return=-168.000]
Iteration 6: 100%|██████████| 50/50 [00:32<00:00,  1.54it/s, episode_return=350, return=-430.900]
Iteration 7: 100%|██████████| 50/50 [00:16<00:00,  3.06it/s, episode_return=400, return=-168.000]
Iteration 8: 100%|██████████| 50/50 [00:15<00:00,  3.25it/s, episode_return=450, return=-165.700]
Iteration 9: 100%|██████████| 50/50 [00:16<00:00,  3.03it/s, episode_return=500, return=-195.400]


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,143043.13779


[34m[1mwandb[0m: Agent Starting Run: pqtsv7rm with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00759120509521487
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Iteration 0: 100%|██████████| 50/50 [00:17<00:00,  2.94it/s, episode_return=50, return=-139.200]
Iteration 1: 100%|██████████| 50/50 [00:07<00:00,  6.30it/s, episode_return=100, return=-109.400]
Iteration 2: 100%|██████████| 50/50 [00:07<00:00,  6.65it/s, episode_return=150, return=-176.500]
Iteration 3: 100%|██████████| 50/50 [00:09<00:00,  5.30it/s, episode_return=200, return=-163.200]
Iteration 4: 100%|██████████| 50/50 [00:15<00:00,  3.23it/s, episode_return=250, return=-259.300]
Iteration 5: 100%|██████████| 50/50 [00:14<00:00,  3.53it/s, episode_return=300, return=-224.700]
Iteration 6: 100%|██████████| 50/50 [00:15<00:00,  3.19it/s, episode_return=350, return=-157.300]
Iteration 7: 100%|██████████| 50/50 [00:12<00:00,  4.01it/s, episode_return=400, return=-178.900]
Iteration 8: 100%|██████████| 50/50 [00:11<00:00,  4.22it/s, episode_return=450, return=-155.500]
Iteration 9:  82%|████████▏ | 41/50 [00:10<00:01,  5.23it/s, episode_return=490, return=-191.900]