In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import gym
import wandb
import numpy as np
from tqdm import tqdm

class Policy(nn.Module):
    def __init__(self, state_dim, hidden_dim, action_dim):
        super(Policy, self).__init__()
        self.fc1 = torch.nn.Linear(state_dim, hidden_dim)
        self.fc2 = torch.nn.Linear(hidden_dim, action_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return F.softmax(self.fc2(x), dim=1)

class Value(nn.Module):
    
    #Takes in state
    def __init__(self, observation_space):
        super(Value, self).__init__()
        
        self.input_layer = nn.Linear(observation_space, 128)
        self.output_layer = nn.Linear(128, 1)
        
    def forward(self, x):
        #input layer
        x = self.input_layer(x)
        
        #activiation relu
        x = F.relu(x)
        
        #get state value
        state_value = self.output_layer(x)
        
        return state_value
    
class REINFORCE:
    def __init__(self, state_dim, hidden_dim, action_dim, learning_rate, gamma,
                 device,v_lr):
        self.policy_net = Policy(state_dim, hidden_dim, action_dim).to(device)
        self.value_net=Value(state_dim).to(device)
        self.optimizer = torch.optim.Adam(self.policy_net.parameters(),
                                          lr=learning_rate)
        self.voptimizer=torch.optim.Adam(self.value_net.parameters(),lr=v_lr)
        self.gamma = gamma
        self.device = device

    def take_action(self, state):  # randomly sample an action according to the probability distribution
        state = torch.tensor(state.reshape(1, -1), dtype=torch.float).to(self.device) 
                                                                      
        probs = self.policy_net(state)
        action_dist = torch.distributions.Categorical(probs) 
        action = action_dist.sample()
        return action.item()
    

    def update(self, transition_dict):
        reward_list = transition_dict['rewards']
        state_list = transition_dict['states']
        action_list = transition_dict['actions']

        G = 0
        self.optimizer.zero_grad()
        self.voptimizer.zero_grad()
        for i in reversed(range(len(reward_list))):  # Back to Front, calc G
            reward = reward_list[i]
            state = torch.tensor(state_list[i].reshape(1, -1),
                                 dtype=torch.float).to(self.device)
            action = torch.tensor(action_list[i]).view(-1, 1).to(self.device)
            log_prob = torch.log(self.policy_net(state).gather(1, action))  # calc log probability of action
            G = self.gamma * G + reward  # calculate return starting from this step
            state_value=self.value_net(state)
            loss = -log_prob * (G - state_value)    # loss of each step
            loss.backward(retain_graph=True) # The gradient will be accumulated in each step
            
        self.optimizer.step()
        target_value = torch.tensor([[G]], dtype=torch.float).to(self.device)  # Target value is the calculated return G

        # Calculate the Mean Squared Error loss
        value_loss = F.mse_loss(state_value, target_value)

        # Backpropagate and update the value network
        value_loss.backward(retain_graph=True)
        self.voptimizer.step()




In [2]:

# agent params
learning_rate = 1e-3
gamma = 0.99
hidden_dim = 128
v_lr=0.005
# training params
num_pbar = 10
num_episodes = 500
num_seeds = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# environment params
env_name = "CartPole-v1"


# Define the sweep configuration
sweep_config = {
    "method": "bayes",
    "metric": {"name": "regret", "goal": "minimize"},
    "parameters": {
        "learning_rate": {"min": 1e-5, "max": 1e-2},
        "v_lr": {"min": 1e-5, "max": 1e-2},
        "hidden_dim": {"values": [64, 128, 256]}
    },
    "project": "Cartpole_with_baseline",
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 3,
        "max_iter": 100
}
}
# Initialize the sweep
sweep_id = wandb.sweep(sweep_config)

def train(learning_rate,v_lr,hidden_dim):
    # create environment
    env = gym.make(env_name)
    env.reset(seed=0)
    # create agent
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n
    agent = REINFORCE(state_dim, hidden_dim, action_dim, learning_rate, gamma, device,v_lr)
    # record episode return, for plotting
    return_list = []
    # train agent
    for i_episode in range(num_episodes):
        episode_return = 0 # return of the whole episode
        transition_dict = { # store transitions of the episode
            "states": [],
            "actions": [],
            "next_states": [],
            "rewards": [],
            "dones": []
        }
        state, _ = env.reset() # reset environment
        terminated, truncated = False, False
        while not terminated and not truncated:
            action = agent.take_action(state) # select an action
            next_state, reward, terminated, truncated, _ = env.step(action) # take action and get next state, reward, termination signal
            # store transition
            transition_dict["states"].append(state)
            transition_dict["actions"].append(action)
            transition_dict["next_states"].append(next_state)
            transition_dict["rewards"].append(reward)
            transition_dict["dones"].append(terminated)
            state = next_state
            episode_return += reward
        # add episode return to return list
        return_list.append(episode_return)
        # update agent per episode(MC)
        agent.update(transition_dict)

    avg = []
    for i in range(num_episodes):
        if i < 100:
            avg_return = np.mean(return_list[:i+1])
        else:
            avg_return = np.mean(return_list[i-100:i+1])
        avg.append(avg_return)

    regret = 0
    for i in avg:
        if i < 195:
            regret += 195 - i

    return regret    
   
def run_training():
    config_defaults = {
        "learning_rate": 1e-3,
        "v_lr": 0.005,
        "hidden_dim": 128
    }
    config = wandb.init(config=config_defaults,project="baseline_cartpole")
    learning_rate = config.config["learning_rate"]
    v_lr=config.config['v_lr']
    hidden_dim = config.config["hidden_dim"]
    regret = train(learning_rate,v_lr, hidden_dim)
    wandb.log({"regret": regret})

# Run the sweep
wandb.agent(sweep_id, function=run_training)




Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: 6w1s5fx9
Sweep URL: https://wandb.ai/rl_shobhith/Cartpole_with_baseline/sweeps/6w1s5fx9


[34m[1mwandb[0m: Agent Starting Run: 5fevi73q with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.003196211463694309
[34m[1mwandb[0m: 	v_lr: 0.0072537655215280045
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mshobhith-v[0m ([33mrl_shobhith[0m). Use [1m`wandb login --relogin`[0m to force relogin


  if not isinstance(terminated, (bool, np.bool8)):


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,61274.67678


[34m[1mwandb[0m: Agent Starting Run: rgwep4x8 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008193310558720048
[34m[1mwandb[0m: 	v_lr: 0.0020787577459535573
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,19828.08517


[34m[1mwandb[0m: Agent Starting Run: lmoskoj0 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0016940074613871672
[34m[1mwandb[0m: 	v_lr: 0.004132900194440651
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19702719797596457, max=1.…

0,1
regret,▁

0,1
regret,28901.28975


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mef2mbs8 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009492366558173936
[34m[1mwandb[0m: 	v_lr: 0.0014791277498162835
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,45187.71524


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: r0h6ujzm with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004312886099117493
[34m[1mwandb[0m: 	v_lr: 0.003514934183896174
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201519069166128, max=1.0)…

0,1
regret,▁

0,1
regret,51738.93593


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gm0iy7ei with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.009702559944361542
[34m[1mwandb[0m: 	v_lr: 0.002775408859018511
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.30892122745966466, max=1.…

0,1
regret,▁

0,1
regret,67431.32653


[34m[1mwandb[0m: Agent Starting Run: 7tjrzmuj with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.008614315920148085
[34m[1mwandb[0m: 	v_lr: 0.0024425193583762096
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,73423.12696


[34m[1mwandb[0m: Agent Starting Run: 8hnnxazs with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0095933851515262
[34m[1mwandb[0m: 	v_lr: 0.0013716447323284097
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.3086536940357538, max=1.0…

0,1
regret,▁

0,1
regret,66284.43275


[34m[1mwandb[0m: Agent Starting Run: myotvwqg with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0075232950842570225
[34m[1mwandb[0m: 	v_lr: 0.0017433654020303166
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,25302.89291


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: c52q4i07 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008185209752761638
[34m[1mwandb[0m: 	v_lr: 0.002778662486834597
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,30075.59242


[34m[1mwandb[0m: Agent Starting Run: 2nfoj9zr with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.007731381847428392
[34m[1mwandb[0m: 	v_lr: 0.0013760857380143057
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,23462.28288


[34m[1mwandb[0m: Agent Starting Run: e6pklt24 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00790202379097215
[34m[1mwandb[0m: 	v_lr: 0.0016035006083961131
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,20564.57965


[34m[1mwandb[0m: Agent Starting Run: f5tf1s2u with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.007616815864633686
[34m[1mwandb[0m: 	v_lr: 0.00138404798557111
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,25469.88949


[34m[1mwandb[0m: Agent Starting Run: utz07a30 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008226010083749853
[34m[1mwandb[0m: 	v_lr: 0.0018133972565810065
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,24470.43813


[34m[1mwandb[0m: Agent Starting Run: 2pfs4ji2 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0080520843643134
[34m[1mwandb[0m: 	v_lr: 0.001949088436801959
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,30817.39599


[34m[1mwandb[0m: Agent Starting Run: aaevyqcd with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006717294903098458
[34m[1mwandb[0m: 	v_lr: 0.000888019047955073
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19702719797596457, max=1.…

0,1
regret,▁

0,1
regret,40197.03202


[34m[1mwandb[0m: Agent Starting Run: zuffhj5i with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006187200510676412
[34m[1mwandb[0m: 	v_lr: 0.007197030513597605
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19718309859154928, max=1.…

0,1
regret,▁

0,1
regret,62339.44026


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 30lqpkxs with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.003748903333905096
[34m[1mwandb[0m: 	v_lr: 0.0029732226972282597
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19712298450837812, max=1.…

0,1
regret,▁

0,1
regret,25928.61091


[34m[1mwandb[0m: Agent Starting Run: f8dr2xy2 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004171214521469041
[34m[1mwandb[0m: 	v_lr: 0.0060609903453196145
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,57976.4922


[34m[1mwandb[0m: Agent Starting Run: v4yjfhi9 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001760187749045601
[34m[1mwandb[0m: 	v_lr: 0.007008720513907725
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,67673.70955


[34m[1mwandb[0m: Agent Starting Run: gxujhqia with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006514508962261634
[34m[1mwandb[0m: 	v_lr: 0.005717471141349879
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,13510.79663


[34m[1mwandb[0m: Agent Starting Run: 4f40yn12 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0034279625558166996
[34m[1mwandb[0m: 	v_lr: 0.001863764041012464
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.002 MB uploaded\r'), FloatProgress(value=0.6381780962128967, max=1.0…

0,1
regret,▁

0,1
regret,17495.16832


[34m[1mwandb[0m: Agent Starting Run: lqmllf3m with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.008098421065343437
[34m[1mwandb[0m: 	v_lr: 0.006406761628757529
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,85324.64105


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: o9g81hfx with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008805620391841887
[34m[1mwandb[0m: 	v_lr: 0.003275379738431894
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8670145477545858, max=1.0…

0,1
regret,▁

0,1
regret,58652.06361


[34m[1mwandb[0m: Agent Starting Run: 1hrk06q6 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0018255043001727687
[34m[1mwandb[0m: 	v_lr: 0.001560877987692538
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,36001.49648


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5v3wrjuj with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006444901081234543
[34m[1mwandb[0m: 	v_lr: 0.00786617335855408
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19731012658227848, max=1.…

0,1
regret,▁

0,1
regret,63131.46199


[34m[1mwandb[0m: Agent Starting Run: da73ytqh with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009584594271018316
[34m[1mwandb[0m: 	v_lr: 0.001810517891001573
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,20167.54952


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qst64gcn with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.009307967407995964
[34m[1mwandb[0m: 	v_lr: 0.0003961805571350235
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,28811.99202


[34m[1mwandb[0m: Agent Starting Run: jtbmf5c4 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.005570325557507209
[34m[1mwandb[0m: 	v_lr: 0.0006764060169916651
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8671306548560582, max=1.0…

0,1
regret,▁

0,1
regret,54960.64704


[34m[1mwandb[0m: Agent Starting Run: dotjbvjo with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0008083744209659978
[34m[1mwandb[0m: 	v_lr: 0.0010736743252951074
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.3298528713811106, max=1.0…

0,1
regret,▁

0,1
regret,50073.76735


[34m[1mwandb[0m: Agent Starting Run: a2pa0lyo with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.006421017506848242
[34m[1mwandb[0m: 	v_lr: 0.0045290630544633234
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,90907.73065


[34m[1mwandb[0m: Agent Starting Run: 3mub83i0 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006269753148420573
[34m[1mwandb[0m: 	v_lr: 0.001591294633726172
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,17438.57032


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vv60js9y with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0036276031892888594
[34m[1mwandb[0m: 	v_lr: 0.003241476808147729
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,50863.73566


[34m[1mwandb[0m: Agent Starting Run: 1rorgi5n with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00019955081780793847
[34m[1mwandb[0m: 	v_lr: 0.005844782338734999
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,86770.78573


[34m[1mwandb[0m: Agent Starting Run: daubyxnv with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00102457355115203
[34m[1mwandb[0m: 	v_lr: 0.009973273316768285
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20139001131404557, max=1.…

0,1
regret,▁

0,1
regret,31587.14686


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: eh5ckpmr with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0038618969245588905
[34m[1mwandb[0m: 	v_lr: 0.009237320884391964
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,61788.08358


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: iziqfmd2 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.004260470577757686
[34m[1mwandb[0m: 	v_lr: 0.009547139510844797
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,55651.75629


[34m[1mwandb[0m: Agent Starting Run: h7vwitoo with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004800640278703991
[34m[1mwandb[0m: 	v_lr: 0.00981292282401688
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19731012658227848, max=1.…

0,1
regret,▁

0,1
regret,74830.23199


[34m[1mwandb[0m: Agent Starting Run: kp0kdtwg with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006407160003023444
[34m[1mwandb[0m: 	v_lr: 0.009712132699548936
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201519069166128, max=1.0)…

0,1
regret,▁

0,1
regret,72012.96914


[34m[1mwandb[0m: Agent Starting Run: fqj1i75b with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.003536634091967979
[34m[1mwandb[0m: 	v_lr: 0.007418086997782071
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,35818.48964


[34m[1mwandb[0m: Agent Starting Run: mqqsckhm with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.003294154416189431
[34m[1mwandb[0m: 	v_lr: 0.0009867445224567553
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.3088839709136895, max=1.0…

0,1
regret,▁

0,1
regret,18815.49294


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: srt429mn with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0030001426227591692
[34m[1mwandb[0m: 	v_lr: 0.009520842880523073
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,23914.2423


[34m[1mwandb[0m: Agent Starting Run: kmmyg0xj with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.007771081703497852
[34m[1mwandb[0m: 	v_lr: 0.009639470384751436
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,92750.38174


[34m[1mwandb[0m: Agent Starting Run: mgrpc3t1 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0060913473014728835
[34m[1mwandb[0m: 	v_lr: 0.00474862322051566
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,78879.99699


[34m[1mwandb[0m: Agent Starting Run: i6a0h3nv with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00918420082505233
[34m[1mwandb[0m: 	v_lr: 0.00950165225787118
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2015842224377627, max=1.0…

0,1
regret,▁

0,1
regret,38130.19855


[34m[1mwandb[0m: Agent Starting Run: 1ir4eee9 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0032471402558027452
[34m[1mwandb[0m: 	v_lr: 0.007362709076309244
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8671516685117824, max=1.0…

0,1
regret,▁

0,1
regret,65206.18871


[34m[1mwandb[0m: Agent Starting Run: kstj8frk with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0072625169830512195
[34m[1mwandb[0m: 	v_lr: 0.008649649964145934
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,28676.25725


[34m[1mwandb[0m: Agent Starting Run: yyyaj73c with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006078897952760423
[34m[1mwandb[0m: 	v_lr: 0.007239324289418517
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,71126.58963


[34m[1mwandb[0m: Agent Starting Run: lrzwdy5b with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0012210695957012348
[34m[1mwandb[0m: 	v_lr: 0.009574620943348666
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,82978.15595


[34m[1mwandb[0m: Agent Starting Run: 67rk3shx with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0046100945209625556
[34m[1mwandb[0m: 	v_lr: 0.0006185410111480682
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2014214181876918, max=1.0…

0,1
regret,▁

0,1
regret,25967.43526


[34m[1mwandb[0m: Agent Starting Run: 74xujf5j with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0057937858457544275
[34m[1mwandb[0m: 	v_lr: 0.005612638322065818
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,15663.83552


[34m[1mwandb[0m: Agent Starting Run: 39i0plx7 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0009224329403863106
[34m[1mwandb[0m: 	v_lr: 0.009619944622501791
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,86517.30845


[34m[1mwandb[0m: Agent Starting Run: wrggo64q with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00575481616002478
[34m[1mwandb[0m: 	v_lr: 0.005187052493964843
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,31635.07378


[34m[1mwandb[0m: Agent Starting Run: qh2115fn with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.009858017278454098
[34m[1mwandb[0m: 	v_lr: 0.0050577950411670935
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.3300648426379883, max=1.0…

0,1
regret,▁

0,1
regret,73706.7844


[34m[1mwandb[0m: Agent Starting Run: 3h63a19d with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008035983311458706
[34m[1mwandb[0m: 	v_lr: 0.005641338098115769
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,61310.00179


[34m[1mwandb[0m: Agent Starting Run: q1jvndvg with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0005781324070903852
[34m[1mwandb[0m: 	v_lr: 0.008905137789395712
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,85206.78538


[34m[1mwandb[0m: Agent Starting Run: yaoonz1b with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.003266233641473591
[34m[1mwandb[0m: 	v_lr: 0.0017672779097739606
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,22078.31193


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4akq0v58 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0033716315238457133
[34m[1mwandb[0m: 	v_lr: 0.0095343783000308
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201519069166128, max=1.0)…

0,1
regret,▁

0,1
regret,42235.0599


[34m[1mwandb[0m: Agent Starting Run: sf06h6ap with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0055919836522290555
[34m[1mwandb[0m: 	v_lr: 0.005274473632819419
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9786561264822135, max=1.0…

0,1
regret,▁

0,1
regret,50342.92627


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xc593krs with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.009251227500100791
[34m[1mwandb[0m: 	v_lr: 0.009963508430273696
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.30892122745966466, max=1.…

0,1
regret,▁

0,1
regret,75805.02787


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 58wuce12 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0010736265060880172
[34m[1mwandb[0m: 	v_lr: 0.00848502343828687
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,70570.08218


[34m[1mwandb[0m: Agent Starting Run: nmvdg9f9 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0019148909828635365
[34m[1mwandb[0m: 	v_lr: 0.004820208977054469
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9789623536855425, max=1.0…

0,1
regret,▁

0,1
regret,47182.19976


[34m[1mwandb[0m: Agent Starting Run: l3nujm9k with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005820059410650369
[34m[1mwandb[0m: 	v_lr: 0.009203274809862072
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201519069166128, max=1.0)…

0,1
regret,▁

0,1
regret,33662.78521


[34m[1mwandb[0m: Agent Starting Run: hnb0pnac with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0041906733049143045
[34m[1mwandb[0m: 	v_lr: 0.0031657116081994837
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.3301707779886148, max=1.0…

0,1
regret,▁

0,1
regret,54776.66918


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4w3qg604 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006764948996110072
[34m[1mwandb[0m: 	v_lr: 0.004730367778127971
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,27608.75317


[34m[1mwandb[0m: Agent Starting Run: a1849o2l with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.007240942800075236
[34m[1mwandb[0m: 	v_lr: 0.004401657827646549
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,80141.08006


[34m[1mwandb[0m: Agent Starting Run: ivu5ur14 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004694003642343758
[34m[1mwandb[0m: 	v_lr: 0.009117453929043982
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9788041758937045, max=1.0…

0,1
regret,▁

0,1
regret,45927.7377


[34m[1mwandb[0m: Agent Starting Run: 16xb1x1i with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005741472707419281
[34m[1mwandb[0m: 	v_lr: 0.005657865833873333
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,68756.67173


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1v8d9t4a with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.006624388277909098
[34m[1mwandb[0m: 	v_lr: 0.007644771396644852
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,61846.25652


[34m[1mwandb[0m: Agent Starting Run: 8t9jo26x with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0009060850784587056
[34m[1mwandb[0m: 	v_lr: 0.008880519551338463
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,81502.52635


[34m[1mwandb[0m: Agent Starting Run: jcvcd7z5 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008855360359122624
[34m[1mwandb[0m: 	v_lr: 0.0006907737822162993
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,75075.9114


[34m[1mwandb[0m: Agent Starting Run: lplxnrck with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005494304220157649
[34m[1mwandb[0m: 	v_lr: 0.0008788125146564288
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9786527514231499, max=1.0…

0,1
regret,▁

0,1
regret,19540.12594


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sjqpzmk5 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0006223901974171789
[34m[1mwandb[0m: 	v_lr: 0.008814053739736469
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,82320.16869


[34m[1mwandb[0m: Agent Starting Run: mp2k9kf9 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.000935330009528746
[34m[1mwandb[0m: 	v_lr: 0.004895332873662527
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,73152.63996


[34m[1mwandb[0m: Agent Starting Run: sjt7ax4m with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0053219318293653735
[34m[1mwandb[0m: 	v_lr: 0.004364143523185625
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,61478.41418


[34m[1mwandb[0m: Agent Starting Run: 15qcixwi with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.007661813801493352
[34m[1mwandb[0m: 	v_lr: 0.0015947517016781172
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,60972.87568


[34m[1mwandb[0m: Agent Starting Run: 8mlyi0c3 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008858749069646628
[34m[1mwandb[0m: 	v_lr: 0.003481523788364931
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,44332.22141


[34m[1mwandb[0m: Agent Starting Run: qwjln7gz with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0022632509842560105
[34m[1mwandb[0m: 	v_lr: 0.003558651582824944
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,40116.16206


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: u8xnuif6 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00914828689582891
[34m[1mwandb[0m: 	v_lr: 1.3102116218689351e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,66889.25412


[34m[1mwandb[0m: Agent Starting Run: 5tgqoh66 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.005795906627138031
[34m[1mwandb[0m: 	v_lr: 0.007016232165524918
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,74643.20159


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kbvxeag6 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004094417317829461
[34m[1mwandb[0m: 	v_lr: 0.00476857584017284
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.30881189685176397, max=1.…

0,1
regret,▁

0,1
regret,59727.42025


[34m[1mwandb[0m: Agent Starting Run: p06i3vv3 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.001889679969284903
[34m[1mwandb[0m: 	v_lr: 0.007269191922081266
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,76823.17557


[34m[1mwandb[0m: Agent Starting Run: b7v6fehf with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.000604236839764764
[34m[1mwandb[0m: 	v_lr: 0.00692190204115752
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,85870.56316


[34m[1mwandb[0m: Agent Starting Run: ikk78m21 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.003010523058201064
[34m[1mwandb[0m: 	v_lr: 0.0008662867102441853
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,21585.63589


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6lu561pc with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006567189081631993
[34m[1mwandb[0m: 	v_lr: 0.009558609302953016
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.3298528713811106, max=1.0…

0,1
regret,▁

0,1
regret,41163.76505


[34m[1mwandb[0m: Agent Starting Run: jnoq01fz with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.009729582177955637
[34m[1mwandb[0m: 	v_lr: 0.003585724905151431
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.97894903450459, max=1.0))…

0,1
regret,▁

0,1
regret,72535.98154


[34m[1mwandb[0m: Agent Starting Run: npms8mit with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00425233595511544
[34m[1mwandb[0m: 	v_lr: 0.0005837079892252444
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,37619.68308


[34m[1mwandb[0m: Agent Starting Run: 5yp71svp with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005247750191510521
[34m[1mwandb[0m: 	v_lr: 0.0032947951262629477
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,46338.24221


[34m[1mwandb[0m: Agent Starting Run: l6foze3r with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.003312075352964164
[34m[1mwandb[0m: 	v_lr: 0.008245368786871184
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,34759.33122


[34m[1mwandb[0m: Agent Starting Run: uu1v4k5r with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00743263926178257
[34m[1mwandb[0m: 	v_lr: 0.0035720318715992154
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19712070874861573, max=1.…

0,1
regret,▁

0,1
regret,74443.70363


[34m[1mwandb[0m: Agent Starting Run: oju5ns6x with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.003679766215689928
[34m[1mwandb[0m: 	v_lr: 0.00039605924015108786
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9786595004742333, max=1.0…

0,1
regret,▁

0,1
regret,30547.13649


[34m[1mwandb[0m: Agent Starting Run: 6jazcb5s with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009359423653139808
[34m[1mwandb[0m: 	v_lr: 0.0004904133219325409
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.3299588737741221, max=1.0…

0,1
regret,▁

0,1
regret,72745.01201


[34m[1mwandb[0m: Agent Starting Run: ii9ny3f3 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0008587772182559348
[34m[1mwandb[0m: 	v_lr: 0.00956646783769912
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,79055.06023


[34m[1mwandb[0m: Agent Starting Run: 19abbtpt with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0028600407528827088
[34m[1mwandb[0m: 	v_lr: 0.0019855572910320173
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9788108791903858, max=1.0…

0,1
regret,▁

0,1
regret,36124.15934


[34m[1mwandb[0m: Agent Starting Run: l6ba6tuk with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0019273414453705136
[34m[1mwandb[0m: 	v_lr: 0.0069514613338866734
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8671516685117824, max=1.0…

0,1
regret,▁

0,1
regret,45652.32054


[34m[1mwandb[0m: Agent Starting Run: touy8j39 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002035362299408834
[34m[1mwandb[0m: 	v_lr: 0.006203206118155992
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9789623536855425, max=1.0…

0,1
regret,▁

0,1
regret,46185.83222


[34m[1mwandb[0m: Agent Starting Run: 39dovisw with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009072416349163151
[34m[1mwandb[0m: 	v_lr: 0.006761211970425596
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9788041758937045, max=1.0…

0,1
regret,▁

0,1
regret,54345.7357


[34m[1mwandb[0m: Agent Starting Run: e9tbhcrv with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006864623794429231
[34m[1mwandb[0m: 	v_lr: 0.00969079758283384
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19731012658227848, max=1.…

0,1
regret,▁

0,1
regret,71358.19337


[34m[1mwandb[0m: Agent Starting Run: 6i9gmzl0 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0028262781500414834
[34m[1mwandb[0m: 	v_lr: 0.00018859368654178143
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19709182867077604, max=1.…

0,1
regret,▁

0,1
regret,29973.68011


[34m[1mwandb[0m: Agent Starting Run: 7bnh4aop with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00765149610178904
[34m[1mwandb[0m: 	v_lr: 0.008466958572150633
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,79438.73188


[34m[1mwandb[0m: Agent Starting Run: jnh52mpr with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.003379776782838487
[34m[1mwandb[0m: 	v_lr: 0.009423467637894846
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9789590254706534, max=1.0…

0,1
regret,▁

0,1
regret,46186.29992


[34m[1mwandb[0m: Agent Starting Run: vk0dbwq6 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.003541772773124032
[34m[1mwandb[0m: 	v_lr: 0.007708929203933635
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,66677.74729


[34m[1mwandb[0m: Agent Starting Run: eoqq4auy with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008200131519188915
[34m[1mwandb[0m: 	v_lr: 0.007520198553372358
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8672888326478962, max=1.0…

0,1
regret,▁

0,1
regret,84175.89325


[34m[1mwandb[0m: Agent Starting Run: uigdq8up with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0017013406504770338
[34m[1mwandb[0m: 	v_lr: 0.0010422776147582906
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19709182867077604, max=1.…

0,1
regret,▁

0,1
regret,29572.14909


[34m[1mwandb[0m: Agent Starting Run: iqa4djq4 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002197410439654137
[34m[1mwandb[0m: 	v_lr: 0.002976685818125042
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,19948.85915


[34m[1mwandb[0m: Agent Starting Run: vdepjuru with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.003776114005797848
[34m[1mwandb[0m: 	v_lr: 0.007278743739414403
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,27329.92504


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nx6qvfbp with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.004440067762941364
[34m[1mwandb[0m: 	v_lr: 0.005250977574057755
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,44164.25264


[34m[1mwandb[0m: Agent Starting Run: iofgjuh3 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.008056102960434545
[34m[1mwandb[0m: 	v_lr: 0.009385164053692776
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9791172282866635, max=1.0…

0,1
regret,▁

0,1
regret,74107.7299


[34m[1mwandb[0m: Agent Starting Run: tc29n6gp with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0018470123167700096
[34m[1mwandb[0m: 	v_lr: 0.007384478217445721
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9789623536855425, max=1.0…

0,1
regret,▁

0,1
regret,65715.22102


[34m[1mwandb[0m: Agent Starting Run: inx6f29p with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00173715367060235
[34m[1mwandb[0m: 	v_lr: 0.0019675398444287248
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,31969.39826


[34m[1mwandb[0m: Agent Starting Run: sdglrbbb with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.007758355898742391
[34m[1mwandb[0m: 	v_lr: 0.008785291983152097
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19737258626147516, max=1.…

0,1
regret,▁

0,1
regret,88089.34801


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: peazbbeh with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.008978877680140013
[34m[1mwandb[0m: 	v_lr: 0.005801782867711588
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8674260401835152, max=1.0…

0,1
regret,▁

0,1
regret,64124.53974


[34m[1mwandb[0m: Agent Starting Run: 0l7qi3jw with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.006131087826244493
[34m[1mwandb[0m: 	v_lr: 0.0014536410497238016
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,24168.96756


[34m[1mwandb[0m: Agent Starting Run: 6lm8wwhe with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0025702750488148696
[34m[1mwandb[0m: 	v_lr: 0.005110462626785689
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,36012.08725


[34m[1mwandb[0m: Agent Starting Run: q0ywmt07 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0017211987132690842
[34m[1mwandb[0m: 	v_lr: 0.006310423241159124
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,49477.79253


[34m[1mwandb[0m: Agent Starting Run: lvwrjmlb with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0057710250758721575
[34m[1mwandb[0m: 	v_lr: 0.002084262059294926
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,11148.17355


[34m[1mwandb[0m: Agent Starting Run: c1oa9m7f with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.007603098100076294
[34m[1mwandb[0m: 	v_lr: 0.00985664637902155
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201551640536609, max=1.0)…

0,1
regret,▁

0,1
regret,65650.26794


[34m[1mwandb[0m: Agent Starting Run: 9isezasm with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.007118446792563163
[34m[1mwandb[0m: 	v_lr: 0.002875762480740833
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9786493752965365, max=1.0…

0,1
regret,▁

0,1
regret,27002.76286


[34m[1mwandb[0m: Agent Starting Run: u17lhjxb with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005127646473909546
[34m[1mwandb[0m: 	v_lr: 0.006986694742057172
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201519069166128, max=1.0)…

0,1
regret,▁

0,1
regret,66709.55049


[34m[1mwandb[0m: Agent Starting Run: 5mh9nr9j with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001035575651774157
[34m[1mwandb[0m: 	v_lr: 0.000827071406985174
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.30871421793452475, max=1.…

0,1
regret,▁

0,1
regret,50943.36097


[34m[1mwandb[0m: Agent Starting Run: w1yd6fw5 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0026179803345700342
[34m[1mwandb[0m: 	v_lr: 0.0005102837733741959
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1970895286301803, max=1.0…

0,1
regret,▁

0,1
regret,25192.38138


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gu11z5yd with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005204034078169158
[34m[1mwandb[0m: 	v_lr: 0.005978545412875206
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,71376.28955


[34m[1mwandb[0m: Agent Starting Run: 0wplxqt4 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001576646282883701
[34m[1mwandb[0m: 	v_lr: 0.004870287869987376
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8677005855356861, max=1.0…

0,1
regret,▁

0,1
regret,45136.18644


[34m[1mwandb[0m: Agent Starting Run: 12y1k6he with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.007818570326676134
[34m[1mwandb[0m: 	v_lr: 0.0024018487686236546
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8670145477545858, max=1.0…

0,1
regret,▁

0,1
regret,17942.81056


[34m[1mwandb[0m: Agent Starting Run: rcveg07x with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.008819938839443441
[34m[1mwandb[0m: 	v_lr: 0.0018451201324888376
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1970895286301803, max=1.0…

0,1
regret,▁

0,1
regret,55808.38439


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8o1mmlcj with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005941491172601306
[34m[1mwandb[0m: 	v_lr: 0.0012638458790517608
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,29252.56729


[34m[1mwandb[0m: Agent Starting Run: v5mux4ho with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0062384645743556845
[34m[1mwandb[0m: 	v_lr: 0.0004707533529941857
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2012924071082391, max=1.0…

0,1
regret,▁

0,1
regret,56757.71506


[34m[1mwandb[0m: Agent Starting Run: 6cahteyl with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.009367262552821193
[34m[1mwandb[0m: 	v_lr: 0.00877800618960647
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19731012658227848, max=1.…

0,1
regret,▁

0,1
regret,62960.65693


[34m[1mwandb[0m: Agent Starting Run: cw7zvytq with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001835525062207836
[34m[1mwandb[0m: 	v_lr: 0.004191749474278101
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,43758.72504


[34m[1mwandb[0m: Agent Starting Run: qh00eava with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.004623884933003958
[34m[1mwandb[0m: 	v_lr: 0.006313567580840004
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,30372.52118


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qsnbu6ji with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.009729426330612946
[34m[1mwandb[0m: 	v_lr: 0.007089827854436117
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.30876304966782664, max=1.…

0,1
regret,▁

0,1
regret,84137.79659


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1pngjyqa with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0035104056294645667
[34m[1mwandb[0m: 	v_lr: 0.00976718583288658
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,65744.92257


[34m[1mwandb[0m: Agent Starting Run: n6k0ut2u with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005004063546085508
[34m[1mwandb[0m: 	v_lr: 0.0009398482885890752
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,22214.85061


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xc0jj5vg with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008027225972646417
[34m[1mwandb[0m: 	v_lr: 0.009830302251387346
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,51731.47247


[34m[1mwandb[0m: Agent Starting Run: 0kl4cgmv with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00976451245937906
[34m[1mwandb[0m: 	v_lr: 0.004921992994350016
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9786493752965365, max=1.0…

0,1
regret,▁

0,1
regret,50833.83707


[34m[1mwandb[0m: Agent Starting Run: b0liy2wj with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0035489059139384447
[34m[1mwandb[0m: 	v_lr: 0.008440716474014292
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.3301707779886148, max=1.0…

0,1
regret,▁

0,1
regret,57402.87943


[34m[1mwandb[0m: Agent Starting Run: ay6qmvot with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005053230715791259
[34m[1mwandb[0m: 	v_lr: 0.0040087131560048285
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,40964.49766


[34m[1mwandb[0m: Agent Starting Run: mcvfdlbf with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0017307967522445585
[34m[1mwandb[0m: 	v_lr: 0.0040550677638356136
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.3304883831199621, max=1.0…

0,1
regret,▁

0,1
regret,39569.52337


[34m[1mwandb[0m: Agent Starting Run: o1f0d3tm with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.007051669384140377
[34m[1mwandb[0m: 	v_lr: 0.0025060008747694166
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,55512.63877


[34m[1mwandb[0m: Agent Starting Run: qf4dw53c with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00792108952270942
[34m[1mwandb[0m: 	v_lr: 0.0001063957294514858
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19731012658227848, max=1.…

0,1
regret,▁

0,1
regret,43666.96179


[34m[1mwandb[0m: Agent Starting Run: i722sv3r with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005862970738034911
[34m[1mwandb[0m: 	v_lr: 0.001770690714531028
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,42165.84184


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4paqr5ae with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001654183366033859
[34m[1mwandb[0m: 	v_lr: 0.0022792709148168255
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,86071.06356


[34m[1mwandb[0m: Agent Starting Run: 5aprl416 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.003482468791128855
[34m[1mwandb[0m: 	v_lr: 0.00395848918223686
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,22136.3856


[34m[1mwandb[0m: Agent Starting Run: vw7wlz60 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006119357229425976
[34m[1mwandb[0m: 	v_lr: 0.0019851568648899527
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,38562.46562


[34m[1mwandb[0m: Agent Starting Run: f0iii9ih with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00855198429311557
[34m[1mwandb[0m: 	v_lr: 0.0011176698772314552
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,20169.18316


[34m[1mwandb[0m: Agent Starting Run: fuig4wse with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009906249787189776
[34m[1mwandb[0m: 	v_lr: 0.003434616316096249
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,26118.1132


[34m[1mwandb[0m: Agent Starting Run: lf40qdph with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00908223042658744
[34m[1mwandb[0m: 	v_lr: 6.611625599070096e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,40612.65475


[34m[1mwandb[0m: Agent Starting Run: 33ok97l5 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.009999288122191698
[34m[1mwandb[0m: 	v_lr: 0.006143127993009625
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1970895286301803, max=1.0…

0,1
regret,▁

0,1
regret,92617.34217


[34m[1mwandb[0m: Agent Starting Run: hv4ld3km with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0006926594243654839
[34m[1mwandb[0m: 	v_lr: 0.008427170858269795
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19731012658227848, max=1.…

0,1
regret,▁

0,1
regret,85140.20341


[34m[1mwandb[0m: Agent Starting Run: 22aisjsg with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.005064820112491419
[34m[1mwandb[0m: 	v_lr: 0.0070891394120798906
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,89543.22672


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7tfi25zp with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00422421151458421
[34m[1mwandb[0m: 	v_lr: 0.007515205401570205
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19731012658227848, max=1.…

0,1
regret,▁

0,1
regret,60876.49594


[34m[1mwandb[0m: Agent Starting Run: kxmm30a9 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0005718944699730492
[34m[1mwandb[0m: 	v_lr: 0.004634027262510338
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,77314.26388


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5xey7mj3 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00306125533460317
[34m[1mwandb[0m: 	v_lr: 0.0017634525646850923
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.30866540164452877, max=1.…

0,1
regret,▁

0,1
regret,24733.16203


[34m[1mwandb[0m: Agent Starting Run: mh3facw7 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.005291666776189685
[34m[1mwandb[0m: 	v_lr: 0.0004108455603154003
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,39119.31404


[34m[1mwandb[0m: Agent Starting Run: ihc7likp with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.003227641820756161
[34m[1mwandb[0m: 	v_lr: 0.0025727382349589436
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,18314.04507


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: addwd9er with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.006256871439036766
[34m[1mwandb[0m: 	v_lr: 0.005440767983378704
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,82894.94172


[34m[1mwandb[0m: Agent Starting Run: ehk1yzh7 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00444738151275657
[34m[1mwandb[0m: 	v_lr: 0.007662022101507007
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8674260401835152, max=1.0…

0,1
regret,▁

0,1
regret,62110.24785


[34m[1mwandb[0m: Agent Starting Run: ybwjr895 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.000208070664384561
[34m[1mwandb[0m: 	v_lr: 0.007797296927828945
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19670833992720368, max=1.…

0,1
regret,▁

0,1
regret,83762.17178


[34m[1mwandb[0m: Agent Starting Run: 089h2zut with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.006912277077400382
[34m[1mwandb[0m: 	v_lr: 0.0058237668172652435
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,82373.1567


[34m[1mwandb[0m: Agent Starting Run: 83w618ss with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0031118591146732035
[34m[1mwandb[0m: 	v_lr: 0.000996484505007249
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,41145.78801


[34m[1mwandb[0m: Agent Starting Run: ir83y2ue with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00923797118173501
[34m[1mwandb[0m: 	v_lr: 0.0068621612704327165
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,62798.05498


[34m[1mwandb[0m: Agent Starting Run: i3fdgcdx with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0029345654939186357
[34m[1mwandb[0m: 	v_lr: 0.007958380616237971
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,86701.14172


[34m[1mwandb[0m: Agent Starting Run: 9v1hq4pt with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005598515983054646
[34m[1mwandb[0m: 	v_lr: 0.003679393163500731
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,46781.7058


[34m[1mwandb[0m: Agent Starting Run: 1xliwt4t with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0013895097248672668
[34m[1mwandb[0m: 	v_lr: 0.001268236177177905
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,30176.4614


[34m[1mwandb[0m: Agent Starting Run: rb0oedk3 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006305960401957159
[34m[1mwandb[0m: 	v_lr: 0.007633410476900988
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,68977.63642


[34m[1mwandb[0m: Agent Starting Run: gwq7zxvv with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.007781597728460114
[34m[1mwandb[0m: 	v_lr: 0.006276011088777145
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,72271.14668


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: eiettjph with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.006850561575640914
[34m[1mwandb[0m: 	v_lr: 0.009572433175423825
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,87673.06776


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: sur7oe4z with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.009403037930068973
[34m[1mwandb[0m: 	v_lr: 0.008928971409450951
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,82161.37761


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: hpgmumqm with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002890711397239081
[34m[1mwandb[0m: 	v_lr: 0.0027099074467464134
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.30903052348568716, max=1.…

0,1
regret,▁

0,1
regret,21944.03611


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: k7vcygkd with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0004607450893944088
[34m[1mwandb[0m: 	v_lr: 0.005583766074954187
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,85608.42355


[34m[1mwandb[0m: Agent Starting Run: ytgzlu5m with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0035135824250152503
[34m[1mwandb[0m: 	v_lr: 0.003968432937448588
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9788075280721177, max=1.0…

0,1
regret,▁

0,1
regret,25157.83921


[34m[1mwandb[0m: Agent Starting Run: jn0s0tlt with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0062337707390701476
[34m[1mwandb[0m: 	v_lr: 0.006141647029086669
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,35267.82873


[34m[1mwandb[0m: Agent Starting Run: vw1sp40c with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008321667773054063
[34m[1mwandb[0m: 	v_lr: 0.005974264036866227
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,31162.36775


[34m[1mwandb[0m: Agent Starting Run: 7tohj36i with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0026587784910655347
[34m[1mwandb[0m: 	v_lr: 3.115989157921167e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19712298450837812, max=1.…

0,1
regret,▁

0,1
regret,34918.55026


[34m[1mwandb[0m: Agent Starting Run: zr5mohjj with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00840340018267674
[34m[1mwandb[0m: 	v_lr: 0.004437464761096814
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201551640536609, max=1.0)…

0,1
regret,▁

0,1
regret,29577.83872


[34m[1mwandb[0m: Agent Starting Run: n0xt3qa1 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0032773801633387686
[34m[1mwandb[0m: 	v_lr: 0.000619344577962316
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,21152.5436


[34m[1mwandb[0m: Agent Starting Run: kpy389z4 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.004444240845605471
[34m[1mwandb[0m: 	v_lr: 0.00013380884095352051
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19699604743083005, max=1.…

0,1
regret,▁

0,1
regret,36636.85445


[34m[1mwandb[0m: Agent Starting Run: iz7zz3mk with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008266455153662652
[34m[1mwandb[0m: 	v_lr: 0.007252743740547303
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,65873.86799


[34m[1mwandb[0m: Agent Starting Run: onvyvc0a with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.009251170609565129
[34m[1mwandb[0m: 	v_lr: 0.005850563316578419
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9788075280721177, max=1.0…

0,1
regret,▁

0,1
regret,87240.08286


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h0lw0rw7 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.003671961229773766
[34m[1mwandb[0m: 	v_lr: 0.004520801609594761
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,37193.51288


[34m[1mwandb[0m: Agent Starting Run: lwiq6iup with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006005171074734203
[34m[1mwandb[0m: 	v_lr: 0.008636417924478924
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,27671.37022


[34m[1mwandb[0m: Agent Starting Run: 33c0d650 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.004316547885203413
[34m[1mwandb[0m: 	v_lr: 0.002074411709907197
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,34422.08056


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: z8wcy2of with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.009473636673780294
[34m[1mwandb[0m: 	v_lr: 0.0018440978950411745
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,36661.52996


[34m[1mwandb[0m: Agent Starting Run: frqleczx with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004410845975056446
[34m[1mwandb[0m: 	v_lr: 0.005822142292926987
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,43459.42411


[34m[1mwandb[0m: Agent Starting Run: 7j49eq0o with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0009234934761326038
[34m[1mwandb[0m: 	v_lr: 0.0018960518832335383
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,70681.07537


[34m[1mwandb[0m: Agent Starting Run: z48bk35u with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005971794696355969
[34m[1mwandb[0m: 	v_lr: 0.00477299017580429
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201551640536609, max=1.0)…

0,1
regret,▁

0,1
regret,22495.69991


[34m[1mwandb[0m: Agent Starting Run: 22cadvgp with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.004357780963225896
[34m[1mwandb[0m: 	v_lr: 0.00973422308495233
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,59676.64617


[34m[1mwandb[0m: Agent Starting Run: zztmf7q4 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0013480192285096717
[34m[1mwandb[0m: 	v_lr: 0.00894429686511846
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,82997.16232


[34m[1mwandb[0m: Agent Starting Run: hjdl9pw0 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.006209599639807507
[34m[1mwandb[0m: 	v_lr: 0.008130205879368656
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,68843.01158


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rzjwzr00 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0013946169254015815
[34m[1mwandb[0m: 	v_lr: 0.007227191330582024
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,88494.33407


[34m[1mwandb[0m: Agent Starting Run: vownknc6 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.001409243045936953
[34m[1mwandb[0m: 	v_lr: 0.009042072286852871
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,77028.55279


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: j24zf407 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.001674556421180192
[34m[1mwandb[0m: 	v_lr: 0.008442961923085508
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,68523.39763


[34m[1mwandb[0m: Agent Starting Run: sqj4uxud with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.007350670382656516
[34m[1mwandb[0m: 	v_lr: 0.006363203738550439
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,75851.29589


[34m[1mwandb[0m: Agent Starting Run: 09is6riv with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004074568739485072
[34m[1mwandb[0m: 	v_lr: 0.004349599329435929
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,21461.34075


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ikc9i7s4 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00336844706662755
[34m[1mwandb[0m: 	v_lr: 0.005218537389815134
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201519069166128, max=1.0)…

0,1
regret,▁

0,1
regret,59152.03173


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kjv34285 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.009046590545287888
[34m[1mwandb[0m: 	v_lr: 0.005163803644826543
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19712070874861573, max=1.…

0,1
regret,▁

0,1
regret,92774.26648


[34m[1mwandb[0m: Agent Starting Run: wth5kcuy with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008847169098638129
[34m[1mwandb[0m: 	v_lr: 0.006448833813775376
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,51899.74189


[34m[1mwandb[0m: Agent Starting Run: 3wejo6iw with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008024641676987389
[34m[1mwandb[0m: 	v_lr: 0.001641968765093885
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,59144.0742


[34m[1mwandb[0m: Agent Starting Run: uveszpnc with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.007051414206793045
[34m[1mwandb[0m: 	v_lr: 0.005739678555419597
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8674260401835152, max=1.0…

0,1
regret,▁

0,1
regret,60302.41305


[34m[1mwandb[0m: Agent Starting Run: f116okmh with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.007748141364732554
[34m[1mwandb[0m: 	v_lr: 0.006659677281571885
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,92759.89849


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qjyulct8 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0016069164367820312
[34m[1mwandb[0m: 	v_lr: 0.001900655201294976
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,42069.22423


[34m[1mwandb[0m: Agent Starting Run: p9rex2kg with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002649716026484871
[34m[1mwandb[0m: 	v_lr: 0.009748728581764012
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,67104.7961


[34m[1mwandb[0m: Agent Starting Run: k0j54tiv with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006429175605001996
[34m[1mwandb[0m: 	v_lr: 0.009389928062880047
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,84763.56092


[34m[1mwandb[0m: Agent Starting Run: a50ypobu with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.008978244489332413
[34m[1mwandb[0m: 	v_lr: 0.000961621311370199
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,47545.68917


[34m[1mwandb[0m: Agent Starting Run: aapm5vzm with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.009083675745345856
[34m[1mwandb[0m: 	v_lr: 0.0033141588422680246
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19712298450837812, max=1.…

0,1
regret,▁

0,1
regret,36175.02813


[34m[1mwandb[0m: Agent Starting Run: itplsdq5 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0018084751229429125
[34m[1mwandb[0m: 	v_lr: 0.00996880170918153
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,79737.02795


[34m[1mwandb[0m: Agent Starting Run: d5ipcuqw with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.007968844640034629
[34m[1mwandb[0m: 	v_lr: 0.008409228364735344
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,71581.10814


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9wbakrvf with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005703950999742537
[34m[1mwandb[0m: 	v_lr: 0.003375844018633159
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9788041758937045, max=1.0…

0,1
regret,▁

0,1
regret,21784.82587


[34m[1mwandb[0m: Agent Starting Run: zbbelvjg with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.004286919248005626
[34m[1mwandb[0m: 	v_lr: 0.00991893669826537
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.30881189685176397, max=1.…

0,1
regret,▁

0,1
regret,53050.15953


[34m[1mwandb[0m: Agent Starting Run: sfymd3ye with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.007993293431822574
[34m[1mwandb[0m: 	v_lr: 0.0049483195468286055
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,39668.90657


[34m[1mwandb[0m: Agent Starting Run: pk2gwce9 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.005467766284838708
[34m[1mwandb[0m: 	v_lr: 0.00952828900703655
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201551640536609, max=1.0)…

0,1
regret,▁

0,1
regret,59014.38892


[34m[1mwandb[0m: Agent Starting Run: gx9ttc3e with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002726897833332585
[34m[1mwandb[0m: 	v_lr: 0.0005626546249484074
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,28627.84086


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: neew355a with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0043357841589108225
[34m[1mwandb[0m: 	v_lr: 0.008563323262047418
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19712070874861573, max=1.…

0,1
regret,▁

0,1
regret,73282.25705


[34m[1mwandb[0m: Agent Starting Run: 10s74rix with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.003727010276230646
[34m[1mwandb[0m: 	v_lr: 0.005126948876568439
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,67450.75432


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: t0ik51n5 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.004167246334354071
[34m[1mwandb[0m: 	v_lr: 0.008258861906580286
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,39786.64911


[34m[1mwandb[0m: Agent Starting Run: kr67cqax with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008795746291083428
[34m[1mwandb[0m: 	v_lr: 0.009766701757377236
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19712070874861573, max=1.…

0,1
regret,▁

0,1
regret,55029.02124


[34m[1mwandb[0m: Agent Starting Run: 6e0hjqij with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0014419137829604215
[34m[1mwandb[0m: 	v_lr: 0.008560452988009308
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,77508.84352


[34m[1mwandb[0m: Agent Starting Run: rrl38n6w with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0015623786041139658
[34m[1mwandb[0m: 	v_lr: 0.004120691624926441
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,39463.11096


[34m[1mwandb[0m: Agent Starting Run: p29gn4x8 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00593791655251906
[34m[1mwandb[0m: 	v_lr: 0.008502035096875967
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777286247, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,48844.66355


[34m[1mwandb[0m: Agent Starting Run: r33mf6ws with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0023354935172932084
[34m[1mwandb[0m: 	v_lr: 0.006149399799087908
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19712070874861573, max=1.…

0,1
regret,▁

0,1
regret,76198.05116


[34m[1mwandb[0m: Agent Starting Run: tzk0kez9 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.008065348790188234
[34m[1mwandb[0m: 	v_lr: 0.00897850137821249
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,31614.86529


[34m[1mwandb[0m: Agent Starting Run: auutam29 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0014304515012504195
[34m[1mwandb[0m: 	v_lr: 0.009536284233826804
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,83597.8869


[34m[1mwandb[0m: Agent Starting Run: z1zrpnqr with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00233865807081393
[34m[1mwandb[0m: 	v_lr: 0.002274149406316568
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201551640536609, max=1.0)…

0,1
regret,▁

0,1
regret,30251.68326


[34m[1mwandb[0m: Agent Starting Run: joyk4zmx with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.002307380812335941
[34m[1mwandb[0m: 	v_lr: 0.005610217266218829
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,52272.87908


[34m[1mwandb[0m: Agent Starting Run: 9ohy0s6a with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.009488452449211437
[34m[1mwandb[0m: 	v_lr: 0.005670391120363442
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,63306.29685


[34m[1mwandb[0m: Agent Starting Run: 3g79690k with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.003854199423386531
[34m[1mwandb[0m: 	v_lr: 0.0037865858966512336
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,40354.25759


[34m[1mwandb[0m: Agent Starting Run: w46hyjvx with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0014871608300002052
[34m[1mwandb[0m: 	v_lr: 0.003494385538324875
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9789656808476989, max=1.0…

0,1
regret,▁

0,1
regret,36127.91605


[34m[1mwandb[0m: Agent Starting Run: jsjhfm65 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.005577011675349446
[34m[1mwandb[0m: 	v_lr: 0.009198746251596526
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8674260401835152, max=1.0…

0,1
regret,▁

0,1
regret,53621.92185


[34m[1mwandb[0m: Agent Starting Run: qzd0x5d8 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0012889179051428948
[34m[1mwandb[0m: 	v_lr: 0.0055984235294951595
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,78626.51553


[34m[1mwandb[0m: Agent Starting Run: b813jyen with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0025188524465702884
[34m[1mwandb[0m: 	v_lr: 0.002935453853834095
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,33727.31454


[34m[1mwandb[0m: Agent Starting Run: 1ni3utv7 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006255499084041381
[34m[1mwandb[0m: 	v_lr: 0.003972533431322997
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,43878.91861


[34m[1mwandb[0m: Agent Starting Run: 6x2udh2n with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00427114955093558
[34m[1mwandb[0m: 	v_lr: 0.008489767412001744
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,90161.35276


[34m[1mwandb[0m: Agent Starting Run: fuhtj1ib with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004607317546142112
[34m[1mwandb[0m: 	v_lr: 0.004179078234876977
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,38151.19737


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 0ipikv8l with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0038762400117539984
[34m[1mwandb[0m: 	v_lr: 0.001116420124236624
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,24571.81908


[34m[1mwandb[0m: Agent Starting Run: h0gpyaph with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006748225757086695
[34m[1mwandb[0m: 	v_lr: 0.005026319882307229
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8674260401835152, max=1.0…

0,1
regret,▁

0,1
regret,57760.40411


[34m[1mwandb[0m: Agent Starting Run: unl8tkda with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0037807162293898777
[34m[1mwandb[0m: 	v_lr: 0.008407117200954045
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8670145477545858, max=1.0…

0,1
regret,▁

0,1
regret,64722.86315


[34m[1mwandb[0m: Agent Starting Run: 3v3zzjn6 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009324152830656143
[34m[1mwandb[0m: 	v_lr: 0.00433363156413143
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,42321.38353


[34m[1mwandb[0m: Agent Starting Run: m5d44ipc with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0011999618897520178
[34m[1mwandb[0m: 	v_lr: 0.0026864979720481
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1973413514796645, max=1.0…

0,1
regret,▁

0,1
regret,67476.4265


[34m[1mwandb[0m: Agent Starting Run: e4cp5q1t with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0006386510760824062
[34m[1mwandb[0m: 	v_lr: 0.004607932003358942
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1970895286301803, max=1.0…

0,1
regret,▁

0,1
regret,70089.56727


[34m[1mwandb[0m: Agent Starting Run: 8kr08z6p with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0058566280945864975
[34m[1mwandb[0m: 	v_lr: 0.00976160846567064
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8675632911392405, max=1.0…

0,1
regret,▁

0,1
regret,68208.1164


[34m[1mwandb[0m: Agent Starting Run: zxr38hv4 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00872920194291942
[34m[1mwandb[0m: 	v_lr: 0.0015618853005120016
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,92350.69227


[34m[1mwandb[0m: Agent Starting Run: ehfg18r6 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004113984809770889
[34m[1mwandb[0m: 	v_lr: 0.002193600363278942
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,24278.708


[34m[1mwandb[0m: Agent Starting Run: mg9twdm3 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004625433777016524
[34m[1mwandb[0m: 	v_lr: 0.001979897280732906
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,20399.3544


[34m[1mwandb[0m: Agent Starting Run: pg80ytxs with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0004105119487147762
[34m[1mwandb[0m: 	v_lr: 0.004621425523609646
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,68110.33502


[34m[1mwandb[0m: Agent Starting Run: 1cizjqji with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005991933833812511
[34m[1mwandb[0m: 	v_lr: 0.00018764387269670375
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19712298450837812, max=1.…

0,1
regret,▁

0,1
regret,37597.37758


[34m[1mwandb[0m: Agent Starting Run: z7qm72n5 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0020837448979475165
[34m[1mwandb[0m: 	v_lr: 0.007301670442777245
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,68947.31515


[34m[1mwandb[0m: Agent Starting Run: vyhcfefz with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0032986716578151903
[34m[1mwandb[0m: 	v_lr: 0.008567484158038591
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9789623536855425, max=1.0…

0,1
regret,▁

0,1
regret,66132.8565


[34m[1mwandb[0m: Agent Starting Run: xllfzv51 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0003373311400914942
[34m[1mwandb[0m: 	v_lr: 0.009537603172434931
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9788075280721177, max=1.0…

0,1
regret,▁

0,1
regret,86144.56301


[34m[1mwandb[0m: Agent Starting Run: qyxkugp6 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008062445394577664
[34m[1mwandb[0m: 	v_lr: 0.005356621017486772
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19731012658227848, max=1.…

0,1
regret,▁

0,1
regret,79054.73785


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: aa0ul7p0 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0028894880792444807
[34m[1mwandb[0m: 	v_lr: 0.003847366847767371
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,34333.7282


[34m[1mwandb[0m: Agent Starting Run: jc4uceey with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0098865988887666
[34m[1mwandb[0m: 	v_lr: 0.007586421522432205
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,58467.63173


[34m[1mwandb[0m: Agent Starting Run: zi7emho3 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004432891307707468
[34m[1mwandb[0m: 	v_lr: 0.008013119969696692
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201519069166128, max=1.0)…

0,1
regret,▁

0,1
regret,25898.08981


[34m[1mwandb[0m: Agent Starting Run: tw3muzi0 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00947445811369391
[34m[1mwandb[0m: 	v_lr: 0.007013745265203832
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,73908.80923


[34m[1mwandb[0m: Agent Starting Run: 4iti6r04 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006707625817213161
[34m[1mwandb[0m: 	v_lr: 0.005736843954865877
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,42596.8833


[34m[1mwandb[0m: Agent Starting Run: pvtcjygp with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0002102185977353526
[34m[1mwandb[0m: 	v_lr: 0.009408956972895064
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19731012658227848, max=1.…

0,1
regret,▁

0,1
regret,85138.42618


[34m[1mwandb[0m: Agent Starting Run: 6qvi7ltc with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00022175004146179393
[34m[1mwandb[0m: 	v_lr: 0.006210817741808741
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.30903052348568716, max=1.…

0,1
regret,▁

0,1
regret,86490.82484


[34m[1mwandb[0m: Agent Starting Run: 4xiawafv with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00021669588483387537
[34m[1mwandb[0m: 	v_lr: 0.007113476853085916
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8671516685117824, max=1.0…

0,1
regret,▁

0,1
regret,86842.48572


[34m[1mwandb[0m: Agent Starting Run: xd6l24yj with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006500781346067027
[34m[1mwandb[0m: 	v_lr: 0.006459629469768535
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19727891156462585, max=1.…

0,1
regret,▁

0,1
regret,61139.52031


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: oaqbyqlp with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0040755427823022635
[34m[1mwandb[0m: 	v_lr: 0.009722459259722769
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,69416.86894


[34m[1mwandb[0m: Agent Starting Run: k9he091k with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.007532784441169557
[34m[1mwandb[0m: 	v_lr: 0.009340780572769476
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,87008.18843


[34m[1mwandb[0m: Agent Starting Run: vyuf7fua with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.005394747376554499
[34m[1mwandb[0m: 	v_lr: 0.0032220466528321973
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,37843.51715


[34m[1mwandb[0m: Agent Starting Run: bvwo6gql with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0056260450012436975
[34m[1mwandb[0m: 	v_lr: 0.008248881219500431
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19712298450837812, max=1.…

0,1
regret,▁

0,1
regret,22689.05838


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: evzlewji with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.008771838552127407
[34m[1mwandb[0m: 	v_lr: 0.008631744342087144
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,58082.16369


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8l5x4zey with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0008612891740197415
[34m[1mwandb[0m: 	v_lr: 0.0002778469510031475
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19712298450837812, max=1.…

0,1
regret,▁

0,1
regret,31782.68827


[34m[1mwandb[0m: Agent Starting Run: w46blhlc with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.009570288492339507
[34m[1mwandb[0m: 	v_lr: 0.0016400429113518735
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8668774703557313, max=1.0…

0,1
regret,▁

0,1
regret,13411.07642


[34m[1mwandb[0m: Agent Starting Run: 7k2y39t5 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.006834130295614496
[34m[1mwandb[0m: 	v_lr: 0.0034358621577920243
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,69423.54112


[34m[1mwandb[0m: Agent Starting Run: wa93fcdd with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.007218868063863884
[34m[1mwandb[0m: 	v_lr: 0.0004898065893319532
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8670145477545858, max=1.0…

0,1
regret,▁

0,1
regret,29577.09201


[34m[1mwandb[0m: Agent Starting Run: qqn02qa0 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0012707675996378625
[34m[1mwandb[0m: 	v_lr: 0.0013400570927232765
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


: 