In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import gym
import wandb
import numpy as np
from tqdm import tqdm

class Policy(nn.Module):
    def __init__(self, state_dim, hidden_dim, action_dim):
        super(Policy, self).__init__()
        self.fc1 = torch.nn.Linear(state_dim, hidden_dim)
        self.fc2 = torch.nn.Linear(hidden_dim, action_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return F.softmax(self.fc2(x), dim=1)

class Value(nn.Module):
    
    #Takes in state
    def __init__(self, observation_space):
        super(Value, self).__init__()
        
        self.input_layer = nn.Linear(observation_space, 128)
        self.output_layer = nn.Linear(128, 1)
        
    def forward(self, x):
        #input layer
        x = self.input_layer(x)
        
        #activiation relu
        x = F.relu(x)
        
        #get state value
        state_value = self.output_layer(x)
        
        return state_value
    
class REINFORCE:
    def __init__(self, state_dim, hidden_dim, action_dim, learning_rate, gamma,
                 device,v_lr):
        self.policy_net = Policy(state_dim, hidden_dim, action_dim).to(device)
        self.value_net=Value(state_dim).to(device)
        self.optimizer = torch.optim.Adam(self.policy_net.parameters(),
                                          lr=learning_rate)
        self.voptimizer=torch.optim.Adam(self.value_net.parameters(),lr=v_lr)
        self.gamma = gamma
        self.device = device

    def take_action(self, state):  # randomly sample an action according to the probability distribution
        state = torch.tensor(state.reshape(1, -1), dtype=torch.float).to(self.device) 
                                                                      
        probs = self.policy_net(state)
        action_dist = torch.distributions.Categorical(probs) 
        action = action_dist.sample()
        return action.item()
    

    def update(self, transition_dict):
        reward_list = transition_dict['rewards']
        state_list = transition_dict['states']
        action_list = transition_dict['actions']

        G = 0
        self.optimizer.zero_grad()
        self.voptimizer.zero_grad()
        for i in reversed(range(len(reward_list))):  # Back to Front, calc G
            reward = reward_list[i]
            state = torch.tensor(state_list[i].reshape(1, -1),
                                 dtype=torch.float).to(self.device)
            action = torch.tensor(action_list[i]).view(-1, 1).to(self.device)
            log_prob = torch.log(self.policy_net(state).gather(1, action))  # calc log probability of action
            G = self.gamma * G + reward  # calculate return starting from this step
            state_value=self.value_net(state)
            loss = -log_prob * (G - state_value)    # loss of each step
            loss.backward(retain_graph=True) # The gradient will be accumulated in each step
            
        self.optimizer.step()
        target_value = torch.tensor([[G]], dtype=torch.float).to(self.device)  # Target value is the calculated return G

        # Calculate the Mean Squared Error loss
        value_loss = F.mse_loss(state_value, target_value)

        # Backpropagate and update the value network
        value_loss.backward(retain_graph=True)
        self.voptimizer.step()



# agent params
learning_rate = 1e-3
gamma = 0.99
hidden_dim = 128
v_lr=0.005
# training params
num_pbar = 10
num_episodes = 500
num_seeds = 5
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# environment params
env_name = "Acrobot-v1"


# Define the sweep configuration
sweep_config = {
    "method": "bayes",
    "metric": {"name": "regret", "goal": "minimize"},
    "parameters": {
        "learning_rate": {"min": 1e-5, "max": 1e-2},
        "v_lr": {"min": 1e-5, "max": 1e-2},
        "hidden_dim": {"values": [64, 128, 256]}
    },
    "project": "acrobot_with_baseline",
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 3,
        "max_iter": 100
}
}
# Initialize the sweep
sweep_id = wandb.sweep(sweep_config)

def train(learning_rate,v_lr,hidden_dim):
    # create environment
    env = gym.make(env_name)
    env.reset(seed=0)
    # create agent
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.n
    agent = REINFORCE(state_dim, hidden_dim, action_dim, learning_rate, gamma, device,v_lr)
    # record episode return, for plotting
    return_list = []
    # train agent
    for i_episode in range(num_episodes):
        episode_return = 0 # return of the whole episode
        transition_dict = { # store transitions of the episode
            "states": [],
            "actions": [],
            "next_states": [],
            "rewards": [],
            "dones": []
        }
        state, _ = env.reset() # reset environment
        terminated, truncated = False, False
        while not terminated and not truncated:
            action = agent.take_action(state) # select an action
            next_state, reward, terminated, truncated, _ = env.step(action) # take action and get next state, reward, termination signal
            # store transition
            transition_dict["states"].append(state)
            transition_dict["actions"].append(action)
            transition_dict["next_states"].append(next_state)
            transition_dict["rewards"].append(reward)
            transition_dict["dones"].append(terminated)
            state = next_state
            episode_return += reward
        # add episode return to return list
        return_list.append(episode_return)
        # update agent per episode(MC)
        agent.update(transition_dict)

    avg = []
    for i in range(num_episodes):
        if i < 100:
            avg_return = np.mean(return_list[:i+1])
        else:
            avg_return = np.mean(return_list[i-100:i+1])
        avg.append(avg_return)

    regret = np.sum(avg)

    return -regret    
   
def run_training():
    config_defaults = {
        "learning_rate": 1e-3,
        "v_lr": 0.005,
        "hidden_dim": 128
    }
    config = wandb.init(config=config_defaults,project="acrobot_with_baseline")
    learning_rate = config.config["learning_rate"]
    v_lr=config.config['v_lr']
    hidden_dim = config.config["hidden_dim"]
    regret = train(learning_rate,v_lr, hidden_dim)
    wandb.log({"regret": regret})

# Run the sweep
wandb.agent(sweep_id, function=run_training)




Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: 53qxj61d
Sweep URL: https://wandb.ai/rl_shobhith/acrobot_with_baseline/sweeps/53qxj61d


[34m[1mwandb[0m: Agent Starting Run: nmh9t4hp with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004552749592920699
[34m[1mwandb[0m: 	v_lr: 0.005221009021681113
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mshobhith-v[0m ([33mrl_shobhith[0m). Use [1m`wandb login --relogin`[0m to force relogin


  if not isinstance(terminated, (bool, np.bool8)):


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,171015.00045


[34m[1mwandb[0m: Agent Starting Run: x43uj54d with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006993533519125113
[34m[1mwandb[0m: 	v_lr: 0.006223217429513339
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20139001131404557, max=1.…

0,1
regret,▁

0,1
regret,90272.22743


[34m[1mwandb[0m: Agent Starting Run: uhao8tw6 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009915387995815338
[34m[1mwandb[0m: 	v_lr: 0.0040275302243207075
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,61757.35529


[34m[1mwandb[0m: Agent Starting Run: a7qgxjl6 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0034264422076670633
[34m[1mwandb[0m: 	v_lr: 0.005960273137984552
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,162214.76796


[34m[1mwandb[0m: Agent Starting Run: b313vt14 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0004807892868057421
[34m[1mwandb[0m: 	v_lr: 0.0047852569585864445
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,232339.18098


[34m[1mwandb[0m: Agent Starting Run: wdkymimw with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.009520441587881168
[34m[1mwandb[0m: 	v_lr: 0.007086775105697127
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1974976243268926, max=1.0…

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: lddqcd4q with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0026434624367717586
[34m[1mwandb[0m: 	v_lr: 0.0054616189981258265
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19709182867077604, max=1.…

0,1
regret,▁

0,1
regret,106121.12288


[34m[1mwandb[0m: Agent Starting Run: eu8buf1o with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.000363877299464914
[34m[1mwandb[0m: 	v_lr: 0.005623285865032072
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.30926365795724464, max=1.…

0,1
regret,▁

0,1
regret,248670.0


[34m[1mwandb[0m: Agent Starting Run: a69jdplr with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0029436336400712527
[34m[1mwandb[0m: 	v_lr: 0.005994316562497773
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,210647.9553


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8wu2s5hp with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009653246030747912
[34m[1mwandb[0m: 	v_lr: 0.004922427686908353
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1970895286301803, max=1.0…

0,1
regret,▁

0,1
regret,93727.57733


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 58ys3dur with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008441010405858845
[34m[1mwandb[0m: 	v_lr: 0.008410562767219958
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201519069166128, max=1.0)…

0,1
regret,▁

0,1
regret,179065.98332


[34m[1mwandb[0m: Agent Starting Run: ht16wck7 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00688595491582596
[34m[1mwandb[0m: 	v_lr: 0.007820018215013987
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,153558.05215


[34m[1mwandb[0m: Agent Starting Run: sw4hj73v with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006594538768113569
[34m[1mwandb[0m: 	v_lr: 0.0013342160305042723
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19702719797596457, max=1.…

0,1
regret,▁

0,1
regret,110464.33241


[34m[1mwandb[0m: Agent Starting Run: m4xfj8t6 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.006786620929350094
[34m[1mwandb[0m: 	v_lr: 0.0005532560255747596
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,83455.51064


[34m[1mwandb[0m: Agent Starting Run: 2qizad25 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0033086448392532423
[34m[1mwandb[0m: 	v_lr: 0.005220467434830157
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19712298450837812, max=1.…

0,1
regret,▁

0,1
regret,207005.55446


[34m[1mwandb[0m: Agent Starting Run: pya7h1xb with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0028140080216991726
[34m[1mwandb[0m: 	v_lr: 0.002946392982993929
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: voq0u6dy with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0019277724513011823
[34m[1mwandb[0m: 	v_lr: 0.0038021581254067895
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2014214181876918, max=1.0…

0,1
regret,▁

0,1
regret,117556.39378


[34m[1mwandb[0m: Agent Starting Run: w23bn6rv with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008666389807923582
[34m[1mwandb[0m: 	v_lr: 0.006154276753527447
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8671516685117824, max=1.0…

0,1
regret,▁

0,1
regret,204629.31901


[34m[1mwandb[0m: Agent Starting Run: b4dtmosl with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.002958335933396122
[34m[1mwandb[0m: 	v_lr: 0.007387649756190798
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201519069166128, max=1.0)…

0,1
regret,▁

0,1
regret,188232.61426


[34m[1mwandb[0m: Agent Starting Run: 1y6vpbb7 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.004641417293842725
[34m[1mwandb[0m: 	v_lr: 0.008070843458325191
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,249802.0087


[34m[1mwandb[0m: Agent Starting Run: u5kut4tr with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.004124191953114002
[34m[1mwandb[0m: 	v_lr: 0.0036966233724668143
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,244591.31865


[34m[1mwandb[0m: Agent Starting Run: gs4wrfbm with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0026236614506980685
[34m[1mwandb[0m: 	v_lr: 0.007299781634985243
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,108028.09944


[34m[1mwandb[0m: Agent Starting Run: 71nehrl1 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.001620793149271947
[34m[1mwandb[0m: 	v_lr: 0.004650030505075499
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,127754.9119


[34m[1mwandb[0m: Agent Starting Run: 3hz2r3a1 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008049850245887308
[34m[1mwandb[0m: 	v_lr: 0.00975305921632539
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19699367088607594, max=1.…

0,1
regret,▁

0,1
regret,194866.42437


[34m[1mwandb[0m: Agent Starting Run: skrrvuvt with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004024285593683214
[34m[1mwandb[0m: 	v_lr: 0.00986219540667774
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201551640536609, max=1.0)…

0,1
regret,▁

0,1
regret,248834.57151


[34m[1mwandb[0m: Agent Starting Run: k5q9gnzf with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006995365027061597
[34m[1mwandb[0m: 	v_lr: 0.00845677298612605
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201551640536609, max=1.0)…

0,1
regret,▁

0,1
regret,183812.26849


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ytlz5591 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009731722938083947
[34m[1mwandb[0m: 	v_lr: 0.00291760677506834
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201551640536609, max=1.0)…

0,1
regret,▁

0,1
regret,249946.38966


[34m[1mwandb[0m: Agent Starting Run: tca5qyk4 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008192732095575351
[34m[1mwandb[0m: 	v_lr: 7.715437416481317e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,72901.81153


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qxztmhf7 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.005409626188008731
[34m[1mwandb[0m: 	v_lr: 0.00405183546762517
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8883433639531201, max=1.0…

0,1
regret,▁

0,1
regret,249892.0


[34m[1mwandb[0m: Agent Starting Run: jpw22uc2 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.005917548401438269
[34m[1mwandb[0m: 	v_lr: 0.006577499775539307
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,246738.96977


[34m[1mwandb[0m: Agent Starting Run: i2a86fox with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.002224665463502999
[34m[1mwandb[0m: 	v_lr: 0.00343428671463182
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201551640536609, max=1.0)…

0,1
regret,▁

0,1
regret,162061.64793


[34m[1mwandb[0m: Agent Starting Run: y1l23qd5 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0014049923338307637
[34m[1mwandb[0m: 	v_lr: 0.002344372984448013
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,210563.87648


[34m[1mwandb[0m: Agent Starting Run: kgbw13b9 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0051459264606062245
[34m[1mwandb[0m: 	v_lr: 0.0024649644719118924
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.888045540796964, max=1.0)…

0,1
regret,▁

0,1
regret,248368.65021


[34m[1mwandb[0m: Agent Starting Run: ppmh01mr with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.002249148352079774
[34m[1mwandb[0m: 	v_lr: 0.007637728930201383
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9801995881514336, max=1.0…

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: dzcx3egw with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.006439136847071147
[34m[1mwandb[0m: 	v_lr: 0.00600364009164271
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,248230.84495


[34m[1mwandb[0m: Agent Starting Run: 6yut1ljs with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 1.2027681902585649e-05
[34m[1mwandb[0m: 	v_lr: 0.002676414355123283
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8681127295756809, max=1.0…

0,1
regret,▁

0,1
regret,249814.0


[34m[1mwandb[0m: Agent Starting Run: f4fubbdy with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0032453030331921076
[34m[1mwandb[0m: 	v_lr: 0.002370931644506988
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9788108791903858, max=1.0…

0,1
regret,▁

0,1
regret,73240.19469


[34m[1mwandb[0m: Agent Starting Run: aqk6guv8 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0006162133943340826
[34m[1mwandb[0m: 	v_lr: 0.009547599566943224
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,245777.30767


[34m[1mwandb[0m: Agent Starting Run: 75uy3tej with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0032974419184364973
[34m[1mwandb[0m: 	v_lr: 0.005888541749797346
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9788108791903858, max=1.0…

0,1
regret,▁

0,1
regret,109228.28346


[34m[1mwandb[0m: Agent Starting Run: qk93kvi1 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0022362561045882047
[34m[1mwandb[0m: 	v_lr: 0.006457478066163553
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,114309.69119


[34m[1mwandb[0m: Agent Starting Run: 3z0eg3as with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0015789815381252637
[34m[1mwandb[0m: 	v_lr: 0.00464588766930388
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1975289086012989, max=1.0…

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: yfv6ha65 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00500544981175537
[34m[1mwandb[0m: 	v_lr: 0.0044396890198457415
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,63039.48472


[34m[1mwandb[0m: Agent Starting Run: 1323fl5s with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.005718605645379005
[34m[1mwandb[0m: 	v_lr: 3.1744575483296155e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8880809358204237, max=1.0…

0,1
regret,▁

0,1
regret,249438.69392


[34m[1mwandb[0m: Agent Starting Run: 4k7sazf3 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004670846244387543
[34m[1mwandb[0m: 	v_lr: 0.005662908685517788
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: vii3ybjh with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008269199642048747
[34m[1mwandb[0m: 	v_lr: 0.008651182662398909
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201519069166128, max=1.0)…

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: tpg4gna3 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006241215538136675
[34m[1mwandb[0m: 	v_lr: 0.009383673981139684
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,219852.88007


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: p2u04tep with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002553709615043021
[34m[1mwandb[0m: 	v_lr: 0.003906586416338536
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,221770.31012


[34m[1mwandb[0m: Agent Starting Run: 3t8kbo37 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004305501123296899
[34m[1mwandb[0m: 	v_lr: 0.00830650008374024
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,191462.34265


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ks6ksjry with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00454979146722319
[34m[1mwandb[0m: 	v_lr: 0.009566887212634717
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1975289086012989, max=1.0…

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: xjt1e9ia with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008787724411334557
[34m[1mwandb[0m: 	v_lr: 0.0046900148632690935
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,249611.26901


[34m[1mwandb[0m: Agent Starting Run: vgypssge with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0010932593943539077
[34m[1mwandb[0m: 	v_lr: 0.0004743751026707321
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2014214181876918, max=1.0…

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: qb05nweh with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006374525101566919
[34m[1mwandb[0m: 	v_lr: 0.003126927582766189
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,179206.51273


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pa3wzu68 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00951769178103584
[34m[1mwandb[0m: 	v_lr: 0.00362396277581948
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2015842224377627, max=1.0…

0,1
regret,▁

0,1
regret,169450.42355


[34m[1mwandb[0m: Agent Starting Run: 66y79e7i with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.008658610967279398
[34m[1mwandb[0m: 	v_lr: 0.003491690026289621
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.30871421793452475, max=1.…

0,1
regret,▁

0,1
regret,216974.13791


[34m[1mwandb[0m: Agent Starting Run: tazvlzd1 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008180861022694753
[34m[1mwandb[0m: 	v_lr: 0.007158826349160108
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777286247, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201519069166128, max=1.0)…

0,1
regret,▁

0,1
regret,58948.19583


[34m[1mwandb[0m: Agent Starting Run: wwafcbab with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.003769575645057763
[34m[1mwandb[0m: 	v_lr: 0.006887094117974426
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201519069166128, max=1.0)…

0,1
regret,▁

0,1
regret,173565.96139


[34m[1mwandb[0m: Agent Starting Run: rhy9l2gs with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004016130308369227
[34m[1mwandb[0m: 	v_lr: 0.005556746609103137
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: auczozmz with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00020319078802529967
[34m[1mwandb[0m: 	v_lr: 0.0011120774901872868
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,243488.14539


[34m[1mwandb[0m: Agent Starting Run: a9setynr with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0022761492294126654
[34m[1mwandb[0m: 	v_lr: 0.00031919344558178
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20135746606334842, max=1.…

0,1
regret,▁

0,1
regret,120799.46921


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nij8fc8a with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0010165053270797405
[34m[1mwandb[0m: 	v_lr: 0.008264562088513942
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,223301.95451


[34m[1mwandb[0m: Agent Starting Run: aas4uhxm with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0011050843411845133
[34m[1mwandb[0m: 	v_lr: 0.009063049097909606
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19702719797596457, max=1.…

0,1
regret,▁

0,1
regret,144614.86437


[34m[1mwandb[0m: Agent Starting Run: ci72cfve with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.000849974092428038
[34m[1mwandb[0m: 	v_lr: 0.004411204069142399
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,192401.42839


[34m[1mwandb[0m: Agent Starting Run: xj7f6ouy with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.004153985314700898
[34m[1mwandb[0m: 	v_lr: 0.005538108310036105
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9786493752965365, max=1.0…

0,1
regret,▁

0,1
regret,163483.94059


[34m[1mwandb[0m: Agent Starting Run: pkbm4om7 with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.00287464264434013
[34m[1mwandb[0m: 	v_lr: 0.006794772464333587
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,79317.52567


[34m[1mwandb[0m: Agent Starting Run: 1r9x1ph6 with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0004690692396031828
[34m[1mwandb[0m: 	v_lr: 0.006504315202034522
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,158578.41099


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: x14c4yuz with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.005281301014898837
[34m[1mwandb[0m: 	v_lr: 0.0008255993155588499
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19702719797596457, max=1.…

0,1
regret,▁

0,1
regret,206368.85809


[34m[1mwandb[0m: Agent Starting Run: 44o6reh8 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0024142593059008686
[34m[1mwandb[0m: 	v_lr: 0.003355838798789213
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19721651114977068, max=1.…

0,1
regret,▁

0,1
regret,249948.08645


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5t9fpjqw with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.003936082361262376
[34m[1mwandb[0m: 	v_lr: 0.00717073367555635
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,130984.55096


[34m[1mwandb[0m: Agent Starting Run: qkt945ao with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.00527906084017553
[34m[1mwandb[0m: 	v_lr: 0.0023285113518967293
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1974976243268926, max=1.0…

0,1
regret,▁

0,1
regret,249835.0


[34m[1mwandb[0m: Agent Starting Run: sczcb6cg with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.007765960101513424
[34m[1mwandb[0m: 	v_lr: 0.0058274122900522154
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,195128.10558


[34m[1mwandb[0m: Agent Starting Run: 7prodco4 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0014015015443666922
[34m[1mwandb[0m: 	v_lr: 0.005582803607128459
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,249862.18896


[34m[1mwandb[0m: Agent Starting Run: j4wds55k with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001456533318255059
[34m[1mwandb[0m: 	v_lr: 0.001999261557213679
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,132452.52265


[34m[1mwandb[0m: Agent Starting Run: t71n7lcm with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008975175303610472
[34m[1mwandb[0m: 	v_lr: 0.0002528425575955386
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888925108, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,249805.76388


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cacd2lct with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.009036387083634396
[34m[1mwandb[0m: 	v_lr: 0.009030053345039169
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,153085.78005


[34m[1mwandb[0m: Agent Starting Run: 8slbt92g with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.007994880071206035
[34m[1mwandb[0m: 	v_lr: 0.009458258815956688
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201519069166128, max=1.0)…

0,1
regret,▁

0,1
regret,172831.11913


[34m[1mwandb[0m: Agent Starting Run: 0axfdsin with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0077961240034350385
[34m[1mwandb[0m: 	v_lr: 0.00628060051221422
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8683877098511245, max=1.0…

0,1
regret,▁

0,1
regret,250000.0


[34m[1mwandb[0m: Agent Starting Run: u3ig1lw8 with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.007848821174199138
[34m[1mwandb[0m: 	v_lr: 0.005881006892409426
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8883433639531201, max=1.0…

0,1
regret,▁

0,1
regret,249290.0


[34m[1mwandb[0m: Agent Starting Run: bj37yp7a with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.003346772193574294
[34m[1mwandb[0m: 	v_lr: 0.005626449801181129
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1971853257432005, max=1.0…

0,1
regret,▁

0,1
regret,70289.61818


[34m[1mwandb[0m: Agent Starting Run: ceehwxxl with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006204764814148433
[34m[1mwandb[0m: 	v_lr: 0.00657042247575442
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777932999, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.1970895286301803, max=1.0…

0,1
regret,▁

0,1
regret,167702.50495


[34m[1mwandb[0m: Agent Starting Run: oezsefzl with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.009288427922792698
[34m[1mwandb[0m: 	v_lr: 0.001875946688163321
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9789590254706534, max=1.0…

0,1
regret,▁

0,1
regret,68210.30178


[34m[1mwandb[0m: Agent Starting Run: vwsd0zuz with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006884182528972078
[34m[1mwandb[0m: 	v_lr: 0.009167605000479728
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8670145477545858, max=1.0…

0,1
regret,▁

0,1
regret,141132.36687


[34m[1mwandb[0m: Agent Starting Run: 6xchdf4k with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.007899736009561224
[34m[1mwandb[0m: 	v_lr: 0.008403231601770297
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19724770642201836, max=1.…

0,1
regret,▁

0,1
regret,222361.83444


[34m[1mwandb[0m: Agent Starting Run: 5celqqca with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.003196676265312951
[34m[1mwandb[0m: 	v_lr: 0.00799763804905553
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201519069166128, max=1.0)…

0,1
regret,▁

0,1
regret,151815.0945


[34m[1mwandb[0m: Agent Starting Run: oveidcdz with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.004591988936684267
[34m[1mwandb[0m: 	v_lr: 0.0038491367647305566
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19715415019762847, max=1.…

0,1
regret,▁

0,1
regret,67063.22516


[34m[1mwandb[0m: Agent Starting Run: jtaoew7c with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.002095525629236658
[34m[1mwandb[0m: 	v_lr: 0.0030934743710742223
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,249533.64851


[34m[1mwandb[0m: Agent Starting Run: sd8nbuyw with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0014219689885369986
[34m[1mwandb[0m: 	v_lr: 0.0005631023089314361
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2012924071082391, max=1.0…

0,1
regret,▁

0,1
regret,93193.17749


[34m[1mwandb[0m: Agent Starting Run: 16yxif2d with config:
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.006283470606512318
[34m[1mwandb[0m: 	v_lr: 0.0016174777709150507
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.19702719797596457, max=1.…

0,1
regret,▁

0,1
regret,83910.35694


[34m[1mwandb[0m: Agent Starting Run: sz5zqq6e with config:
[34m[1mwandb[0m: 	hidden_dim: 256
[34m[1mwandb[0m: 	learning_rate: 0.0005307635659979079
[34m[1mwandb[0m: 	v_lr: 0.006409452837224794
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.201453957996769, max=1.0)…

0,1
regret,▁

0,1
regret,204704.33279


[34m[1mwandb[0m: Agent Starting Run: 5z8asv0p with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.006275969479302181
[34m[1mwandb[0m: 	v_lr: 0.003025896664484424
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8674260401835152, max=1.0…

0,1
regret,▁

0,1
regret,87011.27203


[34m[1mwandb[0m: Agent Starting Run: ydmm7evw with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0011872038547543764
[34m[1mwandb[0m: 	v_lr: 0.001032023163314809
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.20148650832121506, max=1.…

0,1
regret,▁

0,1
regret,142868.2593


[34m[1mwandb[0m: Agent Starting Run: cn0rj3cc with config:
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.008982804720944062
[34m[1mwandb[0m: 	v_lr: 0.0011417441143419095
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
