In [2]:
'''
A bunch of imports, you don't have to worry about these
'''

import numpy as np
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from collections import namedtuple, deque
import torch.optim as optim
import datetime
import gym
from gym.wrappers.record_video import RecordVideo
import glob
import io
import base64
import matplotlib.pyplot as plt
from IPython.display import HTML
from pyvirtualdisplay import Display
import tensorflow as tf
from IPython import display as ipythondisplay
from PIL import Image
import tensorflow_probability as tfp
import wandb







In [3]:
'''
Bunch of Hyper parameters (Which you might have to tune later)
'''
BUFFER_SIZE = int(1e5)  # replay buffer size
BATCH_SIZE = 64         # minibatch size
GAMMA = 0.99            # discount factor
LR = 5e-4               # learning rate
UPDATE_EVERY = 50   # how often to update the network (When Q target is present)
env = gym.make('CartPole-v1')

class QNetwork1(nn.Module):

    def __init__(self, state_size, action_size, seed, fc1_units=128, fc2_units=64, fc3_units=64, fc4_units=128):
        """Initialize parameters and build model.
        Params
        ======
            state_size (int): Dimension of each state
            action_size (int): Dimension of each action
            seed (int): Random seed
            fc1_units (int): Number of nodes in first hidden layer
            fc2_units (int): Number of nodes in second hidden layer
        """
        super(QNetwork1, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.fc1 = nn.Linear(state_size, fc1_units)
        self.fc2 = nn.Linear(fc1_units, fc2_units)
        # Advantage stream
        self.fc3 = nn.Linear(state_size, fc3_units)
        self.fc4 = nn.Linear(fc3_units, fc4_units)

        self.fc_advantage = nn.Linear(fc4_units, action_size)
        # Value stream
        self.fc_value = nn.Linear(fc2_units, 1)

    def forward(self, state):
        """Build a network that maps state -> action values."""
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        value = self.fc_value(x)
        x = F.relu(self.fc3(state))
        x = F.relu(self.fc4(x))
        advantage = self.fc_advantage(x)

        # Combine value and advantage to get Q-values
        Q_values = value + (advantage - advantage.mean(dim=1, keepdim=True))

        return Q_values
    
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class ReplayBuffer:
    """Fixed-size buffer to store experience tuples."""

    def __init__(self, action_size, buffer_size, batch_size, seed):
        """Initialize a ReplayBuffer object.

        Params
        ======
            action_size (int): dimension of each action
            buffer_size (int): maximum size of buffer
            batch_size (int): size of each training batch
            seed (int): random seed
        """
        self.action_size = action_size
        self.memory = deque(maxlen=buffer_size)
        self.batch_size = batch_size
        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
        self.seed = random.seed(seed)

    def add(self, state, action, reward, next_state, done):
        """Add a new experience to memory."""
        e = self.experience(state, action, reward, next_state, done)
        self.memory.append(e)

    def sample(self):
        """Randomly sample a batch of experiences from memory."""
        experiences = random.sample(self.memory, k=self.batch_size)

        states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
        actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).long().to(device)
        rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
        next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
        dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device)

        return (states, actions, rewards, next_states, dones)

    def __len__(self):
        """Return the current size of internal memory."""
        return len(self.memory)
class TutorialAgent1():

    def __init__(self, state_size, action_size, seed):

        ''' Agent Environment Interaction '''
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        ''' Q-Network '''
        self.qnetwork_local = QNetwork1(state_size, action_size, seed).to(device)
        self.qnetwork_target = QNetwork1(state_size, action_size, seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        ''' Replay memory '''
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)

        ''' Initialize time step (for updating every UPDATE_EVERY steps)           -Needed for Q Targets '''
        self.t_step = 0

    def step(self, state, action, reward, next_state, done):

        ''' Save experience in replay memory '''
        self.memory.add(state, action, reward, next_state, done)

        ''' If enough samples are available in memory, get random subset and learn '''
        if len(self.memory) >= BATCH_SIZE:
            experiences = self.memory.sample()
            self.learn(experiences, GAMMA)

        """ +Q TARGETS PRESENT """
        ''' Updating the Network every 'UPDATE_EVERY' steps taken '''
        self.t_step = (self.t_step + 1) % UPDATE_EVERY
        if self.t_step == 0:

            self.qnetwork_target.load_state_dict(self.qnetwork_local.state_dict())

    def act(self, state, eps=0.):
        
        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
        self.qnetwork_local.eval()
        with torch.no_grad():
            action_values = self.qnetwork_local(state)
        self.qnetwork_local.train()

        ''' Epsilon-greedy action selection (Already Present) '''
        if random.random() > eps:
            return np.argmax(action_values.cpu().data.numpy())
        else:
            return random.choice(np.arange(self.action_size))

    def learn(self, experiences, gamma):
        """ +E EXPERIENCE REPLAY PRESENT """
        states, actions, rewards, next_states, dones = experiences

        Q_targets_next = self.qnetwork_target(next_states)

        # Compute value and advantage streams
        next_state_values = self.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(1)
        Q_targets = rewards + (gamma * next_state_values * (1 - dones))

        # Compute Q-values for current states using local network
        Q_expected = self.qnetwork_local(states).gather(1, actions)

        # Compute loss
        loss = F.mse_loss(Q_expected, Q_targets)

        # Minimize the loss
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()


        ''' Defining DQN Algorithm '''

def dqn(agent,n_episodes=10000, max_t=500, eps_start=1.0, eps_end=0.01, eps_decay=0.995):

    scores_window = deque(maxlen=100)
    all_scores=np.array([])
    moving_avg_scores=np.array([])
    done,truncated=False,False
    ''' last 100 scores for checking if the avg is more than 195 '''

    eps = eps_start
    ''' initialize epsilon '''

    for i_episode in range(1, n_episodes+1):
        state,_ = env.reset()
        score = 0
        for t in range(max_t):
            action = agent.act(state, eps)
            next_state,reward, done, truncated,_ = env.step(action)
            agent.step(state, action, reward, next_state, done)
            state = next_state
            score += reward
            if done or truncated:
                break
        scores_window.append(score)
        all_scores=np.append(all_scores,score)

        eps = max(eps_end, eps_decay*eps)
        ''' decrease epsilon '''

        moving_avg_scores=np.append(moving_avg_scores,np.mean(scores_window))

  
        if i_episode==250:
           break
    return moving_avg_scores,True


sweep_config = {
"method": "bayes",
"metric": {"name": "regret", "goal": "minimize"},
"parameters": {
    "LR": {"min": 1e-5, "max": 1e-2},
    "UPDATE_EVERY": {"values":[20,50,75,100]},
    "BUFFER_SIZE": {"values":[1e2,1e3,1e5]},
    "BATCH_SIZE": {"values":[64,128,256]}
},
"project": "dueling_mean_cartpole",
"early_terminate": {
    "type": "hyperband",
    "min_iter": 3,
    "max_iter": 100
}
}
# Initialize the sweep
sweep_id = wandb.sweep(sweep_config)

def train(LR, UPDATE_EVERY, BUFFER_SIZE, BATCH_SIZE):
    UPDATE_EVERY = UPDATE_EVERY
    BUFFER_SIZE = BUFFER_SIZE
    LR = LR
    BATCH_SIZE = BATCH_SIZE
    env = gym.make('CartPole-v1')
    state_shape = env.observation_space.shape[0]
    action_shape = env.action_space.n
    no_of_actions = env.action_space.n
    state,_ = env.reset()
    state=np.array(state)
    agent = TutorialAgent1(state_size=state_shape,action_size = action_shape,seed = 0)
    all_scores_1=dqn(agent)
    all1=(all_scores_1[0])
    regret=0
    for i in all1:
     if i>500:
        break
     else:
      regret+=500-i
    return regret

def run_training():
    config_defaults = {
        "LR": 5e-4,
        "UPDATE_EVERY": 50,
        "BUFFER_SIZE": 1e5,
        "BATCH_SIZE": 64
    }
    config = wandb.init(config=config_defaults,project="dueling_mean_cartpole_2")
    LR = config.config["LR"]
    UPDATE_EVERY=config.config['UPDATE_EVERY']
    BATCH_SIZE = config.config["BATCH_SIZE"]
    BUFFER_SIZE = config.config["BUFFER_SIZE"]
    regret = train(LR, UPDATE_EVERY, BUFFER_SIZE, BATCH_SIZE)
    wandb.log({"regret": regret})

# Run the sweep
wandb.agent(sweep_id, function=run_training)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Create sweep with ID: z1j3kgw0
Sweep URL: https://wandb.ai/rl_shobhith/dueling_mean_cartpole/sweeps/z1j3kgw0


[34m[1mwandb[0m: Agent Starting Run: gitsc04i with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.005925820816633021
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mshobhith-v[0m ([33mrl_shobhith[0m). Use [1m`wandb login --relogin`[0m to force relogin


  if not isinstance(terminated, (bool, np.bool8)):


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,108070.58624


[34m[1mwandb[0m: Agent Starting Run: 17m3ez1e with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.00877271270805817
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22669873722188816, max=1.…

0,1
regret,▁

0,1
regret,108658.72771


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vzs74fvy with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.005368416877864208
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23143032535297728, max=1.…

0,1
regret,▁

0,1
regret,108584.09621


[34m[1mwandb[0m: Agent Starting Run: c9tduyxq with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.002227597318140824
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23135931267259896, max=1.…

0,1
regret,▁

0,1
regret,103489.14224


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 64xdr827 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.0017625958467459516
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2312883435582822, max=1.0…

0,1
regret,▁

0,1
regret,109379.35325


[34m[1mwandb[0m: Agent Starting Run: jn1b34hk with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.005272016199433554
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01128888888888721, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22673282213201024, max=1.…

0,1
regret,▁

0,1
regret,106765.57583


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: fnhp42zq with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.008783613492379706
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.002 MB uploaded\r'), FloatProgress(value=0.6614035087719298, max=1.0…

0,1
regret,▁

0,1
regret,105089.56351


[34m[1mwandb[0m: Agent Starting Run: dgo4zsnc with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.008122043144283643
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22663059813645928, max=1.…

0,1
regret,▁

0,1
regret,108836.81753


[34m[1mwandb[0m: Agent Starting Run: dkwz4q9m with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.001254244196429291
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23146584804297773, max=1.…

0,1
regret,▁

0,1
regret,106879.68708


[34m[1mwandb[0m: Agent Starting Run: c4nxd3es with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.008317778728827241
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8637389081064821, max=1.0…

0,1
regret,▁

0,1
regret,107481.75982


[34m[1mwandb[0m: Agent Starting Run: b461ms1m with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.0019563911336465496
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,104279.44783


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4hlv7sl4 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.008777431955447947
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,104692.71123


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2aocljnk with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.0053090079343705715
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777776363, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2266646625582444, max=1.0…

0,1
regret,▁

0,1
regret,108736.10643


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 57m866nl with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.009692984323068522
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23135931267259896, max=1.…

0,1
regret,▁

0,1
regret,109019.78866


[34m[1mwandb[0m: Agent Starting Run: qvkf0mc5 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.002463943305471198
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888889738, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2267669172932331, max=1.0…

0,1
regret,▁

0,1
regret,109871.98332


[34m[1mwandb[0m: Agent Starting Run: u0925a2c with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.002239517819285286
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22663059813645928, max=1.…

0,1
regret,▁

0,1
regret,105109.63392


[34m[1mwandb[0m: Agent Starting Run: bfbvl1mn with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.006004233291183193
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22665062415400813, max=1.…

0,1
regret,▁

0,1
regret,103299.64352


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: zrpby83p with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.006637400472200915
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23146584804297773, max=1.…

0,1
regret,▁

0,1
regret,105692.26445


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rxk5mzoc with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.007902647446516113
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888889738, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23139481356452354, max=1.…

0,1
regret,▁

0,1
regret,108523.3223


[34m[1mwandb[0m: Agent Starting Run: tourylvz with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.004341478345097492
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,104862.54964


[34m[1mwandb[0m: Agent Starting Run: 22gox1rj with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.0018323705898922868
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.34280559314388814, max=1.…

0,1
regret,▁

0,1
regret,106187.16144


[34m[1mwandb[0m: Agent Starting Run: dkv2zw9m with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.00968223343390822
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888884685, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.226801022710182, max=1.0)…

0,1
regret,▁

0,1
regret,106078.89762


[34m[1mwandb[0m: Agent Starting Run: vj1sj9dt with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 8.14960887398546e-05
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23143032535297728, max=1.…

0,1
regret,▁

0,1
regret,108522.8041


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5emwfv2h with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.002479033959054063
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2266646625582444, max=1.0…

0,1
regret,▁

0,1
regret,109128.66979


[34m[1mwandb[0m: Agent Starting Run: imia95d7 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.005860215399347001
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22669873722188816, max=1.…

0,1
regret,▁

0,1
regret,105635.06322


[34m[1mwandb[0m: Agent Starting Run: 6blee931 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.006140900380871656
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23134786613447958, max=1.…

0,1
regret,▁

0,1
regret,107549.50078


[34m[1mwandb[0m: Agent Starting Run: zkybxm85 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.002453399385144804
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23146584804297773, max=1.…

0,1
regret,▁

0,1
regret,106699.34545


[34m[1mwandb[0m: Agent Starting Run: ksa3a7d8 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.0031816831779253466
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23135931267259896, max=1.…

0,1
regret,▁

0,1
regret,103758.72593


[34m[1mwandb[0m: Agent Starting Run: tjeiabqr with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.0066790432184388295
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2312413687279423, max=1.0…

0,1
regret,▁

0,1
regret,108177.07956


[34m[1mwandb[0m: Agent Starting Run: 91v42z1i with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.004022471343646539
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011277777777781415, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23146584804297773, max=1.…

0,1
regret,▁

0,1
regret,108121.03545


[34m[1mwandb[0m: Agent Starting Run: qknz99lq with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.0030278058092276473
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22663059813645928, max=1.…

0,1
regret,▁

0,1
regret,109800.57547


[34m[1mwandb[0m: Agent Starting Run: hl9251l3 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.007617223619247081
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888884685, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23143032535297728, max=1.…

0,1
regret,▁

0,1
regret,106577.47174


[34m[1mwandb[0m: Agent Starting Run: myvaiqel with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.007513033344952125
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888884685, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22663059813645928, max=1.…

0,1
regret,▁

0,1
regret,104593.06227


[34m[1mwandb[0m: Agent Starting Run: ex9lb5ul with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.0025688371635228407
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23139481356452354, max=1.…

0,1
regret,▁

0,1
regret,105106.36449


[34m[1mwandb[0m: Agent Starting Run: 51v9ubui with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.008257296239171381
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23143032535297728, max=1.…

0,1
regret,▁

0,1
regret,105118.6979


[34m[1mwandb[0m: Agent Starting Run: 10iiw8z8 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.004335208782319596
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8640192539109507, max=1.0…

0,1
regret,▁

0,1
regret,108264.11395


[34m[1mwandb[0m: Agent Starting Run: woublny1 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.0033603373699309
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,105984.26632


[34m[1mwandb[0m: Agent Starting Run: tbfepanx with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.0011293264935312115
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23139481356452354, max=1.…

0,1
regret,▁

0,1
regret,106034.7251


[34m[1mwandb[0m: Agent Starting Run: mhsz9u6u with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.0007126306489986804
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23135931267259896, max=1.…

0,1
regret,▁

0,1
regret,107484.17838


[34m[1mwandb[0m: Agent Starting Run: jr4z5pbh with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.0013193528598378452
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,104598.5089


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3blezqj5 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.004107993136355776
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2267669172932331, max=1.0…

0,1
regret,▁

0,1
regret,107731.69779


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ptvb482q with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.003342666160216847
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888884685, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,108815.59857


[34m[1mwandb[0m: Agent Starting Run: 3ds7o4zw with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.006633229389758773
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,107066.43494


[34m[1mwandb[0m: Agent Starting Run: qzjfy7cu with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.0021581575496006857
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,109116.9129


[34m[1mwandb[0m: Agent Starting Run: m8grxc9k with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.00714770157597706
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,105679.12363


[34m[1mwandb[0m: Agent Starting Run: j7go93i3 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.004179659563561004
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888884685, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23139481356452354, max=1.…

0,1
regret,▁

0,1
regret,107612.50798


[34m[1mwandb[0m: Agent Starting Run: 0muvxb0x with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.006396956168904927
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.005 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.8634996993385448, max=1.0…

0,1
regret,▁

0,1
regret,108201.18322


[34m[1mwandb[0m: Agent Starting Run: 837f8xx4 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.00011713796362472271
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,107326.60592


[34m[1mwandb[0m: Agent Starting Run: hbn8qmrr with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.009557078902329546
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22673282213201024, max=1.…

0,1
regret,▁

0,1
regret,105002.31519


[34m[1mwandb[0m: Agent Starting Run: 1d50vcr0 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.0034546058941255725
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23146584804297773, max=1.…

0,1
regret,▁

0,1
regret,107231.628


[34m[1mwandb[0m: Agent Starting Run: wyummn5d with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.005365907544108153
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2267669172932331, max=1.0…

0,1
regret,▁

0,1
regret,107623.17162


[34m[1mwandb[0m: Agent Starting Run: dbbo4w0h with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.008501557582620422
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22673282213201024, max=1.…

0,1
regret,▁

0,1
regret,104170.69783


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: lvx58zsn with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.008996594426705695
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23135931267259896, max=1.…

0,1
regret,▁

0,1
regret,106450.75444


[34m[1mwandb[0m: Agent Starting Run: spox27gt with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.002362658468119571
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23143032535297728, max=1.…

0,1
regret,▁

0,1
regret,103155.92891


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9fmp4wmc with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.006030524718143593
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23134786613447958, max=1.…

0,1
regret,▁

0,1
regret,108376.97764


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xp3d011o with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.005544114515373319
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2267669172932331, max=1.0…

0,1
regret,▁

0,1
regret,105291.31118


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: w63wpx1c with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.0018810050115725496
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22663059813645928, max=1.…

0,1
regret,▁

0,1
regret,105756.79592


[34m[1mwandb[0m: Agent Starting Run: k8y5x341 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.0022162400398794547
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23132382267218898, max=1.…

0,1
regret,▁

0,1
regret,105979.48684


[34m[1mwandb[0m: Agent Starting Run: gwdocza2 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.0045505646118833955
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22669873722188816, max=1.…

0,1
regret,▁

0,1
regret,106996.27062


[34m[1mwandb[0m: Agent Starting Run: 9jijdqte with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.005905936807366493
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888904896, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22673282213201024, max=1.…

0,1
regret,▁

0,1
regret,106642.11253


[34m[1mwandb[0m: Agent Starting Run: i0niu2r1 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.001306430335510571
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23143032535297728, max=1.…

0,1
regret,▁

0,1
regret,106795.40219


[34m[1mwandb[0m: Agent Starting Run: 31zbg5qr with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.006752489618228726
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888884685, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23143032535297728, max=1.…

0,1
regret,▁

0,1
regret,108863.49125


[34m[1mwandb[0m: Agent Starting Run: fnr4sox8 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.0026610817316302317
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2312883435582822, max=1.0…

0,1
regret,▁

0,1
regret,106763.5134


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: m09gzbhu with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.005343125817463579
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,105910.48286


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 6z6iu3wo with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.00685244606542687
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22673282213201024, max=1.…

0,1
regret,▁

0,1
regret,106719.00507


[34m[1mwandb[0m: Agent Starting Run: uz1xy0y9 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.0006318741449040235
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,107134.5564


[34m[1mwandb[0m: Agent Starting Run: 0b6hrrt0 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.006227053452341986
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2266646625582444, max=1.0…

0,1
regret,▁

0,1
regret,106186.39799


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: z537fztv with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.006132304785980072
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23135931267259896, max=1.…

0,1
regret,▁

0,1
regret,109249.94736


[34m[1mwandb[0m: Agent Starting Run: zk7wm14v with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.005042172422381508
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22669873722188816, max=1.…

0,1
regret,▁

0,1
regret,108625.98181


[34m[1mwandb[0m: Agent Starting Run: 8gsgcw46 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.00914251293604901
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.226801022710182, max=1.0)…

0,1
regret,▁

0,1
regret,107575.69652


[34m[1mwandb[0m: Agent Starting Run: sl3w04ap with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.0070080529870892696
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888884685, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22673282213201024, max=1.…

0,1
regret,▁

0,1
regret,107239.24685


[34m[1mwandb[0m: Agent Starting Run: gyh5vggv with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.0016600374155511135
[34m[1mwandb[0m: 	UPDATE_EVERY: 20
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2266646625582444, max=1.0…

0,1
regret,▁

0,1
regret,107645.08758


[34m[1mwandb[0m: Agent Starting Run: l9d5531s with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.008198648675236902
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,107101.81441


[34m[1mwandb[0m: Agent Starting Run: 00apaljf with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.00903548696030916
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23146584804297773, max=1.…

0,1
regret,▁

0,1
regret,108763.59347


[34m[1mwandb[0m: Agent Starting Run: nki8of8d with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.0002188595532042879
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,104920.9281


[34m[1mwandb[0m: Agent Starting Run: 2zbiwlpk with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.0007408761298322896
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01127777777777131, max=1.0)…

VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,105158.90301


[34m[1mwandb[0m: Agent Starting Run: p45mc35g with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.004460811965044861
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2315013816395456, max=1.0…

0,1
regret,▁

0,1
regret,106564.05185


[34m[1mwandb[0m: Agent Starting Run: 9n9lmzf1 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.008569036990457272
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22673282213201024, max=1.…

0,1
regret,▁

0,1
regret,107606.56679


[34m[1mwandb[0m: Agent Starting Run: jl9ljeku with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.003371846868346681
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23146584804297773, max=1.…

0,1
regret,▁

0,1
regret,105589.04997


[34m[1mwandb[0m: Agent Starting Run: qbf73haa with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.0007085934460260936
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888884685, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23135931267259896, max=1.…

0,1
regret,▁

0,1
regret,107628.79693


[34m[1mwandb[0m: Agent Starting Run: 94tgk47z with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.00018242235662762105
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011288888888884685, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.2312413687279423, max=1.0…

0,1
regret,▁

0,1
regret,108985.36459


[34m[1mwandb[0m: Agent Starting Run: uf06w99h with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.0017640806244823895
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22673282213201024, max=1.…

0,1
regret,▁

0,1
regret,107493.06285


[34m[1mwandb[0m: Agent Starting Run: 9gxbibnl with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.0058195180913665065
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22669873722188816, max=1.…

0,1
regret,▁

0,1
regret,107746.207


[34m[1mwandb[0m: Agent Starting Run: yqdrgg5s with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.0005863709125912581
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

[34m[1mwandb[0m: Network error (ConnectTimeout), entering retry loop.


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
regret,▁

0,1
regret,106176.79468


[34m[1mwandb[0m: Agent Starting Run: nc71pl4s with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.005091421120091309
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.226801022710182, max=1.0)…

0,1
regret,▁

0,1
regret,109319.05455


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 76yfm2np with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.009299501575177336
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.22663059813645928, max=1.…

0,1
regret,▁

0,1
regret,108069.38876


[34m[1mwandb[0m: Agent Starting Run: ihdnsuhz with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 128
[34m[1mwandb[0m: 	BUFFER_SIZE: 1000
[34m[1mwandb[0m: 	LR: 0.00683301998433565
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.9796992481203007, max=1.0…

0,1
regret,▁

0,1
regret,105921.88994


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4al0d6mi with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.002825495414849948
[34m[1mwandb[0m: 	UPDATE_EVERY: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111111111111112, max=1.0…

VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23146584804297773, max=1.…

0,1
regret,▁

0,1
regret,106305.80675


[34m[1mwandb[0m: Agent Starting Run: ac2cd15c with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.00959616137574884
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.001 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.23134786613447958, max=1.…

0,1
regret,▁

0,1
regret,105855.21771


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nzh4281k with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 64
[34m[1mwandb[0m: 	BUFFER_SIZE: 100000
[34m[1mwandb[0m: 	LR: 0.00832924119321163
[34m[1mwandb[0m: 	UPDATE_EVERY: 75
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded\r'), FloatProgress(value=0.3427540589296452, max=1.0…

0,1
regret,▁

0,1
regret,107174.43264


[34m[1mwandb[0m: Agent Starting Run: l5wyviw3 with config:
[34m[1mwandb[0m: 	BATCH_SIZE: 256
[34m[1mwandb[0m: 	BUFFER_SIZE: 100
[34m[1mwandb[0m: 	LR: 0.007259721462189885
[34m[1mwandb[0m: 	UPDATE_EVERY: 100
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
