In [1]:
import os
import json

import torch
import torch.nn as nn
from torch import optim
import numpy as np
import pandas as pd

import torch_utils
from torch import distributions

import gymnasium as gym
import gymnasium_robotics as gym_robo
import models
import cnn_models
import rl_agents
import rl_callbacks
import helper
import gym_helper
import wandb_support
import wandb
import gym_helper

# from mpi4py import MPI

In [None]:
import mujoco

In [None]:
mujoco.MjModel

In [None]:
gym_robo.__version__

In [None]:
def get_default_device():
    """Returns the default device for computations, GPU if available, otherwise CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

device = get_default_device()
print(f"Using device: {device}")

# TEST

In [None]:
gym_robo.register_robotics_envs()

In [None]:
gym.envs.registration.registry

In [None]:
wandb.login(key='758ac5ba01e12a3df504d2db2fec8ba4f391f7e6')

In [None]:
env = gym.make('FetchPush-v2', max_episode_steps=100, render_mode='rgb_array')
env = gym.wrappers.RecordVideo(env, 'test/', episode_trigger=lambda i: i%1==0)

episodes = 10


for episode in range(episodes):
    done = False
    obs, _ = env.reset()
    while not done:
        obs, r, term, trunc, dict = env.step(env.action_space.sample())
        if term or trunc:
            done = True
env.close()

In [None]:
env = gym.make("FetchReach-v2")
env.reset()
obs, reward, terminated, truncated, info = env.step(env.action_space.sample())

# The following always has to hold:
assert reward == env.compute_reward(obs["achieved_goal"], obs["desired_goal"], info)
assert truncated == env.compute_truncated(obs["achieved_goal"], obs["desired_goal"], info)
assert terminated == env.compute_terminated(obs["achieved_goal"], obs["desired_goal"], info)

In [None]:
env.compute_reward()

In [None]:
env = gym.make('FetchPush-v2', render_mode='rgb_array')

In [None]:
if hasattr(env, "distance_threshold"):
    print('true')
else:
    print('false')

In [None]:
if env.get_wrapper_attr("distance_threshold"):
    print('true')

In [None]:
print(dir(env))


# DDPG

In [None]:
env = gym.make('Pendulum-v1')

In [None]:
# build actor

dense_layers = [
    (
        400,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
    (
        300,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    )
]

actor = models.ActorModel(env, cnn_model=None, dense_layers=dense_layers, optimizer='Adam',
                          optimizer_params={'weight_decay':0.01}, learning_rate=0.001, normalize_layers=True)

In [None]:
actor

In [None]:
ddpg_agent.actor_model

In [None]:
ddpg_agent.target_actor_model

In [None]:
# build critic

state_layers = [
    (
        400,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    )
]

merged_layers = [
    (
        300,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    )
]


critic = models.CriticModel(env=env, cnn_model=None, state_layers=state_layers, merged_layers=merged_layers,
                            optimizer='Adam', optimizer_params={'weight_decay':0.01}, learning_rate=0.002, normalize_layers=True)

In [None]:
critic

In [None]:
replay_buffer = helper.ReplayBuffer(env, 100000)
noise = helper.OUNoise(shape=env.action_space.shape, dt=1.0, device='cuda')

In [None]:
ddpg_agent = rl_agents.DDPG(env=env,
                            actor_model=actor,
                            critic_model=critic,
                            discount=0.99,
                            tau=0.005,
                            replay_buffer=replay_buffer,
                            noise=noise,
                            callbacks=[rl_callbacks.WandbCallback('Pendulum-v1')])

In [None]:
ddpg_agent.critic_model

In [None]:
ddpg_agent.target_critic_model

In [None]:
ddpg_agent.train(100, True, 10)

In [None]:
ddpg_agent.test(10, True, 1)

# Actor Critic

In [None]:
env = gym.make("CartPole-v1")

In [None]:
dense_layers = [
    (128, 'relu', "kaiming normal"),
    (256, 'relu', "kaiming normal"),
    ]



In [None]:
policy_model = models.PolicyModel(env=env, dense_layers=dense_layers, optimizer='Adam', learning_rate=0.001,)

In [None]:
for param in policy_model.parameters():
    print(param)

In [None]:
value_model = models.ValueModel(env, dense_layers=dense_layers, optimizer='Adam', learning_rate=0.001)

In [None]:
value_model

In [None]:
for params in value_model.parameters():
    print(params)

In [None]:
actor_critic = rl_agents.ActorCritic(env,
                                     policy_model,
                                     value_model,
                                     discount=0.99,
                                     policy_trace_decay=0.5,
                                     value_trace_decay=0.5,
                                     callbacks=[rl_callbacks.WandbCallback('CartPole-v1-Actor-Critic')])

In [None]:
actor_critic.train(200)

In [None]:
actor_critic.test(10, True, 1)

# REINFORCE

In [None]:
env = gym.make("CartPole-v1")

In [None]:
dense_layers = [
    (128, 'relu', {
                    "kaiming normal": {
                        "a":1.0,
                        "mode":'fan_in'
                    }
                },
    ),
    # (256, 'relu', {
    #                 "kaiming_normal": {
    #                     "a":0.0,
    #                     "mode":'fan_in'
    #                 }
    #             },
    # )
    ]

In [None]:
dense_layers = [(128, 'relu', "kaiming normal")]

In [None]:
value_model = models.ValueModel(env, dense_layers, 'Adam', 0.001)

In [None]:
for param in value_model.parameters():
    print(param)

In [None]:
policy_model = models.PolicyModel(env, dense_layers, 'Adam', 0.001)

In [None]:
for param in policy_model.parameters():
    print(param)

In [None]:
reinforce = rl_agents.Reinforce(env, policy_model, value_model, 0.99, [rl_callbacks.WandbCallback('CartPole-v0_REINFORCE', chkpt_freq=100)])

In [None]:
reinforce.train(200, True, 50)

In [None]:
reinforce.test(10, True, 1)

# DDPG w/CNN

In [None]:
env = gym.make('CarRacing-v2')

In [None]:
cnn_layers = [
    # {
    #     "batchnorm":
    #     {
    #         "num_features":3
    #     }
    # },
    {
        "conv":
        {
            "out_channels": 32,
            "kernel_size": 7,
            "stride": 3,
            "padding": 'valid',
            "bias": False
        }
    },
    {
        "relu":
        {

        }
    },
    {
        "batchnorm":
        {
            "num_features":32
        }
    },
    {
        "conv":
        {
            "out_channels": 32,
            "kernel_size": 5,
            "stride": 3,
            "padding": 'valid',
            "bias": False,
        }
    },
    {
        "relu":
        {

        }
    },
    {
        "batchnorm":
        {
            "num_features":32
        }
    },
    {
        "conv":
        {
            "out_channels": 32,
            "kernel_size": 3,
            "stride": 3,
            "padding": 'valid',
            "bias": False,
        }
    },
]

In [None]:
cnn = cnn_models.CNN(cnn_layers, env)

In [None]:
cnn

In [None]:
# build actor

dense_layers = [
    (
        64,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
    (
        64,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
    (
        64,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
]

actor = models.ActorModel(env, cnn_model=cnn, dense_layers=dense_layers, optimizer="Adam", optimizer_params={'weight_decay':0.0}, learning_rate=0.0001, normalize=False)

In [None]:
actor

In [None]:
# build critic

state_layers = [
    
]

merged_layers = [
    (
        64,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
    (
        64,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
    (
        64,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    )
]


critic = models.CriticModel(env=env, cnn_model=cnn, state_layers=state_layers, merged_layers=merged_layers, optimizer="Adam", optimizer_params={'weight_decay':0.0}, learning_rate=0.0001, normalize=False)

In [None]:
critic

In [None]:
replay_buffer = helper.ReplayBuffer(env, 1000000, goal_shape=(1,))
noise = helper.OUNoise(shape=env.action_space.shape, mean=0.0, theta=0.15, sigma=0.01, dt=1.0, device='cuda')

In [None]:
ddpg_agent = rl_agents.DDPG(
    env,
    actor,
    critic,
    discount=0.98,
    tau=0.05,
    action_epsilon=0.2,
    replay_buffer=replay_buffer,
    batch_size=128,
    noise=noise,
    callbacks=[rl_callbacks.WandbCallback("CarRacing-v2")]
)

In [None]:
ddpg_agent.train(1000, True, 10)

In [None]:
wandb.finish()

In [None]:
wandb.login()

# HER

In [None]:
env = gym.make("Reacher-v4")

In [None]:
_,_ = env.reset()

In [None]:
achieved_goal = gym_helper.reacher_achieved_goal(env)
action = env.action_space.sample()
env.step(action)
print(f'observation: {env.get_wrapper_attr("_get_obs")()}')
print(f'distance to goal: {env.get_wrapper_attr("_get_obs")()[8::]}')
print(f'fingertip: {env.get_wrapper_attr("get_body_com")("fingertip")}')
print(f'target: {env.get_wrapper_attr("get_body_com")("target")}')

In [None]:
next_achieved_goal = env.get_wrapper_attr("_get_obs")()[8::]
desired_goal = [0.0, 0.0, 0.0]

In [None]:
reward_func(env, action, achieved_goal, next_achieved_goal, desired_goal, 0.05)

In [None]:
desired_goal_func, achieved_goal_func, reward_func = gym_helper.get_her_goal_functions(env)

In [None]:
desired_goal_func(env).shape

In [None]:
# build actor

dense_layers = [
    (
        256,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
    (
        256,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
    (
        256,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
]

actor = models.ActorModel(env,
                          cnn_model=None,
                          dense_layers=dense_layers,
                          goal_shape=(3,),
                          optimizer="Adam",
                          optimizer_params={'weight_decay':0.0},
                          learning_rate=0.0001, normalize=False)

In [None]:
# build critic

state_layers = [
    
]

merged_layers = [
    (
        256,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
    (
        256,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
    (
        256,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    )
]


critic = models.CriticModel(env=env,
                            cnn_model=None,
                            state_layers=state_layers,
                            merged_layers=merged_layers,
                            goal_shape=(3,),
                            optimizer="Adam",
                            optimizer_params={'weight_decay':0.0},
                            learning_rate=0.0001,
                            normalize=False)

In [None]:
goal_shape = desired_goal_func(env).shape
replay_buffer = helper.ReplayBuffer(env, 100000, goal_shape)
# noise = helper.OUNoise(shape=env.action_space.shape,
#                        mean=0.0,
#                        theta=0.05,
#                        sigma=0.15,
#                        dt=1.0, device='cuda')

noise=helper.NormalNoise(shape=env.action_space.shape,
                         mean = 0.0,
                         stddev=0.05,
                         )

In [None]:
ddpg_agent = rl_agents.DDPG(env=env,
                            actor_model=actor,
                            critic_model=critic,
                            discount=0.98,
                            tau=0.05,
                            action_epsilon=0.2,
                            replay_buffer=replay_buffer,
                            batch_size=256,
                            noise=noise,
                            callbacks=[rl_callbacks.WandbCallback('Reacher-v4')])

In [None]:
her = rl_agents.HER(ddpg_agent,
                    strategy='future',
                    num_goals=4,
                    tolerance=0.001,
                    desired_goal=desired_goal_func,
                    achieved_goal=achieved_goal_func,
                    reward_fn=reward_func)

In [None]:
her.train(10, 50, 16, 40, True, 1000)

In [None]:
wandb.finish()

In [None]:
her.test(10, True, 1)

In [None]:
her.save()

In [None]:
her.agent.goal_normalizer.running_std

In [None]:
loaded_her = rl_agents.HER.load("/workspaces/RL_Agents/pytorch/src/app/assets/models/her")

In [None]:
loaded_her.agent.replay_buffer.sample(10)

In [None]:
loaded_her.agent.state_normalizer.running_cnt

In [None]:
loaded_her.get_config()

In [None]:
loaded_her.test(10, True, 1)

In [None]:
10e4

# HER w/CNN

In [None]:
env = gym.make('CarRacing-v2')

In [None]:
_,_ = env.reset()

In [None]:
desired_goal_func, achieved_goal_func, reward_func = gym_helper.get_her_goal_functions(env)

In [None]:
desired_goal(env).shape

In [None]:
cnn_layers = [
    # {
    #     "batchnorm":
    #     {
    #         "num_features":3
    #     }
    # },
    {
        "conv":
        {
            "out_channels": 32,
            "kernel_size": 7,
            "stride": 3,
            "padding": 'valid',
            "bias": False
        }
    },
    {
        "relu":
        {

        }
    },
    {
        "batchnorm":
        {
            "num_features":32
        }
    },
    {
        "conv":
        {
            "out_channels": 32,
            "kernel_size": 5,
            "stride": 3,
            "padding": 'valid',
            "bias": False,
        }
    },
    {
        "relu":
        {

        }
    },
    {
        "batchnorm":
        {
            "num_features":32
        }
    },
    {
        "conv":
        {
            "out_channels": 32,
            "kernel_size": 3,
            "stride": 3,
            "padding": 'valid',
            "bias": False,
        }
    },
]

cnn = cnn_models.CNN(cnn_layers, env)

In [None]:
# build actor

dense_layers = [
    (
        256,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
    (
        256,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
    (
        256,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
]

actor = models.ActorModel(env,
                          cnn_model=cnn,
                          dense_layers=dense_layers,
                          goal_shape=(1,),
                          optimizer="Adam",
                          optimizer_params={'weight_decay':0.0},
                          learning_rate=0.001, normalize=False)

In [None]:
actor

In [None]:
# build critic

state_layers = [
    
]

merged_layers = [
    (
        256,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
    (
        256,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
    (
        256,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    )
]


critic = models.CriticModel(env=env,
                            cnn_model=cnn,
                            state_layers=state_layers,
                            merged_layers=merged_layers,
                            goal_shape=(1,),
                            optimizer="Adam",
                            optimizer_params={'weight_decay':0.0},
                            learning_rate=0.001,
                            normalize=False)

In [None]:
critic

In [None]:
goal_shape = desired_goal_func(env).shape
replay_buffer = helper.ReplayBuffer(env, 100000, goal_shape)
# noise = helper.OUNoise(shape=env.action_space.shape,
#                        mean=0.0,
#                        theta=0.05,
#                        sigma=0.15,
#                        dt=1.0, device='cuda')

noise=helper.NormalNoise(shape=env.action_space.shape,
                         mean = 0.0,
                         stddev=0.05,
                         )

In [None]:
ddpg_agent = rl_agents.DDPG(env=env,
                            actor_model=actor,
                            critic_model=critic,
                            discount=0.98,
                            tau=0.05,
                            action_epsilon=0.2,
                            replay_buffer=replay_buffer,
                            batch_size=256,
                            noise=noise,
                            callbacks=[rl_callbacks.WandbCallback('CarRacing-v2')])

In [None]:
ddpg_agent.actor_model

In [None]:
her = rl_agents.HER(ddpg_agent,
                    strategy='future',
                    num_goals=4,
                    tolerance=1,
                    desired_goal=desired_goal_func,
                    achieved_goal=achieved_goal_func,
                    reward_fn=reward_func)

In [None]:
her.agent.actor_model

In [None]:
her.train(num_epochs=20,
          num_cycles=50,
          num_episodes=16,
          num_updates=40,
          render=True,
          render_freq=20
        )

In [None]:
her = rl_agents.HER.load("/workspaces/RL_Agents/pytorch/src/app/models/her")

In [None]:
wandb.finish()

In [None]:
# reset environment
state, _ = her.agent.env.reset()
# instantiate empty lists to store current episode trajectory
states, actions, next_states, dones, state_achieved_goals, \
next_state_achieved_goals, desired_goals = [], [], [], [], [], [], []
# set desired goal
desired_goal = her.desired_goal_func(her.agent.env)
# set achieved goal
state_achieved_goal = her.achieved_goal_func(her.agent.env)
# add initial state and goals to local normalizer stats
her.state_normalizer.update_local_stats(state)
her.goal_normalizer.update_local_stats(desired_goal)
her.goal_normalizer.update_local_stats(state_achieved_goal)
# set done flag
done = False
# reset episode reward to 0
episode_reward = 0
# reset steps counter for the episode
episode_steps = 0

while not done:
    # get normalized values for state and desired goal
    state_norm = her.state_normalizer.normalize(state)
    desired_goal_norm = her.goal_normalizer.normalize(desired_goal)
    # get action
    action = her.agent.get_action(state_norm, desired_goal_norm, grad=False)
    # take action
    next_state, reward, term, trunc, _ = her.agent.env.step(action)
    # get next state achieved goal
    next_state_achieved_goal = her.achieved_goal_func(her.agent.env)
    # add next state and next state achieved goal to normalizers
    her.state_normalizer.update_local_stats(next_state)
    her.goal_normalizer.update_local_stats(next_state_achieved_goal)
    # store trajectory in replay buffer (non normalized!)
    her.agent.replay_buffer.add(state, action, reward, next_state, done,\
                                    state_achieved_goal, next_state_achieved_goal, desired_goal)
    
    # append step state, action, next state, and goals to respective lists
    states.append(state)
    actions.append(action)
    next_states.append(next_state)
    dones.append(done)
    state_achieved_goals.append(state_achieved_goal)
    next_state_achieved_goals.append(next_state_achieved_goal)
    desired_goals.append(desired_goal)

    # add to episode reward and increment steps counter
    episode_reward += reward
    episode_steps += 1
    # update state and state achieved goal
    state = next_state
    state_achieved_goal = next_state_achieved_goal
    # update done flag
    if term or trunc:
        done = True

In [None]:
# package episode states, actions, next states, and goals into trajectory tuple
trajectory = (states, actions, next_states, dones, state_achieved_goals, next_state_achieved_goals, desired_goals)

In [None]:
states, actions, next_states, dones, state_achieved_goals, next_state_achieved_goals, desired_goals = trajectory

In [None]:
for idx, (s, a, ns, d, sag, nsag, dg) in enumerate(zip(states, actions, next_states, dones, state_achieved_goals, next_state_achieved_goals, desired_goals)):
    print(f'a={a}, d={d}, sag={sag}, nsag={nsag}, dg={dg}')

In [None]:
strategy = "future"
num_goals = 4

# loop over each step in the trajectory to set new achieved goals, calculate new reward, and save to replay buffer
for idx, (state, action, next_state, done, state_achieved_goal, next_state_achieved_goal, desired_goal) in enumerate(zip(states, actions, next_states, dones, state_achieved_goals, next_state_achieved_goals, desired_goals)):

    if strategy == "final":
        new_desired_goal = next_state_achieved_goals[-1]
        new_reward = her.reward_fn(state_achieved_goal, next_state_achieved_goal, new_desired_goal)
        print(f'transition: action={action}, reward={new_reward}, done={done}, state_achieved_goal={state_achieved_goal}, next_state_achieved_goal={next_state_achieved_goal}, desired_goal={new_desired_goal}')
        her.agent.replay_buffer.add(state, action, new_reward, next_state, done, state_achieved_goal, next_state_achieved_goal, new_desired_goal)

    if strategy == 'future':
        for i in range(num_goals):
            if idx + i + 1 >= len(states):
                break
            goal_idx = np.random.randint(idx + 1, len(states))
            new_desired_goal = next_state_achieved_goals[goal_idx]
            new_reward = her.reward_fn(state_achieved_goal, next_state_achieved_goal, new_desired_goal)
            print(f'transition: action={action}, reward={new_reward}, done={done}, state_achieved_goal={state_achieved_goal}, next_state_achieved_goal={next_state_achieved_goal}, desired_goal={new_desired_goal}')
            her.agent.replay_buffer.add(state, action, new_reward, next_state, done, state_achieved_goal, next_state_achieved_goal, new_desired_goal)
    

    


In [None]:
s, a, r, ns, d, sag, nsag, dg = her.agent.replay_buffer.sample(100)

In [None]:
for i in range(100):
    print(f'{i}: a={a[i]}, r={r[i]}, d={d[i]}, sag={sag[i]}, nsag={nsag[i]}, dg={dg[i]} ')

# HER Pendulum

In [None]:
env = gym.make('Pendulum-v1')

In [None]:
# build actor

dense_layers = [
    (
        400,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    ),
    (
        300,
        "relu",
        {
            "variance scaling": {
                "scale": 1.0,
                "mode": "fan_in",
                "distribution": "uniform",
            }
        },
    )
]

actor = models.ActorModel(env, cnn_model=None, dense_layers=dense_layers, optimizer='Adam',
                          optimizer_params={'weight_decay':0.01}, learning_rate=0.001, normalize=False)

In [None]:
# build critic

state_layers = [
    
]

merged_layers = [
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    ),
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    ),
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    )
]


critic = models.CriticModel(env=env, cnn_model=None, state_layers=state_layers, merged_layers=merged_layers, optimizer="Adam", optimizer_params={'weight_decay':0.0}, learning_rate=0.001, normalize=False)

In [None]:
replay_buffer = helper.ReplayBuffer(env, 100000, (3,))
noise = helper.OUNoise(shape=env.action_space.shape, dt=1.0, device='cuda')

In [None]:
ddpg_agent = rl_agents.DDPG(env=env,
                            actor_model=actor,
                            critic_model=critic,
                            discount=0.99,
                            tau=0.005,
                            replay_buffer=replay_buffer,
                            noise=noise,
                            callbacks=[rl_callbacks.WandbCallback('Pendulum-v1')])

In [None]:
def desired_goal_func(env):
    return np.array([0.0, 0.0, 0.0])

def achieved_goal_func(env):
    return env.get_wrapper_attr('_get_obs')()

def reward_func(env):
    pass

In [None]:
her = rl_agents.HER(
    agent=ddpg_agent,
    strategy='none',
    desired_goal=desired_goal_func,
    achieved_goal=achieved_goal_func,
    reward_fn=reward_func,
    normalizer_clip=10.0
)

In [None]:
her.agent.critic_model

In [None]:
her.agent.target_critic_model

In [None]:
her.train(1,1,100,1)

In [None]:
wandb.finish()

In [None]:
state = env.observation_space.sample()
state

In [None]:
her.agent.state_normalizer.normalize(state)

In [None]:
goal = her.desired_goal_func(her.agent.env)
goal

In [None]:
her.agent.goal_normalizer.normalize(goal)

In [None]:
def remove_renders(folder_path):
    # Iterate over the files in the folder
    for filename in os.listdir(folder_path):
        # Check if the file has a .mp4 or .meta.json extension
        if filename.endswith(".mp4") or filename.endswith(".meta.json"):
            # Construct the full file path
            file_path = os.path.join(folder_path, filename)
            # Remove the file
            os.remove(file_path)

In [None]:
remove_renders("/workspaces/RL_Agents/pytorch/src/app/assets/models/ddpg/renders/training")

# HER Fetch-Reach (Robotics)

In [None]:
env = gym.make("FetchReach-v2", max_episode_steps=50)

In [None]:
desired_goal_func, achieved_goal_func, reward_func = gym_helper.get_her_goal_functions(env)

In [None]:
achieved_goal_func(env)

In [None]:
env.get_wrapper_attr("_get_obs")()

In [None]:
# reset env state
env.reset()

In [None]:
goal_shape = desired_goal_func(env).shape

In [None]:
goal_shape

In [None]:
# build actor

dense_layers = [
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    ),
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    ),
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    )
]

actor = models.ActorModel(env, cnn_model=None, dense_layers=dense_layers, goal_shape=goal_shape, optimizer='Adam',
                          optimizer_params={'weight_decay':0.0}, learning_rate=0.00001, normalize_layers=False)

In [None]:
actor

In [None]:
# build critic

state_layers = [
    
]

merged_layers = [
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    ),
    (
        64,
        "relu",
        {
            "kaiming uniform": {
               
            }
        },
    ),
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    ),
]


critic = models.CriticModel(env=env, cnn_model=None, state_layers=state_layers, merged_layers=merged_layers, goal_shape=goal_shape, optimizer="Adam", optimizer_params={'weight_decay':0.0}, learning_rate=0.00001, normalize_layers=False)

In [None]:
critic

In [None]:
replay_buffer = helper.ReplayBuffer(env, 1000000, goal_shape)
# noise = helper.OUNoise(shape=env.action_space.shape, dt=1.0, device='cuda')
noise = helper.NormalNoise(shape=env.action_space.shape, mean=0.0, stddev=0.05)

In [None]:
ddpg_agent = rl_agents.DDPG(env=env,
                            actor_model=actor,
                            critic_model=critic,
                            discount=0.98,
                            tau=0.05,
                            action_epsilon=0.2,
                            replay_buffer=replay_buffer,
                            batch_size=256,
                            noise=noise,
                            callbacks=[rl_callbacks.WandbCallback("FetchReach-v2")])

In [None]:
ddpg_agent.critic_model

In [None]:
her = rl_agents.HER(
    agent=ddpg_agent,
    strategy='future',
    tolerance=0.05,
    num_goals=4,
    desired_goal=desired_goal_func,
    achieved_goal=achieved_goal_func,
    reward_fn=reward_func,
    normalizer_clip=5.0
)

In [None]:
her.train(num_epochs=50,
          num_cycles=50,
          num_episodes=16,
          num_updates=40,
          render=True,
          render_freq=1000)

In [None]:
states, action, rewards, next_states, dones, achieved_goals, next_achieved_goals, desired_goals = her.agent.replay_buffer.sample(2)

In [None]:
desired_goals

In [None]:
her.agent.env.get_wrapper_attr("distance_threshold")

In [None]:
# get success
her.agent.env.get_wrapper_attr("_is_success")(achieved_goal_func(her.agent.env), desired_goal_func(her.agent.env))

In [None]:
her.agent.env.get_wrapper_attr("goal_distance")(next_state_achieved_goal, desired_goal, None)

In [None]:
pusher_her = rl_agents.HER.load("/workspaces/RL_Agents/pytorch/src/app/assets/models/her")

In [None]:
pusher_her.agent.env.reset()

In [None]:
pusher_her.get_config()

In [None]:
wandb.finish()

In [None]:
np.linalg.norm(pusher_her.agent.env.get_wrapper_attr("get_body_com")("goal") - pusher_her.agent.env.get_wrapper_attr("get_body_com")("object"))

In [None]:
pusher_her.agent.replay_buffer.get_config()

In [None]:

pusher_her.agent.replay_buffer.desired_goals

In [None]:
## TEST ENV
env = gym.make("Pusher-v5", render_mode="rgb_array")

In [None]:
env = gym.wrappers.RecordVideo(
                    env,
                    "/renders/training",
                    episode_trigger=lambda x: True,
                )


In [None]:
state, _ = env.reset()

for i in range(1000):
# take action
    next_state, reward, term, trunc, _ = env.step(env.action_space.sample())
env.close()

# HER Fetch Push (Robitics)

In [None]:
env = gym.make('FetchPush-v2')

In [None]:
desired_goal_func, achieved_goal_func, reward_func = gym_helper.get_her_goal_functions(env)

In [None]:
# reset env state
env.reset()

In [None]:
goal_shape = desired_goal_func(env).shape

In [None]:
# build actor

dense_layers = [
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    ),
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    ),
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    )
]

actor = models.ActorModel(env, cnn_model=None, dense_layers=dense_layers, goal_shape=goal_shape, optimizer='Adam',
                          optimizer_params={'weight_decay':0.0}, learning_rate=0.00001, normalize_layers=False)

In [None]:
# build critic

state_layers = [
    
]

merged_layers = [
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    ),
    (
        64,
        "relu",
        {
            "kaiming uniform": {
               
            }
        },
    ),
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    ),
]


critic = models.CriticModel(env=env, cnn_model=None, state_layers=state_layers, merged_layers=merged_layers, goal_shape=goal_shape, optimizer="Adam", optimizer_params={'weight_decay':0.0}, learning_rate=0.00001, normalize_layers=False)

In [None]:
replay_buffer = helper.ReplayBuffer(env, 1000000, goal_shape)
# noise = helper.OUNoise(shape=env.action_space.shape, dt=1.0, device='cuda')
noise = helper.NormalNoise(shape=env.action_space.shape, mean=0.0, stddev=0.05)

In [None]:
ddpg_agent = rl_agents.DDPG(env=env,
                            actor_model=actor,
                            critic_model=critic,
                            discount=0.98,
                            tau=0.05,
                            action_epsilon=0.3,
                            replay_buffer=replay_buffer,
                            batch_size=128,
                            noise=noise,
                            callbacks=[rl_callbacks.WandbCallback("FetchPush-v2")],
                            save_dir="fetch_push/models/ddpg/"
                            )

In [None]:
her = rl_agents.HER(
    agent=ddpg_agent,
    strategy='final',
    tolerance=0.05,
    num_goals=4,
    desired_goal=desired_goal_func,
    achieved_goal=achieved_goal_func,
    reward_fn=reward_func,
    normalizer_clip=5.0,
    save_dir="fetch_push/models/her/"
)

In [None]:
her.train(num_epochs=50,
          num_cycles=50,
          num_episodes=16,
          num_updates=40,
          render=True,
          render_freq=1000)

# TESTING MULTITHREADING

In [None]:
env = gym.make('FetchPush-v2')

In [None]:
desired_goal_func, achieved_goal_func, reward_func = gym_helper.get_her_goal_functions(env)

In [None]:
# reset env state
env.reset()

In [None]:
goal_shape = desired_goal_func(env).shape

In [None]:
# build actor

dense_layers = [
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    ),
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    ),
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    )
]

actor = models.ActorModel(env, cnn_model=None, dense_layers=dense_layers, goal_shape=goal_shape, optimizer='Adam',
                          optimizer_params={'weight_decay':0.0}, learning_rate=0.00001, normalize_layers=False)

In [None]:
# build critic

state_layers = [
    
]

merged_layers = [
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    ),
    (
        64,
        "relu",
        {
            "kaiming uniform": {
               
            }
        },
    ),
    (
        64,
        "relu",
        {
            "kaiming uniform": {
                
            }
        },
    ),
]


critic = models.CriticModel(env=env, cnn_model=None, state_layers=state_layers, merged_layers=merged_layers, goal_shape=goal_shape, optimizer="Adam", optimizer_params={'weight_decay':0.0}, learning_rate=0.00001, normalize_layers=False)

In [None]:
replay_buffer = helper.ReplayBuffer(env, 1000000, goal_shape)
# noise = helper.OUNoise(shape=env.action_space.shape, dt=1.0, device='cuda')
noise = helper.NormalNoise(shape=env.action_space.shape, mean=0.0, stddev=0.05)

In [None]:
ddpg_agent = rl_agents.DDPG(env=env,
                            actor_model=actor,
                            critic_model=critic,
                            discount=0.98,
                            tau=0.05,
                            action_epsilon=0.3,
                            replay_buffer=replay_buffer,
                            batch_size=128,
                            noise=noise,
                            callbacks=[rl_callbacks.WandbCallback("FetchPush-v2")],
                            save_dir="fetch_push/models/ddpg/"
                            )

In [None]:
her = rl_agents.HER(
    agent=ddpg_agent,
    strategy='final',
    num_workers=4,
    tolerance=0.05,
    num_goals=4,
    desired_goal=desired_goal_func,
    achieved_goal=achieved_goal_func,
    reward_fn=reward_func,
    normalizer_clip=5.0,
    save_dir="fetch_push/models/her/"
)

In [None]:
her.train()

# TESTING

In [None]:
# load config
config_path = "/workspaces/RL_Agents/pytorch/src/app/HER_Test/her/config.json"
with open(config_path, 'r') as file:
    config = json.load(file)

In [None]:
config

In [None]:
agent = rl_agents.HER.load(config)

In [None]:
for callback in agent.agent.callbacks:
    print(callback._sweep)

# Co Occurence

In [2]:
import subprocess

In [3]:
# Define the path to your JSON configuration file
config_file_path = 'assets/wandb_config.json'

# Read the JSON configuration file
with open(config_file_path, 'r') as file:
    wandb_config = json.load(file)

# Print the configuration to verify it has been loaded correctly
print(wandb_config)

{'method': 'bayes', 'project': 'FetchReach-v2', 'name': 'Test', 'metric': {'name': 'episode_reward', 'goal': 'maximize'}, 'parameters': {'env': {'parameters': {'id': {'value': 'FetchReach-v2'}, 'max_episode_steps': {'value': 50}}}, 'model_type': {'values': ['HER_DDPG']}, 'HER_DDPG': {'parameters': {'HER_DDPG_actor_learning_rate': {'values': [1e-05, 0.0001]}, 'HER_DDPG_critic_learning_rate': {'values': [1e-05, 0.0001]}, 'HER_DDPG_goal_strategy': {'values': ['future']}, 'HER_DDPG_num_goals': {'min': 4, 'max': 8}, 'HER_DDPG_goal_tolerance': {'values': [0.05]}, 'HER_DDPG_discount': {'values': [0.99, 0.9]}, 'HER_DDPG_tau': {'values': [0.05]}, 'HER_DDPG_epsilon_greedy': {'values': [0.2, 0.3]}, 'HER_DDPG_normalizer_clip': {'values': [5]}, 'HER_DDPG_device': {'value': 'cuda'}, 'HER_DDPG_actor_num_cnn_layers': {'value': 0}, 'HER_DDPG_actor_num_layers': {'value': 2}, 'HER_DDPG_actor_activation': {'values': ['relu']}, 'HER_DDPG_actor_hidden_kernel_initializer': {'values': ['kaiming_uniform']}, 'H

In [4]:
# Define the path to your JSON configuration file
config_file_path = 'assets/sweep_config.json'

# Read the JSON configuration file
with open(config_file_path, 'r') as file:
    sweep_config = json.load(file)

# Print the configuration to verify it has been loaded correctly
print(sweep_config)

{'num_sweeps': 5, 'num_episodes': 10, 'seed': 42, 'use_mpi': False, 'num_workers': 1, 'num_agents': 1, 'num_epochs': 1, 'num_cycles': 1, 'num_updates': 1}


In [5]:
# Save the updated configuration to a train config file
os.makedirs('sweep', exist_ok=True)
train_config_path = os.path.join(os.getcwd(), 'sweep/train_config.json')
with open(train_config_path, 'w') as f:
    json.dump(sweep_config, f)

# Save and Set the sweep config path
sweep_config_path = os.path.join(os.getcwd(), 'sweep/sweep_config.json')
with open(sweep_config_path, 'w') as f:
    json.dump(wandb_config, f)

In [6]:
command = ['python', 'sweep.py']

# Set the environment variable
os.environ['WANDB_DISABLE_SERVICE'] = 'true'

subprocess.Popen(command)

<Popen: returncode: None args: ['python', 'sweep.py']>

DEBUG:git.cmd:Popen(['git', 'rev-parse', '--show-toplevel'], cwd=/workspaces/RL_Agents, stdin=None, shell=False, universal_newlines=False)
DEBUG:git.cmd:Popen(['git', 'rev-parse', '--show-toplevel'], cwd=/workspaces/RL_Agents, stdin=None, shell=False, universal_newlines=False)
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.wandb.ai:443
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 189
INFO:wandb.agents.pyagent:Starting sweep agent: entity=None, project=FetchReach-v2, count=5
DEBUG:wandb.agents.pyagent:Agent._setup()
DEBUG:wandb.agents.pyagent:Agent._register()
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.wandb.ai:443
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 68
DEBUG:wandb.agents.pyagent:agent_id = QWdlbnQ6dHM5ajhwZ3Q=


Create sweep with ID: z0diu9m3
Sweep URL: https://wandb.ai/jasonhayes1987/FetchReach-v2/sweeps/z0diu9m3


DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 811
DEBUG:wandb.agents.pyagent:Job received: Job(bir8f7df,{'HER_DDPG': {'value': {'HER_DDPG_actor_activation': 'relu', 'HER_DDPG_actor_clamp_output': 0.05, 'HER_DDPG_actor_hidden_kernel_initializer': 'kaiming_uniform', 'HER_DDPG_actor_hidden_kernel_kaiming_uniform': {'kaiming_uniform_mode': 'fan_in'}, 'HER_DDPG_actor_learning_rate': 0.0001, 'HER_DDPG_actor_normalize_layers': False, 'HER_DDPG_actor_num_cnn_layers': 0, 'HER_DDPG_actor_num_layers': 2, 'HER_DDPG_actor_optimizer': 'Adam', 'HER_DDPG_actor_optimizer_Adam_options': {'Adam_weight_decay': 0}, 'HER_DDPG_actor_output_kernel_constant': {'constant_value': 0.003}, 'HER_DDPG_actor_output_kernel_initializer': 'constant', 'HER_DDPG_batch_size': 256, 'HER_DDPG_critic_activation': 'relu', 'HER_DDPG_critic_hidden_kernel_initializer': 'kaiming_uniform', 'HER_DDPG_critic_hidden_kernel_kaiming_uniform': {'kaiming_uniform_mode': 'fan_in'}, 'HER_DDPG_critic_learn

Loaded train config: {'num_sweeps': 5, 'num_episodes': 10, 'seed': 42, 'use_mpi': False, 'num_workers': 1, 'num_agents': 1, 'num_epochs': 1, 'num_cycles': 1, 'num_updates': 1}
Loaded sweep config: {'method': 'bayes', 'project': 'FetchReach-v2', 'name': 'Test', 'metric': {'name': 'episode_reward', 'goal': 'maximize'}, 'parameters': {'env': {'parameters': {'id': {'value': 'FetchReach-v2'}, 'max_episode_steps': {'value': 50}}}, 'model_type': {'values': ['HER_DDPG']}, 'HER_DDPG': {'parameters': {'HER_DDPG_actor_learning_rate': {'values': [1e-05, 0.0001]}, 'HER_DDPG_critic_learning_rate': {'values': [1e-05, 0.0001]}, 'HER_DDPG_goal_strategy': {'values': ['future']}, 'HER_DDPG_num_goals': {'min': 4, 'max': 8}, 'HER_DDPG_goal_tolerance': {'values': [0.05]}, 'HER_DDPG_discount': {'values': [0.99, 0.9]}, 'HER_DDPG_tau': {'values': [0.05]}, 'HER_DDPG_epsilon_greedy': {'values': [0.2, 0.3]}, 'HER_DDPG_normalizer_clip': {'values': [5]}, 'HER_DDPG_device': {'value': 'cuda'}, 'HER_DDPG_actor_num_cnn

DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 1879
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 371
wandb: Currently logged in as: jasonhayes1987. Use `wandb login --relogin` to force relogin
DEBUG:git.cmd:Popen(['git', 'cat-file', '--batch-check'], cwd=/workspaces/RL_Agents, stdin=<valid stream>, shell=False, universal_newlines=False)
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57
wandb: wandb version 0.17.1 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade
wandb: Tracking run with wandb version 0.17.0
wandb: Run data is saved locally in /workspaces/RL_Agents/pytorch/src/app/wandb/run-20240608_181946-bir8f7df
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run warm-sweep-1
wandb: ⭐️ View project at https://wandb.ai/jasonhayes1987/FetchReach-v2
wandb: 🧹 View sweep at https://wandb.ai/jasonhayes1987/FetchReach-v2/sweeps/z0diu9m3
wand

signal only works in main thread of the main interpreter
sweep wandb config:{'HER_DDPG': {'HER_DDPG_actor_activation': 'relu', 'HER_DDPG_actor_clamp_output': 0.05, 'HER_DDPG_actor_hidden_kernel_initializer': 'kaiming_uniform', 'HER_DDPG_actor_hidden_kernel_kaiming_uniform': {'kaiming_uniform_mode': 'fan_in'}, 'HER_DDPG_actor_learning_rate': 0.0001, 'HER_DDPG_actor_normalize_layers': False, 'HER_DDPG_actor_num_cnn_layers': 0, 'HER_DDPG_actor_num_layers': 2, 'HER_DDPG_actor_optimizer': 'Adam', 'HER_DDPG_actor_optimizer_Adam_options': {'Adam_weight_decay': 0}, 'HER_DDPG_actor_output_kernel_constant': {'constant_value': 0.003}, 'HER_DDPG_actor_output_kernel_initializer': 'constant', 'HER_DDPG_batch_size': 256, 'HER_DDPG_critic_activation': 'relu', 'HER_DDPG_critic_hidden_kernel_initializer': 'kaiming_uniform', 'HER_DDPG_critic_hidden_kernel_kaiming_uniform': {'kaiming_uniform_mode': 'fan_in'}, 'HER_DDPG_critic_learning_rate': 0.0001, 'HER_DDPG_critic_merged_num_layers': 2, 'HER_DDPG_critic

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.wandb.ai:443
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 None
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 786
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
wandb: logging graph, to disable use `wandb.watch(log_graph=False)`
wandb: logging graph, to disable use `wandb.watch(log_graph=False)`

Mean of empty slice.


invalid value encountered in scalar divide

wandb: Adding directory to artifact (./HER_Test/her/ddpg)... Done. 0.0s
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.wandb.ai:443
DEBUG:urllib3.connectionpool:https://api

epoch 0 cycle 1 episode 10, success percentage 0.0, reward -50.0, avg reward -49.5, avg episode time 0.21s


DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57
wandb:                                                                                
wandb: 
wandb: Run history:
wandb:                  action_0 ▅▆▅▄▅▅▅▅▅▅▆▅▅▁▅▅▄▄▅▇▇▅▇▅▅▁▅▅▅▅▅▅▄▅▅█▅▅▅▇
wandb:                  action_1 ▄▅▅▄▅▅▅▅▅▅█▅▅▁▅▅▅▄▅▂▃▂▆▅▄▅▅▅▅▅▄▄▅▅▅▃▅▅▅▁
wandb:                  action_2 ▄▂▄▄▄▄▄▄▄▄▃▄▄▆▄▄▄▄▄▂▄▅▁▄▄█▄▄▄▄▄▄▄▄▄▅▄▄▄▅
wandb:                  action_3 ▄▇▅▄▅▅▅▅▄▅▂▅▄▃▅▄▅▅▄▅▆▃█▄▅▃▅▅▅▅▄▄▅▅▅▆▄▅▅▁
wandb:                actor_loss ▁▂▃▄▅▆▆▇█
wandb:         actor_predictions █████▇▇▇▇▇▅▅▅▅▅▅▅▅▅▅▄▄▄▄▄▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁
wandb:                avg_reward █▄▃▂▂▂▁▁▁▁
wandb:                      best █▁▁▁▁▁▁▁▁▁
wandb:               critic_loss █▄▂▁▃▃▂▃▃
wandb:        critic_predictions █████▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁▁▁▁▁
wandb:                   episode ▁▂▃▃▄▅▆▆▇█
wandb:            episode_reward █▁▁▁▁▁▁▁▁▁
wandb:              episode_time ▄█▂▃▁▂▁▁▂▂
wandb:             goal_distance ▁▄▅▄▆▆▆▇▄▄▅▅▅▆▆▆

Loaded train config: {'num_sweeps': 5, 'num_episodes': 10, 'seed': 42, 'use_mpi': False, 'num_workers': 1, 'num_agents': 1, 'num_epochs': 1, 'num_cycles': 1, 'num_updates': 1}
Loaded sweep config: {'method': 'bayes', 'project': 'FetchReach-v2', 'name': 'Test', 'metric': {'name': 'episode_reward', 'goal': 'maximize'}, 'parameters': {'env': {'parameters': {'id': {'value': 'FetchReach-v2'}, 'max_episode_steps': {'value': 50}}}, 'model_type': {'values': ['HER_DDPG']}, 'HER_DDPG': {'parameters': {'HER_DDPG_actor_learning_rate': {'values': [1e-05, 0.0001]}, 'HER_DDPG_critic_learning_rate': {'values': [1e-05, 0.0001]}, 'HER_DDPG_goal_strategy': {'values': ['future']}, 'HER_DDPG_num_goals': {'min': 4, 'max': 8}, 'HER_DDPG_goal_tolerance': {'values': [0.05]}, 'HER_DDPG_discount': {'values': [0.99, 0.9]}, 'HER_DDPG_tau': {'values': [0.05]}, 'HER_DDPG_epsilon_greedy': {'values': [0.2, 0.3]}, 'HER_DDPG_normalizer_clip': {'values': [5]}, 'HER_DDPG_device': {'value': 'cuda'}, 'HER_DDPG_actor_num_cnn

DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 1879
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 371
DEBUG:git.cmd:Popen(['git', 'cat-file', '--batch-check'], cwd=/workspaces/RL_Agents, stdin=<valid stream>, shell=False, universal_newlines=False)
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57
wandb: wandb version 0.17.1 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade
wandb: Tracking run with wandb version 0.17.0
wandb: Run data is saved locally in /workspaces/RL_Agents/pytorch/src/app/wandb/run-20240608_182007-mjmpydj2
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run hardy-sweep-2
wandb: ⭐️ View project at https://wandb.ai/jasonhayes1987/FetchReach-v2
wandb: 🧹 View sweep at https://wandb.ai/jasonhayes1987/FetchReach-v2/sweeps/z0diu9m3
wandb: 🚀 View run at https://wandb.ai/jasonhayes1987/FetchReach-v2/runs/mjmpydj2


signal only works in main thread of the main interpreter
sweep wandb config:{'HER_DDPG': {'HER_DDPG_actor_activation': 'relu', 'HER_DDPG_actor_clamp_output': 0.05, 'HER_DDPG_actor_hidden_kernel_initializer': 'kaiming_uniform', 'HER_DDPG_actor_hidden_kernel_kaiming_uniform': {'kaiming_uniform_mode': 'fan_in'}, 'HER_DDPG_actor_learning_rate': 0.0001, 'HER_DDPG_actor_normalize_layers': False, 'HER_DDPG_actor_num_cnn_layers': 0, 'HER_DDPG_actor_num_layers': 2, 'HER_DDPG_actor_optimizer': 'Adam', 'HER_DDPG_actor_optimizer_Adam_options': {'Adam_weight_decay': 0}, 'HER_DDPG_actor_output_kernel_constant': {'constant_value': 0.003}, 'HER_DDPG_actor_output_kernel_initializer': 'constant', 'HER_DDPG_batch_size': 128, 'HER_DDPG_critic_activation': 'relu', 'HER_DDPG_critic_hidden_kernel_initializer': 'kaiming_uniform', 'HER_DDPG_critic_hidden_kernel_kaiming_uniform': {'kaiming_uniform_mode': 'fan_in'}, 'HER_DDPG_critic_learning_rate': 0.0001, 'HER_DDPG_critic_merged_num_layers': 2, 'HER_DDPG_critic

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.wandb.ai:443
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 None
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 789
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
wandb: logging graph, to disable use `wandb.watch(log_graph=False)`
wandb: logging graph, to disable use `wandb.watch(log_graph=False)`
wandb: Adding directory to artifact (./HER_Test/her/ddpg)... Done. 0.0s
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.wandb.ai:443
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 42
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57


epoch 0 cycle 1 episode 10, success percentage 0.0, reward -50.0, avg reward -50.0, avg episode time 0.25s


DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57
wandb:                                                                                
wandb: 
wandb: Run history:
wandb:                  action_0 ▄▄▄▄▅█▃▄▄▅▅▁▄▂▄▄▄▇▄▄▃▃▅▄▄▄▄▁▄▄▄▄▅▅▆▄▃▄▅▄
wandb:                  action_1 ▄▂▅▄▅▄▃▇▅▅▇▂▅▃▅▅▅█▅▅▅▅▃▅▅▄▅▁▅▅▅▄▅▅▅▅█▅▅▅
wandb:                  action_2 ▅▅▅▅▅▄▄▁▅▅▆▄▅▅▅▅▅▅▅▅▄▁▁▅▅▄▅▄▄▄▅▄▅▅▅▅█▅▅▅
wandb:                  action_3 ▄▇▄▄▅▁▂█▅▅▅▁▅▅▄▄▅▁▄▄▂▄▆▄▄▄▅▅▄▅▅▄▄▄▁▅▅▅▅▄
wandb:                actor_loss ▁▂▂▃▄▅▆▆▇█
wandb:         actor_predictions █████▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▄▄▄▄▄▂▂▂▂▁▁▁▁▁▁▁▁▁
wandb:                avg_reward ▁▁▁▁▁▁▁▁▁▁
wandb:                      best █▁▁▁▁▁▁▁▁▁
wandb:               critic_loss ▁█▅▅▂▂▃▇▄▇
wandb:        critic_predictions █████▇▇▇▇▆▆▆▆▆▆▆▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁
wandb:                   episode ▁▂▃▃▄▅▆▆▇█
wandb:            episode_reward ▁▁▁▁▁▁▁▁▁▁
wandb:              episode_time ▅▅▄▅█▅▁▁▃▁
wandb:             goal_distance ▄▃▃▂▄▅▅▆▂▄▄▃▃▄

Loaded train config: {'num_sweeps': 5, 'num_episodes': 10, 'seed': 42, 'use_mpi': False, 'num_workers': 1, 'num_agents': 1, 'num_epochs': 1, 'num_cycles': 1, 'num_updates': 1}
Loaded sweep config: {'method': 'bayes', 'project': 'FetchReach-v2', 'name': 'Test', 'metric': {'name': 'episode_reward', 'goal': 'maximize'}, 'parameters': {'env': {'parameters': {'id': {'value': 'FetchReach-v2'}, 'max_episode_steps': {'value': 50}}}, 'model_type': {'values': ['HER_DDPG']}, 'HER_DDPG': {'parameters': {'HER_DDPG_actor_learning_rate': {'values': [1e-05, 0.0001]}, 'HER_DDPG_critic_learning_rate': {'values': [1e-05, 0.0001]}, 'HER_DDPG_goal_strategy': {'values': ['future']}, 'HER_DDPG_num_goals': {'min': 4, 'max': 8}, 'HER_DDPG_goal_tolerance': {'values': [0.05]}, 'HER_DDPG_discount': {'values': [0.99, 0.9]}, 'HER_DDPG_tau': {'values': [0.05]}, 'HER_DDPG_epsilon_greedy': {'values': [0.2, 0.3]}, 'HER_DDPG_normalizer_clip': {'values': [5]}, 'HER_DDPG_device': {'value': 'cuda'}, 'HER_DDPG_actor_num_cnn

DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 1879
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 371
DEBUG:git.cmd:Popen(['git', 'cat-file', '--batch-check'], cwd=/workspaces/RL_Agents, stdin=<valid stream>, shell=False, universal_newlines=False)
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57
wandb: wandb version 0.17.1 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade
wandb: Tracking run with wandb version 0.17.0
wandb: Run data is saved locally in /workspaces/RL_Agents/pytorch/src/app/wandb/run-20240608_182034-t14sop1t
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run rare-sweep-3
wandb: ⭐️ View project at https://wandb.ai/jasonhayes1987/FetchReach-v2
wandb: 🧹 View sweep at https://wandb.ai/jasonhayes1987/FetchReach-v2/sweeps/z0diu9m3
wandb: 🚀 V

signal only works in main thread of the main interpreter
sweep wandb config:{'HER_DDPG': {'HER_DDPG_actor_activation': 'relu', 'HER_DDPG_actor_clamp_output': 0.05, 'HER_DDPG_actor_hidden_kernel_initializer': 'kaiming_uniform', 'HER_DDPG_actor_hidden_kernel_kaiming_uniform': {'kaiming_uniform_mode': 'fan_in'}, 'HER_DDPG_actor_learning_rate': 0.0001, 'HER_DDPG_actor_normalize_layers': False, 'HER_DDPG_actor_num_cnn_layers': 0, 'HER_DDPG_actor_num_layers': 2, 'HER_DDPG_actor_optimizer': 'Adam', 'HER_DDPG_actor_optimizer_Adam_options': {'Adam_weight_decay': 0}, 'HER_DDPG_actor_output_kernel_constant': {'constant_value': 0.003}, 'HER_DDPG_actor_output_kernel_initializer': 'constant', 'HER_DDPG_batch_size': 256, 'HER_DDPG_critic_activation': 'relu', 'HER_DDPG_critic_hidden_kernel_initializer': 'kaiming_uniform', 'HER_DDPG_critic_hidden_kernel_kaiming_uniform': {'kaiming_uniform_mode': 'fan_in'}, 'HER_DDPG_critic_learning_rate': 1e-05, 'HER_DDPG_critic_merged_num_layers': 2, 'HER_DDPG_critic_

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.wandb.ai:443
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 None
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 789
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
wandb: logging graph, to disable use `wandb.watch(log_graph=False)`
wandb: logging graph, to disable use `wandb.watch(log_graph=False)`
wandb: Adding directory to artifact (./HER_Test/her/ddpg)... Done. 0.0s
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.wandb.ai:443
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 42
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57


epoch 0 cycle 1 episode 10, success percentage 0.0, reward -50.0, avg reward -49.6, avg episode time 0.32s


DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): o151352.ingest.sentry.io:443
DEBUG:urllib3.connectionpool:https://o151352.ingest.sentry.io:443 "POST /api/4504800232407040/envelope/ HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57
wandb:                                                                                
wandb: 
wandb: Run history:
wandb:                  action_0 ▃▇▄▄▄▄▄▄▆▃▁█▄▃▄▄▄▃▄▅▄▄▄▄▁▄▄▄▄▄▄▄▄▃▄▄▃▄▄▃
wandb:                  action_1 ▅▆▅▅▅▅▅▅▂▅█▁▅▆▆▅▅▅▅▆▆▆▅▇█▅▅▆▅▅▅▅▅▅▆▅▅▁▅▅
wandb:                  action_2 ▅▁▅▄▅▅▅▅▇▅▇█▅▅▅▅▅▅▅▃▅▅▅▇█▄▅▅▃▅▄▅▅▅▅▅▅▂▅▅
wandb:                  action_3 ▅▄▅▅▆▅▅▅▄▅▁▃▅▅▅▅▅▅▅█▅▅▅▄▄▅▅▆▁▅▅▅▅▅▅▅▅▆▅▅
wandb:                actor_loss ▃▃▁▃▃▅▆▅▆█
wandb:         actor_predictions █████████▆▆▆▆▆▆▆▆▆▄▄▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▁▁▁▁
wandb:                avg_reward █▄▃▂▂▂▁▁▁▁
wandb:                      best █▁▁▁▁▁▁▁▁▁
wandb:               critic_loss ▅▃▇▆█▅▁▃▄▂
wandb:        critic_predictions ▆▆▆▆▆▆▆

Loaded train config: {'num_sweeps': 5, 'num_episodes': 10, 'seed': 42, 'use_mpi': False, 'num_workers': 1, 'num_agents': 1, 'num_epochs': 1, 'num_cycles': 1, 'num_updates': 1}
Loaded sweep config: {'method': 'bayes', 'project': 'FetchReach-v2', 'name': 'Test', 'metric': {'name': 'episode_reward', 'goal': 'maximize'}, 'parameters': {'env': {'parameters': {'id': {'value': 'FetchReach-v2'}, 'max_episode_steps': {'value': 50}}}, 'model_type': {'values': ['HER_DDPG']}, 'HER_DDPG': {'parameters': {'HER_DDPG_actor_learning_rate': {'values': [1e-05, 0.0001]}, 'HER_DDPG_critic_learning_rate': {'values': [1e-05, 0.0001]}, 'HER_DDPG_goal_strategy': {'values': ['future']}, 'HER_DDPG_num_goals': {'min': 4, 'max': 8}, 'HER_DDPG_goal_tolerance': {'values': [0.05]}, 'HER_DDPG_discount': {'values': [0.99, 0.9]}, 'HER_DDPG_tau': {'values': [0.05]}, 'HER_DDPG_epsilon_greedy': {'values': [0.2, 0.3]}, 'HER_DDPG_normalizer_clip': {'values': [5]}, 'HER_DDPG_device': {'value': 'cuda'}, 'HER_DDPG_actor_num_cnn

DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 1879
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 371
DEBUG:git.cmd:Popen(['git', 'cat-file', '--batch-check'], cwd=/workspaces/RL_Agents, stdin=<valid stream>, shell=False, universal_newlines=False)
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57
wandb: wandb version 0.17.1 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade
wandb: Tracking run with wandb version 0.17.0
wandb: Run data is saved locally in /workspaces/RL_Agents/pytorch/src/app/wandb/run-20240608_182055-3ogk1zzp
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run astral-sweep-4
wandb: ⭐️ View project at https://wandb.ai/jasonhayes1987/FetchReach-v2
wandb: 🧹 View sweep at https://wandb.ai/jasonhayes1987/FetchReach-v2/sweeps/z0diu9m3
wandb: 🚀 View run at https://wandb.ai/jasonhayes1987/FetchReach-v2/runs/3ogk1zzp


signal only works in main thread of the main interpreter
sweep wandb config:{'HER_DDPG': {'HER_DDPG_actor_activation': 'relu', 'HER_DDPG_actor_clamp_output': 0.05, 'HER_DDPG_actor_hidden_kernel_initializer': 'kaiming_uniform', 'HER_DDPG_actor_hidden_kernel_kaiming_uniform': {'kaiming_uniform_mode': 'fan_in'}, 'HER_DDPG_actor_learning_rate': 0.0001, 'HER_DDPG_actor_normalize_layers': False, 'HER_DDPG_actor_num_cnn_layers': 0, 'HER_DDPG_actor_num_layers': 2, 'HER_DDPG_actor_optimizer': 'Adam', 'HER_DDPG_actor_optimizer_Adam_options': {'Adam_weight_decay': 0}, 'HER_DDPG_actor_output_kernel_constant': {'constant_value': 0.003}, 'HER_DDPG_actor_output_kernel_initializer': 'constant', 'HER_DDPG_batch_size': 256, 'HER_DDPG_critic_activation': 'relu', 'HER_DDPG_critic_hidden_kernel_initializer': 'kaiming_uniform', 'HER_DDPG_critic_hidden_kernel_kaiming_uniform': {'kaiming_uniform_mode': 'fan_in'}, 'HER_DDPG_critic_learning_rate': 0.0001, 'HER_DDPG_critic_merged_num_layers': 2, 'HER_DDPG_critic

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.wandb.ai:443
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 None
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 789
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
wandb: logging graph, to disable use `wandb.watch(log_graph=False)`
wandb: logging graph, to disable use `wandb.watch(log_graph=False)`
wandb: Adding directory to artifact (./HER_Test/her/ddpg)... Done. 0.0s
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.wandb.ai:443
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 42
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57


epoch 0 cycle 1 episode 10, success percentage 0.0, reward -50.0, avg reward -47.7, avg episode time 0.30s


DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57
wandb:                                                                                
wandb: 
wandb: Run history:
wandb:                  action_0 ▅▁▅▅▅▅▅█▄▅▄▅▅▅▅▆▅▅▅▅▆▅▅▅▅▅▅▅▅▅▅▅▅▇▅▅▅▅▅▆
wandb:                  action_1 ▅▄▅▅▅▅▅█▄▄▅▆▇▅▅▅▅▅▅▅▅▅█▅▄▅▅▅▅▅▅▅▅▃▅▅▅▅▅▁
wandb:                  action_2 ▅▅▅▅▅▅▅▄▆▅▅█▆▅▅▅▆▅▅▅▅▅▃▅▅▅▅▅▅▅▅▅▅▁▅▅▅▅▅▇
wandb:                  action_3 ▄█▄▄▅▄▅▇▄▄▄▂▁▄▄▄▇▄▄▄▄▄█▄▄▅▅▄▄▄▄▄▄▃▄▅▄▄▅▄
wandb:                actor_loss ▁▂▃▃▄▅▆▆▇█
wandb:         actor_predictions █████▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▁▁▁▁
wandb:                avg_reward ▁▁█▆▅▄▄▄▃▃
wandb:                      best █▁█▁▁▁▁▁▁▁
wandb:               critic_loss ▅█▃▁▄▇▅▅▃▁
wandb:        critic_predictions █████▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁
wandb:                   episode ▁▂▃▃▄▅▆▆▇█
wandb:            episode_reward ▁▁█▁▁▁▁▁▁▁
wandb:              episode_time ▂▁▂▄█▂▅▄▄▃
wandb:             goal_distance ▄▄▄▄▃▃▃▂▂▁▁▂▄▃

Loaded train config: {'num_sweeps': 5, 'num_episodes': 10, 'seed': 42, 'use_mpi': False, 'num_workers': 1, 'num_agents': 1, 'num_epochs': 1, 'num_cycles': 1, 'num_updates': 1}
Loaded sweep config: {'method': 'bayes', 'project': 'FetchReach-v2', 'name': 'Test', 'metric': {'name': 'episode_reward', 'goal': 'maximize'}, 'parameters': {'env': {'parameters': {'id': {'value': 'FetchReach-v2'}, 'max_episode_steps': {'value': 50}}}, 'model_type': {'values': ['HER_DDPG']}, 'HER_DDPG': {'parameters': {'HER_DDPG_actor_learning_rate': {'values': [1e-05, 0.0001]}, 'HER_DDPG_critic_learning_rate': {'values': [1e-05, 0.0001]}, 'HER_DDPG_goal_strategy': {'values': ['future']}, 'HER_DDPG_num_goals': {'min': 4, 'max': 8}, 'HER_DDPG_goal_tolerance': {'values': [0.05]}, 'HER_DDPG_discount': {'values': [0.99, 0.9]}, 'HER_DDPG_tau': {'values': [0.05]}, 'HER_DDPG_epsilon_greedy': {'values': [0.2, 0.3]}, 'HER_DDPG_normalizer_clip': {'values': [5]}, 'HER_DDPG_device': {'value': 'cuda'}, 'HER_DDPG_actor_num_cnn

DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 1879
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 371
DEBUG:git.cmd:Popen(['git', 'cat-file', '--batch-check'], cwd=/workspaces/RL_Agents, stdin=<valid stream>, shell=False, universal_newlines=False)
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57
wandb: wandb version 0.17.1 is available!  To upgrade, please run:
wandb:  $ pip install wandb --upgrade
wandb: Tracking run with wandb version 0.17.0
wandb: Run data is saved locally in /workspaces/RL_Agents/pytorch/src/app/wandb/run-20240608_182117-mfqzar0i
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run light-sweep-5
wandb: ⭐️ View project at https://wandb.ai/jasonhayes1987/FetchReach-v2
wandb: 🧹 View sweep at https://wandb.ai/jasonhayes1987/FetchReach-v2/sweeps/z0diu9m3
wandb: 🚀 View run at https://wandb.ai/jasonhayes1987/FetchReach-v2/runs/mfqzar0i


signal only works in main thread of the main interpreter
sweep wandb config:{'HER_DDPG': {'HER_DDPG_actor_activation': 'relu', 'HER_DDPG_actor_clamp_output': 0.05, 'HER_DDPG_actor_hidden_kernel_initializer': 'kaiming_uniform', 'HER_DDPG_actor_hidden_kernel_kaiming_uniform': {'kaiming_uniform_mode': 'fan_in'}, 'HER_DDPG_actor_learning_rate': 0.0001, 'HER_DDPG_actor_normalize_layers': False, 'HER_DDPG_actor_num_cnn_layers': 0, 'HER_DDPG_actor_num_layers': 2, 'HER_DDPG_actor_optimizer': 'Adam', 'HER_DDPG_actor_optimizer_Adam_options': {'Adam_weight_decay': 0}, 'HER_DDPG_actor_output_kernel_constant': {'constant_value': 0.003}, 'HER_DDPG_actor_output_kernel_initializer': 'constant', 'HER_DDPG_batch_size': 256, 'HER_DDPG_critic_activation': 'relu', 'HER_DDPG_critic_hidden_kernel_initializer': 'kaiming_uniform', 'HER_DDPG_critic_hidden_kernel_kaiming_uniform': {'kaiming_uniform_mode': 'fan_in'}, 'HER_DDPG_critic_learning_rate': 1e-05, 'HER_DDPG_critic_merged_num_layers': 2, 'HER_DDPG_critic_

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.wandb.ai:443
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 None
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 789
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 785
wandb: logging graph, to disable use `wandb.watch(log_graph=False)`
wandb: logging graph, to disable use `wandb.watch(log_graph=False)`
wandb: Adding directory to artifact (./HER_Test/her/ddpg)... Done. 0.0s
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.wandb.ai:443
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 42
DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57


epoch 0 cycle 1 episode 10, success percentage 0.1, reward -50.0, avg reward -45.9, avg episode time 0.26s


DEBUG:urllib3.connectionpool:https://api.wandb.ai:443 "POST /graphql HTTP/1.1" 200 57
wandb:                                                                                
wandb: 
wandb: Run history:
wandb:                  action_0 ▅▃▅▅▅▅▅█▅▆▆▅▁▅▅▄▅▅▅▅▅▅▅▁▅▅▅▅▅▅▅▆▆▅▅▂▅▅▅▅
wandb:                  action_1 ▄█▅▅▅▅▅▆▅▅▄▅▅▅▅█▅▅▅▅▅▅▄▁▅▅▅▅▅▅▅▅▅▅▄▇▅▅▅▅
wandb:                  action_2 ▇▂▇▇▇▇▇▃▇▇▇▇▅▇▇▁▇▇▇▇█▇▇▆▇█▇▇▇▇██▇▆█▄▇▇▇█
wandb:                  action_3 ▄█▅▅▄▅▅█▄▄▄▄▂▄▅▃▄▅▄▄▄▄▄▁▅▄▄▄▅▅▅▄▄▅▅▄▅▅▄▅
wandb:                actor_loss ▆█▆▄▄▂▁▁▂▁
wandb:         actor_predictions █████▇▇▇▇▆▆▆▆▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▁▁▁▁
wandb:                avg_reward ▁▁▁▁▁▁█▇▆▆
wandb:                      best █▁▁▁▁██▁▁▁
wandb:               critic_loss █▆▅▅▄▃▂▁▂▃
wandb:        critic_predictions █████▆▆▆▆▇▇▇▇▇▇▇▇▇▅▅▅▅▆▆▆▆▆▄▄▄▄▂▂▂▂▂▁▁▁▁
wandb:                   episode ▁▂▃▃▄▅▆▆▇█
wandb:            episode_reward ▁▁▁▁▁▁█▁▁▁
wandb:              episode_time █▄▄▁▆▄▁▄▅▅
wandb:             goal_distance ▄▂▂▄▅▆▄▃▂▃▂▃▅▆