In [1]:
import gym
import custom_robotics
import wrappers
from custom_policies import custom_feature_extractor

from stable_baselines3 import SAC#, HerReplayBuffer
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder
import wandb
from wandb.integration.sb3 import WandbCallback

In [5]:
config = {
    "policy_type": "MultiInputPolicy",
    "total_timesteps": int(2e6),
    "env_name": "CustomFetchReach-v0",
    "name": "SAC_frame_stack_3dConv",
    "learning_starts": 10000,
    "buffer_size": 100000,
    "gamma": 0.95,
    "learning_rate": 0.001,
    "normalize": True
}

In [6]:
def make_env():
    env = gym.make(config["env_name"])
    env = Monitor(env)  # record stats such as returns
    env = wrappers.ImageAndRobot(env, 80, 80)
    env = wrappers.FrameStack(env, stack_size=4)
    return env
env = DummyVecEnv([make_env])

In [4]:
model = SAC(config["policy_type"], env, 
    learning_starts=1000, 
    buffer_size=config["buffer_size"],
    gamma=config["gamma"],
    learning_rate=config["learning_rate"],
    policy_kwargs=dict(
        features_extractor_class=custom_feature_extractor.CustomFeatureExtractor,
        features_extractor_kwargs=dict(custom_cnn=False),
        net_arch=[512, 512]
    ),
    verbose=1)
model.learn(
    total_timesteps=config["total_timesteps"],
)

Using cuda device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 50       |
|    ep_rew_mean     | -50      |
|    success_rate    | 0.0      |
| time/              |          |
|    episodes        | 4        |
|    fps             | 537      |
|    time_elapsed    | 0        |
|    total_timesteps | 200      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 50       |
|    ep_rew_mean     | -48.2    |
|    success_rate    | 0.0      |
| time/              |          |
|    episodes        | 8        |
|    fps             | 579      |
|    time_elapsed    | 0        |
|    total_timesteps | 400      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 50       |
|    ep_rew_mean     | -47.2    |
|    success_rate    | 0.0      |
| time/              |          |
|    episodes        | 12     

KeyboardInterrupt: 

In [7]:
name = config["name"]
run = wandb.init(
    project="fetch-reach",
    entity="f-krone",
    name=name,
    config=config,
    sync_tensorboard=True,  # auto-upload sb3's tensorboard metrics
    monitor_gym=True,  # auto-upload the videos of agents playing the game
    save_code=True,  # optional
)


env = VecVideoRecorder(env, f"videos/fetch-reach/{name}_{run.id}", record_video_trigger=lambda x: x % 2000 == 0, video_length=200)

[34m[1mwandb[0m: Currently logged in as: [33mf-krone[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.10 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


In [None]:
model = SAC(config["policy_type"], env, 
    learning_starts=config["learning_starts"], 
    buffer_size=config["buffer_size"],
    gamma=config["gamma"],
    learning_rate=config["learning_rate"],
    policy_kwargs=dict(
        features_extractor_class=custom_feature_extractor.CustomFeatureExtractor,
        features_extractor_kwargs=dict(custom_cnn=False),
        net_arch=[512, 512]
    ),
    verbose=1, tensorboard_log=f"runs/fetch-reach/{name}_{run.id}")
model.learn(
    total_timesteps=config["total_timesteps"],
    callback=WandbCallback(
        gradient_save_freq=100,
        model_save_path=f"models/fetch-reach/{name}_{run.id}",
        verbose=2,
    ),
)
model.save(f"{name}_fetch-reach")

Using cuda device
Logging to runs/fetch-reach/SAC_frame_stack_3dConv_p8pqnpfl/SAC_1
Saving video to /home/ws/upecj/master/models/src/videos/fetch-reach/SAC_frame_stack_3dConv_p8pqnpfl/rl-video-step-0-to-step-200.mp4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 50       |
|    ep_rew_mean     | -49      |
|    success_rate    | 0.25     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 102      |
|    time_elapsed    | 1        |
|    total_timesteps | 200      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 50       |
|    ep_rew_mean     | -49.5    |
|    success_rate    | 0.125    |
| time/              |          |
|    episodes        | 8        |
|    fps             | 173      |
|    time_elapsed    | 2        |
|    total_timesteps | 400      |
---------------------------------
------------------------------------

In [9]:
run.finish()

VBox(children=(Label(value=' 13.88MB of 13.88MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

0,1
global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
rollout/ep_len_mean,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
rollout/ep_rew_mean,▃▂▆▆█▇▇▅▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
rollout/success_rate,█▃▂▁▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
time/fps,▆▇███▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/actor_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂█
train/critic_loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█
train/ent_coef,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂█
train/ent_coef_loss,██▇▇████▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁
train/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
global_step,79600.0
rollout/ep_len_mean,50.0
rollout/ep_rew_mean,-49.93
rollout/success_rate,0.0
time/fps,0.0
train/actor_loss,1.1390430967029916e+26
train/critic_loss,inf
train/ent_coef,2.74221745763911e+24
train/ent_coef_loss,-2112.25439
train/learning_rate,0.001


In [15]:
video_folder = 'logs/videos/'
video_length = 500
env_id=config["env_name"]
name = "SAC_multiInput_custom"

test_env = DummyVecEnv([make_env])

# Record the video starting at the first step
test_env = VecVideoRecorder(test_env, video_folder,
                       record_video_trigger=lambda x: x == 0, video_length=video_length,
                       name_prefix=f"{name}-{env_id}")
obs = test_env.reset()
test_model = SAC.load(f"{name}_fetch", env=test_env)

for _ in range(video_length + 1):
  action, _states = test_model.predict(obs, deterministic=True)
  obs, _, _, _ = test_env.step(action)
# Save the video
test_env.close()

Saving video to /home/ws/upecj/master/models/logs/videos/SAC_multiInput_custom-CustomFetchPickAndPlaceDense-v0-step-0-to-step-500.mp4


Error: You must call wandb.init() before wandb.log()