In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
os.getcwd()

In [None]:
path="/content/drive/MyDrive/Colab_Notebooks/panda_grasp/"
os.chdir(path)
os.getcwd()

In [None]:
!ls

In [None]:
cd Environment/

In [None]:
pip install stable-baselines3[extra]

In [None]:
pip install gym

In [None]:
pip install pybullet

In [1]:
import argparse
import difflib
import os
import uuid

import gym
import numpy as np
import seaborn
import torch as th
from stable_baselines3.common.utils import set_random_seed
import task
from utils.exp_manager import ExperimentManager
from utils.utils import ALGOS, StoreDict

seaborn.set()


In [2]:
def train(args=None):

    # Check if the selected environment is valid
    # If it could not be found, suggest the closest match
    registered_envs = set(gym.envs.registry.env_specs.keys())
    if args.env not in registered_envs:
        try:
            closest_match = difflib.get_close_matches(
                args.env, registered_envs, n=1)[0]
        except IndexError:
            closest_match = "'no close match found...'"
        raise ValueError(
            f"{args.env} not found in gym registry, you maybe meant {closest_match}?")

    # If no specific seed is selected, choose a random one
    if args.seed < 0:
        args.seed = np.random.randint(2 ** 32 - 1, dtype="int64").item()

    # Set the random seed across platforms
    set_random_seed(args.seed)

    # Setting num threads to 1 makes things run faster on cpu
    if args.num_threads > 0:
        if args.verbose > 1:
            print(f"Setting torch.num_threads to {args.num_threads}")
        th.set_num_threads(args.num_threads)

    # Verify that pre-trained agent exists before continuing to train it
    if args.trained_agent != "":
        assert args.trained_agent.endswith(".zip") and os.path.isfile(
            args.trained_agent
        ), "The trained_agent must be a valid path to a .zip file"

    # If enabled, ensure that the run has a unique ID
    uuid_str = f"_{uuid.uuid4()}" if args.uuid else ""

    print("=" * 10, args.env, "=" * 10)
    print(f"Seed: {args.seed}")

    exp_manager = ExperimentManager(
        args,
        args.algo,
        args.env,
        args.log_folder,
        args.tensorboard_log,
        args.n_timesteps,
        args.eval_freq,
        args.eval_episodes,
        args.save_freq,
        args.hyperparams,
        args.env_kwargs,
        args.trained_agent,
        args.optimize_hyperparameters,
        args.storage,
        args.study_name,
        args.n_trials,
        args.n_jobs,
        args.sampler,
        args.pruner,
        n_startup_trials=args.n_startup_trials,
        n_evaluations=args.n_evaluations,
        truncate_last_trajectory=args.truncate_last_trajectory,
        uuid_str=uuid_str,
        seed=args.seed,
        log_interval=args.log_interval,
        save_replay_buffer=args.save_replay_buffer,
        
        verbose=args.verbose,
        vec_env_type=args.vec_env,
    )

    # Prepare experiment and launch hyperparameter optimization if needed
    model = exp_manager.setup_experiment()

    if args.optimize_hyperparameters:
        exp_manager.hyperparameters_optimization()
    else:
        exp_manager.learn(model)
        exp_manager.save_trained_model(model)

In [3]:
parser = argparse.ArgumentParser()

# Environment and its parameters
parser.add_argument("--env", type=str,
                        default="PandaTouchEnv_color-v0",
                        help="environment ID")
parser.add_argument("--env-kwargs", type=str, nargs="+", action=StoreDict,
                        help="Optional keyword argument to pass to the env constructor")
parser.add_argument("--vec-env", type=str, choices=["dummy", "subproc"],
                        default="dummy",
                        help="VecEnv type")

# Algorithm
parser.add_argument("--algo", type=str, choices=list(ALGOS.keys()), required=False,
                        default="sac", help="RL Algorithm")
parser.add_argument("-params", "--hyperparams", type=str, nargs="+", action=StoreDict,
                        help="Overwrite hyperparameter (e.g. learning_rate:0.01 train_freq:10)")
parser.add_argument("--num-threads", type=int,
                        default=-1,
                        help="Number of threads for PyTorch (-1 to use default)")

# Training duration
parser.add_argument("-n", "--n-timesteps", type=int,
                        default=50000,
                        help="Overwrite the number of timesteps")

# Continue training an already trained agent
parser.add_argument("-i", "--trained-agent", type=str,
                        default="",
                        help="Path to a pretrained agent to continue training")

# Random seed
parser.add_argument("--seed", type=int,
                        default=42,
                        help="Random generator seed")

# Saving of model
parser.add_argument("--save-freq", type=int,
                        default=1000,
                        help="Save the model every n steps (if negative, no checkpoint)")
parser.add_argument("--save-replay-buffer", action="store_true",
                        default=False,
                        help="Save the replay buffer too (when applicable)")

# Pre-load a replay buffer and start training on it
parser.add_argument("--preload-replay-buffer", type=str,
                        default="model",
                        help="Path to a replay buffer that should be preloaded before starting the training process")

# Logging
parser.add_argument("-f", "--log-folder", type=str,
                        default="logs",
                        help="Log folder")
parser.add_argument("-tb", "--tensorboard-log", type=str,
                        default="tensorboard_logs",
                        help="Tensorboard log dir")
parser.add_argument("--log-interval", type=int,
                        default=-1,
                        help="Override log interval (default: -1, no change)")
parser.add_argument("-uuid", "--uuid", action="store_true",
                        default=False,
                        help="Ensure that the run has a unique ID")

# Hyperparameter optimization
parser.add_argument("-optimize", "--optimize-hyperparameters", action="store_true",
                        default=False,
                        help="Run hyperparameters search")
parser.add_argument("--sampler", type=str, choices=["random", "tpe", "skopt"],
                        default="tpe",
                        help="Sampler to use when optimizing hyperparameters")
parser.add_argument("--pruner", type=str, choices=["halving", "median", "none"],
                        default="median",
                        help="Pruner to use when optimizing hyperparameters")
parser.add_argument("--n-trials", type=int,
                        default=10,
                        help="Number of trials for optimizing hyperparameters")
parser.add_argument("--n-startup-trials", type=int,
                        default=5,
                        help="Number of trials before using optuna sampler")
parser.add_argument("--n-evaluations", type=int,
                        default=2,
                        help="Number of evaluations for hyperparameter optimization")
parser.add_argument("--n-jobs", type=int,
                        default=1,
                        help="Number of parallel jobs when optimizing hyperparameters")
parser.add_argument("--storage", type=str,
                        default=None,
                        help="Database storage path if distributed optimization should be used")
parser.add_argument("--study-name", type=str,
                        default=None,
                        help="Study name for distributed optimization")

# Evaluation
parser.add_argument("--eval-freq", type=int,
                        default=-1,
                        help="Evaluate the agent every n steps (if negative, no evaluation)")
parser.add_argument("--eval-episodes", type=int,
                        default=10,
                        help="Number of episodes to use for evaluation")

# Verbosity
parser.add_argument("--verbose", type=int,
                        default=1,
                        help="Verbose mode (0: no output, 1: INFO)")

# HER specifics
parser.add_argument(
        "--truncate-last-trajectory",
        help="When using HER with online sampling the last trajectory "
        "in the replay buffer will be truncated after reloading the replay buffer.",
        default=True,
        type=bool,
)
args, unknown = parser.parse_known_args()

In [4]:
print(args.preload_replay_buffer)
print(args.algo)

model
sac


In [5]:
train(args=args)

Seed: 42
OrderedDict([('batch_size', 32),
             ('buffer_size', 25000),
             ('ent_coef', 'auto_0.1'),
             ('env_wrapper',
              ['task.wrapper.ProcessFrame84',
               'task.wrapper.MoveConstraint',
               {'task.wrapper.TimeLimit': {'max_episode_steps': 400}}]),
             ('gamma', 0.95),
             ('gradient_steps', 1),
             ('learning_rate', 'lin_0.0003'),
             ('learning_starts', 0),
             ('n_timesteps', 50000),
             ('noise_std', 0.025),
             ('noise_type', 'normal'),
             ('optimize_memory_usage', True),
             ('policy', 'CnnPolicy'),
             ('policy_kwargs', {'n_critics': 2, 'net_arch': [128, 128]}),
             ('target_entropy', 'auto'),
             ('tau', 0.01),
             ('train_freq', 1)])
Using 1 environments
Overwriting n_timesteps with n=50000


ic| self.object_object1_id: 3


Wrapping into a VecTransposeImage
Applying normal noise with std 0.025
Using cuda device




Log path: logs/sac/PandaTouchEnv_color-v0_16
Logging to tensorboard_logs/PandaTouchEnv_color-v0/SAC_13


  return array(a, dtype, copy=False, order=order)
ic| reward: 1.0, done: True
ic| reward: 1.0, done: True
ic| reward: 1.0, done: True
ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 120      |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 4        |
|    fps             | 4        |
|    time_elapsed    | 106      |
|    total timesteps | 482      |
| train/             |          |
|    actor_loss      | -1.18    |
|    critic_loss     | 0.00236  |
|    ent_coef        | 0.0866   |
|    ent_coef_loss   | -16.5    |
|    learning_rate   | 0.000297 |
|    n_updates       | 481      |
---------------------------------


ic| reward: 1.0, done: True
ic| reward: 1.0, done: True
ic| reward: 1.0, done: True
ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 69.9     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 8        |
|    fps             | 4        |
|    time_elapsed    | 125      |
|    total timesteps | 559      |
| train/             |          |
|    actor_loss      | -1.33    |
|    critic_loss     | 0.00144  |
|    ent_coef        | 0.0847   |
|    ent_coef_loss   | -16.6    |
|    learning_rate   | 0.000297 |
|    n_updates       | 558      |
---------------------------------


ic| reward: 1.0, done: True
ic| reward: 1.0, done: True
ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 89       |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 12       |
|    fps             | 4        |
|    time_elapsed    | 316      |
|    total timesteps | 1379     |
| train/             |          |
|    actor_loss      | -2.08    |
|    critic_loss     | 0.00356  |
|    ent_coef        | 0.0665   |
|    ent_coef_loss   | -18.1    |
|    learning_rate   | 0.000292 |
|    n_updates       | 1378     |
---------------------------------


ic| reward: 1.0, done: True
ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 94.8     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 16       |
|    fps             | 4        |
|    time_elapsed    | 569      |
|    total timesteps | 2433     |
| train/             |          |
|    actor_loss      | -2.09    |
|    critic_loss     | 0.00099  |
|    ent_coef        | 0.0491   |
|    ent_coef_loss   | -20.2    |
|    learning_rate   | 0.000285 |
|    n_updates       | 2432     |
---------------------------------


ic| reward: 1.0, done: True
ic| reward: 1.0, done: True
ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 94.9     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 20       |
|    fps             | 4        |
|    time_elapsed    | 742      |
|    total timesteps | 3119     |
| train/             |          |
|    actor_loss      | -1.76    |
|    critic_loss     | 0.000375 |
|    ent_coef        | 0.0404   |
|    ent_coef_loss   | -21.5    |
|    learning_rate   | 0.000281 |
|    n_updates       | 3118     |
---------------------------------


ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 108      |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 24       |
|    fps             | 4        |
|    time_elapsed    | 1123     |
|    total timesteps | 4641     |
| train/             |          |
|    actor_loss      | -1.43    |
|    critic_loss     | 0.0561   |
|    ent_coef        | 0.0265   |
|    ent_coef_loss   | -24.4    |
|    learning_rate   | 0.000272 |
|    n_updates       | 4640     |
---------------------------------


ic| reward: 1.0, done: True
ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 105      |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 28       |
|    fps             | 4        |
|    time_elapsed    | 1371     |
|    total timesteps | 5604     |
| train/             |          |
|    actor_loss      | -1.06    |
|    critic_loss     | 0.0104   |
|    ent_coef        | 0.0205   |
|    ent_coef_loss   | -26      |
|    learning_rate   | 0.000266 |
|    n_updates       | 5603     |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 105      |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 32       |
|    fps             | 4        |
|    time_elapsed    | 1774     |
|    total timesteps | 7204     |
| train/             |          |
|    actor_loss      | -0.94    |
|    critic_loss     | 0.000287 |
|    ent_coef        | 0.0135   |
|    ent_coef_loss   | -28.8    |
|    learning_rate   | 0.000257 |
|    n_updates       | 7203     |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 105      |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 36       |
|    fps             | 4        |
|    time_elapsed    | 2173     |
|    total timesteps | 8804     |
| train/             |          |
|    actor_loss      | -0.72    |
|    critic_loss     | 2.87e-05 |
|    ent_coef        | 0.009    |
|    ent_coef_loss   | -31.7    |
|    learning_rate   | 0.000247 |
|    n_updates       | 8803     |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 105      |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 40       |
|    fps             | 4        |
|    time_elapsed    | 2576     |
|    total timesteps | 10404    |
| train/             |          |
|    actor_loss      | -0.507   |
|    critic_loss     | 0.000136 |
|    ent_coef        | 0.00611  |
|    ent_coef_loss   | -34      |
|    learning_rate   | 0.000238 |
|    n_updates       | 10403    |
---------------------------------


ic| reward: 1.0, done: True
ic| reward: 1.0, done: True
ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 99.6     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 44       |
|    fps             | 4        |
|    time_elapsed    | 2711     |
|    total timesteps | 10992    |
| train/             |          |
|    actor_loss      | -0.446   |
|    critic_loss     | 1.96e-05 |
|    ent_coef        | 0.00532  |
|    ent_coef_loss   | -35.1    |
|    learning_rate   | 0.000234 |
|    n_updates       | 10991    |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 99.6     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 48       |
|    fps             | 4        |
|    time_elapsed    | 3099     |
|    total timesteps | 12592    |
| train/             |          |
|    actor_loss      | -0.368   |
|    critic_loss     | 0.000147 |
|    ent_coef        | 0.00369  |
|    ent_coef_loss   | -36.6    |
|    learning_rate   | 0.000224 |
|    n_updates       | 12591    |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 99.6     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 52       |
|    fps             | 4        |
|    time_elapsed    | 3506     |
|    total timesteps | 14192    |
| train/             |          |
|    actor_loss      | -0.262   |
|    critic_loss     | 5.22e-06 |
|    ent_coef        | 0.0026   |
|    ent_coef_loss   | -39.9    |
|    learning_rate   | 0.000215 |
|    n_updates       | 14191    |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 99.6     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 56       |
|    fps             | 4        |
|    time_elapsed    | 3908     |
|    total timesteps | 15792    |
| train/             |          |
|    actor_loss      | -0.186   |
|    critic_loss     | 0.000359 |
|    ent_coef        | 0.00186  |
|    ent_coef_loss   | -40.7    |
|    learning_rate   | 0.000205 |
|    n_updates       | 15791    |
---------------------------------


ic| reward: 1.0, done: True
ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 93.6     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 60       |
|    fps             | 4        |
|    time_elapsed    | 4120     |
|    total timesteps | 16647    |
| train/             |          |
|    actor_loss      | -0.141   |
|    critic_loss     | 1.1e-05  |
|    ent_coef        | 0.00156  |
|    ent_coef_loss   | -43.3    |
|    learning_rate   | 0.0002   |
|    n_updates       | 16646    |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 93.6     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 64       |
|    fps             | 4        |
|    time_elapsed    | 4528     |
|    total timesteps | 18247    |
| train/             |          |
|    actor_loss      | -0.139   |
|    critic_loss     | 5.07e-06 |
|    ent_coef        | 0.00114  |
|    ent_coef_loss   | -44.4    |
|    learning_rate   | 0.000191 |
|    n_updates       | 18246    |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 93.6     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 68       |
|    fps             | 4        |
|    time_elapsed    | 4895     |
|    total timesteps | 19847    |
| train/             |          |
|    actor_loss      | -0.13    |
|    critic_loss     | 2.9e-05  |
|    ent_coef        | 0.000854 |
|    ent_coef_loss   | -46      |
|    learning_rate   | 0.000181 |
|    n_updates       | 19846    |
---------------------------------


ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 91       |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 72       |
|    fps             | 4        |
|    time_elapsed    | 5181     |
|    total timesteps | 21074    |
| train/             |          |
|    actor_loss      | -0.0992  |
|    critic_loss     | 8.66e-05 |
|    ent_coef        | 0.000689 |
|    ent_coef_loss   | -42.4    |
|    learning_rate   | 0.000174 |
|    n_updates       | 21073    |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 91       |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 76       |
|    fps             | 4        |
|    time_elapsed    | 5586     |
|    total timesteps | 22674    |
| train/             |          |
|    actor_loss      | -0.0862  |
|    critic_loss     | 1.1e-05  |
|    ent_coef        | 0.000528 |
|    ent_coef_loss   | -44.1    |
|    learning_rate   | 0.000164 |
|    n_updates       | 22673    |
---------------------------------


ic| reward: 1.0, done: True
ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 85.7     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 80       |
|    fps             | 4        |
|    time_elapsed    | 5800     |
|    total timesteps | 23514    |
| train/             |          |
|    actor_loss      | -0.086   |
|    critic_loss     | 1.12e-06 |
|    ent_coef        | 0.000462 |
|    ent_coef_loss   | -50      |
|    learning_rate   | 0.000159 |
|    n_updates       | 23513    |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 85.7     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 84       |
|    fps             | 4        |
|    time_elapsed    | 6202     |
|    total timesteps | 25114    |
| train/             |          |
|    actor_loss      | -0.0795  |
|    critic_loss     | 3.11e-06 |
|    ent_coef        | 0.000363 |
|    ent_coef_loss   | -35.9    |
|    learning_rate   | 0.000149 |
|    n_updates       | 25113    |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 85.7     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 88       |
|    fps             | 4        |
|    time_elapsed    | 6604     |
|    total timesteps | 26714    |
| train/             |          |
|    actor_loss      | -0.0845  |
|    critic_loss     | 1.67e-06 |
|    ent_coef        | 0.000288 |
|    ent_coef_loss   | -52.6    |
|    learning_rate   | 0.00014  |
|    n_updates       | 26713    |
---------------------------------


ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 95.5     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 92       |
|    fps             | 4        |
|    time_elapsed    | 7000     |
|    total timesteps | 28273    |
| train/             |          |
|    actor_loss      | -0.0561  |
|    critic_loss     | 0.000167 |
|    ent_coef        | 0.000234 |
|    ent_coef_loss   | -54      |
|    learning_rate   | 0.00013  |
|    n_updates       | 28272    |
---------------------------------


ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 92.6     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 96       |
|    fps             | 4        |
|    time_elapsed    | 7302     |
|    total timesteps | 29485    |
| train/             |          |
|    actor_loss      | -0.0526  |
|    critic_loss     | 1.36e-06 |
|    ent_coef        | 0.000201 |
|    ent_coef_loss   | -53.9    |
|    learning_rate   | 0.000123 |
|    n_updates       | 29484    |
---------------------------------


ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 91.9     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 100      |
|    fps             | 4        |
|    time_elapsed    | 7625     |
|    total timesteps | 30757    |
| train/             |          |
|    actor_loss      | -0.0494  |
|    critic_loss     | 0.000108 |
|    ent_coef        | 0.000174 |
|    ent_coef_loss   | -42.6    |
|    learning_rate   | 0.000115 |
|    n_updates       | 30756    |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 91.9     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 104      |
|    fps             | 4        |
|    time_elapsed    | 8030     |
|    total timesteps | 32357    |
| train/             |          |
|    actor_loss      | -0.0253  |
|    critic_loss     | 4.9e-05  |
|    ent_coef        | 0.000147 |
|    ent_coef_loss   | -23.4    |
|    learning_rate   | 0.000106 |
|    n_updates       | 32356    |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 91.9     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 108      |
|    fps             | 4        |
|    time_elapsed    | 8428     |
|    total timesteps | 33957    |
| train/             |          |
|    actor_loss      | 0.00276  |
|    critic_loss     | 9.12e-06 |
|    ent_coef        | 0.000126 |
|    ent_coef_loss   | -48.6    |
|    learning_rate   | 9.63e-05 |
|    n_updates       | 33956    |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 91.9     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 112      |
|    fps             | 4        |
|    time_elapsed    | 8849     |
|    total timesteps | 35557    |
| train/             |          |
|    actor_loss      | -0.00828 |
|    critic_loss     | 2.74e-06 |
|    ent_coef        | 0.000109 |
|    ent_coef_loss   | -22.8    |
|    learning_rate   | 8.67e-05 |
|    n_updates       | 35556    |
---------------------------------


ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 89.6     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 116      |
|    fps             | 4        |
|    time_elapsed    | 9158     |
|    total timesteps | 36778    |
| train/             |          |
|    actor_loss      | -0.0128  |
|    critic_loss     | 5.93e-08 |
|    ent_coef        | 9.82e-05 |
|    ent_coef_loss   | -57.5    |
|    learning_rate   | 7.93e-05 |
|    n_updates       | 36777    |
---------------------------------


ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 87.5     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 120      |
|    fps             | 4        |
|    time_elapsed    | 9464     |
|    total timesteps | 37999    |
| train/             |          |
|    actor_loss      | -0.0139  |
|    critic_loss     | 1.61e-07 |
|    ent_coef        | 9e-05    |
|    ent_coef_loss   | -52.2    |
|    learning_rate   | 7.2e-05  |
|    n_updates       | 37998    |
---------------------------------


ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 85.2     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 124      |
|    fps             | 4        |
|    time_elapsed    | 9786     |
|    total timesteps | 39212    |
| train/             |          |
|    actor_loss      | -0.0139  |
|    critic_loss     | 1.72e-07 |
|    ent_coef        | 8.24e-05 |
|    ent_coef_loss   | -59.7    |
|    learning_rate   | 6.47e-05 |
|    n_updates       | 39211    |
---------------------------------


ic| reward: 1.0, done: True
ic| reward: 1.0, done: True


---------------------------------
| rollout/           |          |
|    ep_len_mean     | 81.9     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 128      |
|    fps             | 3        |
|    time_elapsed    | 10037    |
|    total timesteps | 40067    |
| train/             |          |
|    actor_loss      | -0.0161  |
|    critic_loss     | 1.57e-06 |
|    ent_coef        | 7.81e-05 |
|    ent_coef_loss   | -42.3    |
|    learning_rate   | 5.96e-05 |
|    n_updates       | 40066    |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 81.9     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 132      |
|    fps             | 3        |
|    time_elapsed    | 10498    |
|    total timesteps | 41667    |
| train/             |          |
|    actor_loss      | -0.0143  |
|    critic_loss     | 2.05e-06 |
|    ent_coef        | 7.2e-05  |
|    ent_coef_loss   | -56.4    |
|    learning_rate   | 5e-05    |
|    n_updates       | 41666    |
---------------------------------




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 81.9     |
|    ep_rew_mean     | 1        |
| time/              |          |
|    episodes        | 136      |
|    fps             | 3        |
|    time_elapsed    | 10962    |
|    total timesteps | 43267    |
| train/             |          |
|    actor_loss      | -0.0141  |
|    critic_loss     | 2.12e-08 |
|    ent_coef        | 6.7e-05  |
|    ent_coef_loss   | -58.5    |
|    learning_rate   | 4.04e-05 |
|    n_updates       | 43266    |
---------------------------------




Saving to logs/sac/PandaTouchEnv_color-v0_16


In [None]:
%tensorboard --logdir ./tensorboard_logs/PandaGraspEnv_color-v0/

SyntaxError: invalid syntax (<ipython-input-4-f56d0c02a80c>, line 1)