## Training

In [1]:
import gym
import numpy as np

from stable_baselines import TD3
from stable_baselines.td3.policies import LnMlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines.ddpg.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise

from stable_baselines.common.callbacks import CheckpointCallback, EvalCallback
from stable_baselines.bench import Monitor

from modules import CustomTD3Policy

import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

from modules import KukaBulletGymRandomBallRandomTarget

import os

# Reloading any code written in external .py files.
%load_ext autoreload
%autoreload 2

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [2]:
# Set the logging and saving directories
parent = "./TD3_kuka_random_ball_random_target_shallow/"
checkpoint_path = parent + "checkpoints/"
best_model_path = parent + "best_model/"
eval_log_path = parent + "eval_logs/"
monitor_log_path = parent + "monitor_logs/"

In [3]:
# Make environment with monitor wrapper
env = KukaBulletGymRandomBallRandomTarget.KukaBulletGym(render=False)
wrapped_env = Monitor(env, monitor_log_path)

In [4]:
# Make callbacks
checkpoint_callback = CheckpointCallback(save_freq=50000, 
                                         save_path=checkpoint_path,
                                         name_prefix="model")
eval_callback = EvalCallback(env,
                             best_model_save_path=best_model_path, 
                             log_path=eval_log_path,
                             eval_freq=1000, 
                             deterministic=True, 
                             render=False,
                             verbose=0,
                             n_eval_episodes=50)

In [5]:
# The noise objects for TD3
n_actions = env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

In [6]:
model = TD3.load("./TD3_kuka_random_ball_shallow/best_model/best_model")
model.set_env(wrapped_env)

Loading a model without an environment, this model cannot be trained until it has a valid environment.


In [7]:
model.learn(total_timesteps=int(1e6), log_interval=5000, callback=[checkpoint_callback,eval_callback])

  "{} != {}".format(self.training_env, self.eval_env))


---------------------------------------
| current_lr              | 0.0003    |
| ep_rewmean              | 12        |
| episodes                | 5000      |
| eplenmean               | 3.05      |
| fps                     | 9         |
| mean 100 episode reward | 12        |
| n_updates               | 14000     |
| qf1_loss                | 175.9384  |
| qf2_loss                | 176.35236 |
| time_elapsed            | 1490      |
| total timesteps         | 14183     |
---------------------------------------
---------------------------------------
| current_lr              | 0.0003    |
| ep_rewmean              | 19        |
| episodes                | 10000     |
| eplenmean               | 4.3       |
| fps                     | 10        |
| mean 100 episode reward | 19        |
| n_updates               | 31900     |
| qf1_loss                | 222.17229 |
| qf2_loss                | 227.36511 |
| time_elapsed            | 3129      |
| total timesteps         | 32025     |


<stable_baselines.td3.td3.TD3 at 0x206d6b289c8>

In [9]:
model.save(checkpoint_path+"end_manual")