## Training

In [1]:
import gym
import numpy as np

from stable_baselines import TD3
from stable_baselines.td3.policies import LnMlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines.ddpg.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise

from stable_baselines.common.callbacks import CheckpointCallback, EvalCallback
from stable_baselines.bench import Monitor

import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

from modules import KukaBulletGymFixedBall

import os

# Reloading any code written in external .py files.
%load_ext autoreload
%autoreload 2

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [2]:
# Set the logging and saving directories
parent = "./TD3_kuka_fixed_ball_shallow/"
checkpoint_path = parent + "checkpoints/"
best_model_path = parent + "best_model/"
eval_log_path = parent + "eval_logs/"
monitor_log_path = parent + "monitor_logs/"

In [3]:
# Make environment with monitor wrapper
env = KukaBulletGymFixedBall.KukaBulletGym(render=False)
wrapped_env = Monitor(env, monitor_log_path)

In [4]:
# Make callbacks
checkpoint_callback = CheckpointCallback(save_freq=50000, 
                                         save_path=checkpoint_path,
                                         name_prefix="model")
eval_callback = EvalCallback(env,
                             best_model_save_path=best_model_path, 
                             log_path=eval_log_path,
                             eval_freq=1000, 
                             deterministic=True, 
                             render=False,
                             verbose=0,
                             n_eval_episodes=50)

In [5]:
# The noise objects for TD3
n_actions = env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

In [6]:
model = TD3(LnMlpPolicy, wrapped_env, action_noise=action_noise, verbose=1)

In [None]:
model.learn(total_timesteps=int(5e5), log_interval=5000, callback=[checkpoint_callback,eval_callback])

In [None]:
model.save(checkpoint_path+"end_manual")