## Training

In [None]:
import gym
import numpy as np

from stable_baselines.ddpg.policies import MlpPolicy, LnMlpPolicy
from modules import CustomDDPGPolicy
from stable_baselines.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise, AdaptiveParamNoiseSpec
from stable_baselines import DDPG
from stable_baselines.common.callbacks import CheckpointCallback, EvalCallback
from stable_baselines.bench import Monitor

from stable_baselines.common.callbacks import CheckpointCallback, EvalCallback
from stable_baselines.bench import Monitor

import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

from modules import KukaBulletGymFixedBall

import os

# Reloading any code written in external .py files.
%load_ext autoreload
%autoreload 2

In [None]:
# Set the logging and saving directories
parent = "./DDPG_kuka_fixed_ball_shallow/"
checkpoint_path = parent + "checkpoints/"
best_model_path = parent + "best_model/"
eval_log_path = parent + "eval_logs/"
monitor_log_path = parent + "monitor_logs/"

In [None]:
# Make environment with monitor wrapper
env = KukaBulletGymFixedBall.KukaBulletGym(render=False)
wrapped_env = Monitor(env, monitor_log_path)

In [None]:
# Make callbacks
checkpoint_callback = CheckpointCallback(save_freq=50000, 
                                         save_path=checkpoint_path,
                                         name_prefix="model")
eval_callback = EvalCallback(env,
                             best_model_save_path=best_model_path, 
                             log_path=eval_log_path,
                             eval_freq=1000, 
                             deterministic=True, 
                             render=False,
                             verbose=0,
                             n_eval_episodes=50)

In [None]:
# Make DDPG noise objects
action_noise = None
param_noise = AdaptiveParamNoiseSpec(initial_stddev=0.5, desired_action_stddev=0.5)

In [None]:
# Make DDPG model
model = DDPG(LnMlpPolicy, 
             wrapped_env, 
             verbose=1, 
             render=False, 
             param_noise=param_noise, 
             action_noise=action_noise, 
             observation_range=(-np.inf, np.inf)) # Clips obs to [-5,5] by default, bad since our target pos can be bigger

In [None]:
model.learn(total_timesteps=int(5e5), log_interval=5000, callback=[checkpoint_callback,eval_callback])

In [None]:
model.save(checkpoint_path+"manual")