Skip to content

Commit

Permalink
Monitor and plot results
Browse files Browse the repository at this point in the history
  • Loading branch information
erdnaxe committed Jul 21, 2020
1 parent 9a5ddea commit e80b971
Show file tree
Hide file tree
Showing 7 changed files with 367 additions and 256 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ dist
# Training logs
notebooks/*/tensorboard_log
notebooks/*/trained_models
notebooks/*/training_logs
notebooks/*/video
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,5 @@ dist
# Training logs
tensorboard_log
trained_models
training_logs
video
4 changes: 2 additions & 2 deletions Dockerfile.stablebaselines
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ FROM tensorflow/tensorflow:1.15.2-gpu-py3
RUN apt-get update && \
apt-get install -y libsm6 libxext6 libxrender-dev cmake zlib1g-dev ffmpeg libglvnd0 libgl1 libglx0 libegl1 mesa-utils && \
rm -rf /var/lib/apt/lists/*
RUN pip install --no-cache-dir gym stable-baselines imageio jupyter jupyterlab matplotlib optuna
RUN pip install --no-cache-dir gym stable-baselines imageio jupyter jupyterlab matplotlib optuna seaborn
WORKDIR /tf/kraby
COPY . /tf/kraby
RUN pip install --no-cache-dir -e .
Expand All @@ -23,4 +23,4 @@ ENV NVIDIA_DRIVER_CAPABILITIES ${NVIDIA_DRIVER_CAPABILITIES},display
RUN mkdir /.local /.jupyter && \
chmod a+rwx /.local /.jupyter
EXPOSE 8888
CMD jupyter lab --ip 0.0.0.0 --no-browser --notebook-dir=/tf/kraby/notebooks/stablebaselines
CMD jupyter lab --ip 0.0.0.0 --no-browser --notebook-dir=/tf/kraby
102 changes: 102 additions & 0 deletions gym_kraby/train.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import tensorflow as tf
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common import set_global_seeds
from stable_baselines import PPO2, logger
from stable_baselines.common.vec_env import SubprocVecEnv
from stable_baselines.bench.monitor import Monitor
import gym
from gym.wrappers import TimeLimit
import argparse
import os

# Disable Tensorflow deprecation warnings
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)


def make_env(log_folder, env_name, rank, seed=0):
"""
Init an environment
:param rank: (int) index of the subprocess
:param seed: (int) the inital seed for RNG
"""
timestep_limit = 32

def _init():
env = gym.make(env_name)
env = TimeLimit(env, timestep_limit)
env = Monitor(env, log_folder + 'seed_' + str(seed + rank))
env.seed(seed + rank)
return env
set_global_seeds(seed)
return _init


def train(exp_name, env_name, n_envs, **kwargs):
# Train 10 runs
for n in range(1, 11): # PPO2_n
# Configure logger
log_folder = 'training_logs/' + exp_name + '_' + str(n) + '/'
logger.configure(log_folder, ['csv'])

print("[+] Starting training", n)
env = SubprocVecEnv([make_env(log_folder, env_name, i, (n-1)*32)
for i in range(n_envs)])

model = PPO2(
policy=MlpPolicy,
env=env,
verbose=True,

# Make it deterministic
seed=32*n, # Fixed seed
n_cpu_tf_sess=1, # force deterministic results

# Pass arguments
**kwargs
)
model.learn(
total_timesteps=int(250e3),
log_interval=1, # log each update
)

# Saving model
os.makedirs("trained_models", exist_ok=True)
model.save("trained_models/" + exp_name + "_" + str(n))

env.close()
del env
del model


if __name__ == "__main__":
# Some commandline settings
parser = argparse.ArgumentParser()
parser.add_argument(
'--one-leg',
action='store_true',
help='simulate or command only one leg',
)
args = parser.parse_args()

# Select corresponding environment
model = 'OneLeg' if args.one_leg else 'Hexapod'
env_name = 'gym_kraby:' + model + 'BulletEnv-v0'

# Use `tensorboard --logdir notebooks/stablebaselines/tensorboard_log/exp_name` to inspect learning
# See https://github.com/araffin/rl-baselines-zoo/blob/master/utils/hyperparams_opt.py#L148
# batchsize = n_steps * n_envs / nminibatches
train(
exp_name="test",
env_name=env_name,
n_envs=32,
gamma=0.90, # Discount factor
n_steps=128, # batchsize = n_steps * n_envs
ent_coef=0.01, # Entropy coefficient for the loss calculation
learning_rate=10e-4,
lam=0.95, # Factor for trade-off of bias vs variance for Generalized Advantage Estimator
nminibatches=64, # Number of training minibatches per update.
noptepochs=30, # Number of epoch when optimizing the surrogate
cliprange=0.2, # Clipping parameter, this clipping depends on the reward scaling
tensorboard_log="./tensorboard_log/test/", # Tensorboard integration
)
127 changes: 27 additions & 100 deletions notebooks/stablebaselines/one_leg_training.ipynb
Original file line number Diff line number Diff line change
@@ -1,124 +1,51 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# Disable Tensorflow deprecation warnings\n",
"import tensorflow as tf\n",
"tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# One Leg training using PPO2 from StableBaselines"
"# Training using PPO2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Starting training 1\n",
"Starting training 2\n",
"Starting training 3\n",
"Starting training 4\n",
"Starting training 5\n",
"Starting training 6\n",
"Starting training 7\n",
"Starting training 8\n",
"Starting training 9\n",
"Starting training 10\n"
"WARNING:tensorflow:\n",
"The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
"For more information, please see:\n",
" * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n",
" * https://github.com/tensorflow/addons\n",
" * https://github.com/tensorflow/io (for I/O related ops)\n",
"If you depend on functionality not listed there, please file an issue.\n",
"\n",
"[+] Starting training 1\n",
"[+] Starting training 2\n",
"[+] Starting training 3\n"
]
}
],
"source": [
"from stable_baselines.common.policies import MlpPolicy\n",
"from stable_baselines.common import set_global_seeds\n",
"from stable_baselines import PPO2\n",
"from stable_baselines.common.vec_env import SubprocVecEnv\n",
"import gym\n",
"from gym.wrappers import TimeLimit\n",
"\n",
"\n",
"def make_env(rank, seed=0):\n",
" \"\"\"\n",
" Init an environment\n",
"\n",
" :param rank: (int) index of the subprocess\n",
" :param seed: (int) the inital seed for RNG\n",
" \"\"\"\n",
" timestep_limit = 128\n",
"\n",
" def _init():\n",
" env = gym.make(\"gym_kraby:OneLegBulletEnv-v0\")\n",
" env = TimeLimit(env, timestep_limit)\n",
" env.seed(seed + rank)\n",
" return env\n",
" set_global_seeds(seed)\n",
" return _init\n",
"\n",
"# Train 10 runs\n",
"for n in range(1, 11): # PPO2_n\n",
" print(\"Starting training\", n)\n",
" num_cpu = 32\n",
" env = SubprocVecEnv([make_env(i, n*32) for i in range(num_cpu)])\n",
"\n",
" # Use `tensorboard --logdir notebooks/stablebaselines/tensorboard_log/one_leg_doc1` to inspect learning\n",
" model = PPO2(\n",
" policy=MlpPolicy,\n",
" env=env,\n",
" gamma=0.99, # Discount factor\n",
" n_steps=512, # batchsize = n_steps * n_envs\n",
" ent_coef=0.01, # Entropy coefficient for the loss calculation\n",
" learning_rate=2.5e-4,\n",
" lam=0.95, # Factor for trade-off of bias vs variance for Generalized Advantage Estimator\n",
" nminibatches=64, # Number of training minibatches per update.\n",
" # For recurrent policies, the nb of env run in parallel should be a multiple of it.\n",
" noptepochs=30, # Number of epoch when optimizing the surrogate\n",
" cliprange=0.2, # Clipping parameter, this clipping depends on the reward scaling\n",
" verbose=False,\n",
" tensorboard_log=\"./tensorboard_log/one_leg_doc1/\",\n",
"\n",
" seed=32*n, # Fixed seed\n",
" n_cpu_tf_sess=1, # force deterministic results\n",
" )\n",
" model.learn(total_timesteps=int(1e6))\n",
"\n",
" # Saving model\n",
" model.save(\"trained_models/one_leg_doc1_\" + str(n))\n",
"\n",
" env.close()\n",
" del env\n",
" del model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Training with PPO2\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"doc1: Learning to go to a random target TODO\n",
"\n",
"doc2: without torque\n",
"from gym_kraby.train import train\n",
"\n",
"doc3: cos/sin"
"train(\n",
" exp_name=\"test\",\n",
" env_name=\"gym_kraby:OneLegBulletEnv-v0\",\n",
" n_envs=32,\n",
" gamma=0.90, # Discount factor\n",
" n_steps=128, # batchsize = n_steps * n_envs\n",
" ent_coef=0.01, # Entropy coefficient for the loss calculation\n",
" learning_rate=10e-4,\n",
" lam=0.95, # Factor for trade-off of bias vs variance for Generalized Advantage Estimator\n",
" nminibatches=64, # Number of training minibatches per update.\n",
" noptepochs=30, # Number of epoch when optimizing the surrogate\n",
" cliprange=0.2, # Clipping parameter, this clipping depends on the reward scaling, -1 desactivate\n",
")"
]
},
{
Expand Down

0 comments on commit e80b971

Please sign in to comment.