Full hexapod training notebook

erdnaxe · Aug 11, 2020 · dd2a7ca · dd2a7ca
1 parent c004977
commit dd2a7ca
Show file tree

Hide file tree

Showing 2 changed files with 97 additions and 0 deletions.
diff --git a/notebooks/stablebaselines/results_hexapod.ipynb b/notebooks/stablebaselines/results_hexapod.ipynb
@@ -0,0 +1,97 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Some tests on the full hexapod"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:\n",
+      "The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
+      "For more information, please see:\n",
+      "  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n",
+      "  * https://github.com/tensorflow/addons\n",
+      "  * https://github.com/tensorflow/io (for I/O related ops)\n",
+      "If you depend on functionality not listed there, please file an issue.\n",
+      "\n",
+      "[+] Starting training 1\n"
+     ]
+    }
+   ],
+   "source": [
+    "from gym_kraby.train import train\n",
+    "\n",
+    "n_envs = 32\n",
+    "nminibatches = 1\n",
+    "noptepochs = 20\n",
+    "n_steps = 256\n",
+    "gamma = 0.95\n",
+    "cliprange = 0.2\n",
+    "learning_rate = 0.009\n",
+    "lam = 0.95\n",
+    "ent_coef = 0.01\n",
+    "\n",
+    "train(\n",
+    "    exp_name=\"hexapod_test1\",\n",
+    "    env_name=\"gym_kraby:HexapodBulletEnv-v0\",\n",
+    "    n_envs=n_envs,\n",
+    "    gamma=gamma,  # Discount factor\n",
+    "    n_steps=n_steps,  # batchsize = n_steps * n_envs\n",
+    "    ent_coef=ent_coef,  # Entropy coefficient for the loss calculation\n",
+    "    learning_rate=learning_rate,\n",
+    "    lam=lam,  # Factor for trade-off of bias vs variance for Generalized Advantage Estimator\n",
+    "    nminibatches=nminibatches,  # Number of training minibatches per update.\n",
+    "    noptepochs=noptepochs,  # Number of epoch when optimizing the surrogate\n",
+    "    cliprange=cliprange,  # Clipping parameter, this clipping depends on the reward scaling\n",
+    "    tensorboard_log=\"./tensorboard_log/hexapod_test/\",  # DEBUG\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "noptepochs = 30 -> 20 better\n",
+    "0.001 < learning_rate < 0.01\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/stablebaselines/results.ipynb → ...oks/stablebaselines/results_one_leg.ipynb b/notebooks/stablebaselines/results.ipynb → ...oks/stablebaselines/results_one_leg.ipynb