Supporting multirollout for the simapp released as of reInvent 2019 (#…

…983)
aws · Jan 17, 2020 · 1cef53b · 1cef53b
1 parent 9e38c5c
commit 1cef53b
Show file tree

Hide file tree

Showing 79 changed files with 8,565 additions and 1,557 deletions.
diff --git a/reinforcement_learning/rl_deepracer_robomaker_coach_gazebo/Dockerfile b/reinforcement_learning/rl_deepracer_robomaker_coach_gazebo/Dockerfile
@@ -5,6 +5,8 @@ ARG CPU_OR_GPU
 ARG AWS_REGION
 FROM 520713654638.dkr.ecr.$AWS_REGION.amazonaws.com/sagemaker-tensorflow-scriptmode:1.12.0-$CPU_OR_GPU-py3
 
+COPY ./src/markov /opt/amazon/markov
+
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
         jq \
@@ -38,29 +40,32 @@ RUN pip install \
     PyOpenGL==3.1.0 \
     scipy==1.3.0 \
     scikit-image==0.15.0 \
-    gym==0.10.5 \
-    retrying \
-    eventlet \
-    boto3 \
-    minio==4.0.5 \
     futures==3.1.1 \
-    redis==3.3.8
+    boto3==1.9.23 \
+    minio==4.0.5 \
+    kubernetes==7.0.0 \
+    opencv-python==4.1.1.26 \
+    rl-coach-slim==1.0.0 \
+    retrying \
+    eventlet
 
-#Install rl coach
-RUN pip install rl-coach-slim==0.11.1
+RUN pip install mxnet-mkl>=1.3.0
 
 RUN pip install --no-cache-dir --upgrade sagemaker-containers
 
+# Patch Intel coach
+COPY ./src/rl_coach.patch /opt/amazon/rl_coach.patch
+RUN patch -p1 -N --directory=/usr/local/lib/python3.6/dist-packages/ < /opt/amazon/rl_coach.patch
+
 
-# Patch intel coach so that discrete distributions can not produce nan's
-COPY src/lib/ppo_head.py /usr/local/lib/python3.6/dist-packages/rl_coach/architectures/tensorflow_components/heads/ppo_head.py
 
 # Copy in all the code and make it available on the path
-COPY src/lib/redis.conf /etc/redis/redis.conf
+COPY ./src/lib/redis.conf /etc/redis/redis.conf
 ENV PYTHONPATH /opt/amazon/:$PYTHONPATH
 ENV PATH /opt/ml/code/:$PATH
 WORKDIR /opt/ml/code
 
+# Tell sagemaker-containers where the launch point is for training job.
 ENV NODE_TYPE SAGEMAKER_TRAINING_WORKER
 
-ENV PYTHONUNBUFFERED 1
+ENV PYTHONUNBUFFERED 1
diff --git a/reinforcement_learning/rl_deepracer_robomaker_coach_gazebo/deepracer_rl.ipynb b/reinforcement_learning/rl_deepracer_robomaker_coach_gazebo/deepracer_rl.ipynb
@@ -60,7 +60,7 @@
     "# !python3 sim_app_bundler.py --clean\n",
     "\n",
     "# # Download Robomaker simApp from the deepracer public s3 bucket\n",
-    "# simulation_application_bundle_location = \"s3://deepracer-managed-resources-us-east-1/deepracer-simapp-notebook.tar.gz\"\n",
+    "# simulation_application_bundle_location = \"s3://deepracer-managed-resources-us-east-1/deepracer-simapp.tar.gz\"\n",
     "# !aws s3 cp {simulation_application_bundle_location} ./\n",
     "\n",
     "# # Untar the simapp bundle\n",
@@ -73,7 +73,7 @@
     "# # bundle/opt/install/deepracer_simulation_environment/share/deepracer_simulation_environment/\n",
     "# # bundle/opt/install/deepracer_simulation_environment/lib/deepracer_simulation_environment/\n",
     "\n",
-    "# # Copying the notebook src/markov changes to the simapp (For sagemaker container)\n",
+    "# # # Copying the notebook src/markov changes to the simapp (For sagemaker container)\n",
     "# !rsync -av ./src/markov/ ./build/simapp/bundle/opt/install/sagemaker_rl_agent/lib/python3.5/site-packages/markov\n",
     "\n",
     "# !python3 sim_app_bundler.py --tar"
@@ -449,14 +449,11 @@
    "source": [
     "# Uncomment the pygmentize code lines to see the code\n",
     "\n",
-    "# Environmental File\n",
-    "#!pygmentize src/markov/environments/deepracer_racetrack_env.py\n",
-    "\n",
     "# Reward function\n",
     "#!pygmentize src/markov/rewards/default.py\n",
     "\n",
     "# Action space\n",
-    "#!pygmentize src/markov/actions/model_metadata_10_state.json\n",
+    "#!pygmentize src/markov/actions/single_speed_stereo_shallow.json\n",
     "\n",
     "# Preset File\n",
     "#!pygmentize src/markov/presets/default.py\n",
@@ -482,14 +479,11 @@
     "# Clean up the previously uploaded files\n",
     "!aws s3 rm --recursive {s3_location}\n",
     "\n",
-    "# Make any changes to the environment and preset files below and upload these files\n",
-    "!aws s3 cp src/markov/environments/deepracer_racetrack_env.py {s3_location}/environments/deepracer_racetrack_env.py\n",
-    "\n",
-    "!aws s3 cp src/markov/rewards/default.py {s3_location}/rewards/reward_function.py\n",
+    "!aws s3 cp src/markov/rewards/default.py {s3_location}/customer_reward_function.py\n",
     "\n",
-    "!aws s3 cp src/markov/actions/model_metadata_10_state.json {s3_location}/model_metadata.json\n",
+    "!aws s3 cp src/markov/actions/default.json {s3_location}/model/model_metadata.json\n",
     "\n",
-    "!aws s3 cp src/markov/presets/default.py {s3_location}/presets/preset.py\n",
+    "#!aws s3 cp src/markov/presets/default.py {s3_location}/presets/preset.py\n",
     "#!aws s3 cp src/markov/presets/preset_attention_layer.py {s3_location}/presets/preset.py"
    ]
   },
@@ -560,9 +554,19 @@
     "                            \"s3_bucket\": s3_bucket,\n",
     "                            \"s3_prefix\": s3_prefix,\n",
     "                            \"aws_region\": aws_region,\n",
-    "                            \"preset_s3_key\": \"%s/presets/preset.py\"% s3_prefix,\n",
-    "                            \"model_metadata_s3_key\": \"%s/model_metadata.json\" % s3_prefix,\n",
-    "                            \"environment_s3_key\": \"%s/environments/deepracer_racetrack_env.py\" % s3_prefix,\n",
+    "                            \"model_metadata_s3_key\": \"%s/model/model_metadata.json\" % s3_prefix,\n",
+    "                            \"reward_function_s3_source\": \"%s/customer_reward_function.py\" % s3_prefix,\n",
+    "                            \"batch_size\": \"64\",\n",
+    "                            \"num_epochs\": \"10\",\n",
+    "                            \"stack_size\": \"1\",\n",
+    "                            \"lr\": \"0.0003\",\n",
+    "                            \"exploration_type\": \"Categorical\",\n",
+    "                            \"e_greedy_value\": \"1\",\n",
+    "                            \"epsilon_steps\": \"10000\",\n",
+    "                            \"beta_entropy\": \"0.01\",\n",
+    "                            \"discount_factor\": \"0.999\",\n",
+    "                            \"loss_type\": \"Huber\",\n",
+    "                            \"num_episodes_between_training\": \"20\"\n",
     "                        },\n",
     "                        subnets=deepracer_subnets,\n",
     "                        security_group_ids=deepracer_security_groups,\n",
@@ -573,6 +577,15 @@
     "print(\"Training job: %s\" % job_name)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "training_job_arn = estimator.latest_training_job.describe()['TrainingJobArn']"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -699,6 +712,61 @@
     "We create [AWS RoboMaker](https://console.aws.amazon.com/robomaker/home#welcome) Simulation Jobs that simulates the environment and shares this data with SageMaker for training. "
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "s3_yaml_name=\"training_params.yaml\"\n",
+    "world_name = \"reInvent2019_track\"\n",
+    "\n",
+    "!touch {s3_yaml_name}\n",
+    "!echo \"WORLD_NAME:                           \\\"{world_name}\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"SAGEMAKER_SHARED_S3_BUCKET:           \\\"{s3_bucket}\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"SAGEMAKER_SHARED_S3_PREFIX:           \\\"{s3_prefix}\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"TRAINING_JOB_ARN:                     \\\"{training_job_arn}\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"METRICS_S3_BUCKET:                    \\\"{s3_bucket}\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"METRICS_S3_OBJECT_KEY:                \\\"{s3_prefix}/training_metrics.json\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"AWS_REGION:                           \\\"{aws_region}\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"TARGET_REWARD_SCORE:                  \\\"None\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"NUMBER_OF_EPISODES:                   \\\"0\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"ROBOMAKER_SIMULATION_JOB_ACCOUNT_ID:  \\\"{account_id}\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"JOB_TYPE:                             \\\"TRAINING\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"CHANGE_START_POSITION:                \\\"true\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"ALTERNATE_DRIVING_DIRECTION:          \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"KINESIS_VIDEO_STREAM_NAME:            \\\"{kvs_stream_name}\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"REWARD_FILE_S3_KEY:                   \\\"{s3_prefix}/customer_reward_function.py\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"MODEL_METADATA_FILE_S3_KEY:           \\\"{s3_prefix}/model/model_metadata.json\\\"\" | tee -a {s3_yaml_name}\n",
+    "\n",
+    "!echo \"NUMBER_OF_OBSTACLES:                  \\\"0\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"MIN_DISTANCE_BETWEEN_OBSTACLES:       \\\"2.0\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"RANDOMIZE_OBSTACLE_LOCATIONS:         \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"PSEUDO_RANDOMIZE_OBSTACLE_LOCATIONS:  \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"NUMBER_OF_PSEUDO_RANDOM_PLACEMENTS:   \\\"2\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"IS_OBSTACLE_BOT_CAR:                  \\\"false\\\"\" | tee -a {s3_yaml_name}   \n",
+    "\n",
+    "!echo \"IS_LANE_CHANGE:                       \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"LOWER_LANE_CHANGE_TIME:               \\\"3.0\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"UPPER_LANE_CHANGE_TIME:               \\\"5.0\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"LANE_CHANGE_DISTANCE:                 \\\"1.0\\\"\" | tee -a {s3_yaml_name}\n",
+    "\n",
+    "!echo \"NUMBER_OF_BOT_CARS:                   \\\"6\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"MIN_DISTANCE_BETWEEN_BOT_CARS:        \\\"2.0\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"RANDOMIZE_BOT_CAR_LOCATIONS:          \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"BOT_CAR_SPEED:                        \\\"0.2\\\"\" | tee -a {s3_yaml_name}\n",
+    "\n",
+    "!echo \"CAR_COLOR:                            \\\"LightBlue\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"FIRST_PERSON_VIEW:                    \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"CHANGE_START_POSITION:                \\\"true\\\"\" | tee -a {s3_yaml_name}\n",
+    "\n",
+    "print(\"Upload yaml settings to S3\")\n",
+    "!aws s3 cp ./training_params.yaml {s3_location}/training_params.yaml\n",
+    "!rm training_params.yaml"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -708,21 +776,13 @@
     "num_simulation_workers = 1\n",
     "\n",
     "envriron_vars = {\n",
-    "    \"WORLD_NAME\": \"reinvent_base\",\n",
-    "    \"KINESIS_VIDEO_STREAM_NAME\": kvs_stream_name,\n",
-    "    \"SAGEMAKER_SHARED_S3_BUCKET\": s3_bucket,\n",
+    "    \"S3_YAML_NAME\": s3_yaml_name,\n",
     "    \"SAGEMAKER_SHARED_S3_PREFIX\": s3_prefix,\n",
-    "    \"TRAINING_JOB_ARN\": job_name,\n",
+    "    \"SAGEMAKER_SHARED_S3_BUCKET\": s3_bucket,\n",
+    "    \"WORLD_NAME\": world_name,\n",
+    "    \"KINESIS_VIDEO_STREAM_NAME\": kvs_stream_name,\n",
     "    \"APP_REGION\": aws_region,\n",
-    "    \"METRIC_NAME\": \"TrainingRewardScore\",\n",
-    "    \"METRIC_NAMESPACE\": \"AWSDeepRacer\",\n",
-    "    \"REWARD_FILE_S3_KEY\": \"%s/rewards/reward_function.py\" % s3_prefix,\n",
-    "    \"MODEL_METADATA_FILE_S3_KEY\": \"%s/model_metadata.json\" % s3_prefix,\n",
-    "    \"METRICS_S3_BUCKET\": s3_bucket,\n",
-    "    \"METRICS_S3_OBJECT_KEY\": s3_bucket + \"/training_metrics.json\",\n",
-    "    \"TARGET_REWARD_SCORE\": \"None\",\n",
-    "    \"NUMBER_OF_EPISODES\": \"0\",\n",
-    "    \"ROBOMAKER_SIMULATION_JOB_ACCOUNT_ID\": account_id\n",
+    "    \"MODEL_METADATA_FILE_S3_KEY\": \"%s/model/model_metadata.json\" % s3_prefix\n",
     "}\n",
     "\n",
     "simulation_application = {\"application\":simulation_app_arn,\n",
@@ -853,6 +913,52 @@
     "### Evaluation - ReInvent Track"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s3_yaml_name=\"evaluation_params.yaml\"\n",
+    "world_name = \"reInvent2019_track\"\n",
+    "\n",
+    "!touch {s3_yaml_name}\n",
+    "echo \"WORLD_NAME:                           \\\"{world_name}\\\"\" | tee {s3_yaml_name}\n",
+    "echo \"MODEL_S3_BUCKET:                      \\\"{s3_bucket}\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"MODEL_S3_PREFIX:                      \\\"{s3_prefix}\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"AWS_REGION:                           \\\"{aws_region}\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"METRICS_S3_BUCKET:                    \\\"{s3_bucket}\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"METRICS_S3_OBJECT_KEY:                \\\"{s3_prefix}/evaluation_metrics.json\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"NUMBER_OF_TRIALS:                     \\\"10\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"ROBOMAKER_SIMULATION_JOB_ACCOUNT_ID:  \\\"{account_id}\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"JOB_TYPE:                             \\\"EVALUATION\\\"\" | tee -a {s3_yaml_name}\n",
+    "\n",
+    "echo \"NUMBER_OF_OBSTACLES:                  \\\"0\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"MIN_DISTANCE_BETWEEN_OBSTACLES:       \\\"2.0\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"RANDOMIZE_OBSTACLE_LOCATIONS:         \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"PSEUDO_RANDOMIZE_OBSTACLE_LOCATIONS:  \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"NUMBER_OF_PSEUDO_RANDOM_PLACEMENTS:   \\\"2\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"IS_OBSTACLE_BOT_CAR:                  \\\"false\\\"\" | tee -a {s3_yaml_name}  \n",
+    "\n",
+    "echo \"IS_LANE_CHANGE:                       \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"LOWER_LANE_CHANGE_TIME:               \\\"3.0\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"UPPER_LANE_CHANGE_TIME:               \\\"5.0\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"LANE_CHANGE_DISTANCE:                 \\\"1.0\\\"\" | tee -a {s3_yaml_name}\n",
+    "\n",
+    "echo \"NUMBER_OF_BOT_CARS:                   \\\"6\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"MIN_DISTANCE_BETWEEN_BOT_CARS:        \\\"2.0\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"RANDOMIZE_BOT_CAR_LOCATIONS:          \\\"true\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"BOT_CAR_SPEED:                        \\\"0.2\\\"\" | tee -a {s3_yaml_name}\n",
+    "echo \"CAR_COLOR:                            \\\"LightBlue\\\"\" | tee -a {s3_yaml_name}\n",
+    "\n",
+    "!echo \"CAR_COLOR:                           \\\"LightBlue\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"FIRST_PERSON_VIEW:                   \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
+    "!echo \"CHANGE_START_POSITION:               \\\"true\\\"\" | tee -a {s3_yaml_name}\n",
+    "\n",
+    "print(\"Upload yaml settings to S3\")\n",
+    "!aws s3 cp ./evaluation_params.yaml {s3_location}/evaluation_params.json"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -864,16 +970,13 @@
     "num_simulation_workers = 1\n",
     "\n",
     "envriron_vars = {\n",
-    "    \"WORLD_NAME\": \"reinvent_base\",\n",
-    "    \"KINESIS_VIDEO_STREAM_NAME\": \"SilverstoneStream\",\n",
-    "    \"MODEL_S3_BUCKET\": s3_bucket,\n",
+    "    \"S3_YAML_NAME\": s3_yaml_name,\n",
     "    \"MODEL_S3_PREFIX\": s3_prefix,\n",
+    "    \"MODEL_S3_BUCKET\": s3_bucket,\n",
+    "    \"WORLD_NAME\": world_name,\n",
+    "    \"KINESIS_VIDEO_STREAM_NAME\": kvs_stream_name,\n",
     "    \"APP_REGION\": aws_region,\n",
-    "    \"MODEL_METADATA_FILE_S3_KEY\": \"%s/model_metadata.json\" % s3_prefix,\n",
-    "    \"METRICS_S3_BUCKET\": s3_bucket,\n",
-    "    \"METRICS_S3_OBJECT_KEY\": s3_bucket + \"/evaluation_metrics.json\",\n",
-    "    \"NUMBER_OF_TRIALS\": \"5\",\n",
-    "    \"ROBOMAKER_SIMULATION_JOB_ACCOUNT_ID\": account_id\n",
+    "    \"MODEL_METADATA_FILE_S3_KEY\": \"%s/model/model_metadata.json\" % s3_prefix\n",
     "}\n",
     "\n",
     "simulation_application = {\n",
@@ -1002,5 +1105,5 @@
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/reinforcement_learning/rl_deepracer_robomaker_coach_gazebo/src/markov/actions/default.json b/reinforcement_learning/rl_deepracer_robomaker_coach_gazebo/src/markov/actions/default.json
@@ -0,0 +1,26 @@
+{
+    "action_space": [
+        {
+            "steering_angle": -30,
+            "speed": 0.6
+        },
+        {
+            "steering_angle": -15,
+            "speed": 0.6
+        },
+        {
+            "steering_angle": 0,
+            "speed": 0.6
+        },
+        {
+            "steering_angle": 15,
+            "speed": 0.6
+        },
+        {
+            "steering_angle": 30,
+            "speed": 0.6
+        }
+    ],
+    "sensor": ["FRONT_FACING_CAMERA"],
+    "neural_network": "DEEP_CONVOLUTIONAL_NETWORK_SHALLOW"
+}
diff --git a/reinforcement_learning/rl_deepracer_robomaker_coach_gazebo/src/markov/agent_ctrl/__init__.py b/reinforcement_learning/rl_deepracer_robomaker_coach_gazebo/src/markov/agent_ctrl/__init__.py
diff --git a/...earning/rl_deepracer_robomaker_coach_gazebo/src/markov/agent_ctrl/agent_ctrl_interface.py b/...earning/rl_deepracer_robomaker_coach_gazebo/src/markov/agent_ctrl/agent_ctrl_interface.py
@@ -0,0 +1,46 @@
+'''This class this defines an interface for how agents need to interact with the
+    environment, all concrete classes should abstract away the communication of the
+    agent with gazebo
+'''
+import abc
+
+class AgentCtrlInterface(object, metaclass=abc.ABCMeta):
+    @property
+    @abc.abstractmethod
+    def action_space(self):
+      '''Returns a read onlu version of the action space so that is can be passed to coach'''
+      raise NotImplementedError('Agent control must be able retrieve action space')
+
+    @abc.abstractmethod
+    def reset_agent(self):
+        '''Reset the agent to a desired start postion
+        '''
+        raise NotImplementedError('Agent control must be able to reset agent')
+
+    @abc.abstractmethod
+    def send_action(self, action):
+        '''Send the desired action to the agent
+           action - Integer with the desired action to take
+        '''
+        raise NotImplementedError('Agent control must be able to send action')
+
+    @abc.abstractmethod
+    def judge_action(self, action):
+        '''Returns the reward and done flag after and agent takes the action prescribed by a given
+           policy
+           action - Integer with the desired action to take
+        '''
+        raise NotImplementedError('Agent control must be able to judge action')
+
+    @abc.abstractmethod
+    def finish_episode(self):
+        '''Runs all behavior required at the end of the episode, such as uploading
+           debug data to S3.
+        '''
+        raise NotImplementedError('Agent control must be able to properly handle the end of \
+                                   an episode')
+
+    @abc.abstractmethod
+    def clear_data(self):
+      '''Clears the agent data'''
+      raise NotImplementedError('Agent control must be able to clear data')