Skip to content

Commit

Permalink
Supporting multirollout for the simapp released as of reInvent 2019 (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
sunil19m authored and saurabh3949 committed Jan 17, 2020
1 parent 9e38c5c commit 1cef53b
Show file tree
Hide file tree
Showing 79 changed files with 8,565 additions and 1,557 deletions.
Expand Up @@ -5,6 +5,8 @@ ARG CPU_OR_GPU
ARG AWS_REGION
FROM 520713654638.dkr.ecr.$AWS_REGION.amazonaws.com/sagemaker-tensorflow-scriptmode:1.12.0-$CPU_OR_GPU-py3

COPY ./src/markov /opt/amazon/markov

RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
jq \
Expand Down Expand Up @@ -38,29 +40,32 @@ RUN pip install \
PyOpenGL==3.1.0 \
scipy==1.3.0 \
scikit-image==0.15.0 \
gym==0.10.5 \
retrying \
eventlet \
boto3 \
minio==4.0.5 \
futures==3.1.1 \
redis==3.3.8
boto3==1.9.23 \
minio==4.0.5 \
kubernetes==7.0.0 \
opencv-python==4.1.1.26 \
rl-coach-slim==1.0.0 \
retrying \
eventlet

#Install rl coach
RUN pip install rl-coach-slim==0.11.1
RUN pip install mxnet-mkl>=1.3.0

RUN pip install --no-cache-dir --upgrade sagemaker-containers

# Patch Intel coach
COPY ./src/rl_coach.patch /opt/amazon/rl_coach.patch
RUN patch -p1 -N --directory=/usr/local/lib/python3.6/dist-packages/ < /opt/amazon/rl_coach.patch


# Patch intel coach so that discrete distributions can not produce nan's
COPY src/lib/ppo_head.py /usr/local/lib/python3.6/dist-packages/rl_coach/architectures/tensorflow_components/heads/ppo_head.py

# Copy in all the code and make it available on the path
COPY src/lib/redis.conf /etc/redis/redis.conf
COPY ./src/lib/redis.conf /etc/redis/redis.conf
ENV PYTHONPATH /opt/amazon/:$PYTHONPATH
ENV PATH /opt/ml/code/:$PATH
WORKDIR /opt/ml/code

# Tell sagemaker-containers where the launch point is for training job.
ENV NODE_TYPE SAGEMAKER_TRAINING_WORKER

ENV PYTHONUNBUFFERED 1
ENV PYTHONUNBUFFERED 1
Expand Up @@ -60,7 +60,7 @@
"# !python3 sim_app_bundler.py --clean\n",
"\n",
"# # Download Robomaker simApp from the deepracer public s3 bucket\n",
"# simulation_application_bundle_location = \"s3://deepracer-managed-resources-us-east-1/deepracer-simapp-notebook.tar.gz\"\n",
"# simulation_application_bundle_location = \"s3://deepracer-managed-resources-us-east-1/deepracer-simapp.tar.gz\"\n",
"# !aws s3 cp {simulation_application_bundle_location} ./\n",
"\n",
"# # Untar the simapp bundle\n",
Expand All @@ -73,7 +73,7 @@
"# # bundle/opt/install/deepracer_simulation_environment/share/deepracer_simulation_environment/\n",
"# # bundle/opt/install/deepracer_simulation_environment/lib/deepracer_simulation_environment/\n",
"\n",
"# # Copying the notebook src/markov changes to the simapp (For sagemaker container)\n",
"# # # Copying the notebook src/markov changes to the simapp (For sagemaker container)\n",
"# !rsync -av ./src/markov/ ./build/simapp/bundle/opt/install/sagemaker_rl_agent/lib/python3.5/site-packages/markov\n",
"\n",
"# !python3 sim_app_bundler.py --tar"
Expand Down Expand Up @@ -449,14 +449,11 @@
"source": [
"# Uncomment the pygmentize code lines to see the code\n",
"\n",
"# Environmental File\n",
"#!pygmentize src/markov/environments/deepracer_racetrack_env.py\n",
"\n",
"# Reward function\n",
"#!pygmentize src/markov/rewards/default.py\n",
"\n",
"# Action space\n",
"#!pygmentize src/markov/actions/model_metadata_10_state.json\n",
"#!pygmentize src/markov/actions/single_speed_stereo_shallow.json\n",
"\n",
"# Preset File\n",
"#!pygmentize src/markov/presets/default.py\n",
Expand All @@ -482,14 +479,11 @@
"# Clean up the previously uploaded files\n",
"!aws s3 rm --recursive {s3_location}\n",
"\n",
"# Make any changes to the environment and preset files below and upload these files\n",
"!aws s3 cp src/markov/environments/deepracer_racetrack_env.py {s3_location}/environments/deepracer_racetrack_env.py\n",
"\n",
"!aws s3 cp src/markov/rewards/default.py {s3_location}/rewards/reward_function.py\n",
"!aws s3 cp src/markov/rewards/default.py {s3_location}/customer_reward_function.py\n",
"\n",
"!aws s3 cp src/markov/actions/model_metadata_10_state.json {s3_location}/model_metadata.json\n",
"!aws s3 cp src/markov/actions/default.json {s3_location}/model/model_metadata.json\n",
"\n",
"!aws s3 cp src/markov/presets/default.py {s3_location}/presets/preset.py\n",
"#!aws s3 cp src/markov/presets/default.py {s3_location}/presets/preset.py\n",
"#!aws s3 cp src/markov/presets/preset_attention_layer.py {s3_location}/presets/preset.py"
]
},
Expand Down Expand Up @@ -560,9 +554,19 @@
" \"s3_bucket\": s3_bucket,\n",
" \"s3_prefix\": s3_prefix,\n",
" \"aws_region\": aws_region,\n",
" \"preset_s3_key\": \"%s/presets/preset.py\"% s3_prefix,\n",
" \"model_metadata_s3_key\": \"%s/model_metadata.json\" % s3_prefix,\n",
" \"environment_s3_key\": \"%s/environments/deepracer_racetrack_env.py\" % s3_prefix,\n",
" \"model_metadata_s3_key\": \"%s/model/model_metadata.json\" % s3_prefix,\n",
" \"reward_function_s3_source\": \"%s/customer_reward_function.py\" % s3_prefix,\n",
" \"batch_size\": \"64\",\n",
" \"num_epochs\": \"10\",\n",
" \"stack_size\": \"1\",\n",
" \"lr\": \"0.0003\",\n",
" \"exploration_type\": \"Categorical\",\n",
" \"e_greedy_value\": \"1\",\n",
" \"epsilon_steps\": \"10000\",\n",
" \"beta_entropy\": \"0.01\",\n",
" \"discount_factor\": \"0.999\",\n",
" \"loss_type\": \"Huber\",\n",
" \"num_episodes_between_training\": \"20\"\n",
" },\n",
" subnets=deepracer_subnets,\n",
" security_group_ids=deepracer_security_groups,\n",
Expand All @@ -573,6 +577,15 @@
"print(\"Training job: %s\" % job_name)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"training_job_arn = estimator.latest_training_job.describe()['TrainingJobArn']"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -699,6 +712,61 @@
"We create [AWS RoboMaker](https://console.aws.amazon.com/robomaker/home#welcome) Simulation Jobs that simulates the environment and shares this data with SageMaker for training. "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"s3_yaml_name=\"training_params.yaml\"\n",
"world_name = \"reInvent2019_track\"\n",
"\n",
"!touch {s3_yaml_name}\n",
"!echo \"WORLD_NAME: \\\"{world_name}\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"SAGEMAKER_SHARED_S3_BUCKET: \\\"{s3_bucket}\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"SAGEMAKER_SHARED_S3_PREFIX: \\\"{s3_prefix}\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"TRAINING_JOB_ARN: \\\"{training_job_arn}\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"METRICS_S3_BUCKET: \\\"{s3_bucket}\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"METRICS_S3_OBJECT_KEY: \\\"{s3_prefix}/training_metrics.json\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"AWS_REGION: \\\"{aws_region}\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"TARGET_REWARD_SCORE: \\\"None\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"NUMBER_OF_EPISODES: \\\"0\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"ROBOMAKER_SIMULATION_JOB_ACCOUNT_ID: \\\"{account_id}\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"JOB_TYPE: \\\"TRAINING\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"CHANGE_START_POSITION: \\\"true\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"ALTERNATE_DRIVING_DIRECTION: \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"KINESIS_VIDEO_STREAM_NAME: \\\"{kvs_stream_name}\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"REWARD_FILE_S3_KEY: \\\"{s3_prefix}/customer_reward_function.py\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"MODEL_METADATA_FILE_S3_KEY: \\\"{s3_prefix}/model/model_metadata.json\\\"\" | tee -a {s3_yaml_name}\n",
"\n",
"!echo \"NUMBER_OF_OBSTACLES: \\\"0\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"MIN_DISTANCE_BETWEEN_OBSTACLES: \\\"2.0\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"RANDOMIZE_OBSTACLE_LOCATIONS: \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"PSEUDO_RANDOMIZE_OBSTACLE_LOCATIONS: \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"NUMBER_OF_PSEUDO_RANDOM_PLACEMENTS: \\\"2\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"IS_OBSTACLE_BOT_CAR: \\\"false\\\"\" | tee -a {s3_yaml_name} \n",
"\n",
"!echo \"IS_LANE_CHANGE: \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"LOWER_LANE_CHANGE_TIME: \\\"3.0\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"UPPER_LANE_CHANGE_TIME: \\\"5.0\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"LANE_CHANGE_DISTANCE: \\\"1.0\\\"\" | tee -a {s3_yaml_name}\n",
"\n",
"!echo \"NUMBER_OF_BOT_CARS: \\\"6\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"MIN_DISTANCE_BETWEEN_BOT_CARS: \\\"2.0\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"RANDOMIZE_BOT_CAR_LOCATIONS: \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"BOT_CAR_SPEED: \\\"0.2\\\"\" | tee -a {s3_yaml_name}\n",
"\n",
"!echo \"CAR_COLOR: \\\"LightBlue\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"FIRST_PERSON_VIEW: \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"CHANGE_START_POSITION: \\\"true\\\"\" | tee -a {s3_yaml_name}\n",
"\n",
"print(\"Upload yaml settings to S3\")\n",
"!aws s3 cp ./training_params.yaml {s3_location}/training_params.yaml\n",
"!rm training_params.yaml"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -708,21 +776,13 @@
"num_simulation_workers = 1\n",
"\n",
"envriron_vars = {\n",
" \"WORLD_NAME\": \"reinvent_base\",\n",
" \"KINESIS_VIDEO_STREAM_NAME\": kvs_stream_name,\n",
" \"SAGEMAKER_SHARED_S3_BUCKET\": s3_bucket,\n",
" \"S3_YAML_NAME\": s3_yaml_name,\n",
" \"SAGEMAKER_SHARED_S3_PREFIX\": s3_prefix,\n",
" \"TRAINING_JOB_ARN\": job_name,\n",
" \"SAGEMAKER_SHARED_S3_BUCKET\": s3_bucket,\n",
" \"WORLD_NAME\": world_name,\n",
" \"KINESIS_VIDEO_STREAM_NAME\": kvs_stream_name,\n",
" \"APP_REGION\": aws_region,\n",
" \"METRIC_NAME\": \"TrainingRewardScore\",\n",
" \"METRIC_NAMESPACE\": \"AWSDeepRacer\",\n",
" \"REWARD_FILE_S3_KEY\": \"%s/rewards/reward_function.py\" % s3_prefix,\n",
" \"MODEL_METADATA_FILE_S3_KEY\": \"%s/model_metadata.json\" % s3_prefix,\n",
" \"METRICS_S3_BUCKET\": s3_bucket,\n",
" \"METRICS_S3_OBJECT_KEY\": s3_bucket + \"/training_metrics.json\",\n",
" \"TARGET_REWARD_SCORE\": \"None\",\n",
" \"NUMBER_OF_EPISODES\": \"0\",\n",
" \"ROBOMAKER_SIMULATION_JOB_ACCOUNT_ID\": account_id\n",
" \"MODEL_METADATA_FILE_S3_KEY\": \"%s/model/model_metadata.json\" % s3_prefix\n",
"}\n",
"\n",
"simulation_application = {\"application\":simulation_app_arn,\n",
Expand Down Expand Up @@ -853,6 +913,52 @@
"### Evaluation - ReInvent Track"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"s3_yaml_name=\"evaluation_params.yaml\"\n",
"world_name = \"reInvent2019_track\"\n",
"\n",
"!touch {s3_yaml_name}\n",
"echo \"WORLD_NAME: \\\"{world_name}\\\"\" | tee {s3_yaml_name}\n",
"echo \"MODEL_S3_BUCKET: \\\"{s3_bucket}\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"MODEL_S3_PREFIX: \\\"{s3_prefix}\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"AWS_REGION: \\\"{aws_region}\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"METRICS_S3_BUCKET: \\\"{s3_bucket}\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"METRICS_S3_OBJECT_KEY: \\\"{s3_prefix}/evaluation_metrics.json\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"NUMBER_OF_TRIALS: \\\"10\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"ROBOMAKER_SIMULATION_JOB_ACCOUNT_ID: \\\"{account_id}\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"JOB_TYPE: \\\"EVALUATION\\\"\" | tee -a {s3_yaml_name}\n",
"\n",
"echo \"NUMBER_OF_OBSTACLES: \\\"0\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"MIN_DISTANCE_BETWEEN_OBSTACLES: \\\"2.0\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"RANDOMIZE_OBSTACLE_LOCATIONS: \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"PSEUDO_RANDOMIZE_OBSTACLE_LOCATIONS: \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"NUMBER_OF_PSEUDO_RANDOM_PLACEMENTS: \\\"2\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"IS_OBSTACLE_BOT_CAR: \\\"false\\\"\" | tee -a {s3_yaml_name} \n",
"\n",
"echo \"IS_LANE_CHANGE: \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"LOWER_LANE_CHANGE_TIME: \\\"3.0\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"UPPER_LANE_CHANGE_TIME: \\\"5.0\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"LANE_CHANGE_DISTANCE: \\\"1.0\\\"\" | tee -a {s3_yaml_name}\n",
"\n",
"echo \"NUMBER_OF_BOT_CARS: \\\"6\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"MIN_DISTANCE_BETWEEN_BOT_CARS: \\\"2.0\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"RANDOMIZE_BOT_CAR_LOCATIONS: \\\"true\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"BOT_CAR_SPEED: \\\"0.2\\\"\" | tee -a {s3_yaml_name}\n",
"echo \"CAR_COLOR: \\\"LightBlue\\\"\" | tee -a {s3_yaml_name}\n",
"\n",
"!echo \"CAR_COLOR: \\\"LightBlue\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"FIRST_PERSON_VIEW: \\\"false\\\"\" | tee -a {s3_yaml_name}\n",
"!echo \"CHANGE_START_POSITION: \\\"true\\\"\" | tee -a {s3_yaml_name}\n",
"\n",
"print(\"Upload yaml settings to S3\")\n",
"!aws s3 cp ./evaluation_params.yaml {s3_location}/evaluation_params.json"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -864,16 +970,13 @@
"num_simulation_workers = 1\n",
"\n",
"envriron_vars = {\n",
" \"WORLD_NAME\": \"reinvent_base\",\n",
" \"KINESIS_VIDEO_STREAM_NAME\": \"SilverstoneStream\",\n",
" \"MODEL_S3_BUCKET\": s3_bucket,\n",
" \"S3_YAML_NAME\": s3_yaml_name,\n",
" \"MODEL_S3_PREFIX\": s3_prefix,\n",
" \"MODEL_S3_BUCKET\": s3_bucket,\n",
" \"WORLD_NAME\": world_name,\n",
" \"KINESIS_VIDEO_STREAM_NAME\": kvs_stream_name,\n",
" \"APP_REGION\": aws_region,\n",
" \"MODEL_METADATA_FILE_S3_KEY\": \"%s/model_metadata.json\" % s3_prefix,\n",
" \"METRICS_S3_BUCKET\": s3_bucket,\n",
" \"METRICS_S3_OBJECT_KEY\": s3_bucket + \"/evaluation_metrics.json\",\n",
" \"NUMBER_OF_TRIALS\": \"5\",\n",
" \"ROBOMAKER_SIMULATION_JOB_ACCOUNT_ID\": account_id\n",
" \"MODEL_METADATA_FILE_S3_KEY\": \"%s/model/model_metadata.json\" % s3_prefix\n",
"}\n",
"\n",
"simulation_application = {\n",
Expand Down Expand Up @@ -1002,5 +1105,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
@@ -0,0 +1,26 @@
{
"action_space": [
{
"steering_angle": -30,
"speed": 0.6
},
{
"steering_angle": -15,
"speed": 0.6
},
{
"steering_angle": 0,
"speed": 0.6
},
{
"steering_angle": 15,
"speed": 0.6
},
{
"steering_angle": 30,
"speed": 0.6
}
],
"sensor": ["FRONT_FACING_CAMERA"],
"neural_network": "DEEP_CONVOLUTIONAL_NETWORK_SHALLOW"
}
@@ -0,0 +1,46 @@
'''This class this defines an interface for how agents need to interact with the
environment, all concrete classes should abstract away the communication of the
agent with gazebo
'''
import abc

class AgentCtrlInterface(object, metaclass=abc.ABCMeta):
@property
@abc.abstractmethod
def action_space(self):
'''Returns a read onlu version of the action space so that is can be passed to coach'''
raise NotImplementedError('Agent control must be able retrieve action space')

@abc.abstractmethod
def reset_agent(self):
'''Reset the agent to a desired start postion
'''
raise NotImplementedError('Agent control must be able to reset agent')

@abc.abstractmethod
def send_action(self, action):
'''Send the desired action to the agent
action - Integer with the desired action to take
'''
raise NotImplementedError('Agent control must be able to send action')

@abc.abstractmethod
def judge_action(self, action):
'''Returns the reward and done flag after and agent takes the action prescribed by a given
policy
action - Integer with the desired action to take
'''
raise NotImplementedError('Agent control must be able to judge action')

@abc.abstractmethod
def finish_episode(self):
'''Runs all behavior required at the end of the episode, such as uploading
debug data to S3.
'''
raise NotImplementedError('Agent control must be able to properly handle the end of \
an episode')

@abc.abstractmethod
def clear_data(self):
'''Clears the agent data'''
raise NotImplementedError('Agent control must be able to clear data')

0 comments on commit 1cef53b

Please sign in to comment.