# Introduction

This notebook outlines the steps involved in building and deploying a Battlesnake model using Ray RLlib and TensorFlow on Amazon SageMaker.

Library versions currently in use:  TensorFlow 2.1, Ray RLlib 0.8.2

The model is first trained using multi-agent PPO, and then deployed to a managed _TensorFlow Serving_ SageMaker endpoint that can be used for inference.

In [1]:
import sagemaker
from sagemaker.rl import RLEstimator, RLToolkit
import boto3

## Initialise sagemaker
We need to define several parameters prior to running the training job. 

In [2]:
sm_session = sagemaker.session.Session()
s3_bucket = sm_session.default_bucket()

s3_output_path = 's3://{}/'.format(s3_bucket)
print("S3 bucket path: {}".format(s3_output_path))

S3 bucket path: s3://sagemaker-us-west-2-216604823851/


In [3]:
job_name_prefix = 'Battlesnake-job-rllib'

role = sagemaker.get_execution_role()
print(role)

arn:aws:iam::216604823851:role/BattlesnakeEnvironment-NotebookInstanceExecutionRo-YMEAYYBNHRKI


In [7]:
# Change local_mode to True if you want to do local training within this Notebook instance
# Otherwise, we'll spin-up a SageMaker training instance to handle the training

local_mode = False

if local_mode:
    instance_type = 'local'
else:
    instance_type = "ml.m5.xlarge"
    
# If training locally, do some Docker housekeeping..
if local_mode:
    !/bin/bash ./common/setup.sh

# Train your model here

In [8]:
region = sm_session.boto_region_name
device = "cpu"
image_name = '462105765813.dkr.ecr.{region}.amazonaws.com/sagemaker-rl-ray-container:ray-0.8.2-tf-{device}-py36'.format(region=region, device=device)

In [9]:
%%time

# Define and execute our training job
# Adjust hyperparameters and train_instance_count accordingly

metric_definitions = RLEstimator.default_metric_definitions(RLToolkit.RAY)
    
estimator = RLEstimator(entry_point="train-mabs.py",
                        source_dir='rllib_src',
                        dependencies=["rllib_common/sagemaker_rl", "battlesnake_gym/"],
                        image_name=image_name,
                        role=role,
                        train_instance_type=instance_type,
                        train_instance_count=1,
                        output_path=s3_output_path,
                        base_job_name=job_name_prefix,
                        metric_definitions=metric_definitions,
                        hyperparameters={
                            # See train-mabs.py to add additional hyperparameters
                            # Also see ray_launcher.py for the rl.training.* hyperparameters
                            #
                            # number of training iterations
                            "num_iters": 30,
                            # number of snakes in the gym
                            "num_agents": 5,
                        }
                    )

estimator.fit()

job_name = estimator.latest_training_job.job_name
print("Training job: %s" % job_name)



2020-07-16 21:17:19 Starting - Starting the training job...
2020-07-16 21:17:21 Starting - Launching requested ML instances......
2020-07-16 21:18:26 Starting - Preparing the instances for training...
2020-07-16 21:19:13 Downloading - Downloading input data
2020-07-16 21:19:13 Training - Downloading the training image......
2020-07-16 21:20:19 Training - Training image download completed. Training in progress..[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2020-07-16 21:20:23,114 sagemaker-containers INFO     Imported framework sagemaker_tensorflow_container.training[0m
[34m2020-07-16 21:20:23,120 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-07-16 21:20:23,233 sagemaker-containers INFO     Installing module with the following command:[0m
[34m/usr/bin/python3 -m pip install . -r requirements.txt[0m
[34mProcessing /opt/ml/code[0m
[34mCollecting arr

[34m#033[2m#033[36m(pid=114)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=114)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=114)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=116)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=115)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=113)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=116)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=115)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=113)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=116)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=115)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=113)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=116)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=115)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=113)#033[0m   obj = yaml.load(type_)[0m
[34m#033[

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-23-17
  done: false
  episode_len_mean: 3.748082357690755
  episode_reward_max: 2.25
  episode_reward_mean: -1.5036536132418248
  episode_reward_min: -9.75
  episodes_this_iter: 2477
  episodes_total: 5034
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 14568.92
    learner:
      policy_0:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0005000000237487257
        entropy: 1.341519832611084
        entropy_coeff: 0.0
        kl: 0.017999494448304176
        policy_loss: -0.04186198487877846
        total_loss: 1.5443516969680786
        vf_explained_var: 0.21959862112998962
        vf_loss: 1.5826138257980347
      policy_1:
        cur_kl_coeff: 0.20000000298023224
        cur_lr: 0.0005000000237487257
        entropy: 1.3273491859436035
        entropy_coeff: 0.0
        

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-24-45
  done: false
  episode_len_mean: 4.8559411146161935
  episode_reward_max: 2.05
  episode_reward_mean: -1.3419821240799161
  episode_reward_min: -9.55
  episodes_this_iter: 1902
  episodes_total: 9159
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 12424.458
    learner:
      policy_0:
        cur_kl_coeff: 0.30000001192092896
        cur_lr: 0.0005000000237487257
        entropy: 1.2259438037872314
        entropy_coeff: 0.0
        kl: 0.02468319796025753
        policy_loss: -0.04852496087551117
        total_loss: 2.172515630722046
        vf_explained_var: 0.26035478711128235
        vf_loss: 2.2136354446411133
      policy_1:
        cur_kl_coeff: 0.44999998807907104
        cur_lr: 0.0005000000237487257
        entropy: 1.154374361038208
        entropy_coeff: 0.0
        

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-26-11
  done: false
  episode_len_mean: 6.382434301521439
  episode_reward_max: 3.1000000000000005
  episode_reward_mean: -0.9187067773167359
  episode_reward_min: -9.75
  episodes_this_iter: 1446
  episodes_total: 12291
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 11724.611
    learner:
      policy_0:
        cur_kl_coeff: 0.44999998807907104
        cur_lr: 0.0005000000237487257
        entropy: 1.073108196258545
        entropy_coeff: 0.0
        kl: 0.021152475848793983
        policy_loss: -0.04647110402584076
        total_loss: 3.4153318405151367
        vf_explained_var: 0.352887898683548
        vf_loss: 3.452284574508667
      policy_1:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 1.0479066371917725
        entropy_coeff: 0

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-27-36
  done: false
  episode_len_mean: 8.63932898415657
  episode_reward_max: 2.950000000000001
  episode_reward_mean: -0.19920782851817329
  episode_reward_min: -9.0
  episodes_this_iter: 1073
  episodes_total: 14634
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 11373.576
    learner:
      policy_0:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.967411994934082
        entropy_coeff: 0.0
        kl: 0.016751006245613098
        policy_loss: -0.03736807405948639
        total_loss: 4.6821699142456055
        vf_explained_var: 0.42836979031562805
        vf_loss: 4.708231449127197
      policy_1:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.9365869164466858
        entropy_coeff: 0.0

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-29-01
  done: false
  episode_len_mean: 13.29243937232525
  episode_reward_max: 5.549999999999986
  episode_reward_mean: 0.7788159771754636
  episode_reward_min: -8.25
  episodes_this_iter: 701
  episodes_total: 16229
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 11173.836
    learner:
      policy_0:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.9273703098297119
        entropy_coeff: 0.0
        kl: 0.010631438344717026
        policy_loss: -0.03340252861380577
        total_loss: 7.196388244628906
        vf_explained_var: 0.5234967470169067
        vf_loss: 7.22261381149292
      policy_1:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.8444918990135193
        entropy_coeff: 0.0
  

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-30-25
  done: false
  episode_len_mean: 14.099547511312217
  episode_reward_max: 5.149999999999995
  episode_reward_mean: 0.8270739064856709
  episode_reward_min: -7.9
  episodes_this_iter: 663
  episodes_total: 17486
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 10354.807
    learner:
      policy_0:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.8958932161331177
        entropy_coeff: 0.0
        kl: 0.011049339547753334
        policy_loss: -0.02916673570871353
        total_loss: 10.629154205322266
        vf_explained_var: 0.5167029500007629
        vf_loss: 10.650861740112305
      policy_1:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.8117117881774902
        entropy_coeff: 0.0

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-31-48
  done: false
  episode_len_mean: 26.213483146067414
  episode_reward_max: 13.400000000000041
  episode_reward_mean: 2.567977528089885
  episode_reward_min: -1.75
  episodes_this_iter: 356
  episodes_total: 18284
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 10378.734
    learner:
      policy_0:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.8424747586250305
        entropy_coeff: 0.0
        kl: 0.007956977933645248
        policy_loss: -0.02095990628004074
        total_loss: 14.818283081054688
        vf_explained_var: 0.695953905582428
        vf_loss: 14.833873748779297
      policy_1:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.7374107241630554
        entropy_coeff: 0.0

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-33-11
  done: false
  episode_len_mean: 43.43255813953488
  episode_reward_max: 16.15000000000008
  episode_reward_mean: 4.679767441860462
  episode_reward_min: -1.1000000000000085
  episodes_this_iter: 215
  episodes_total: 18769
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 10417.989
    learner:
      policy_0:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.8709126710891724
        entropy_coeff: 0.0
        kl: 0.009532378055155277
        policy_loss: -0.018561633303761482
        total_loss: 17.694494247436523
        vf_explained_var: 0.8017085790634155
        vf_loss: 17.706621170043945
      policy_1:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.7223637104034424
        entr

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-34-32
  done: false
  episode_len_mean: 58.78616352201258
  episode_reward_max: 19.60000000000013
  episode_reward_mean: 7.133962264150946
  episode_reward_min: -0.4500000000000002
  episodes_this_iter: 159
  episodes_total: 19097
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 10422.51
    learner:
      policy_0:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.8186542987823486
        entropy_coeff: 0.0
        kl: 0.007952268235385418
        policy_loss: -0.013599900528788567
        total_loss: 32.67927169799805
        vf_explained_var: 0.8337600231170654
        vf_loss: 32.687503814697266
      policy_1:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.6299331784248352
        entrop

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-35-53
  done: false
  episode_len_mean: 72.36434108527132
  episode_reward_max: 25.400000000000226
  episode_reward_mean: 8.8356589147287
  episode_reward_min: 0.7000000000000002
  episodes_this_iter: 129
  episodes_total: 19355
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 10420.261
    learner:
      policy_0:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.8303779363632202
        entropy_coeff: 0.0
        kl: 0.006389666348695755
        policy_loss: -0.01331762783229351
        total_loss: 35.85108947753906
        vf_explained_var: 0.8801181316375732
        vf_loss: 35.86009216308594
      policy_1:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.599680483341217
        entropy_co

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-37-17
  done: false
  episode_len_mean: 51.36756756756757
  episode_reward_max: 24.050000000000193
  episode_reward_mean: 5.398918918918919
  episode_reward_min: -2.6000000000000005
  episodes_this_iter: 185
  episodes_total: 19659
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 10483.899
    learner:
      policy_0:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.5263407230377197
        entropy_coeff: 0.0
        kl: 0.005408211145550013
        policy_loss: -0.013989780098199844
        total_loss: 55.546669006347656
        vf_explained_var: 0.8285323977470398
        vf_loss: 55.557010650634766
      policy_1:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.37036287784576416
        en

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-38-39
  done: false
  episode_len_mean: 75.25984251968504
  episode_reward_max: 25.25000000000021
  episode_reward_mean: 8.418897637795286
  episode_reward_min: -0.20000000000000018
  episodes_this_iter: 127
  episodes_total: 19924
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 10536.049
    learner:
      policy_0:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.5352717041969299
        entropy_coeff: 0.0
        kl: 0.006840194575488567
        policy_loss: -0.013530354015529156
        total_loss: 51.761959075927734
        vf_explained_var: 0.8942326903343201
        vf_loss: 51.770870208740234
      policy_1:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.4840400815010071
        ent

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-40-01
  done: false
  episode_len_mean: 88.68571428571428
  episode_reward_max: 26.500000000000227
  episode_reward_mean: 10.693333333333362
  episode_reward_min: 0.4500000000000002
  episodes_this_iter: 105
  episodes_total: 20148
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 10540.003
    learner:
      policy_0:
        cur_kl_coeff: 0.3375000059604645
        cur_lr: 0.0005000000237487257
        entropy: 0.64852374792099
        entropy_coeff: 0.0
        kl: 0.008418659679591656
        policy_loss: -0.01378058735281229
        total_loss: 43.863800048828125
        vf_explained_var: 0.9030368328094482
        vf_loss: 43.87474060058594
      policy_1:
        cur_kl_coeff: 0.675000011920929
        cur_lr: 0.0005000000237487257
        entropy: 0.4114105701446533
        entrop

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-41-23
  done: false
  episode_len_mean: 95.68
  episode_reward_max: 25.70000000000023
  episode_reward_mean: 11.500000000000039
  episode_reward_min: 0.04999999999999982
  episodes_this_iter: 98
  episodes_total: 20351
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 10563.886
    learner:
      policy_0:
        cur_kl_coeff: 0.3375000059604645
        cur_lr: 0.0005000000237487257
        entropy: 0.6502629518508911
        entropy_coeff: 0.0
        kl: 0.008623506873846054
        policy_loss: -0.010077928192913532
        total_loss: 68.04769897460938
        vf_explained_var: 0.8951800465583801
        vf_loss: 68.05487823486328
      policy_1:
        cur_kl_coeff: 0.3375000059604645
        cur_lr: 0.0005000000237487257
        entropy: 0.4346596598625183
        entropy_coeff: 0

[34mResult for PPO_MultiAgentBattlesnake-v1_2c7616ac:
  custom_metrics: {}
  date: 2020-07-16_21-42-45
  done: true
  episode_len_mean: 95.51
  episode_reward_max: 28.650000000000258
  episode_reward_mean: 11.933500000000038
  episode_reward_min: -0.7999999999999998
  episodes_this_iter: 98
  episodes_total: 20550
  experiment_id: 04b4be94a18f47938b20d396475d97f6
  experiment_tag: '0'
  hostname: ip-10-0-157-251.us-west-2.compute.internal
  info:
    grad_time_ms: 10601.319
    learner:
      policy_0:
        cur_kl_coeff: 0.3375000059604645
        cur_lr: 0.0005000000237487257
        entropy: 0.6823322772979736
        entropy_coeff: 0.0
        kl: 0.007734912913292646
        policy_loss: -0.016199372708797455
        total_loss: 59.76768112182617
        vf_explained_var: 0.910664975643158
        vf_loss: 59.78126907348633
      policy_1:
        cur_kl_coeff: 0.3375000059604645
        cur_lr: 0.0005000000237487257
        entropy: 0.46004927158355713
        entropy_coeff: 0

[34m#033[2m#033[36m(pid=7247)#033[0m   obj = yaml.load(type_)[0m
[34m#033[2m#033[36m(pid=7247)#033[0m   obj = yaml.load(type_)[0m
[34m2020-07-16 21:43:20,985#011INFO trainable.py:178 -- _setup took 29.871 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.[0m
[34m2020-07-16 21:43:22,782#011INFO trainable.py:416 -- Restored on 10.0.157.251 from checkpoint: /opt/ml/model/checkpoint[0m
[34m2020-07-16 21:43:22,782#011INFO trainable.py:423 -- Current state after restoring: {'_iteration': 30, '_timesteps_total': 279987, '_time_total': 1291.9189867973328, '_episodes_total': 20550}[0m

2020-07-16 21:43:40 Uploading - Uploading generated training model
2020-07-16 21:43:40 Completed - Training job completed
[34mSaved TensorFlow serving model!
[0m
[34m2020-07-16 21:43:27,910 sagemaker-containers INFO     Reporting training SUCCESS[0m
Training seconds: 1473
Billable seconds: 1473
Training job: Battlesnake-job-rllib-

In [11]:
# Where is the model stored in S3?
estimator.model_data

's3://sagemaker-us-west-2-216604823851/Battlesnake-job-rllib-2020-07-16-21-17-19-254/output/model.tar.gz'

# Create an endpoint to host the policy
Firstly, we will delete the previous endpoint and model

In [None]:
sm_client = boto3.client(service_name='sagemaker')
sm_client.delete_endpoint(EndpointName='battlesnake-endpoint')
sm_client.delete_endpoint_config(EndpointConfigName='battlesnake-endpoint')
sm_client.delete_model(ModelName="battlesnake-rllib")

In [None]:
# Copy the endpoint to a central location
model_data = "s3://{}/battlesnake-aws/pretrainedmodels/model.tar.gz".format(s3_bucket)
!aws s3 cp {estimator.model_data} {model_data}

from sagemaker.tensorflow.serving import Model

model = Model(model_data=model_data,
              role=role,
              entry_point="inference.py",
              source_dir='rllib_inference/src',
              framework_version='2.1.0',
              name="battlesnake-rllib",
             )

if local_mode:
    inf_instance_type = 'local'
else:
    inf_instance_type = "ml.t2.medium"

# Deploy an inference endpoint
predictor = model.deploy(initial_instance_count=1, instance_type=inf_instance_type,#instance_type="local", #
                         endpoint_name='battlesnake-endpoint')

# Test the endpoint

This example is using single observation for a 5-agent environment 
The last axis is 12 because the current MultiAgentEnv is concatenating 2 frames
5 agent maps + 1 food map = 6 maps total    6 maps * 2 frames = 12

In [None]:
import numpy as np
from time import time

data1 = np.zeros(shape=(1, 21, 21, 6), dtype=np.float32).tolist()

health_dict = {0: 50, 1: 50}
json = {"turn": 4,
        "board": {
                "height": 15,
                "width": 15,
                "food": [],
                "snakes": []
                },
            "you": {
                "id": "snake-id-string",
                "name": "Sneky Snek",
                "health": 90,
                "body": [{"x": 1, "y": 3}]
                }
            }

before = time()
action = predictor.predict({"state": data1, "prev_action": -1, 
                           "prev_reward": -1, "seq_lens": -1,  
                           "all_health": health_dict, "json": json})
elapsed = time() - before

action_to_take = action["outputs"]["heuristisc_action"]
print("Action to take {}".format(action_to_take))
print("Inference took %.2f ms" % (elapsed*1000))