Initialisations to change the base directory of the entry scripts and to update mxnet to the newest mxnet-mkl

In [1]:
import boto3
import sagemaker
import numpy as np

from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner
from sagemaker.mxnet.estimator import MXNet

Set the parameters of the notebook.
`run_hpo = False` will only train the network once within this notebook. 
If `run_hpo = True` will run the hyperparameter optimization (run the training multiple times with different parameters to find the best parameters) (see Hyperparameter tuning jobs in SageMaker)

In [2]:
run_hpo = False
map_size = (11, 11)

## Initialise sagemaker
We need to define several parameters prior to running the training job. 

In [31]:
sage_session = sagemaker.session.Session()
s3_bucket = sage_session.default_bucket()
s3_output_path = 's3://{}/'.format(s3_bucket)
print("S3 bucket path: {}".format(s3_output_path))

# Run on local mode if no HPO is required
if run_hpo == False:
    local_mode = True
else:
    local_mode = False
    
if local_mode:
    train_instance_type = 'local'
else:
    train_instance_type = "SAGEMAKER_TRAINING_INSTANCE_TYPE"
endpoint_instance_type = "SAGEMAKER_INFERENCE_INSTANCE_TYPE"
    
role = sagemaker.get_execution_role()
print("Using IAM role arn: {}".format(role))

S3 bucket path: s3://sagemaker-us-west-2-681627153266/
Using IAM role arn: arn:aws:iam::681627153266:role/sagemaker-soln-bs-us-west-2-nb-role


## Define the attributes of the training job
Use `job_name_prefix` to identify the sagemaker training job for this.

In [4]:
job_name_prefix = 'Battlesnake-job-mxnet'

## Define the metrics to evaluate your training job
The regex for this metric was defined based on what is printed in the training script `examples/train.py`

In [5]:
metric_definitions = [
    {'Name': 'timesteps', 'Regex': '.*Mean timesteps ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
]

## Define the hyperparameters of your job

In [6]:
map_size_string = "[{}, {}]".format(map_size[0], map_size[1])
static_hyperparameters = {
    'qnetwork_type': "attention",
    'seed': 111,
    'number_of_snakes': 4,
    'episodes': 3500,
    'print_score_steps': 10,
    'activation_type': "softrelu",
    'state_type': 'one_versus_all',
    'sequence_length': 2,
    'repeat_size': 3,
    'kernel_size': 3,
    'starting_channels': 6,
    'map_size': map_size_string,
    'snake_representation': 'bordered-51s',
    'save_model_every': 700,
    'eps_start': 0.99,
    'models_to_save': 'local'
}

# Train your model here
Defines the estimator.
If `run_hpo == False`, this training job will run. Please note that this will take a couple of hours.

In [7]:
estimator = MXNet(entry_point="train.py",
                  source_dir='training/training_src',
                  dependencies=["../BattlesnakeGym/"],
                  role=role,
                  train_instance_type=train_instance_type,
                  train_instance_count=1,
                  output_path=s3_output_path,
                  framework_version="1.6.0",
                  py_version='py3',
                  base_job_name=job_name_prefix,
                  metric_definitions=metric_definitions,
                  hyperparameters=static_hyperparameters
                 )
if local_mode:
    estimator.fit()

train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


Creating 6wn8d4f6c5-algo-1-r8807 ... 
Creating 6wn8d4f6c5-algo-1-r8807 ... done
Attaching to 6wn8d4f6c5-algo-1-r8807
[36m6wn8d4f6c5-algo-1-r8807 |[0m 2021-08-07 18:48:49,930 sagemaker-training-toolkit INFO     Imported framework sagemaker_mxnet_container.training
[36m6wn8d4f6c5-algo-1-r8807 |[0m 2021-08-07 18:48:49,933 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)
[36m6wn8d4f6c5-algo-1-r8807 |[0m 2021-08-07 18:48:49,949 sagemaker_mxnet_container.training INFO     MXNet training environment: {'SM_HOSTS': '["algo-1-r8807"]', 'SM_NETWORK_INTERFACE_NAME': 'eth0', 'SM_HPS': '{"activation_type":"softrelu","episodes":3500,"eps_start":0.99,"kernel_size":3,"map_size":"[11, 11]","models_to_save":"local","number_of_snakes":4,"print_score_steps":10,"qnetwork_type":"attention","repeat_size":3,"save_model_every":700,"seed":111,"sequence_length":2,"snake_representation":"bordered-51s","starting_channels":6,"state_type":"one_versus_all"}', 'SM_USER_ENTRY_POIN

[36m6wn8d4f6c5-algo-1-r8807 |[0m Running with seed = 111
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 10	Average Score: 	2.00	1.40	1.90	1.10	Mean timesteps 3.00
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 20	Average Score: 	1.40	1.50	1.75	1.45	Mean timesteps 2.90
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 30	Average Score: 	1.57	1.77	1.70	1.43	Mean timesteps 2.97
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 40	Average Score: 	1.77	1.93	1.82	1.55	Mean timesteps 3.23
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 50	Average Score: 	1.76	1.88	1.80	1.68	Mean timesteps 3.22
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 60	Average Score: 	1.62	1.93	1.80	1.68	Mean timesteps 3.17
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 70	Average Score: 	1.74	1.89	1.93	1.80	Mean timesteps 3.36
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 80	Average Score: 	1.66	1.88	1.93	1.75	Mean timesteps 3.31
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 90	Average Score: 	1.68	1.93	1.93	1.80	Mean timesteps 3.34
[36m6wn8d4f6c5-algo-1-

[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 810	Average Score: 	6.60	6.07	5.87	6.27	Mean timesteps 9.90
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 820	Average Score: 	6.81	6.19	6.08	6.76	Mean timesteps 10.19
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 830	Average Score: 	7.00	6.20	6.20	6.63	Mean timesteps 10.21
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 840	Average Score: 	7.21	6.35	6.29	6.81	Mean timesteps 10.43
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 850	Average Score: 	7.47	6.31	6.47	6.96	Mean timesteps 10.77
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 860	Average Score: 	7.58	6.57	6.52	7.19	Mean timesteps 10.90
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 870	Average Score: 	7.58	6.52	6.58	7.05	Mean timesteps 10.72
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 880	Average Score: 	7.84	6.68	6.66	6.92	Mean timesteps 10.85
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 890	Average Score: 	7.51	6.71	6.69	6.82	Mean timesteps 10.62
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 900	Average Score: 	7.

[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 1580	Average Score: 	12.63	14.39	13.34	13.25	Mean timesteps 21.51
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 1590	Average Score: 	13.91	14.18	13.96	13.95	Mean timesteps 22.29
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 1600	Average Score: 	14.07	15.57	13.55	14.96	Mean timesteps 23.39
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 1610	Average Score: 	13.53	14.49	12.65	14.30	Mean timesteps 22.37
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 1620	Average Score: 	14.51	14.09	13.32	14.89	Mean timesteps 22.93
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 1630	Average Score: 	14.68	13.39	13.48	15.05	Mean timesteps 22.53
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 1640	Average Score: 	15.40	12.74	14.14	14.90	Mean timesteps 22.57
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 1650	Average Score: 	15.59	13.07	14.27	14.54	Mean timesteps 22.79
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 1660	Average Score: 	15.99	13.37	14.61	14.57	Mean timesteps 23.07
[36m6wn8d4f6c5-alg

[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 2340	Average Score: 	23.52	23.47	23.18	23.60	Mean timesteps 37.33
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 2350	Average Score: 	26.19	23.68	22.51	22.79	Mean timesteps 38.30
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 2360	Average Score: 	28.32	24.37	22.07	24.20	Mean timesteps 39.58
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 2370	Average Score: 	28.85	24.01	23.58	25.33	Mean timesteps 40.77
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 2380	Average Score: 	31.08	26.25	24.92	26.27	Mean timesteps 43.53
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 2390	Average Score: 	30.84	29.10	26.54	29.56	Mean timesteps 46.56
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 2400	Average Score: 	32.69	32.90	27.33	32.86	Mean timesteps 50.55
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 2410	Average Score: 	31.85	33.92	29.11	35.03	Mean timesteps 51.60
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 2420	Average Score: 	33.66	34.90	29.03	35.09	Mean timesteps 53.14
[36m6wn8d4f6c5-alg

[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 3100	Average Score: 	65.10	65.44	69.18	65.81	Mean timesteps 101.49
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 3110	Average Score: 	68.31	66.68	71.64	67.69	Mean timesteps 104.50
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 3120	Average Score: 	69.53	61.38	65.69	68.01	Mean timesteps 101.20
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 3130	Average Score: 	70.51	63.21	64.46	66.40	Mean timesteps 101.90
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 3140	Average Score: 	71.50	63.68	62.84	65.27	Mean timesteps 100.71
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 3150	Average Score: 	72.24	71.49	59.29	66.96	Mean timesteps 103.22
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 3160	Average Score: 	71.50	69.50	60.82	67.03	Mean timesteps 103.68
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 3170	Average Score: 	70.12	67.40	64.58	68.17	Mean timesteps 103.71
[36m6wn8d4f6c5-algo-1-r8807 |[0m Episode 3180	Average Score: 	72.53	65.46	63.09	64.18	Mean timesteps 102.26
[36m6wn8d

# Running hyperparameter optimisation
Start the Hyperparameter optimisation sagemaker jobs!
HPO will run if `run_hpo == True`. You can view the training progress in SageMaker > Training > Hyperparameter tuning jobs. Please note that this runs a couple of instances and could be costly.

In [13]:
hyperparameter_ranges = {
#     'buffer_size': IntegerParameter(1000, 6000),
#     'update_every': IntegerParameter(10, 20),
    'batch_size': IntegerParameter(128,128),

    'lr_start': ContinuousParameter(1e-5, 1e-3),
    'lr_factor': ContinuousParameter(0.5, 1.0),
    'lr_step': IntegerParameter(5000, 30000),
    
    'tau': ContinuousParameter(1e-4, 1e-3),
    'gamma': ContinuousParameter(0.90, 0.99),
    
    'depth': IntegerParameter(64, 256),
    'depthS': IntegerParameter(64, 256),
}
run_hpo=False

In [14]:
max_jobs = 3
max_parallel_jobs = 3

tuner = HyperparameterTuner(estimator,
                            objective_metric_name='timesteps',
                            objective_type='Maximize',
                            hyperparameter_ranges=hyperparameter_ranges,
                            metric_definitions=metric_definitions,
                            max_jobs=max_jobs,
                            max_parallel_jobs=max_parallel_jobs,
                            base_tuning_job_name=job_name_prefix)
if run_hpo:
    tuner.fit()

AttributeError: 'LocalSagemakerClient' object has no attribute 'create_hyper_parameter_tuning_job'

Now wait for the hyper parameter turner to complete. If you are running HPO, please check SageMaker > Training > Hyperparameter tuning jobs for the progress.

# Updating your SageMaker endpoint

## Collect the target model

Once you have retrained your models, we will copy the model artifacts into your SageMaker notebook then package it for a SageMaker endpoint. 

Firstly, we will obtain an s3 URL of the best model.

In [44]:
if run_hpo:
    best_training_job = tuner.best_training_job()
    best_model_path = "{}/{}/output/model.tar.gz".format(s3_output_path, best_training_job)
else:
    best_model_path = estimator.model_data
model_path_key = best_model_path.replace(s3_output_path, "")
print("Best model location {}".format(best_model_path))

Best model location s3://sagemaker-us-west-2-681627153266/Battlesnake-job-mxnet-2021-08-07-18-47-47-200/model.tar.gz


Download the best model and put it into LocalEnv/pretrained_models/

Note that your new models will override the old models and you can keep version control of all the models

In [36]:
s3 = boto3.resource('s3')
s3.Bucket(s3_bucket).download_file(model_path_key, 'inference/pretrained_models/model.tar.gz')

model_dir = "Model-{}x{}".format(map_size[0], map_size[1])
!rm -r inference/pretrained_models/{model_dir}

!mkdir inference/pretrained_models/{model_dir}
!tar -xf inference/pretrained_models/model.tar.gz -C inference/pretrained_models/{model_dir}
# !rm mxnet_inference/pretrained_models/model.tar.gz

Package pretrained_models to endpoint

In [37]:
!mv inference/pretrained_models Models
!tar -czf Models.tar.gz Models
!mv Models inference/pretrained_models

s3_client = boto3.client('s3')
s3_client.upload_file("Models.tar.gz", s3_bucket, 
                      "battlesnake-aws/pretrainedmodels/Models.tar.gz")
!rm Models.tar.gz

## Update the SageMaker endpoint with your new model

In [38]:
model_data = "s3://{}/battlesnake-aws/pretrainedmodels/Models.tar.gz".format(s3_bucket)
print("Make an endpoint with {}".format(model_data))

Make an endpoint with s3://sagemaker-us-west-2-681627153266/battlesnake-aws/pretrainedmodels/Models.tar.gz


Delete the existing endpoint, model, and endpoint configuration files

In [45]:
sm_client = boto3.client(service_name='sagemaker')
sm_client.delete_endpoint(EndpointName='battlesnake-endpoint')
sm_client.delete_endpoint_config(EndpointConfigName='battlesnake-endpoint')
sm_client.delete_model(ModelName="battlesnake-mxnet")

{'ResponseMetadata': {'RequestId': '0208e1c8-b026-4687-b05d-2da0f6ab7a14',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '0208e1c8-b026-4687-b05d-2da0f6ab7a14',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '0',
   'date': 'Sat, 07 Aug 2021 21:35:40 GMT'},
  'RetryAttempts': 0}}

Create a new endpoint with the new model

In [42]:
from sagemaker.mxnet import MXNetModel

endpoint_instance_type = info["SagemakerInferenceInstanceType"]
mxnet_model = MXNetModel(model_data=model_data,
                             entry_point='predict.py',
                             role=role,
                             framework_version='1.6.0',
                             source_dir='inference/inference_src',
                             name=info['SagemakerEndPointName'],
                             code_location='s3://{}//code'.format(s3_bucket),
                             py_version='py3')
predictor = mxnet_model.deploy(initial_instance_count=1,
                               instance_type=endpoint_instance_type,
                               endpoint_name=info['SagemakerEndPointName'])



-------------!

## Testing that your endpoint works.
You should see `Action to take is X`

In [43]:
data1 = np.zeros(shape=(1, 2, 3, map_size[0]+2, map_size[1]+2))
data2 = np.zeros(shape=(1, 2))
data3 = np.zeros(shape=(1, 2))
data4 = np.zeros(shape=(1, 2))
health_dict = {0: 50, 1: 50}
json = {"board": {
            "height": 15,
            "width": 15,
            "food": [],
            "snakes": []
            },
        "you": {
            "id": "snake-id-string",
            "name": "Sneky Snek",
            "health": 90,
            "body": [{"x": 1, "y": 3}]
            }
        }
action = predictor.predict({"state": data1, "snake_id": data2, 
                           "turn_count": data3, "health": data4,  
                           "all_health": health_dict, "map_width": map_size[0], "json": json})
print("Action to take is {}".format(action))

Action to take is 3
