# Optimizing building HVAC with Amazon SageMaker RL

In [None]:
import sagemaker
import boto3

from sagemaker.rl import RLEstimator

from source.common.docker_utils import build_and_push_docker_image

## Initialize Amazon SageMaker

In [None]:
role = sagemaker.get_execution_role()
sm_session = sagemaker.session.Session()

# SageMaker SDK creates a default bucket. Change this bucket to your own bucket, if needed.
s3_bucket = sm_session.default_bucket()

s3_output_path = f's3://{s3_bucket}'
print(f'S3 bucket path: {s3_output_path}')
print(f'Role: {role}')

## Set additional training parameters

### Set instance type

Set `cpu_or_gpu` to either `'cpu'` or `'gpu'` for using CPU or GPU instances.

### Configure the framework you want to use

Set `framework` to `'tf'` or `'torch'` for TensorFlow or PyTorch, respectively.

You will also have to edit your entry point i.e., `train-sagemaker-distributed.py` with the configuration parameter `"use_pytorch"` to match the framework that you have selected.

In [None]:
job_name_prefix = 'energyplus-hvac-ray'

cpu_or_gpu = 'gpu' # has to be either cpu or gpu
if cpu_or_gpu != 'cpu' and cpu_or_gpu != 'gpu':
    raise ValueError('cpu_or_gpu has to be either cpu or gpu')
    
framework = 'tf'    

instance_type = 'ml.g4dn.16xlarge' # g4dn.16x large has 1 GPU and 64 cores

# Train your homogeneous scaling job here

### Edit the training code

The training code is written in the file `train-sagemaker-distributed.py` which is uploaded in the /source directory.

*Note that ray will automatically set `"ray_num_cpus"` and `"ray_num_gpus"` in `_get_ray_config`*

In [None]:
!pygmentize source/train-sagemaker-distributed.py

### Train the RL model using the Python SDK Script mode

When using SageMaker for distributed training, you can select a GPU or CPU instance. The RLEstimator is used for training RL jobs.

1. Specify the source directory where the environment, presets and training code is uploaded.
2. Specify the entry point as the training code
3. Specify the image (CPU or GPU) to be used for the training environment.
4. Define the training parameters such as the instance count, job name, S3 path for output and job name.
5. Define the metrics definitions that you are interested in capturing in your logs. These can also be visualized in CloudWatch and SageMaker Notebooks.

#### GPU docker image

In [None]:
# Build image
    
repository_short_name = f'sagemaker-hvac-ray-{cpu_or_gpu}'
docker_build_args = {
    'CPU_OR_GPU': cpu_or_gpu, 
    'AWS_REGION': boto3.Session().region_name,
    'FRAMEWORK': framework
}

image_name = build_and_push_docker_image(repository_short_name, build_args=docker_build_args)
print("Using ECR image %s" % image_name)

In [None]:
metric_definitions =  [
    {'Name': 'training_iteration', 'Regex': 'training_iteration: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, 
    {'Name': 'episodes_total', 'Regex': 'episodes_total: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, 
    {'Name': 'num_steps_trained', 'Regex': 'num_steps_trained: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, 
    {'Name': 'timesteps_total', 'Regex': 'timesteps_total: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
    {'Name': 'training_iteration', 'Regex': 'training_iteration: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},

    {'Name': 'episode_reward_max', 'Regex': 'episode_reward_max: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, 
    {'Name': 'episode_reward_mean', 'Regex': 'episode_reward_mean: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'}, 
    {'Name': 'episode_reward_min', 'Regex': 'episode_reward_min: ([-+]?[0-9]*[.]?[0-9]+([eE][-+]?[0-9]+)?)'},
] 

### Ray homogeneous scaling - Specify `train_instance_count` > 1

Homogeneous scaling allows us to use multiple instances of the same type.

Spot instances are unused EC2 instances that could be used at 90% discount compared to On-Demand prices (more information about spot instances can be found [here](https://aws.amazon.com/ec2/spot/?cards.sort-by=item.additionalFields.startDateTime&cards.sort-order=asc) and [here](https://docs.aws.amazon.com/sagemaker/latest/dg/model-managed-spot-training.html))

To use spot instances, set `train_use_spot_instances = True`. To use On-Demand instances, `train_use_spot_instances = False`.

In [None]:
hyperparameters = {
    # no. of days to simulate. Remember to adjust the dates in RunPeriod of 
    # 'source/eplus/envs/buildings/MediumOffice/RefBldgMediumOfficeNew2004_Chicago.idf' to match simulation days.
    'n_days': 365,
    'n_iter': 50, # no. of training iterations
    'algorithm': 'APEX_DDPG', # only APEX_DDPG and PPO are tested
    'multi_zone_control': True, # if each zone temperature set point has to be independently controlled
    'energy_temp_penalty_ratio': 10
}

# Set additional training parameters
training_params = {
    'base_job_name': job_name_prefix,    
    'train_instance_count': 1,
    'tags': [{'Key': k, 'Value': str(v)} for k,v in hyperparameters.items()]
}

# Defining the RLEstimator
estimator = RLEstimator(entry_point=f'train-sagemaker-hvac.py',
                        source_dir='source',
                        dependencies=["source/common/"],
                        image_uri=image_name,
                        role=role,
                        train_instance_type=instance_type,  
#                         train_instance_type='local',                          
                        output_path=s3_output_path,
                        metric_definitions=metric_definitions,
                        hyperparameters=hyperparameters,
                        **training_params
                    )

estimator.fit(wait=False)

print(' ')
print(estimator.latest_training_job.job_name)
print('type=', instance_type, 'count=', training_params['train_instance_count'])
print(' ')