In [None]:
import sagemaker
import boto3
import sys
import os
import glob
import re
import subprocess
from IPython.display import HTML
import time
from time import gmtime, strftime
sys.path.append("common")
from misc import get_execution_role, wait_for_s3_object
from docker_utils import build_and_push_docker_image
from sagemaker.rl import RLEstimator, RLToolkit, RLFramework

In [None]:
# Required variables and objects
sage_session = sagemaker.session.Session()
s3_output_path = f's3://sagemaker-cmcollander/'

# Training settings
# instance_type = "ml.m4.xlarge"
instance_type = "ml.c5.2xlarge"
# max_jobs = 50
# max_parallel_jobs = 5

# IAM role
try:
    role = sagemaker.get_execution_role()
except:
    role = get_execution_role()

print("Using IAM role arn: {}".format(role))

In [None]:
%%time

cpu_or_gpu = 'gpu' if instance_type.startswith('ml.p') else 'cpu'
repository_short_name = "sagemaker-roboschool-ray-%s" % cpu_or_gpu
docker_build_args = {
    'CPU_OR_GPU': cpu_or_gpu, 
    'AWS_REGION': boto3.Session().region_name,
}
custom_image_name = build_and_push_docker_image(repository_short_name, build_args=docker_build_args)
print("Using ECR image %s" % custom_image_name)

In [None]:
%%time

metric_definitions = RLEstimator.default_metric_definitions(RLToolkit.RAY)
estimator = RLEstimator(entry_point="train-reacher.py",
                        source_dir='src',
                        dependencies=["common/sagemaker_rl"],
                        image_name=custom_image_name,
                        role=role,
                        train_instance_type=instance_type,
                        train_instance_count=1,
                        output_path=s3_output_path,
                        base_job_name="reacher",
                        metric_definitions=metric_definitions,
                        hyperparameters={}
                    )

estimator.fit(wait=False)
job_name = estimator.latest_training_job.job_name
print(f"Training job: {job_name}")

In [None]:
s3_url = f"s3://sagemaker-cmcollander/{job_name}"

intermediate_folder_key = f"{job_name}/output/intermediate/"
intermediate_url = f"s3://sagemaker-cmcollander/{intermediate_folder_key}"

print(f"S3 job path: {s3_url}")
print(f"Intermediate folder path: {intermediate_url}")
    
tmp_dir = f"/tmp/{job_name}".format(job_name)
os.system(f"mkdir {tmp_dir}")
print(f"Create local folder {tmp_dir}")

In [None]:
recent_videos = wait_for_s3_object(
            s3_bucket, intermediate_folder_key, tmp_dir, 
            fetch_only=(lambda obj: obj.key.endswith(".mp4") and obj.size>0), 
            limit=10, training_job_name=job_name)
last_video = sorted(recent_videos)[-1]  # Pick which video to watch
os.system("mkdir -p ./src/tmp_render/ && cp {} ./src/tmp_render/last_video.mp4".format(last_video))
HTML('<video src="./src/tmp_render/last_video.mp4" controls autoplay></video>')

In [None]:
%matplotlib inline
from sagemaker.analytics import TrainingJobAnalytics

df = TrainingJobAnalytics(job_name, ['episode_reward_mean']).dataframe()
num_metrics = len(df)
if num_metrics == 0:
    print("No algorithm metrics found in CloudWatch")
else:
    plt = df.plot(x='timestamp', y='value', figsize=(12,5), legend=True, style='b-')
    plt.set_ylabel('Mean reward per episode')
    plt.set_xlabel('Training time (s)')