# Checkpoint Test

In the hyperparaemter tuning notebook we will be using spot instances.  Because these instances could be stopped at and time we need the ability to read checkpoints and restart the image from them.

#### Import necessary modules

In [2]:
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

import boto3
import math
import numpy as np
import pandas as pd
from sagemaker import get_execution_role
from sagemaker.estimator import Estimator
from sagemaker.session import Session
import seaborn as sns; sns.set()
from time import sleep

#### Set for local parameters

In [5]:
n_instances = 1
image_type = 'cpu'
instance_type = 'ml.m5.large'

hyperparameters = {'start_day': 2915,
                   'days_per_epoch': 90,
                   'fc1': 512,
                   'fc2': 256,
                   'lr_actor': 0.0005,
                   'lr_critic': 0.004}

train_use_spot_instances = True
train_max_run = 14400
train_max_wait = 14400 if train_use_spot_instances else None

sagemaker_session = Session()
bucket_name = sagemaker_session.default_bucket()
role = get_execution_role()
account = boto3.client('sts').get_caller_identity()['Account']
region = boto3.Session().region_name
image_name = '{}.dkr.ecr.{}.amazonaws.com/portfolio-optimization-{}:latest'.format(account, region, image_type)
output_path = f's3://{bucket_name}/jobs'


#### Compile the environment

In [6]:
print(image_name)

031118886020.dkr.ecr.us-east-1.amazonaws.com/portfolio-optimization-cpu:latest


#### Submit the job

In [4]:
estimator = Estimator(role=role,
                      train_instance_count=n_instances,
                      train_instance_type=instance_type,
                      image_name=image_name,
                      checkpoint_s3_uri=f'{output_path}/{job_name}/checkpoints',
                      hyperparameters=hyperparameters)
estimator.fit()




#### Start a new job from the previous checkpoint

In [None]:
estimator = Estimator(role=role,
                      train_instance_count=n_instances,
                      train_instance_type=instance_type,
                      image_name=image_name,
                      checkpoint_s3_uri=estimator.checkpoint_s3_uri,
                      hyperparameters=hyperparameters)
estimator.fit()

# Reference
- [Tennis Sensitivity](https://github.com/daniel-fudge/sagemaker-tennis/blob/master/sensitivity.ipynb)
- [Tennis Repo](https://github.com/daniel-fudge/sagemaker-tennis)
#### SageMaker
- [SageMaker Instance types](https://aws.amazon.com/sagemaker/pricing/instance-types/)
- [SageMaker Instance prices](https://aws.amazon.com/sagemaker/pricing/)
- [SageMaker Estimator SDK](https://sagemaker.readthedocs.io/en/stable/api/training/estimators.html)