In [21]:
import sagemaker
import boto3
from sagemaker.amazon.amazon_estimator import get_image_uri 
from sagemaker.session import s3_input, Session

## Create bucket & Validation Region for S3

In [22]:
bucket_name = 'aps360project' # <--- CHANGE THIS VARIABLE TO A UNIQUE NAME FOR YOUR BUCKET
my_region = boto3.session.Session().region_name # set the region of the instance
print(my_region)

us-east-2


## Create Paths to S3 Buckets for storage of model data

In [23]:
# Prefix for files in bucket
prefix = 'chest_x_ray_classification'

# Dataset directory
dataset = 'Xray_Dataset'

# Model output folder name
output_dir_name = 'trial_1'

# S3 Path bucket to get the data for training (Train, Test, Validation)
dataset_dir = 's3://{}/{}/{}'.format(bucket_name, prefix, dataset)

# output path for SageMaker to dump all model artifacts and graphs etc
output_dir = 's3://{}/{}/{}'.format(bucket_name, prefix,output_dir_name)

# sanity check for output path for model data
print('Dataset directory <dataset_dir>: ', dataset_dir)
print('Model Output directory <output_dir>: ', output_dir)

Dataset directory <dataset_dir>:  s3://aps360project/chest_x_ray_classification/Xray_Dataset
Model Output directory <output_dir>:  s3://aps360project/chest_x_ray_classification/trial_1


In [24]:
# initialize hyperparamters
hyperparameters = {
    'epochs': 4,
    'batch-size': 256,
    'learning-rate': 0.0001
}

# Training instance
training_instance = 'ml.g4dn.2xlarge'

# Create the current role to use sagemaker 
role = sagemaker.get_execution_role()

In [25]:
from sagemaker.pytorch import PyTorch

# Create a Pytorch estimator to run the training script on AWS Sagemaker
estimator = PyTorch(
                entry_point='xray1.py',
                role=role,
                framework_version='1.8.0',
                py_version='py3',
                output_path=output_dir,
                train_instance_count=1,
                script_mode=True,
                train_instance_type=training_instance,
                hyperparameters= hyperparameters,
            )

train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [26]:
estimator.fit({'training': dataset_dir})

2021-03-28 21:57:14 Starting - Starting the training job...
2021-03-28 21:57:17 Starting - Launching requested ML instancesProfilerReport-1616968634: InProgress
......
2021-03-28 21:58:30 Starting - Preparing the instances for training......
2021-03-28 21:59:30 Downloading - Downloading input data...
2021-03-28 22:00:12 Training - Downloading the training image.................[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2021-03-28 22:02:53,593 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2021-03-28 22:02:53,616 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2021-03-28 22:02:56,650 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2021-03-28 22:02:56,984 sagemaker-training-toolkit INFO     Invoking user script
[0m
[34mTraining Env:
[0m
[34m{
    "additio