In [1]:
import time
import boto3
from sagemaker import get_execution_role
from sagemaker.amazon.amazon_estimator import get_image_uri

In [2]:
num_classes=2

num_training_samples=194266

num_layers=18

mini_batch_size=128

image_shape= '3,50,50'

augmentation_type = 'crop_color_transform'

epochs=5

learning_rate=0.01

use_pretrained_model=1



In [3]:
job_name_prefix='breast-cancer-detection'
timestamp=time.strftime('-%Y-%m-%d-%H-%M-%S', time.gmtime())
job_name=job_name_prefix+timestamp

In [4]:
bucket='sagemaker-data-iamrahulinc'
input_prefix='breast-cancer-detection/input/recordio'
input_train='s3://{}/{}/train'.format(bucket,input_prefix)
input_test='s3://{}/{}/test'.format(bucket,input_prefix)

In [5]:
output_prefix='breast-cancer-detection/output'
output_path='s3://{}/{}/'.format(bucket,output_prefix)


In [6]:
instance_count=1
instance_type='ml.m4.xlarge'
volume_size_gb=50

In [8]:
role=get_execution_role()
training_image=get_image_uri(boto3.Session().region_name,'image-classification')

In [9]:
train_timeout=360000

In [11]:
training_params = {
    "TrainingJobName": job_name,
    # specify the training docker image
    "AlgorithmSpecification": {
        "TrainingImage": training_image,
        "TrainingInputMode": "Pipe"
    },
    "RoleArn": role,
    "OutputDataConfig": {
        "S3OutputPath": output_path
    },
    "ResourceConfig": {
        "InstanceCount": instance_count,
        "InstanceType": instance_type,
        "VolumeSizeInGB": volume_size_gb
    },
    "HyperParameters": {
        #"image_shape": image_shape,
        "num_layers": str(num_layers),
        "num_training_samples": str(num_training_samples),
        "num_classes": str(num_classes),
        "mini_batch_size": str(mini_batch_size),
        "epochs": str(epochs),
        "learning_rate": str(learning_rate),
        #"lr_scheduler_step": str(lr_scheduler_step),
        #"lr_scheduler_factor": str(lr_scheduler_factor),
        #"augmentation_type": str(augmentation_type),
        #"checkpoint_frequency": str(checkpoint_frequency),
        "augmentation_type" : str(augmentation_type)
    },
    "StoppingCondition": {
        "MaxRuntimeInSeconds": train_timeout
    },
#Training data should be inside a subdirectory called "train"
#Validation data should be inside a subdirectory called "validation"
#The algorithm currently only supports fullyreplicated model (where data is copied onto each machine)
    "InputDataConfig": [
        {
            "ChannelName": "train",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": input_test,
                    "S3DataDistributionType": "FullyReplicated"
                }
            },
            "ContentType": "application/x-recordio",
            "CompressionType": "None"
        },
        {
            "ChannelName": "validation",
            "DataSource": {
                "S3DataSource": {
                    "S3DataType": "S3Prefix",
                    "S3Uri": 's3://{}/validation/'.format(bucket),
                    "S3DataDistributionType": "FullyReplicated"
                }
            },
            "ContentType": "application/x-recordio",
            "CompressionType": "None"
        }
    ]
}
print('Training job name: {}'.format(job_name))
print('\nInput Data Location: {}'.format(training_params['InputDataConfig'][0]['DataSource']['S3DataSource']))

Training job name: breast-cancer-detection-2020-04-03-05-58-14

Input Data Location: {'S3DataType': 'S3Prefix', 'S3Uri': 's3://sagemaker-data-iamrahulinc/breast-cancer-detection/input/recordio/test', 'S3DataDistributionType': 'FullyReplicated'}
