### Set up

#### 1. Set  up  accounts and role

In [None]:
import sagemaker
import boto3

sagemaker_session = sagemaker.Session()
account_id =  boto3.client('sts').get_caller_identity().get('Account')
region = boto3.session.Session().region_name


role = sagemaker.get_execution_role()
#role="arn:aws:iam::{}:role/service-role/AmazonSageMaker-ExecutionRole-20190118T115449".format(account_id)


#### 2. Setup image and instance type

In [None]:
pytorch_custom_image_name="character-embedding:gpu-1.0.0-201908270722"
instance_type = "ml.p3.2xlarge" 

In [None]:
docker_repo = "{}.dkr.ecr.{}.amazonaws.com/{}".format(account_id, region, pytorch_custom_image_name)

#### 3. Configure train/ test and validation datasets

In [None]:
bucket = sagemaker_session.default_bucket()

In [None]:
train = "s3://{}/email_mock_train/".format(bucket)
test="s3://{}/email_mock_test/".format(bucket)
val="s3://{}/email_mock_val/".format(bucket)
s3_output_path= "s3://{}/email_mock_model/".format(bucket)

### Start training

In [None]:
inputs = {
    "train" : train,
    "val" :val
}

In [None]:
hyperparameters = {
    "batchsize": "32",
    "epochs" : "1000",
    "log-level" : "INFO"
}

In [None]:
metric_definitions = [{"Name": "TrainLoss",
                     "Regex": "###score: train_loss### (\d*[.]?\d*)"}
                    ,{"Name": "ValidationLoss",
                     "Regex": "###score: val_loss### (\d*[.]?\d*)"}
                    ,{"Name": "TrainAccuracy",
                     "Regex": "###score: train_accuracy### (\d*[.]?\d*)"}
                   ,{"Name": "ValidationAccuracy",
                     "Regex": "###score: val_accuracy### (\d*[.]?\d*)"}
                    ]

In [None]:
git_config = {'repo': 'https://github.com/elangovana/character-embedding.git',
              'branch': 'master',
              'commit': 'cb5b7d11a87b47f536367d49d2a4e89a68cc949d'}

In [None]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
     entry_point='experiment_email.py',
                    source_dir = 'src',
                    dependencies =['src'],
                    role=role,
                    framework_version ="1.0.0",
                    py_version='py3',
                    git_config= git_config,
                    image_name= docker_repo,
                    train_instance_count=1,
                    train_instance_type=instance_type,
                    hyperparameters =hyperparameters,
                    output_path=s3_output_path,
                    metric_definitions=metric_definitions,
                    #train_use_spot_instances = True
                    base_job_name ="Character-embedding")

In [None]:
estimator.fit(inputs)