### Set up

#### 1. Set  up  accounts and role

In [1]:
#!pip install sagemaker==1.39.0

In [2]:
import sagemaker
import boto3

sagemaker_session = sagemaker.Session()
account_id =  boto3.client('sts').get_caller_identity().get('Account')
region = boto3.session.Session().region_name


#role = sagemaker.get_execution_role()
role="arn:aws:iam::{}:role/service-role/AmazonSageMaker-ExecutionRole-20190118T115449".format(account_id)


#### 2. Setup image and instance type

In [3]:
pytorch_custom_image_name="ppi-extractor:gpu-1.0.0-201910130520"
instance_type = "ml.p3.8xlarge" 

In [4]:
docker_repo = "{}.dkr.ecr.{}.amazonaws.com/{}".format(account_id, region, pytorch_custom_image_name)

#### 3. Configure train/ test and validation datasets

In [5]:
bucket = "aegovan-data"

In [6]:
trainfile = "s3://{}/processed_dataset/train_unique_pub_v6_less_negative.json".format(bucket)
testfile= "s3://{}/processed_dataset/test_unique_pub_v6_less_negative.json".format(bucket)
valfile="s3://{}/processed_dataset/val_unique_pub_v6_less_negative.json".format(bucket)
embeddingfile="s3://{}/embeddings/wikipedia-pubmed-and-PMC-w2v.bin.txt".format(bucket)
embeddingfile="s3://{}/embeddings/bio_nlp_vec/PubMed-shuffle-win-2.bin.txt".format(bucket)


embed_dim=200

trainfile = "s3://{}/processed_dataset/train_multiclass.json".format(bucket)
testfile= "s3://{}/processed_dataset/test_multiclass.json".format(bucket)
valfile="s3://{}/processed_dataset/val_multiclass.json".format(bucket)

#Collobert embedding
#embeddingfile="s3://{}/embeddings/collobert/words_vocab_collabert.txt".format(bucket)
#embed_dim=50
s3_output_path= "s3://{}/results/".format(bucket)
s3_code_path= "s3://{}/ppi_code/".format(bucket)

### Start training

In [7]:
inputs = {
    "train" : trainfile,
    "val" :valfile,
    "test" : testfile,
    "embedding" : embeddingfile
}

In [8]:
# hyperparameters = {
#     "dataset":"PpiDatasetFactory",
#     "earlystoppingpatience" : 20,
#     "trainfile":trainfile.split("/")[-1],
#     "valfile":valfile.split("/")[-1],
#     "embeddingfile":embeddingfile.split("/")[-1],
#     "embeddim":embed_dim,
#     "batchsize": "32",
#     "epochs" : "1000",   
#     "log-level" : "INFO",
#     "lstmhiddensize": 8,
#     "fclayersize": 10,
#     "numlayers":3,
#     "cnn_output":100,
#     "poolingkernelsize":3
   
# }

hyperparameters = {
    "dataset":"PpiDatasetFactory",
    "earlystoppingpatience" : 20,
    "trainfile":trainfile.split("/")[-1],
    "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "32",
    "epochs" : "1000",   
    "log-level" : "INFO",
    "dropout_rate_cnn": 0.5,
    "pooling_kernel_size": 3,
    "pool_stride":2,
    "cnn_kernel_size":3,
    "cnn_num_layers":3,
    "cnn_output":64,
    "learningrate":.001
   
}

In [9]:

BilstmNetworkFactoryhyperparametersNoPos = {
    "dataset":"PpiMulticlassDatasetFactory",
    "network" :"RelationExtractorBiLstmNetworkFactoryNoPos",
    "trainfile":trainfile.split("/")[-1],
    "valfile":valfile.split("/")[-1],
    "testfile":testfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "64",
    "epochs" : "1000",  
    "earlystoppingpatience":20,
    "log-level" : "INFO",
    "learningrate":.001,
    "lstm_dropout":0.5,
    "lstm_num_layers" :1,
    "lstm_hidden_size":400,
    "fc_drop_out_rate":0.5,
      "train_val_vocab_merge":1
}

In [10]:
metric_definitions = [{"Name": "TrainLoss",
                     "Regex": "###score: train_loss### (\d*[.]?\d*)"}
                    ,{"Name": "ValidationLoss",
                     "Regex": "###score: val_loss### (\d*[.]?\d*)"}
                    ,{"Name": "TrainFScore",
                     "Regex": "###score: train_fscore### (\d*[.]?\d*)"}
                   ,{"Name": "ValidationFScore",
                     "Regex": "###score: val_fscore### (\d*[.]?\d*)"}
                    ]

In [11]:
!git log -1 | head -1
!git log -1 | head -5 | tail -1

commit 7e6e36a89f9d7bd04a37eeb853c024879eb581f1
    Fix confusion matric


In [12]:
git_config = {'repo': 'https://github.com/elangovana/PPI-typed-relation-extractor.git',
              'branch': 'master'}

In [13]:
base_job_name="ppimulticlass-bilstm"
hyperparameters = BilstmNetworkFactoryhyperparametersNoPos #BilstmNetworkFactoryhyperparametersNoPos #PpiAimedDatasetFactoryYlhsiehBiLstmNetwork #SimpleResnetCnnPosNetworkFactoryhyperparameters
inputs = inputs 



In [14]:
hyperparameters

{'batchsize': '64',
 'dataset': 'PpiMulticlassDatasetFactory',
 'earlystoppingpatience': 20,
 'embeddim': 200,
 'embeddingfile': 'PubMed-shuffle-win-2.bin.txt',
 'epochs': '1000',
 'fc_drop_out_rate': 0.5,
 'learningrate': 0.001,
 'log-level': 'INFO',
 'lstm_dropout': 0.5,
 'lstm_hidden_size': 400,
 'lstm_num_layers': 1,
 'network': 'RelationExtractorBiLstmNetworkFactoryNoPos',
 'testfile': 'test_multiclass.json',
 'train_val_vocab_merge': 1,
 'trainfile': 'train_multiclass.json',
 'valfile': 'val_multiclass.json'}

In [15]:
inputs

{'embedding': 's3://aegovan-data/embeddings/bio_nlp_vec/PubMed-shuffle-win-2.bin.txt',
 'test': 's3://aegovan-data/processed_dataset/test_multiclass.json',
 'train': 's3://aegovan-data/processed_dataset/train_multiclass.json',
 'val': 's3://aegovan-data/processed_dataset/val_multiclass.json'}

In [16]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
     entry_point='main_train.py',
                    source_dir = 'source/algorithms',
                    dependencies =['source/algorithms', 'source/datasets',  'source/preprocessor', 'source/modelnetworks'],
                    role=role,
                    framework_version ="1.0.0",
                    py_version='py3',
                    git_config= git_config,
                    image_name= docker_repo,
                    train_instance_count=1,
                    train_instance_type=instance_type,
                    hyperparameters =hyperparameters,
                    output_path=s3_output_path,
                    metric_definitions=metric_definitions,
                    #train_use_spot_instances = True
                    code_location=s3_code_path,
                    train_volume_size=30,
                    base_job_name =base_job_name)

In [17]:
estimator.fit(inputs)

2019-11-02 08:09:53 Starting - Starting the training job...
2019-11-02 08:09:54 Starting - Launching requested ML instances...
2019-11-02 08:10:50 Starting - Preparing the instances for training......
2019-11-02 08:11:48 Downloading - Downloading input data...
2019-11-02 08:12:50 Training - Downloading the training image.........
2019-11-02 08:14:43 Training - Training image download completed. Training in progress..[31mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[31mbash: no job control in this shell[0m
[31m2019-11-02 08:14:44,578 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[31m2019-11-02 08:14:44,621 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[31m2019-11-02 08:14:44,622 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[31m2019-11-02 08:14:44,942 sagemaker-containers INFO     Module main_train does not provide a setu

[31m2019-11-02 08:17:20,319 - algorithms.PretrainedEmbedderLoaderMinimum - INFO - Total words in original embedding handle is 2231686[0m
[31m2019-11-02 08:17:20,319 - algorithms.PretrainedEmbedderLoaderMinimum - INFO - Total words in final embedding is 5306[0m
[31m2019-11-02 08:17:20,319 - algorithms.PretrainedEmbedderLoaderMinimum - INFO - Total words randomly initialized is 990[0m
[31m2019-11-02 08:17:20,324 - algorithms.transform_sentence_tokeniser - INFO - Running sentence tokenisor [0m
[31m2019-11-02 08:19:06,273 - algorithms.transform_sentence_tokeniser - INFO - Completed  sentence tokenisor [0m
[31m2019-11-02 08:19:06,279 - algorithms.transform_text_index - INFO - Transforming TransformTextToIndex[0m
[31m2019-11-02 08:19:06,843 - algorithms.transform_text_index - INFO - Total number of unknown occurances 34574[0m
[31m2019-11-02 08:19:06,843 - algorithms.transform_text_index - INFO - Completed TransformTextToIndex[0m
[31m2019-11-02 08:19:06,843 - algorithms.trans

[31m2019-11-02 08:19:43,957 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:19:43,962 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_e80e1e20-d0f4-4c1c-9ce9-d95e399c2ec2_20191102_081943.csv: [0m
[31m[[  25    0    0    0    0    4    0    0]
 [   0    4    0    0    0    0    0    0]
 [   0    0   92    0    0   16    0    0]
 [   0    0    0   10    0    1    1    0]
 [   0    0    0    0   45    3    0    0]
 [  12    1   40    4    5 2180  142    0]
 [   0    0    5    0    1  175  448    0]
 [   0    0    0    0    0    5    0    4]][0m
[31m2019-11-02 08:19:43,967 - algorithms.Train - INFO - Train set result details: 0.7925058885054681[0m
[31m2019-11-02 08:19:43,967 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:19:44,190 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_73fb70ec-eb4b-4bcd-8044-08

[31m2019-11-02 08:20:12,691 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:20:12,696 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_2eda36a1-bcf3-435d-aa8a-ac0fca9e15fb_20191102_082012.csv: [0m
[31m[[  29    0    0    0    0    0    0    0]
 [   0    4    0    0    0    0    0    0]
 [   0    0  107    0    0    1    0    0]
 [   0    0    0   11    0    0    1    0]
 [   0    0    0    0   47    1    0    0]
 [  16    0   38    2    3 2261   64    0]
 [   0    0    6    0    1  125  497    0]
 [   0    0    0    0    0    0    0    9]][0m
[31m2019-11-02 08:20:12,701 - algorithms.Train - INFO - Train set result details: 0.9027171754067362[0m
[31m2019-11-02 08:20:12,701 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:20:12,922 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_8051e034-b147-40d2-a042-10

[31m2019-11-02 08:20:37,224 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:20:37,229 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_f852ada0-596d-4275-838b-6d02da515bbf_20191102_082037.csv: [0m
[31m[[  27    0    0    0    0    2    0    0]
 [   0    4    0    0    0    0    0    0]
 [   0    0  104    0    0    3    1    0]
 [   0    0    0   11    0    0    1    0]
 [   0    0    0    0   45    3    0    0]
 [   4    0   11    0    1 2345   23    0]
 [   0    0    4    0    1  182  442    0]
 [   0    0    0    0    0    0    0    9]][0m
[31m2019-11-02 08:20:37,233 - algorithms.Train - INFO - Train set result details: 0.9350256106302169[0m
[31m2019-11-02 08:20:37,233 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:20:37,460 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_a6bb8355-ecc4-4201-b438-80

[31m2019-11-02 08:21:01,780 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:21:01,785 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_d2ce80c1-9a60-484a-bbb0-6313864ef7d3_20191102_082101.csv: [0m
[31m[[  29    0    0    0    0    0    0    0]
 [   0    4    0    0    0    0    0    0]
 [   0    0  105    0    0    2    1    0]
 [   0    0    0   11    0    0    1    0]
 [   0    0    0    0   47    1    0    0]
 [   7    0    6    0    2 2356   12    1]
 [   0    0    3    0    1  146  479    0]
 [   0    0    0    0    0    0    0    9]][0m
[31m2019-11-02 08:21:01,789 - algorithms.Train - INFO - Train set result details: 0.9398695239163951[0m
[31m2019-11-02 08:21:01,789 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:21:02,012 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_704d6868-5dc2-47ec-97b2-99

[31m2019-11-02 08:21:22,257 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:21:22,262 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_19d9296c-84b6-4621-9587-c39cc2221a14_20191102_082122.csv: [0m
[31m[[  29    0    0    0    0    0    0    0]
 [   0    4    0    0    0    0    0    0]
 [   0    0  106    0    0    2    0    0]
 [   0    0    0   12    0    0    0    0]
 [   0    0    0    0   48    0    0    0]
 [   4    0    5    0    3 2355   16    1]
 [   0    0    5    1    1  150  472    0]
 [   0    0    0    0    0    0    0    9]][0m
[31m2019-11-02 08:21:22,267 - algorithms.Train - INFO - Train set result details: 0.9446743719759252[0m
[31m2019-11-02 08:21:22,267 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:21:22,492 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_04119654-e1e9-436f-802c-bb

[31m2019-11-02 08:24:11,631 - algorithms.PretrainedEmbedderLoaderMinimum - INFO - Total words in original embedding handle is 2231686[0m
[31m2019-11-02 08:24:11,632 - algorithms.PretrainedEmbedderLoaderMinimum - INFO - Total words in final embedding is 5306[0m
[31m2019-11-02 08:24:11,632 - algorithms.PretrainedEmbedderLoaderMinimum - INFO - Total words randomly initialized is 990[0m
[31m2019-11-02 08:24:11,635 - algorithms.transform_sentence_tokeniser - INFO - Running sentence tokenisor [0m
[31m2019-11-02 08:25:57,689 - algorithms.transform_sentence_tokeniser - INFO - Completed  sentence tokenisor [0m
[31m2019-11-02 08:25:57,696 - algorithms.transform_text_index - INFO - Transforming TransformTextToIndex[0m
[31m2019-11-02 08:25:58,255 - algorithms.transform_text_index - INFO - Total number of unknown occurances 34574[0m
[31m2019-11-02 08:25:58,255 - algorithms.transform_text_index - INFO - Completed TransformTextToIndex[0m
[31m2019-11-02 08:25:58,256 - algorithms.trans

[31m2019-11-02 08:26:28,619 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:26:28,624 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_6a02c272-e20b-4d46-993e-fbedc302d213_20191102_082628.csv: [0m
[31m[[  22    0    0    0    0    7    0    0]
 [   0    4    0    0    0    0    0    0]
 [   0    0   90    0    0   16    2    0]
 [   0    0    0   10    0    1    1    0]
 [   0    0    0    0   42    4    2    0]
 [   0    0    1    0    0 2360   23    0]
 [   0    0    1    0    0  149  479    0]
 [   0    0    0    0    0    0    0    9]][0m
[31m2019-11-02 08:26:28,628 - algorithms.Train - INFO - Train set result details: 0.9259542303729447[0m
[31m2019-11-02 08:26:28,628 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:26:28,854 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_380c0396-79ec-44d8-bd01-8e

[31m2019-11-02 08:26:53,202 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:26:53,207 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_6913ef82-e39f-454d-a0ae-2e7f2456ccd2_20191102_082653.csv: [0m
[31m[[  26    0    0    0    0    3    0    0]
 [   0    4    0    0    0    0    0    0]
 [   0    0  102    0    0    3    3    0]
 [   0    0    0    9    0    2    1    0]
 [   0    0    0    0   35   11    2    0]
 [   0    0    0    0    0 2349   35    0]
 [   0    0    0    0    0   68  561    0]
 [   0    0    0    0    0    0    0    9]][0m
[31m2019-11-02 08:26:53,211 - algorithms.Train - INFO - Train set result details: 0.9379427958797729[0m
[31m2019-11-02 08:26:53,211 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:26:53,437 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_2d97454d-f944-4dfb-aa1c-46

[31m2019-11-02 08:27:22,166 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_b3d837bd-1925-4957-8d7f-3c61be0c094f_20191102_082722.csv: [0m
[31m[[  0   0   0   0   0   4   0]
 [  0   0   0   0   8   3   0]
 [  0   0   0   0   1   0   0]
 [  0   0   0   0   6   3   0]
 [  0   6   0   0 210 100   0]
 [  0   2   0   0  50  27   0]
 [  0   0   0   0   0   1   0]][0m
[31m2019-11-02 08:27:22,167 - algorithms.Train - INFO - Validation set result details: 0.13707254639206262 [0m
[31m2019-11-02 08:27:22,167 - algorithms.Train - INFO - Run     69    16       867     4/51          8% 4.266659 12.751152       0.9716       0.1371[0m
[31m###score: train_loss### 4.266659105196595[0m
[31m###score: val_loss### 12.75115191936493[0m
[31m###score: train_fscore### 0.9716086859723365[0m
[31m###score: val_fscore### 0.13707254639206262[0m
[31m2019-11-02 08:27:26,028 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-

[31m2019-11-02 08:27:50,600 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:27:50,605 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_1cd791c9-905b-4aa2-af54-4a8a867d9438_20191102_082750.csv: [0m
[31m[[  29    0    0    0    0    0    0    0]
 [   0    4    0    0    0    0    0    0]
 [   0    0  103    0    0    3    2    0]
 [   0    0    0   11    0    0    1    0]
 [   0    0    0    0   44    2    2    0]
 [   0    0    0    0    0 2302   82    0]
 [   0    0    1    0    0   20  608    0]
 [   0    0    0    0    0    0    0    9]][0m
[31m2019-11-02 08:27:50,610 - algorithms.Train - INFO - Train set result details: 0.9725572243324225[0m
[31m2019-11-02 08:27:50,610 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:27:50,834 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_6b6ff0dc-88f0-4bf8-b9e1-15

[31m2019-11-02 08:28:19,496 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:28:19,502 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_83560fe3-2520-42e2-9707-c2ab5637820b_20191102_082819.csv: [0m
[31m[[  29    0    0    0    0    0    0    0]
 [   0    4    0    0    0    0    0    0]
 [   0    0  104    0    0    2    2    0]
 [   0    0    0   11    0    0    1    0]
 [   0    0    0    0   48    0    0    0]
 [   1    0    3    0    4 2249  127    0]
 [   0    0    1    0    1    8  619    0]
 [   0    0    0    0    0    0    0    9]][0m
[31m2019-11-02 08:28:19,506 - algorithms.Train - INFO - Train set result details: 0.9650255342564411[0m
[31m2019-11-02 08:28:19,506 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:28:19,735 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_a58c20b8-926f-4b36-ad5a-8e

[31m2019-11-02 08:28:40,215 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:28:40,220 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_5bd4b0d4-0acf-46b7-906a-a37d8b02671d_20191102_082840.csv: [0m
[31m[[  28    0    0    0    0    1    0    0]
 [   0    4    0    0    0    0    0    0]
 [   0    0  104    0    0    0    4    0]
 [   0    0    0   11    0    0    1    0]
 [   0    0    0    0   44    2    2    0]
 [   0    0    0    0    0 2327   57    0]
 [   0    0    0    0    0   10  619    0]
 [   0    0    0    0    0    0    0    9]][0m
[31m2019-11-02 08:28:40,224 - algorithms.Train - INFO - Train set result details: 0.9756764130212472[0m
[31m2019-11-02 08:28:40,225 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:28:40,454 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_4f2e58dc-872e-44c6-8eb8-d7

### HPO

In [18]:
objective_metric_name ="ValidationFScore"

In [19]:
from sagemaker.tuner import HyperparameterTuner, IntegerParameter, CategoricalParameter, ContinuousParameter
hyperparameter_ranges = {'lstmhiddensize': IntegerParameter(2,200), #ContinuousParameter(0.01, 0.2),
                        "fclayersize": IntegerParameter(2,50),
                        "numlayers":IntegerParameter(1,10),
                        "poolingkernelsize":IntegerParameter(2,10)
                        }

In [20]:
hyperparameters={ "trainfile":trainfile.split("/")[-1],
    "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
                        "embeddim":"200",
                        "dataset":"PpiDatasetFactory",
                 "earlystoppingpatience":20,
                         'epochs': 100}

In [21]:

estimator = PyTorch(
     entry_point='main_train.py',
                    source_dir = 'source/algorithms',
                    dependencies =['source/algorithms', 'source/datasets', 'source/preprocessor'],
                    role=role,
                    framework_version ="1.0.0",
                    py_version='py3',
                   # git_config= git_config,
                    image_name= docker_repo,
                    train_instance_count=1,
                    train_instance_type=instance_type,
                    hyperparameters =hyperparameters,
                    output_path=s3_output_path,
                    metric_definitions=metric_definitions,
                    code_location=s3_code_path,


                    #train_use_spot_instances = True
                    train_volume_size=30,
                    base_job_name ="hpo-ppi-extractor")

In [22]:
tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=50,
                            max_parallel_jobs=4,
                            strategy="Random",
                            base_tuning_job_name="hpo-ppi-extractor")
tuner.fit(inputs)