### Set up

#### 1. Set  up  accounts and role

In [1]:
#!pip install sagemaker==1.39.0

In [2]:
import sagemaker
import boto3

sagemaker_session = sagemaker.Session()
account_id =  boto3.client('sts').get_caller_identity().get('Account')
region = boto3.session.Session().region_name


#role = sagemaker.get_execution_role()
role="arn:aws:iam::{}:role/service-role/AmazonSageMaker-ExecutionRole-20190118T115449".format(account_id)


#### 2. Setup image and instance type

In [3]:
pytorch_custom_image_name="ppi-extractor:gpu-1.0.0-201910130520"
instance_type = "ml.p3.8xlarge" 

In [4]:
docker_repo = "{}.dkr.ecr.{}.amazonaws.com/{}".format(account_id, region, pytorch_custom_image_name)

#### 3. Configure train/ test and validation datasets

In [5]:
bucket = "aegovan-data"

In [6]:
plain_trainfile = "s3://{}/aimed/AIMedFull.json".format(bucket)
trainfile = "s3://{}/aimed/AIMedFull_preprocessed.json".format(bucket)

#trainfile="s3://{}/aimed/AIMedFull_Ylhsieh.json".format(bucket)
    
valfile="s3://{}/aimed/AIMedval_preprocessed.json".format(bucket)
#trainfile = "s3://{}/aimed/AIMedtrain_pubmedoverlap.json".format(bucket)
#valfile="s3://{}/aimed/AIMedval_pubmedoverlap.json".format(bucket)
#embeddingfile="s3://{}/embeddings/PubMed-and-PMC-w2v.bin.txt".format(bucket)
#embeddingfile="s3://{}/embeddings/bio_nlp_vec/PubMed-shuffle-win-30.bin.txt".format(bucket)
embeddingfile="s3://{}/embeddings/bio_nlp_vec/PubMed-shuffle-win-2.bin.txt".format(bucket)
embed_dim=200

#Collobert embedding
coll_embeddingfile="s3://{}/embeddings/collobert/words_vocab_collabert.txt".format(bucket)
coll_embed_dim=50
s3_output_path= "s3://{}/results/".format(bucket)
s3_code_path= "s3://{}/aimed_code".format(bucket)

### Start training

In [7]:
pub_inputs = {
    "train" : trainfile,
 #   "val" :valfile,
    "embedding" : embeddingfile
}

In [8]:
plain_pub_inputs = {
    "train" : plain_trainfile,
 #   "val" :valfile,
    "embedding" : embeddingfile
}

In [9]:
coll_inputs = {
    "train" : trainfile,
   # "val" :valfile,
    "embedding" : coll_embeddingfile
}

In [10]:
# hyperparameters = {
#     "dataset":"PpiAimedDatasetFactory",
#     "trainfile":trainfile.split("/")[-1],
#     "valfile":valfile.split("/")[-1],
#     "embeddingfile":embeddingfile.split("/")[-1],
#     "embeddim":embed_dim,
#     "batchsize": "32",
#     "epochs" : "1000",   
#     "log-level" : "INFO",
#     "lstmhiddensize": 100,
#     "fclayersize": 15,
#     "numlayers":7,
#     "poolingkernelsize":10,
#     "learningrate":.001,
#     "cnn_output":100,
#     "earlystoppingpatience":20
# }

In [11]:
choi_CnnNetworkNoPosFactoryhyperparameters = {
    "dataset":"PpiAimedDatasetPreprocessedFactory",
    "network" :"RelationExtractorCnnNetworkNoPosFactory",
    "trainfile":trainfile.split("/")[-1],
    "embeddingfile":coll_embeddingfile.split("/")[-1],
    "embeddim":coll_embed_dim,
    "batchsize": "32",
    "epochs" : "1000",   
    "log-level" : "INFO",
    "learningrate":.001,
    "cnn_output":100,
    "earlystoppingpatience":20,
    "dropout_rate_cnn":.2,
    "fc_drop_out_rate":0.5,
    "train_val_vocab_merge":1
}

In [12]:
choi_CnnPosNetworkFactory = {
    "dataset":"PpiAimedDatasetPreprocessedFactory",
    "network" :"RelationExtractorCnnPosNetworkFactory",
    "trainfile":trainfile.split("/")[-1],
    "embeddingfile":coll_embeddingfile.split("/")[-1],
    "embeddim":coll_embed_dim,
    "batchsize": "32",
    "epochs" : "1000",   
    "log-level" : "INFO",
    "learningrate":.001,
    "cnn_output":100,
    "earlystoppingpatience":20,
    "dropout_rate_cnn":.2,
    "fc_drop_out_rate":0.5,
      "train_val_vocab_merge":1
}

In [13]:
BilstmNetworkFactoryhyperparameters = {
    "dataset":"PpiAimedDatasetPreprocessedFactory",
    "network" :"RelationExtractorBiLstmNetworkFactory",
    "trainfile":trainfile.split("/")[-1],
    "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "50",
    "epochs" : "1000",  
    "earlystoppingpatience":20,
    "log-level" : "INFO",
    "learningrate":.001,
    "lstm_dropout":0.5,
    "pooling_kernel_size":3,
    "lstm_num_layers" :3,
    "lstm_hidden_size":64,
    "fc_layer_size":64,
    "fc_drop_out_rate":0.5,
}

In [14]:
base_experiment_bilstm_pos =  {
    "dataset":"PpiAimedDatasetFactory",
    "network" :"RelationExtractorDynamicEntityBiLstmNetworkFactory",
    "trainfile":plain_trainfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "50",
    "epochs" : "1000",  
    "earlystoppingpatience":20,
    "log-level" : "INFO",
    "learningrate":.001,
    "lstm_dropout":0.5,
    "pooling_kernel_size":3,
    "lstm_num_layers" :3,
    "lstm_hidden_size":64,
    "fc_layer_size":64,
    "fc_drop_out_rate":0.5,
    "train_val_vocab_merge":0,
    "use_min_dict":0,
    "fine_tune_embeddings":0
    
}

#### For BiLSTM network f - score using the following HP

```bash
/usr/bin/python -m main_train_k_fold --batchsize 64 --dataset PpiAimedDatasetPreprocessedFactory --earlystoppingpatience 20 --embeddim 200 --embeddingfile PubMed-shuffle-win-2.bin.txt --epochs 1000 --fc_drop_out_rate 0.5 --learningrate 0.001 --log-level INFO --lstm_dropout 0.5 --lstm_hidden_size 400 --lstm_num_layers 1 --network RelationExtractorBiLstmNetworkFactoryNoPos --train_val_vocab_merge 1 --trainfile AIMedFull_preprocessed.json
```

**precision, recall, fscore, support** for 10 folds

```json
[(0.7528089887640449, 0.7528089887640449, 0.752808988764045, None),
 (0.6915887850467289, 0.7628865979381443, 0.7254901960784315, None),
 (0.7586206896551724, 0.6226415094339622, 0.6839378238341969, None),
 (0.7727272727272727, 0.6296296296296297, 0.6938775510204083, None),
 (0.75, 0.6990291262135923, 0.7236180904522612, None),
 (0.7525773195876289, 0.7525773195876289, 0.752577319587629, None),
 (0.6851851851851852, 0.74, 0.7115384615384615, None),
 (0.7142857142857143, 0.7476635514018691, 0.730593607305936, None),
 (0.7560975609756098, 0.6326530612244898, 0.6888888888888889, None),
 (0.6788990825688074, 0.7789473684210526, 0.7254901960784315, None)]
```

**tn, fp, fn, tp** for 10 folds


```json
[(473, 22, 22, 67), 
 (454, 33, 23, 74), 
 (457, 21, 40, 66), 
 (456, 20, 40, 68), 
 (456, 24, 31, 72), 
 (462, 24, 24, 73),
 (449, 34, 26, 74), 
 (444, 32, 27, 80), 
 (465, 20, 36, 62),
 (453, 35, 21, 74)]
```

See aimed-ppi-extractor-2019-10-20-04-33-12-355-copy-10-20


main_train_k_fold --batchsize 64 --dataset PpiAimedDatasetPreprocessedFactory --earlystoppingpatience 20 --embeddim 200 --embeddingfile PubMed-shuffle-win-2.bin.txt --epochs 1000 --fc_drop_out_rate 0.5 --learningrate 0.001 --log-level INFO --lstm_dropout 0.5 --lstm_hidden_size 400 --lstm_num_layers 1 --network RelationExtractorBiLstmNetworkFactoryNoPos --train_val_vocab_merge 1 --trainfile AIMedFull_preprocessed.json --use_min_dict 1


In [15]:
BilstmNetworkFactoryhyperparametersNoPos = {
    "dataset":"PpiAimedDatasetPreprocessedFactory",
    "network" :"RelationExtractorBiLstmNetworkFactoryNoPos",
    "trainfile":trainfile.split("/")[-1],
   # "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "64",
    "epochs" : "1000",  
    "earlystoppingpatience":20,
    "log-level" : "INFO",
    "learningrate":.001,
    "lstm_dropout":0.5,
    "lstm_num_layers" :1,
    "lstm_hidden_size":400,
    "fc_drop_out_rate":0.5,
    "use_min_dict":1,
    "train_val_vocab_merge":1
}

In [16]:


PpiAimedDatasetFactoryYlhsiehBiLstmNetwork = {
    "dataset":"PpiAimedDatasetFactoryYlhsieh",
    "network" :"RelationExtractorBiLstmNetworkFactoryNoPos",
    "trainfile":trainfile.split("/")[-1],
   # "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "16",
    "epochs" : "1000",  
    "earlystoppingpatience":20,
    "log-level" : "INFO",
    "learningrate":.001,
    "lstm_dropout":0.5,
    "lstm_num_layers" :1,
    "lstm_hidden_size":400,
    "fc_drop_out_rate":0.5,
  "train_val_vocab_merge":1
}

In [17]:
ResnetCnnPosNetworkFactoryhyperparameters = {
    "dataset":"PpiAimedDatasetPreprocessedFactory",
    "network" :"RelationExtractorResnetCnnPosNetworkFactory",
    "earlystoppingpatience" : 20,
    "trainfile":trainfile.split("/")[-1],
   # "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "8",
    "epochs" : "1000",   
    "log-level" : "INFO",
    "dropout_rate_cnn": 0.5,
    "pooling_kernel_size": 3,
    "pool_stride":2,
    "cnn_kernel_size":3,
    "cnn_num_layers":3,
    "cnn_output":64,
    "learningrate":.00001,
    "weight_decay":.00001,
    "fc_layer_size": 256,
    "fc_drop_out_rate": 0.5,
    "input_drop_out_rate" : 0.2
   
}

In [18]:
SimpleResnetCnnPosNetworkFactoryhyperparameters = {
    "dataset":"PpiAimedDatasetFactory",
    "network" :"RelationExtractorSimpleResnetCnnPosNetworkFactory",
    "earlystoppingpatience" : 50,
    "trainfile":trainfile.split("/")[-1],
  #  "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "32",
    "epochs" : "1000",   
    "log-level" : "INFO",
    "dropout_rate_cnn": 0.5,
    "pooling_kernel_size": 3,
    "pool_stride":2,
    "cnn_kernel_size":3,
    "cnn_num_layers":2,
    "cnn_output":64,
    "learningrate":.001,
    "weight_decay":.00001,
    "fc_layer_size": 256,
    "fc_drop_out_rate": 0.5,
    "input_drop_out_rate" : 0.2,
   
    "train_val_vocab_merge":1
}

In [19]:
SimpleResnetCnnPosNetworkFactoryhyperparameters_coll = {
    "dataset":"PpiAimedDatasetFactory",
    "network" :"RelationExtractorSimpleResnetCnnPosNetworkFactory",
    "earlystoppingpatience" : 50,
    "trainfile":trainfile.split("/")[-1],
  #  "valfile":valfile.split("/")[-1],
    "embeddingfile":coll_embeddingfile.split("/")[-1],
    "embeddim":50,
    "batchsize": "32",
    "epochs" : "1000",   
    "log-level" : "INFO",
    "dropout_rate_cnn": 0.5,
    "pooling_kernel_size": 3,
    "pool_stride":2,
    "cnn_kernel_size":3,
    "cnn_num_layers":2,
    "cnn_output":64,
    "learningrate":.001,
    "weight_decay":.00001,
    "fc_layer_size": 256,
    "fc_drop_out_rate": 0.5,
    "input_drop_out_rate" : 0.2,
   
    "train_val_vocab_merge":1
}

In [20]:
SimpleResnetCnnPosNetworkFactoryhyperparametersv2 = {
    "dataset":"PpiAimedDatasetFactory",
    "network" :"RelationExtractorSimpleResnetCnnPosNetworkFactory",
    "earlystoppingpatience" : 50,
    "trainfile":trainfile.split("/")[-1],
    "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "8",
    "epochs" : "1000",   
    "log-level" : "INFO",
    "dropout_rate_cnn": 0.5,
    "pooling_kernel_size": 3,
    "pool_stride":2,
    "cnn_kernel_size":3,
    "cnn_num_layers":3,
    "cnn_output":32,
    "learningrate":.001,
    "weight_decay":.00001,
    "fc_layer_size": 128,
    "fc_drop_out_rate": 0.5,
    "input_drop_out_rate" : 0.2,
    "train_val_vocab_merge":1
   
}

In [21]:
metric_definitions = [{"Name": "TrainLoss",
                     "Regex": "###score: train_loss### (\d*[.]?\d*)"}
                    ,{"Name": "ValidationLoss",
                     "Regex": "###score: val_loss### (\d*[.]?\d*)"}
                    ,{"Name": "TrainFScore",
                     "Regex": "###score: train_fscore### (\d*[.]?\d*)"}
                   ,{"Name": "ValidationFScore",
                     "Regex": "###score: val_fscore### (\d*[.]?\d*)"}
                    ]

In [22]:
!git log -1 | head -1
!git log -1 | head -5 | tail -1

commit 7e6e36a89f9d7bd04a37eeb853c024879eb581f1
    Fix confusion matric


In [23]:
git_config = {'repo': 'https://github.com/elangovana/PPI-typed-relation-extractor.git',
              'branch': 'master',
            #  'commit': '58a09e154935248667062a36fdae7d86b86b477c'
             }

In [24]:
base_job_name="aimed-base-bilstm-pos"
hyperparameters = BilstmNetworkFactoryhyperparametersNoPos #BilstmNetworkFactoryhyperparametersNoPos #PpiAimedDatasetFactoryYlhsiehBiLstmNetwork #SimpleResnetCnnPosNetworkFactoryhyperparameters
inputs = pub_inputs 


In [25]:
hyperparameters

{'batchsize': '64',
 'dataset': 'PpiAimedDatasetPreprocessedFactory',
 'earlystoppingpatience': 20,
 'embeddim': 200,
 'embeddingfile': 'PubMed-shuffle-win-2.bin.txt',
 'epochs': '1000',
 'fc_drop_out_rate': 0.5,
 'learningrate': 0.001,
 'log-level': 'INFO',
 'lstm_dropout': 0.5,
 'lstm_hidden_size': 400,
 'lstm_num_layers': 1,
 'network': 'RelationExtractorBiLstmNetworkFactoryNoPos',
 'train_val_vocab_merge': 1,
 'trainfile': 'AIMedFull_preprocessed.json',
 'use_min_dict': 1}

In [26]:
inputs

{'embedding': 's3://aegovan-data/embeddings/bio_nlp_vec/PubMed-shuffle-win-2.bin.txt',
 'train': 's3://aegovan-data/aimed/AIMedFull_preprocessed.json'}

In [27]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
     entry_point='main_train_k_fold.py',
   # entry_point='main_train.py',
                    source_dir = 'source/algorithms',
                    dependencies =['source/algorithms', 'source/datasets', 'source/preprocessor', 'source/modelnetworks'],
                    role=role,
                    framework_version ="1.0.0",
                    py_version='py3',
                    git_config= git_config,
                    image_name= docker_repo,
                    train_instance_count=1,
                    train_instance_type=instance_type,
                    hyperparameters =hyperparameters,
                    output_path=s3_output_path,
                    metric_definitions=metric_definitions,
                    #train_use_spot_instances = True
                    train_volume_size=30,
                    code_location=s3_code_path,
                    train_max_run = 60 * 60 * 24 * 4,
                    base_job_name = base_job_name)

In [None]:
estimator.fit(inputs)

2019-11-02 08:09:25 Starting - Starting the training job...
2019-11-02 08:09:28 Starting - Launching requested ML instances...
2019-11-02 08:10:21 Starting - Preparing the instances for training......
2019-11-02 08:11:22 Downloading - Downloading input data......
2019-11-02 08:12:40 Training - Downloading the training image........[31mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[31mbash: no job control in this shell[0m
[31m2019-11-02 08:14:34,044 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[31m2019-11-02 08:14:34,088 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[31m2019-11-02 08:14:34,089 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[31m2019-11-02 08:14:34,413 sagemaker-containers INFO     Module main_train_k_fold does not provide a setup.py. [0m
[31mGenerating setup.py[0m
[31m2019-11-02 08:14:34,413 sagemaker-c


2019-11-02 08:14:32 Training - Training image download completed. Training in progress.[31m2019-11-02 08:17:08,095 - algorithms.PretrainedEmbedderLoaderMinimum - INFO - Total words in original embedding handle is 2231686[0m
[31m2019-11-02 08:17:08,095 - algorithms.PretrainedEmbedderLoaderMinimum - INFO - Total words in final embedding is 1864[0m
[31m2019-11-02 08:17:08,095 - algorithms.PretrainedEmbedderLoaderMinimum - INFO - Total words randomly initialized is 77[0m
[31m2019-11-02 08:17:08,097 - algorithms.transform_sentence_tokeniser - INFO - Running sentence tokenisor [0m
[31m2019-11-02 08:18:10,069 - algorithms.transform_sentence_tokeniser - INFO - Completed  sentence tokenisor [0m
[31m2019-11-02 08:18:10,073 - algorithms.transform_text_index - INFO - Transforming TransformTextToIndex[0m
[31m2019-11-02 08:18:10,272 - algorithms.transform_text_index - INFO - Total number of unknown occurances 4757[0m
[31m2019-11-02 08:18:10,272 - algorithms.transform_text_index - INF

[31m2019-11-02 08:18:37,424 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:18:37,431 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_8568cfea-1946-41c5-87aa-65800ad72145_20191102_081837.csv: [0m
[31m[[4066  273]
 [ 397  514]][0m
[31m2019-11-02 08:18:37,438 - algorithms.Train - INFO - Train set result details: 0.7646495375689591[0m
[31m2019-11-02 08:18:37,438 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:18:37,551 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_c8ea7733-ea39-4777-86cf-6c14990a11c9_20191102_081837.csv: [0m
[31m[[438  57]
 [ 59  30]][0m
[31m2019-11-02 08:18:37,552 - algorithms.Train - INFO - Validation set result details: 0.6119868035190617 [0m
[31m2019-11-02 08:18:37,553 - algorithms.Train - INFO - Run     20     4       415     2/83          2% 26.653020 4.468518       0.7646 

[31m2019-11-02 08:19:03,831 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:19:03,838 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_2ef1b23f-4a6d-47de-99e2-21bf5262a5d0_20191102_081903.csv: [0m
[31m[[4145  194]
 [ 256  655]][0m
[31m2019-11-02 08:19:03,845 - algorithms.Train - INFO - Train set result details: 0.8464153838152694[0m
[31m2019-11-02 08:19:03,845 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:19:03,961 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_ebffb98b-2cd4-4d7c-8253-531ab916d5b1_20191102_081903.csv: [0m
[31m[[438  57]
 [ 40  49]][0m
[31m2019-11-02 08:19:03,962 - algorithms.Train - INFO - Validation set result details: 0.7014362136664296 [0m
[31m2019-11-02 08:19:03,963 - algorithms.Train - INFO - Run     46    14      1245     2/83          2% 17.814252 4.133510       0.8464 

[31m2019-11-02 08:19:30,119 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:19:30,127 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_ff63e61d-55d9-4d0d-83bf-8be5e8a63ee4_20191102_081930.csv: [0m
[31m[[4148  191]
 [  65  846]][0m
[31m2019-11-02 08:19:30,133 - algorithms.Train - INFO - Train set result details: 0.919324321988156[0m
[31m2019-11-02 08:19:30,134 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:19:30,247 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_e73c5bb3-a16a-4cd5-80ee-adea5407d8e6_20191102_081930.csv: [0m
[31m[[430  65]
 [ 25  64]][0m
[31m2019-11-02 08:19:30,248 - algorithms.Train - INFO - Validation set result details: 0.7462095605987447 [0m
[31m2019-11-02 08:19:30,248 - algorithms.Train - INFO - Snapshotting because the current score 0.7462095605987447 is greater than 0.74247

[31m2019-11-02 08:19:45,871 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:19:45,879 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_3e32e00a-994e-4c31-ac39-6c727fc941b5_20191102_081945.csv: [0m
[31m[[4239  100]
 [  38  873]][0m
[31m2019-11-02 08:19:45,885 - algorithms.Train - INFO - Train set result details: 0.9553674396323829[0m
[31m2019-11-02 08:19:45,885 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:19:45,998 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_2f9edbd1-47f2-44bf-8c2e-d756fb18785e_20191102_081945.csv: [0m
[31m[[456  39]
 [ 24  65]][0m
[31m2019-11-02 08:19:45,999 - algorithms.Train - INFO - Validation set result details: 0.8044798724591471 [0m
[31m2019-11-02 08:19:46,000 - algorithms.Train - INFO - Snapshotting because the current score 0.8044798724591471 is greater than 0.7980

[31m2019-11-02 08:20:12,061 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:20:12,068 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_a42ae3c7-057b-4f7a-848e-37e9321aae7c_20191102_082012.csv: [0m
[31m[[4327   12]
 [ 122  789]][0m
[31m2019-11-02 08:20:12,075 - algorithms.Train - INFO - Train set result details: 0.9532404532093467[0m
[31m2019-11-02 08:20:12,075 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:20:12,190 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_13a50e80-9d88-4383-9ad0-b5a48bb74fd4_20191102_082012.csv: [0m
[31m[[475  20]
 [ 34  55]][0m
[31m2019-11-02 08:20:12,192 - algorithms.Train - INFO - Validation set result details: 0.808473423379652 [0m
[31m2019-11-02 08:20:12,192 - algorithms.Train - INFO - Run    114    40      3403     2/83          2% 5.234438 4.469990       0.9532   

[31m2019-11-02 08:20:38,438 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:20:38,445 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_0eb7567e-fdbd-4469-a866-192112729464_20191102_082038.csv: [0m
[31m[[4335    4]
 [  38  873]][0m
[31m2019-11-02 08:20:38,452 - algorithms.Train - INFO - Train set result details: 0.9858445652374878[0m
[31m2019-11-02 08:20:38,452 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:20:38,567 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_8bc355cc-279a-4ec1-9c58-c7ef16c9ddcb_20191102_082038.csv: [0m
[31m[[474  21]
 [ 33  56]][0m
[31m2019-11-02 08:20:38,568 - algorithms.Train - INFO - Validation set result details: 0.8104032898059303 [0m
[31m2019-11-02 08:20:38,568 - algorithms.Train - INFO - Run    141    50      4233     2/83          2% 1.770517 5.284800       0.9858  

[31m2019-11-02 08:21:04,567 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:21:04,575 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_d5276fd0-66e9-4a2d-8580-ea7d451732d1_20191102_082104.csv: [0m
[31m[[4329   10]
 [  13  898]][0m
[31m2019-11-02 08:21:04,582 - algorithms.Train - INFO - Train set result details: 0.992353112796051[0m
[31m2019-11-02 08:21:04,582 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:21:04,695 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_4f0c06ae-2167-4a65-9588-7f5eef6f4582_20191102_082104.csv: [0m
[31m[[475  20]
 [ 27  62]][0m
[31m2019-11-02 08:21:04,696 - algorithms.Train - INFO - Validation set result details: 0.8390023872787954 [0m
[31m2019-11-02 08:21:04,696 - algorithms.Train - INFO - Run    167    60      5063     2/83          2% 1.248661 5.569308       0.9924   

[31m2019-11-02 08:21:30,814 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:21:30,821 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_917739f7-ea43-4a13-8c26-4ebb482d8db9_20191102_082130.csv: [0m
[31m[[4336    3]
 [  13  898]][0m
[31m2019-11-02 08:21:30,828 - algorithms.Train - INFO - Train set result details: 0.9946641786493969[0m
[31m2019-11-02 08:21:30,828 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:21:30,945 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_c504045e-b213-48d6-afb3-3978bc387a9c_20191102_082130.csv: [0m
[31m[[475  20]
 [ 29  60]][0m
[31m2019-11-02 08:21:30,946 - algorithms.Train - INFO - Validation set result details: 0.8305050612742921 [0m
[31m2019-11-02 08:21:30,946 - algorithms.Train - INFO - Run    193    70      5893     2/83          2% 0.726104 4.749711       0.9947  

[31m2019-11-02 08:21:46,652 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:21:46,659 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_d3477d46-4683-4c2e-b8ca-3a06922a0a6c_20191102_082146.csv: [0m
[31m[[4338    1]
 [  17  894]][0m
[31m2019-11-02 08:21:46,666 - algorithms.Train - INFO - Train set result details: 0.9939814146083104[0m
[31m2019-11-02 08:21:46,666 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:21:46,778 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_3f6fe030-f121-4d7a-ad90-fe824de9b1cc_20191102_082146.csv: [0m
[31m[[478  17]
 [ 30  59]][0m
[31m2019-11-02 08:21:46,780 - algorithms.Train - INFO - Validation set result details: 0.8341460467083597 [0m
[31m2019-11-02 08:21:46,780 - algorithms.Train - INFO - Run    209    76      6391     2/83          2% 0.848941 5.576771       0.9940  

[31m2019-11-02 08:22:12,802 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:22:12,809 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_619c5be5-efbc-4403-893a-ba8829aa32e7_20191102_082212.csv: [0m
[31m[[4332    7]
 [   0  911]][0m
[31m2019-11-02 08:22:12,816 - algorithms.Train - INFO - Train set result details: 0.9976827416716001[0m
[31m2019-11-02 08:22:12,816 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:22:12,928 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_8def5499-cdcb-46f7-b09c-0939627a0ca8_20191102_082212.csv: [0m
[31m[[471  24]
 [ 23  66]][0m
[31m2019-11-02 08:22:12,930 - algorithms.Train - INFO - Validation set result details: 0.8449537086724923 [0m
[31m2019-11-02 08:22:12,930 - algorithms.Train - INFO - Run    235    86      7221     2/83          2% 0.341847 5.588665       0.9977  

[31m2019-11-02 08:25:04,960 - algorithms.PretrainedEmbedderLoaderMinimum - INFO - Total words in original embedding handle is 2231686[0m
[31m2019-11-02 08:25:04,960 - algorithms.PretrainedEmbedderLoaderMinimum - INFO - Total words in final embedding is 1864[0m
[31m2019-11-02 08:25:04,960 - algorithms.PretrainedEmbedderLoaderMinimum - INFO - Total words randomly initialized is 77[0m
[31m2019-11-02 08:25:04,962 - algorithms.transform_sentence_tokeniser - INFO - Running sentence tokenisor [0m
[31m2019-11-02 08:26:04,642 - algorithms.transform_sentence_tokeniser - INFO - Completed  sentence tokenisor [0m
[31m2019-11-02 08:26:04,646 - algorithms.transform_text_index - INFO - Transforming TransformTextToIndex[0m
[31m2019-11-02 08:26:04,832 - algorithms.transform_text_index - INFO - Total number of unknown occurances 4757[0m
[31m2019-11-02 08:26:04,832 - algorithms.transform_text_index - INFO - Completed TransformTextToIndex[0m
[31m2019-11-02 08:26:04,833 - algorithms.transfo

[31m2019-11-02 08:26:33,170 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:26:33,178 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_ee4ddc43-0174-431d-8390-f56384611dac_20191102_082633.csv: [0m
[31m[[4308   31]
 [  22  889]][0m
[31m2019-11-02 08:26:33,184 - algorithms.Train - INFO - Train set result details: 0.9824701651030096[0m
[31m2019-11-02 08:26:33,184 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:26:33,299 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_1b3c91c7-ffc2-4015-8de2-4e6b7fe091fa_20191102_082633.csv: [0m
[31m[[475  20]
 [ 29  60]][0m
[31m2019-11-02 08:26:33,300 - algorithms.Train - INFO - Validation set result details: 0.8305050612742921 [0m
[31m2019-11-02 08:26:33,300 - algorithms.Train - INFO - Snapshotting because the current score 0.8305050612742921 is greater than 0.8218

[31m2019-11-02 08:26:59,447 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:26:59,455 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_6d6b48fc-e322-4aed-bafe-716767152091_20191102_082659.csv: [0m
[31m[[4321   18]
 [   4  907]][0m
[31m2019-11-02 08:26:59,462 - algorithms.Train - INFO - Train set result details: 0.992739093174974[0m
[31m2019-11-02 08:26:59,462 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:26:59,576 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_1363568f-9110-4a7f-83a0-0f3caf1de786_20191102_082659.csv: [0m
[31m[[468  27]
 [ 20  69]][0m
[31m2019-11-02 08:26:59,578 - algorithms.Train - INFO - Validation set result details: 0.8490665640207857 [0m
[31m2019-11-02 08:26:59,578 - algorithms.Train - INFO - Snapshotting because the current score 0.8490665640207857 is greater than 0.84517

[31m2019-11-02 08:27:15,253 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:27:15,261 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_ce418c48-332e-4073-afa6-b3f6a54a5595_20191102_082715.csv: [0m
[31m[[4335    4]
 [   4  907]][0m
[31m2019-11-02 08:27:15,267 - algorithms.Train - INFO - Train set result details: 0.9973436746188615[0m
[31m2019-11-02 08:27:15,267 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:27:15,381 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_a43c5b6b-38e6-40be-89d8-240df65cffa5_20191102_082715.csv: [0m
[31m[[471  24]
 [ 21  68]][0m
[31m2019-11-02 08:27:15,382 - algorithms.Train - INFO - Validation set result details: 0.8528942551512201 [0m
[31m2019-11-02 08:27:15,382 - algorithms.Train - INFO - Run     63    23      1992     2/83          2% 0.641891 3.785523       0.9973  

[31m2019-11-02 08:27:41,538 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:27:41,546 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_f60becd3-90fd-4e78-8c3e-9335cd614a63_20191102_082741.csv: [0m
[31m[[4338    1]
 [   5  906]][0m
[31m2019-11-02 08:27:41,553 - algorithms.Train - INFO - Train set result details: 0.9980042924817706[0m
[31m2019-11-02 08:27:41,553 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:27:41,670 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_ae565e24-7c20-44df-a9be-a2e2b7bf265f_20191102_082741.csv: [0m
[31m[[480  15]
 [ 29  60]][0m
[31m2019-11-02 08:27:41,672 - algorithms.Train - INFO - Validation set result details: 0.8439413079389757 [0m
[31m2019-11-02 08:27:41,672 - algorithms.Train - INFO - Run     89    33      2822     2/83          2% 0.301644 5.028634       0.9980  

[31m2019-11-02 08:28:08,083 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:28:08,091 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_48bd7b2d-ecca-45ad-9755-acbb5d2f2e74_20191102_082808.csv: [0m
[31m[[4335    4]
 [   0  911]][0m
[31m2019-11-02 08:28:08,098 - algorithms.Train - INFO - Train set result details: 0.9986741356185006[0m
[31m2019-11-02 08:28:08,098 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:28:08,215 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_762a21fd-15c7-41a0-85ac-68da50ebb824_20191102_082808.csv: [0m
[31m[[468  27]
 [ 16  73]][0m
[31m2019-11-02 08:28:08,216 - algorithms.Train - INFO - Validation set result details: 0.8642822013608531 [0m
[31m2019-11-02 08:28:08,216 - algorithms.Train - INFO - Run    116    43      3652     2/83          2% 0.277868 4.573297       0.9987  

[31m2019-11-02 08:28:34,586 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:28:34,594 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_8c3bc692-faf0-49a3-adde-404e33e759de_20191102_082834.csv: [0m
[31m[[4333    6]
 [   1  910]][0m
[31m2019-11-02 08:28:34,601 - algorithms.Train - INFO - Train set result details: 0.9976807399367306[0m
[31m2019-11-02 08:28:34,601 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:28:34,717 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_5d1f813b-3b2f-48e9-80a5-86636b0fd4a9_20191102_082834.csv: [0m
[31m[[473  22]
 [ 20  69]][0m
[31m2019-11-02 08:28:34,718 - algorithms.Train - INFO - Validation set result details: 0.8620782726045884 [0m
[31m2019-11-02 08:28:34,719 - algorithms.Train - INFO - Run    142    53      4482     2/83          2% 0.299883 6.158998       0.9977  

[31m2019-11-02 08:29:01,084 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-11-02 08:29:01,092 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_e20ce8a1-6db8-45f5-b7e3-99b43442f362_20191102_082901.csv: [0m
[31m[[4334    5]
 [   1  910]][0m
[31m2019-11-02 08:29:01,099 - algorithms.Train - INFO - Train set result details: 0.9980112034277508[0m
[31m2019-11-02 08:29:01,099 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-11-02 08:29:01,216 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_c079de9f-0d84-40b3-8cc7-5bb13acb992e_20191102_082901.csv: [0m
[31m[[468  27]
 [ 24  65]][0m
[31m2019-11-02 08:29:01,218 - algorithms.Train - INFO - Validation set result details: 0.8332801558380494 [0m
[31m2019-11-02 08:29:01,218 - algorithms.Train - INFO - Run    169    63      5312     2/83          2% 0.360732 5.867132       0.9980  

### HPO

In [None]:
objective_metric_name ="ValidationFScore"

In [None]:
from sagemaker.tuner import HyperparameterTuner, IntegerParameter, CategoricalParameter, ContinuousParameter
hyperparameter_ranges = {'lstmhiddensize': IntegerParameter(40,200), #ContinuousParameter(0.01, 0.2),
                          "fclayersize": IntegerParameter(10,50),
                            "numlayers":IntegerParameter(1,10),
                        "poolingkernelsize":IntegerParameter(2,10)}

In [None]:
hyperparameters={ "trainfile":trainfile.split("/")[-1],
    "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
     "embeddim":"200",
    "epochs": 100,
                 "earlystoppingpatience": 20,
                        "dataset":"PpiAimedDatasetFactory"}

In [None]:

estimator = PyTorch(
     entry_point='main_train.py',
                    source_dir = 'source/algorithms',
                    dependencies =['source/algorithms', 'source/datasets','source/preprocessor'],
                    role=role,
                    framework_version ="1.0.0",
                    py_version='py3',
                   # git_config= git_config,
                    image_name= docker_repo,
                    train_instance_count=1,
                    train_instance_type=instance_type,
                    hyperparameters =hyperparameters,
                    output_path=s3_output_path,
                    metric_definitions=metric_definitions,
                    code_location=s3_code_path,
                    #train_use_spot_instances = True
                    train_volume_size=30,
                    base_job_name ="aimed-ppi-extractor")

In [None]:
tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=50,
                            max_parallel_jobs=7,
                            strategy="Random",
                            base_tuning_job_name="hpo-aimed-ppi-extractor")
tuner.fit(inputs)