### Set up

#### 1. Set  up  accounts and role

In [1]:
#!pip install sagemaker==1.39.0

In [2]:
import sagemaker
import boto3

sagemaker_session = sagemaker.Session()
account_id =  boto3.client('sts').get_caller_identity().get('Account')
region = boto3.session.Session().region_name


#role = sagemaker.get_execution_role()
role="arn:aws:iam::{}:role/service-role/AmazonSageMaker-ExecutionRole-20190118T115449".format(account_id)


#### 2. Setup image and instance type

In [3]:
pytorch_custom_image_name="ppi-extractor:gpu-1.0.0-201910130520"
instance_type = "ml.p3.8xlarge" 

In [4]:
docker_repo = "{}.dkr.ecr.{}.amazonaws.com/{}".format(account_id, region, pytorch_custom_image_name)

#### 3. Configure train/ test and validation datasets

In [5]:
bucket = "aegovan-data"

In [6]:
plain_trainfile = "s3://{}/aimed/AIMedFull.json".format(bucket)
trainfile = "s3://{}/aimed/AIMedFull_preprocessed.json".format(bucket)

#trainfile="s3://{}/aimed/AIMedFull_Ylhsieh.json".format(bucket)
    
valfile="s3://{}/aimed/AIMedval_preprocessed.json".format(bucket)
#trainfile = "s3://{}/aimed/AIMedtrain_pubmedoverlap.json".format(bucket)
#valfile="s3://{}/aimed/AIMedval_pubmedoverlap.json".format(bucket)
#embeddingfile="s3://{}/embeddings/PubMed-and-PMC-w2v.bin.txt".format(bucket)
#embeddingfile="s3://{}/embeddings/bio_nlp_vec/PubMed-shuffle-win-30.bin.txt".format(bucket)
embeddingfile="s3://{}/embeddings/bio_nlp_vec/PubMed-shuffle-win-2.bin.txt".format(bucket)
embed_dim=200

#Collobert embedding
coll_embeddingfile="s3://{}/embeddings/collobert/words_vocab_collabert.txt".format(bucket)
coll_embed_dim=50
s3_output_path= "s3://{}/results/".format(bucket)
s3_code_path= "s3://{}/aimed_code".format(bucket)

### Start training

In [7]:
pub_inputs = {
    "train" : trainfile,
 #   "val" :valfile,
    "embedding" : embeddingfile
}

In [8]:
plain_pub_inputs = {
    "train" : plain_trainfile,
 #   "val" :valfile,
    "embedding" : embeddingfile
}

In [9]:
coll_inputs = {
    "train" : trainfile,
   # "val" :valfile,
    "embedding" : coll_embeddingfile
}

In [10]:
# hyperparameters = {
#     "dataset":"PpiAimedDatasetFactory",
#     "trainfile":trainfile.split("/")[-1],
#     "valfile":valfile.split("/")[-1],
#     "embeddingfile":embeddingfile.split("/")[-1],
#     "embeddim":embed_dim,
#     "batchsize": "32",
#     "epochs" : "1000",   
#     "log-level" : "INFO",
#     "lstmhiddensize": 100,
#     "fclayersize": 15,
#     "numlayers":7,
#     "poolingkernelsize":10,
#     "learningrate":.001,
#     "cnn_output":100,
#     "earlystoppingpatience":20
# }

In [11]:
choi_CnnNetworkNoPosFactoryhyperparameters = {
    "dataset":"PpiAimedDatasetPreprocessedFactory",
    "network" :"RelationExtractorCnnNetworkNoPosFactory",
    "trainfile":trainfile.split("/")[-1],
    "embeddingfile":coll_embeddingfile.split("/")[-1],
    "embeddim":coll_embed_dim,
    "batchsize": "32",
    "epochs" : "1000",   
    "log-level" : "INFO",
    "learningrate":.001,
    "cnn_output":100,
    "earlystoppingpatience":20,
    "dropout_rate_cnn":.2,
    "fc_drop_out_rate":0.5,
    "train_val_vocab_merge":1
}

In [12]:
choi_CnnPosNetworkFactory = {
    "dataset":"PpiAimedDatasetPreprocessedFactory",
    "network" :"RelationExtractorCnnPosNetworkFactory",
    "trainfile":trainfile.split("/")[-1],
    "embeddingfile":coll_embeddingfile.split("/")[-1],
    "embeddim":coll_embed_dim,
    "batchsize": "32",
    "epochs" : "1000",   
    "log-level" : "INFO",
    "learningrate":.001,
    "cnn_output":100,
    "earlystoppingpatience":20,
    "dropout_rate_cnn":.2,
    "fc_drop_out_rate":0.5,
      "train_val_vocab_merge":1
}

In [13]:
BilstmNetworkFactoryhyperparameters = {
    "dataset":"PpiAimedDatasetPreprocessedFactory",
    "network" :"RelationExtractorBiLstmNetworkFactory",
    "trainfile":trainfile.split("/")[-1],
    "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "50",
    "epochs" : "1000",  
    "earlystoppingpatience":20,
    "log-level" : "INFO",
    "learningrate":.001,
    "lstm_dropout":0.5,
    "pooling_kernel_size":3,
    "lstm_num_layers" :3,
    "lstm_hidden_size":64,
    "fc_layer_size":64,
    "fc_drop_out_rate":0.5,
}

In [14]:
base_experiment_bilstm_pos =  {
    "dataset":"PpiAimedDatasetFactory",
    "network" :"RelationExtractorDynamicEntityBiLstmNetworkFactory",
    "trainfile":plain_trainfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "50",
    "epochs" : "1000",  
    "earlystoppingpatience":20,
    "log-level" : "INFO",
    "learningrate":.001,
    "lstm_dropout":0.5,
    "pooling_kernel_size":3,
    "lstm_num_layers" :3,
    "lstm_hidden_size":64,
    "fc_layer_size":64,
    "fc_drop_out_rate":0.5,
    "train_val_vocab_merge":0,
    "use_min_dict":0,
    "fine_tune_embeddings":0
    
}

#### For BiLSTM network f - score using the following HP

```bash
/usr/bin/python -m main_train_k_fold --batchsize 64 --dataset PpiAimedDatasetPreprocessedFactory --earlystoppingpatience 20 --embeddim 200 --embeddingfile PubMed-shuffle-win-2.bin.txt --epochs 1000 --fc_drop_out_rate 0.5 --learningrate 0.001 --log-level INFO --lstm_dropout 0.5 --lstm_hidden_size 400 --lstm_num_layers 1 --network RelationExtractorBiLstmNetworkFactoryNoPos --train_val_vocab_merge 1 --trainfile AIMedFull_preprocessed.json
```

**precision, recall, fscore, support** for 10 folds

```json
[(0.7528089887640449, 0.7528089887640449, 0.752808988764045, None),
 (0.6915887850467289, 0.7628865979381443, 0.7254901960784315, None),
 (0.7586206896551724, 0.6226415094339622, 0.6839378238341969, None),
 (0.7727272727272727, 0.6296296296296297, 0.6938775510204083, None),
 (0.75, 0.6990291262135923, 0.7236180904522612, None),
 (0.7525773195876289, 0.7525773195876289, 0.752577319587629, None),
 (0.6851851851851852, 0.74, 0.7115384615384615, None),
 (0.7142857142857143, 0.7476635514018691, 0.730593607305936, None),
 (0.7560975609756098, 0.6326530612244898, 0.6888888888888889, None),
 (0.6788990825688074, 0.7789473684210526, 0.7254901960784315, None)]
```

**tn, fp, fn, tp** for 10 folds


```json
[(473, 22, 22, 67), 
 (454, 33, 23, 74), 
 (457, 21, 40, 66), 
 (456, 20, 40, 68), 
 (456, 24, 31, 72), 
 (462, 24, 24, 73),
 (449, 34, 26, 74), 
 (444, 32, 27, 80), 
 (465, 20, 36, 62),
 (453, 35, 21, 74)]
```


In [15]:
BilstmNetworkFactoryhyperparametersNoPos = {
    "dataset":"PpiAimedDatasetPreprocessedFactory",
    "network" :"RelationExtractorBiLstmNetworkFactoryNoPos",
    "trainfile":trainfile.split("/")[-1],
   # "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "64",
    "epochs" : "1000",  
    "earlystoppingpatience":20,
    "log-level" : "INFO",
    "learningrate":.001,
    "lstm_dropout":0.5,
    "lstm_num_layers" :1,
    "lstm_hidden_size":400,
    "fc_drop_out_rate":0.5,
      "train_val_vocab_merge":1
}

In [16]:


PpiAimedDatasetFactoryYlhsiehBiLstmNetwork = {
    "dataset":"PpiAimedDatasetFactoryYlhsieh",
    "network" :"RelationExtractorBiLstmNetworkFactoryNoPos",
    "trainfile":trainfile.split("/")[-1],
   # "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "16",
    "epochs" : "1000",  
    "earlystoppingpatience":20,
    "log-level" : "INFO",
    "learningrate":.001,
    "lstm_dropout":0.5,
    "lstm_num_layers" :1,
    "lstm_hidden_size":400,
    "fc_drop_out_rate":0.5,
  "train_val_vocab_merge":1
}

In [17]:
ResnetCnnPosNetworkFactoryhyperparameters = {
    "dataset":"PpiAimedDatasetPreprocessedFactory",
    "network" :"RelationExtractorResnetCnnPosNetworkFactory",
    "earlystoppingpatience" : 20,
    "trainfile":trainfile.split("/")[-1],
   # "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "8",
    "epochs" : "1000",   
    "log-level" : "INFO",
    "dropout_rate_cnn": 0.5,
    "pooling_kernel_size": 3,
    "pool_stride":2,
    "cnn_kernel_size":3,
    "cnn_num_layers":3,
    "cnn_output":64,
    "learningrate":.00001,
    "weight_decay":.00001,
    "fc_layer_size": 256,
    "fc_drop_out_rate": 0.5,
    "input_drop_out_rate" : 0.2
   
}

In [18]:
SimpleResnetCnnPosNetworkFactoryhyperparameters = {
    "dataset":"PpiAimedDatasetFactory",
    "network" :"RelationExtractorSimpleResnetCnnPosNetworkFactory",
    "earlystoppingpatience" : 50,
    "trainfile":trainfile.split("/")[-1],
  #  "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "32",
    "epochs" : "1000",   
    "log-level" : "INFO",
    "dropout_rate_cnn": 0.5,
    "pooling_kernel_size": 3,
    "pool_stride":2,
    "cnn_kernel_size":3,
    "cnn_num_layers":2,
    "cnn_output":64,
    "learningrate":.001,
    "weight_decay":.00001,
    "fc_layer_size": 256,
    "fc_drop_out_rate": 0.5,
    "input_drop_out_rate" : 0.2,
   
    "train_val_vocab_merge":1
}

In [19]:
SimpleResnetCnnPosNetworkFactoryhyperparameters_coll = {
    "dataset":"PpiAimedDatasetFactory",
    "network" :"RelationExtractorSimpleResnetCnnPosNetworkFactory",
    "earlystoppingpatience" : 50,
    "trainfile":trainfile.split("/")[-1],
  #  "valfile":valfile.split("/")[-1],
    "embeddingfile":coll_embeddingfile.split("/")[-1],
    "embeddim":50,
    "batchsize": "32",
    "epochs" : "1000",   
    "log-level" : "INFO",
    "dropout_rate_cnn": 0.5,
    "pooling_kernel_size": 3,
    "pool_stride":2,
    "cnn_kernel_size":3,
    "cnn_num_layers":2,
    "cnn_output":64,
    "learningrate":.001,
    "weight_decay":.00001,
    "fc_layer_size": 256,
    "fc_drop_out_rate": 0.5,
    "input_drop_out_rate" : 0.2,
   
    "train_val_vocab_merge":1
}

In [20]:
SimpleResnetCnnPosNetworkFactoryhyperparametersv2 = {
    "dataset":"PpiAimedDatasetFactory",
    "network" :"RelationExtractorSimpleResnetCnnPosNetworkFactory",
    "earlystoppingpatience" : 50,
    "trainfile":trainfile.split("/")[-1],
    "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
    "embeddim":embed_dim,
    "batchsize": "8",
    "epochs" : "1000",   
    "log-level" : "INFO",
    "dropout_rate_cnn": 0.5,
    "pooling_kernel_size": 3,
    "pool_stride":2,
    "cnn_kernel_size":3,
    "cnn_num_layers":3,
    "cnn_output":32,
    "learningrate":.001,
    "weight_decay":.00001,
    "fc_layer_size": 128,
    "fc_drop_out_rate": 0.5,
    "input_drop_out_rate" : 0.2,
    "train_val_vocab_merge":1
   
}

In [21]:
metric_definitions = [{"Name": "TrainLoss",
                     "Regex": "###score: train_loss### (\d*[.]?\d*)"}
                    ,{"Name": "ValidationLoss",
                     "Regex": "###score: val_loss### (\d*[.]?\d*)"}
                    ,{"Name": "TrainFScore",
                     "Regex": "###score: train_fscore### (\d*[.]?\d*)"}
                   ,{"Name": "ValidationFScore",
                     "Regex": "###score: val_fscore### (\d*[.]?\d*)"}
                    ]

In [22]:
!git log -1 | head -1
!git log -1 | head -5 | tail -1

commit 0d3612a247c838daf6f3b476943a51df2cf52a51
    Fix name


In [23]:
git_config = {'repo': 'https://github.com/elangovana/PPI-typed-relation-extractor.git',
              'branch': 'master',
            #  'commit': '58a09e154935248667062a36fdae7d86b86b477c'
             }

In [30]:
base_job_name="aimed-base-bilstm-pos"
hyperparameters = base_experiment_bilstm_pos #BilstmNetworkFactoryhyperparametersNoPos #PpiAimedDatasetFactoryYlhsiehBiLstmNetwork #SimpleResnetCnnPosNetworkFactoryhyperparameters
inputs = plain_pub_inputs 


In [31]:
hyperparameters

{'batchsize': '50',
 'dataset': 'PpiAimedDatasetFactory',
 'earlystoppingpatience': 20,
 'embeddim': 200,
 'embeddingfile': 'PubMed-shuffle-win-2.bin.txt',
 'epochs': '1000',
 'fc_drop_out_rate': 0.5,
 'fc_layer_size': 64,
 'fine_tune_embeddings': 0,
 'learningrate': 0.001,
 'log-level': 'INFO',
 'lstm_dropout': 0.5,
 'lstm_hidden_size': 64,
 'lstm_num_layers': 3,
 'network': 'RelationExtractorDynamicEntityBiLstmNetworkFactory',
 'pooling_kernel_size': 3,
 'sagemaker_container_log_level': 20,
 'sagemaker_enable_cloudwatch_metrics': False,
 'sagemaker_job_name': 'aimed-ppi-extractor-2019-10-23-10-00-13-758',
 'sagemaker_program': 'main_train_k_fold.py',
 'sagemaker_region': 'us-east-2',
 'sagemaker_submit_directory': 's3://aegovan-data/aimed_code/aimed-ppi-extractor-2019-10-23-10-00-13-758/source/sourcedir.tar.gz',
 'train_val_vocab_merge': 0,
 'trainfile': 'AIMedFull.json',
 'use_min_dict': 0}

In [32]:
inputs

{'embedding': 's3://aegovan-data/embeddings/bio_nlp_vec/PubMed-shuffle-win-2.bin.txt',
 'train': 's3://aegovan-data/aimed/AIMedFull.json'}

In [35]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
     entry_point='main_train_k_fold.py',
   # entry_point='main_train.py',
                    source_dir = 'source/algorithms',
                    dependencies =['source/algorithms', 'source/datasets', 'source/preprocessor', 'source/modelnetworks'],
                    role=role,
                    framework_version ="1.0.0",
                    py_version='py3',
                    git_config= git_config,
                    image_name= docker_repo,
                    train_instance_count=1,
                    train_instance_type=instance_type,
                    hyperparameters =hyperparameters,
                    output_path=s3_output_path,
                    metric_definitions=metric_definitions,
                    #train_use_spot_instances = True
                    train_volume_size=30,
                    code_location=s3_code_path,
                    train_max_run = 60 * 60 * 24 * 4,
                    base_job_name = base_job_name)

In [36]:
estimator.fit(inputs)

2019-10-23 10:02:31 Starting - Starting the training job...
2019-10-23 10:02:33 Starting - Launching requested ML instances......
2019-10-23 10:03:55 Starting - Preparing the instances for training...
2019-10-23 10:04:52 Downloading - Downloading input data.........
2019-10-23 10:06:13 Training - Downloading the training image.........
2019-10-23 10:08:19 Training - Training image download completed. Training in progress.[31mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[31mbash: no job control in this shell[0m
[31m2019-10-23 10:08:21,906 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[31m2019-10-23 10:08:21,950 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[31m2019-10-23 10:08:21,951 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[31m2019-10-23 10:08:22,365 sagemaker-containers INFO     Module main_train_k_fold does not pr

[31m2019-10-23 10:11:24,865 - algorithms.PretrainedEmbedderLoader - INFO - Total words in embedding is 2231686[0m
[31m2019-10-23 10:11:24,869 - algorithms.PretrainedEmbedderLoader - INFO - The number of words intialised without embbeder is 192[0m
[31m2019-10-23 10:11:24,869 - algorithms.PretrainedEmbedderLoader - INFO - Total words 2231878[0m
[31m2019-10-23 10:11:52,671 - algorithms.transform_sentence_tokeniser - INFO - Running sentence tokenisor [0m
[31mWe have shown previously that Phe93 in the extracellular domain of the erythropoietin (EPO) receptor (EPOR) is crucial for binding EPO.Substitution of Phe93 with alanine resulted in a dramatic decrease in EPO binding to the Escherichia coli-expressed extracellular domain of the EPOR (PROTEIN1-binding protein or EBP) and no detectable binding to full-length mutant receptor expressed in COS cells.[0m
[31mIL-10 also activated Stat5 in Ba/F3 cells that stably expressed the murine PROTEIN1 receptor.[0m
[31mSpecifically, substit

[31mNerve growth factor binds independently to two transmembrane receptors, the p75 neurotrophin receptor and the PROTEIN1 (trkA) tyrosine kinase receptor, which are both co-expressed in the majority of neuronal cells that respond to NGF.[0m
[31mInterferon-gamma and several other cytokines that are known to activate PROTEIN1 kinase were also found to stimulate Raf-1 kinase activity toward MEK-1 in mammalian cells.[0m
[31mMutation of Ala-522 to proline, which is located outside the predicted amphipathic helix region, had no effect on PROTEIN1 alpha binding.[0m
[31mWe have cloned cDNAs corresponding to the human PROTEIN1 receptor alpha chain (IL-13Ralpha).[0m
[31mWe employed both random and site-specific mutagenesis to determine the function of amino acid residues in the extracellular domain (referred to as PROTEIN1 binding protein, EBP) of the EPOR.[0m
[31mIsothermal titration calorimetry shows that PROTEIN1 SC1 forms a 1:1 complex with its high-affinity receptor (IFN-gamma R

[31mWe have identified a new TNF-related ligand, designated human GITR ligand (hGITRL), and its human receptor (hGITR), an ortholog of the recently discovered murine PROTEIN1 [4].[0m
[31mWe recently reported that Ras interaction with the PROTEIN1 cysteine-rich domain (Raf-CRD, residues 139-184) may be required for Raf-1 activation.[0m
[31mThe death-inducing receptor Fas is activated when cross-linked by the type II membrane protein PROTEIN1 (FasL).[0m
[31mThe structure also reveals the final six C-terminal amino acid residues of PROTEIN1 SC1 (residues 253-258) that have not been observed in any other reported IFN-gamma structures.[0m
[31mThe PROTEIN1 (IL-1beta) converting enzyme (ICE) processes the inactive IL-1beta precursor to the proinflammatory cytokine.[0m
[31mInfluence of interleukin-6 (IL-6) dimerization on formation of the high affinity hexameric PROTEIN1 complex.[0m
[31mCoexpression of the alpha and betaL subunits of the human PROTEIN1 is required for the inductio

[31mMutagenesis studies of the human PROTEIN1 receptor. Establishment of structure-function relationships.[0m
[31mHere we describe the crystal structure at 2.7 A resolution of the soluble extracellular part of PROTEIN2 complexed with IL1RA.[0m
[31mThese data suggest that anchoring proteins interact with PROTEIN1 alpha via an amphipathic helix binding motif.[0m
[31mThe v-Raf proteins purified from cells infected with EC12 or 22W viruses activated PROTEIN1 kinase from skeletal muscle in vitro.[0m
[31mThe results of his3 activation indicated that p85, N + C-SH2 and C-SH2 interact with both PROTEIN1 beta and IGF-1R beta, whereas N-SH2 and SH3 + N-SH2 interact only with IR beta.[0m
[31mThe acidic amino terminus of the p53 protein has been shown to contain this trans-activation activity as well as the domains for mdm-2 and adenovirus PROTEIN1 55-kD protein binding.[0m
[31mPROTEIN1-related protein: a ligand and specific activator of the tyrosine kinase receptor Flt4.[0m
[31mThe

[31mUsing the yeast two-hybrid system, a genetic assay for studying protein-protein interactions, we have examined and compared the interaction of the PROTEIN1 receptor (IGF-IR) and the insulin receptor (IR) with their two known substrates p52Shc and the insulin receptor substrate-1 (IRS-1).[0m
[31mThree naturally occurring ligands for the IL-1 receptor (IL1R) exist: the agonists IL-1alpha and IL-1beta and the PROTEIN1 antagonist IL1RA (ref. 2).[0m
[31mCD5 associates with the PROTEIN1 complex and is rapidly phosphosphorylated on tyrosine residues as a result of TcR zeta/CD3 ligation.[0m
[31mAlthough IRS-1 is thought to interact with the PROTEIN1 receptor, the nature of the interaction has not been defined.[0m
[31mCharacterization of a novel high affinity human PROTEIN1 receptor. Expression on T cells and association with IL-7 driven proliferation.[0m
[31m2019-10-23 10:12:48,415 - algorithms.transform_sentence_tokeniser - INFO - Completed  sentence tokenisor [0m
[31m2019-10

[31m2019-10-23 10:14:44,950 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-10-23 10:14:44,959 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_959133b9-adaf-4d47-ac68-ffb3b0160e10_20191023_101444.csv: [0m
[31m[[3822    0]
 [ 890    0]][0m
[31m2019-10-23 10:14:44,968 - algorithms.Train - INFO - Train set result details: 0.0[0m
[31m2019-10-23 10:14:44,968 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-10-23 10:14:50,311 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_a1896222-5b76-4351-a500-a53c0851bd00_20191023_101450.csv: [0m
[31m[[410   0]
 [100   0]][0m
[31m2019-10-23 10:14:50,313 - algorithms.Train - INFO - Validation set result details: 0.0 [0m
[31m2019-10-23 10:14:50,313 - algorithms.Train - INFO - Snapshotting because the current score 0.0 is greater than None [0m
[31m2019-10-23 10:14:50,313 - algorithms

[31m2019-10-23 10:25:07,603 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-10-23 10:25:07,610 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_ab51d97d-532a-480a-ac4d-d6a9c9e88914_20191023_102507.csv: [0m
[31m[[3772   50]
 [ 801   89]][0m
[31m2019-10-23 10:25:07,616 - algorithms.Train - INFO - Train set result details: 0.1729834791059281[0m
[31m2019-10-23 10:25:07,616 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-10-23 10:25:12,803 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_52d92854-83f7-4427-b600-176a62f354cb_20191023_102512.csv: [0m
[31m[[408   2]
 [ 93   7]][0m
[31m2019-10-23 10:25:12,805 - algorithms.Train - INFO - Validation set result details: 0.12844036697247707 [0m
[31m2019-10-23 10:25:12,805 - algorithms.Train - INFO - Run    733     6       665     4/95          4% 36.022705 4.503097       0.1730

[31m2019-10-23 10:35:31,818 - algorithms.Train - INFO - Train set result details:[0m
[31m2019-10-23 10:35:31,824 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_e695cae0-1cdc-4bf7-aafc-c5454b6ea9cf_20191023_103531.csv: [0m
[31m[[3570  252]
 [ 522  368]][0m
[31m2019-10-23 10:35:31,831 - algorithms.Train - INFO - Train set result details: 0.4874172185430463[0m
[31m2019-10-23 10:35:31,831 - algorithms.Train - INFO - Validation set result details:[0m
[31m2019-10-23 10:35:37,016 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_e4650a23-07e4-4ae3-a2b0-ff3bd2fd514a_20191023_103537.csv: [0m
[31m[[386  24]
 [ 64  36]][0m
[31m2019-10-23 10:35:37,018 - algorithms.Train - INFO - Validation set result details: 0.45 [0m
[31m2019-10-23 10:35:37,018 - algorithms.Train - INFO - Snapshotting because the current score 0.45 is greater than 0.40277777777777773 [0m
[31m2019

EndpointConnectionError: Could not connect to the endpoint URL: "https://api.sagemaker.us-east-2.amazonaws.com/"

### HPO

In [None]:
objective_metric_name ="ValidationFScore"

In [None]:
from sagemaker.tuner import HyperparameterTuner, IntegerParameter, CategoricalParameter, ContinuousParameter
hyperparameter_ranges = {'lstmhiddensize': IntegerParameter(40,200), #ContinuousParameter(0.01, 0.2),
                          "fclayersize": IntegerParameter(10,50),
                            "numlayers":IntegerParameter(1,10),
                        "poolingkernelsize":IntegerParameter(2,10)}

In [None]:
hyperparameters={ "trainfile":trainfile.split("/")[-1],
    "valfile":valfile.split("/")[-1],
    "embeddingfile":embeddingfile.split("/")[-1],
     "embeddim":"200",
    "epochs": 100,
                 "earlystoppingpatience": 20,
                        "dataset":"PpiAimedDatasetFactory"}

In [None]:

estimator = PyTorch(
     entry_point='main_train.py',
                    source_dir = 'source/algorithms',
                    dependencies =['source/algorithms', 'source/datasets','source/preprocessor'],
                    role=role,
                    framework_version ="1.0.0",
                    py_version='py3',
                   # git_config= git_config,
                    image_name= docker_repo,
                    train_instance_count=1,
                    train_instance_type=instance_type,
                    hyperparameters =hyperparameters,
                    output_path=s3_output_path,
                    metric_definitions=metric_definitions,
                    code_location=s3_code_path,
                    #train_use_spot_instances = True
                    train_volume_size=30,
                    base_job_name ="aimed-ppi-extractor")

In [None]:
tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=50,
                            max_parallel_jobs=7,
                            strategy="Random",
                            base_tuning_job_name="hpo-aimed-ppi-extractor")
tuner.fit(inputs)