### Set up

#### 1. Set  up  accounts and role

In [1]:
#!pip install sagemaker==1.39.0

In [2]:
import sagemaker
import boto3

sagemaker_session = sagemaker.Session()
account_id =  boto3.client('sts').get_caller_identity().get('Account')
region = boto3.session.Session().region_name


#role = sagemaker.get_execution_role()
role="arn:aws:iam::{}:role/service-role/AmazonSageMaker-ExecutionRole-20190118T115449".format(account_id)


#### 2. Setup image and instance type

In [3]:
pytorch_custom_image_name="ppi-extractor:gpu-1.0.0-201910130520"
instance_type = "ml.p3.8xlarge" 

In [4]:
docker_repo = "{}.dkr.ecr.{}.amazonaws.com/{}".format(account_id, region, pytorch_custom_image_name)

#### 3. Configure train/ test and validation datasets

In [5]:
bucket = "aegovan-data"

In [6]:
trainfile = "s3://{}/aimed/AIMedtrain.json".format(bucket)
#trainfile = "s3://{}/aimed/AIMedFull.json".format(bucket)


valfile="s3://{}/aimed/AIMedval.json".format(bucket)
# trainfile = "s3://{}/aimed/AIMedtrain_pubmedoverlap.json".format(bucket)
# valfile="s3://{}/aimed/AIMedval_pubmedoverlap.json".format(bucket)
#embeddingfile="s3://{}/embeddings/PubMed-and-PMC-w2v.bin.txt".format(bucket)
#embeddingfile="s3://{}/embeddings/bio_nlp_vec/PubMed-shuffle-win-30.bin.txt".format(bucket)
pretrained_bert="s3://{}/embeddings/bert/".format(bucket)


s3_output_path= "s3://{}/results/".format(bucket)
s3_code_path= "s3://{}/aimed_bert_code".format(bucket)

### Start training

In [7]:
pub_inputs = {
    "train" : trainfile,
    "val" :valfile,
    "PRETRAINED_BIOBERT" : pretrained_bert
}

In [49]:
BertNetworkFactoryhyperparameters = {
    "dataset":"PpiAimedDatasetFactory",
    "network" :"RelationExtractorBioBertFactory",
    "trainfile":trainfile.split("/")[-1],
     "valfile":valfile.split("/")[-1],
    "batchsize": "8",
    "epochs" : "1000",   
    "log-level" : "INFO",
    "learningrate":.00001,
    "earlystoppingpatience":20
}

In [50]:
metric_definitions = [{"Name": "TrainLoss",
                     "Regex": "###score: train_loss### (\d*[.]?\d*)"}
                    ,{"Name": "ValidationLoss",
                     "Regex": "###score: val_loss### (\d*[.]?\d*)"}
                    ,{"Name": "TrainFScore",
                     "Regex": "###score: train_fscore### (\d*[.]?\d*)"}
                   ,{"Name": "ValidationFScore",
                     "Regex": "###score: val_fscore### (\d*[.]?\d*)"}
                    ]

In [51]:
!git log -1 | head -1
!git log -1 | head -5 | tail -1

commit d271cb3afe738bfab69c71df3ce803ca2531f8f8
    Fix gpu


In [52]:
git_config = {'repo': 'https://github.com/elangovana/PPI-typed-relation-extractor.git',
              'branch': 'master',
            #  'commit': '58a09e154935248667062a36fdae7d86b86b477c'
             }

In [53]:
hyperparameters = BertNetworkFactoryhyperparameters
inputs = pub_inputs 


In [54]:
hyperparameters

{'batchsize': '8',
 'dataset': 'PpiAimedDatasetFactory',
 'earlystoppingpatience': 20,
 'epochs': '1000',
 'learningrate': 1e-05,
 'log-level': 'INFO',
 'network': 'RelationExtractorBioBertFactory',
 'trainfile': 'AIMedtrain.json',
 'valfile': 'AIMedval.json'}

In [55]:
inputs

{'PRETRAINED_BIOBERT': 's3://aegovan-data/embeddings/bert/',
 'train': 's3://aegovan-data/aimed/AIMedtrain.json',
 'val': 's3://aegovan-data/aimed/AIMedval.json'}

In [56]:
from sagemaker.pytorch import PyTorch

estimator = PyTorch(
     #entry_point='main_train_k_fold.py',
    entry_point='main_train_bert.py',
                    source_dir = 'source/algorithms',
                    dependencies =['source/algorithms', 'source/datasets', 'source/preprocessor', 'source/modelnetworks','source/trainpipelinesbuilders'],
                    role=role,
                    framework_version ="1.0.0",
                    py_version='py3',
                    git_config= git_config,
                    image_name= docker_repo,
                    train_instance_count=1,
                    train_instance_type=instance_type,
                    hyperparameters =hyperparameters,
                    output_path=s3_output_path,
                    metric_definitions=metric_definitions,
                    #train_use_spot_instances = True
                    train_volume_size=30,
                    code_location=s3_code_path,
                    base_job_name ="aimed-ppi-bert-extractor")

In [None]:
estimator.fit(inputs)

2019-10-13 06:14:12 Starting - Starting the training job...
2019-10-13 06:14:14 Starting - Launching requested ML instances...
2019-10-13 06:15:10 Starting - Preparing the instances for training......
2019-10-13 06:16:27 Downloading - Downloading input data
2019-10-13 06:16:27 Training - Downloading the training image............
2019-10-13 06:18:44 Training - Training image download completed. Training in progress.[31mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[31mbash: no job control in this shell[0m
[31m2019-10-13 06:18:46,188 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[31m2019-10-13 06:18:46,232 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[31m2019-10-13 06:18:49,249 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[31m2019-10-13 06:18:49,691 sagemaker-containers INFO     Module main_train_bert does not provide a 

[31m2019-10-13 06:19:04,042 - algorithms.transform_berttext_token_to_index - INFO - Completed TransformBertTextTokenToIndex[0m
[31m2019-10-13 06:19:04,055 - algorithms.transform_berttext_tokenise - INFO - Transforming TransformBertTextTokenise[0m
[31mThe retinoblastoma-susceptibility gene product binds directly to the human PROTEIN1 TAFII250.[0m
[31mThese cells contain a temperature-sensitive mutation in the PROTEIN1-associated factor TAFII250, first identified as the cell cycle regulatory protein CCG1.[0m
[31mSHPTP2 associates with the PROTEIN1 after ligand stimulation, and binding of SHPTP2 to this receptor promotes tyrosine phosphorylation of SHPTP2.[0m
[31mSHPTP2 associates with the PROTEIN1 (PDGF) receptor after ligand stimulation, and binding of SHPTP2 to this receptor promotes tyrosine phosphorylation of SHPTP2.[0m
[31mSREBP transcriptional activity is mediated through an interaction with the PROTEIN1-binding protein.[0m
[31m2019-10-13 06:19:04,421 - algorithms.tr

[31m2019-10-13 07:00:43,766 - algorithms.BertTrain - INFO - Train set result details:[0m
[31m2019-10-13 07:00:43,773 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_79c3f392-1e63-44ab-aca7-4264aa5c4529_20191013_070043.csv: [0m
[31m[[3903   25]
 [   8  921]][0m
[31m2019-10-13 07:00:43,780 - algorithms.BertTrain - INFO - Train set result details: 0.9824[0m
[31m2019-10-13 07:00:43,780 - algorithms.BertTrain - INFO - Validation set result details:[0m
[31m2019-10-13 07:00:48,062 - algorithms.result_writer - INFO - Confusion matrix, full output in /opt/ml/output/data/predictedvsactual_250163d1-a531-4ecb-b123-afb09caff9bd_20191013_070048.csv: [0m
[31m[[282  22]
 [ 16  45]][0m
[31m2019-10-13 07:00:48,064 - algorithms.BertTrain - INFO - Validation set result details: 0.703125 [0m
[31m2019-10-13 07:00:48,064 - algorithms.BertTrain - INFO - Run   2503     8      5472     2/608         0% 15.340616 20.660026       0.9824  