# Push Docker Image to AWS Elastic Container Registry

In [6]:
!cd container; ./build_and_push.sh drug-prediction-gcn

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon  129.5kB
Step 1/13 : ARG REGION=us-west-2
Step 2/13 : FROM continuumio/miniconda3
 ---> 406f2b43ea59
Step 3/13 : RUN apt-get update
 ---> Using cache
 ---> 817b945a98f3
Step 4/13 : RUN apt-get install -y build-essential
 ---> Using cache
 ---> 3d7693cf10c5
Step 5/13 : RUN conda install -y -c conda-forge rdkit==2018.09.3
 ---> Using cache
 ---> 783cc015d080
Step 6/13 : RUN conda install pytorch-cpu torchvision -y -c pytorch
 ---> Using cache
 ---> be7a1fe86b74
Step 7/13 : RUN pip install sagemaker-containers
 ---> Using cache
 ---> 4d709d050584
Step 8/13 : RUN conda install -y -c conda-forge matplotlib
 ---> Using cache
 ---> 2b35e0e1353d
Step 9/13 : RUN pip install scikit-learn==0.21.3 dgl==0.4.1 seaborn
 ---> Using cache
 ---> 509f94486440
Step 10/13 : ENV PATH="/opt/ml/code:${PATH}"
 ---> Using cache
 ---> ed4eb32f9860
Step 11/13 : COPY /src /opt/ml/co

# Run Local Training Job

In [None]:
# Download the dataset to local

# download pretrained mol2vec model
!wget -O data/model_300dim.pkl https://github.com/samoturk/mol2vec/raw/master/examples/models/model_300dim.pkl model_300dim.pkl   

# download example data     
!wget -O data/ames.sdf https://github.com/samoturk/mol2vec/raw/master/examples/data/ames.sdf
    
#download hiv data
!wget -O data/hiv.zip https://s3-us-west-1.amazonaws.com/deepchem.io/datasets/molnet_publish/hiv.zip
!cd data; unzip hiv.zip

In [7]:
from sagemaker import get_execution_role

role = get_execution_role()

In [8]:
import os
import subprocess

instance_type = 'local'

if subprocess.call('nvidia-smi') == 0:
    ## Set type to GPU if one is present
    instance_type = 'local_gpu'
    
print("Instance type = " + instance_type)

Instance type = local


In [10]:
from sagemaker.estimator import Estimator

hyperparameters = {'dev-mode':True, 
                   'net-type': 'GAT',
                   'epochs': 200,
                   'learning-rate':1e-4,
                   'gcn-hidden-feats':64,
                   'classifier-hidden-feats':64,
                   'batch-size':200,
                   'num-hidden-layers':2,
                   'random-state':-1}

# GAT auc_roc:0.6333333333333333, f1_score:0.5714285714285715
# GCN 
estimator = Estimator(role=role,
                      train_instance_count=1,
                      train_instance_type=instance_type,
                      image_name='drug-prediction-gcn:latest',
                      hyperparameters=hyperparameters)

estimator.fit('file:///home/ec2-user/SageMaker/graph_neural_net/data')

Creating tmp6l3q56xk_algo-1-sya8e_1 ... 
[1BAttaching to tmp6l3q56xk_algo-1-sya8e_12mdone[0m
[36malgo-1-sya8e_1  |[0m 2020-01-28 21:33:29,800 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-sya8e_1  |[0m 2020-01-28 21:33:29,801 sagemaker-containers INFO     Failed to parse hyperparameter dev-mode value True to Json.
[36malgo-1-sya8e_1  |[0m Returning the value itself
[36malgo-1-sya8e_1  |[0m 2020-01-28 21:33:29,802 sagemaker-containers INFO     Failed to parse hyperparameter net-type value GCN to Json.
[36malgo-1-sya8e_1  |[0m Returning the value itself
[36malgo-1-sya8e_1  |[0m 2020-01-28 21:33:29,816 sagemaker-containers INFO     Failed to parse hyperparameter dev-mode value True to Json.
[36malgo-1-sya8e_1  |[0m Returning the value itself
[36malgo-1-sya8e_1  |[0m 2020-01-28 21:33:29,816 sagemaker-containers INFO     Failed to parse hyperparameter net-type value GCN to Json.
[36malgo-1-sya8e_1  |[0m Returning the value itself

# Run A Sagemaker Training Job

In [17]:
import sagemaker as sage

sess = sage.Session()

In [18]:
import boto3

client = boto3.client('sts')
account = client.get_caller_identity()['Account']

my_session = boto3.session.Session()
region = my_session.region_name

algorithm_name = 'drug-prediction-gcn'

ecr_image = '{}.dkr.ecr.{}.amazonaws.com/{}:latest'.format(account, region, algorithm_name)

print(ecr_image)

405787280505.dkr.ecr.us-west-2.amazonaws.com/drug-prediction-gcn:latest


In [None]:
from sagemaker.estimator import Estimator

data_location = 's3://jdurago-insight-2020a/data'
output_path = 's3://jdurago-insight-2020a/output/baseline/'
max_run_time = 3*60*60 # train for max of 3 hours



hyperparameters = {'dev-mode':False, 
                   'net-type': 'GAT',
                   'epochs': 200,
                   'learning-rate':1e-4,
                   'gcn-hidden-feats':64,
                   'classifier-hidden-feats':64,
                   'batch-size':200,
                   'num-hidden-layers':2}

metric_definitions = [{"Name": "train:loss", 
                       "Regex": "train_loss:(\d+.\d+)"}, 
                      {"Name": "test:auc_roc", 
                       "Regex": "test_score:(\d+.\d+)"},
                      {"Name": "val:auc_roc", 
                       "Regex": "auc_roc:(\d+.\d+)"},
                      {"Name": "val:f1_score", 
                       "Regex": "f1_score:(\d+.\d+)"}
                     ]

instance_type = 'ml.m5.4xlarge'

tags = [{'Key': 'net-type',
         'Value': hyperparameters['net-type']}]

estimator = Estimator(role=role,
                      train_instance_count=1,
                       train_instance_type=instance_type,
                      image_name=ecr_image,
                      hyperparameters=hyperparameters,
                     output_path = output_path,
                     train_max_run=max_run_time,
                     metric_definitions=metric_definitions,
                     tags=tags)

estimator.fit(data_location)


2020-01-28 21:42:34 Starting - Starting the training job...
2020-01-28 21:42:35 Starting - Launching requested ML instances......
2020-01-28 21:43:35 Starting - Preparing the instances for training...
2020-01-28 21:44:20 Downloading - Downloading input data
2020-01-28 21:44:20 Training - Downloading the training image............
2020-01-28 21:46:26 Training - Training image download completed. Training in progress..[34m2020-01-28 21:46:26,705 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-01-28 21:46:26,705 sagemaker-containers INFO     Failed to parse hyperparameter net-type value GAT to Json.[0m
[34mReturning the value itself[0m
[34m2020-01-28 21:46:26,705 sagemaker-containers INFO     Failed to parse hyperparameter dev-mode value False to Json.[0m
[34mReturning the value itself[0m
[34m2020-01-28 21:46:27,369 sagemaker-containers INFO     Failed to parse hyperparameter net-type value GAT to Json.[0m
[34mReturning the value itsel

# Hyperparameter Tuning

In [15]:
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner
from sagemaker.estimator import Estimator
import sagemaker as sage

sess = sage.Session()

max_jobs = 64
max_parallel_jobs = 4

data_location = 's3://jdurago-insight-2020a/data'
output_path = 's3://jdurago-insight-2020a/output/baseline/'
max_run_time = 3*60*60 # train for max of 3 hours
instance_type = 'ml.m5.4xlarge'

hyperparameters = {'dev-mode':False, 
                   'net-type': 'GCN',
                   'epochs': 200,
                   'batch-size':200,
                   'learning-rate':1e-4}

tags = [{'Key': 'net-type',
         'Value': hyperparameters['net-type']}]

hyperparameter_ranges = {'gcn-hidden-feats': CategoricalParameter([32,64,128,256,512]),
                         'classifier-hidden-feats': CategoricalParameter([32,64,128,256,512]),
                         'num-hidden-layers': CategoricalParameter([2,3,4,5,6]),}

objective_metric_name = 'val:f1_score'
objective_type = 'Maximize'
metric_definitions = [{"Name": "train:loss", 
                       "Regex": "train_loss:(\d+.\d+)"}, 
                      {"Name": "test:auc_roc", 
                       "Regex": "test_score:(\d+.\d+)"},
                      {"Name": "val:auc_roc", 
                       "Regex": "auc_roc:(\d+.\d+)"},
                      {"Name": "val:f1_score", 
                       "Regex": "f1_score:(\d+.\d+)"}
                     ]

estimator = Estimator(role=role,
                      train_instance_count=1,
                       train_instance_type=instance_type,
                      image_name=ecr_image,
                      hyperparameters=hyperparameters,
                     output_path = output_path,
                     train_max_run=max_run_time,
                     metric_definitions=metric_definitions,
                     tags=tags)

tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=max_jobs,
                            max_parallel_jobs=max_parallel_jobs,
                            objective_type=objective_type)

tuner.fit(data_location)