# Push Docker Image to AWS Elastic Container Registry

In [9]:
!cd container; ./build_and_push.sh drug-prediction-gnn

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon  122.9kB
Step 1/13 : ARG REGION=us-west-2
Step 2/13 : FROM continuumio/miniconda3
 ---> 406f2b43ea59
Step 3/13 : RUN apt-get update
 ---> Using cache
 ---> eadb9e6511ac
Step 4/13 : RUN apt-get install -y build-essential
 ---> Using cache
 ---> 3d766abc469b
Step 5/13 : RUN conda install -y -c conda-forge rdkit==2018.09.3
 ---> Using cache
 ---> bcc7f871be57
Step 6/13 : RUN conda install pytorch-cpu torchvision -y -c pytorch
 ---> Using cache
 ---> 36b5e4f72e70
Step 7/13 : RUN pip install sagemaker-containers
 ---> Using cache
 ---> 358efac188c9
Step 8/13 : RUN conda install -y -c conda-forge matplotlib
 ---> Using cache
 ---> e9d6d470e62d
Step 9/13 : RUN pip install scikit-learn==0.21.3 dgl==0.4.1 seaborn
 ---> Using cache
 ---> 4a52a057564d
Step 10/13 : ENV PATH="/opt/ml/code:${PATH}"
 ---> Using cache
 ---> ddf9f158658c
Step 11/13 : COPY /src /opt/ml/co

# Run Local Training Job

In [None]:
# Download the dataset to local

# download pretrained mol2vec model
!wget -O data/model_300dim.pkl https://github.com/samoturk/mol2vec/raw/master/examples/models/model_300dim.pkl model_300dim.pkl   

# download example data     
!wget -O data/ames.sdf https://github.com/samoturk/mol2vec/raw/master/examples/data/ames.sdf
    
#download hiv data
!wget -O data/hiv.zip https://s3-us-west-1.amazonaws.com/deepchem.io/datasets/molnet_publish/hiv.zip
!cd data; unzip hiv.zip

In [3]:
from sagemaker import get_execution_role

role = get_execution_role()

In [4]:
import os
import subprocess

instance_type = 'local'

if subprocess.call('nvidia-smi') == 0:
    ## Set type to GPU if one is present
    instance_type = 'local_gpu'
    
print("Instance type = " + instance_type)

Instance type = local


In [5]:
from sagemaker.estimator import Estimator

# hyperparameters = {'dev-mode':True, 
#                    'net-type': 'GAT',
#                    'epochs': 200,
#                    'learning-rate':1e-4,
#                    'gcn-hidden-feats':64,
#                    'classifier-hidden-feats':64,
#                    'batch-size':200,
#                    'num-hidden-layers':2,
#                    'random-state':-1}

hyperparameters = {'dev-mode':False, 
                   'net-type': 'GCN',
                   'epochs': 2,
                   'learning-rate':1e-4,
                   'gcn-hidden-feats':128,
                   'classifier-hidden-feats':512,
                   'batch-size':200,
                   'num-hidden-layers':2}

estimator = Estimator(role=role,
                      train_instance_count=1,
                      train_instance_type=instance_type,
                      image_name='drug-prediction-gnn:latest',
                      hyperparameters=hyperparameters)

estimator.fit('file:///home/ec2-user/SageMaker/graph_neural_net/data')

Creating tmpiye0wgtp_algo-1-ix067_1 ... 
[1BAttaching to tmpiye0wgtp_algo-1-ix067_12mdone[0m
[36malgo-1-ix067_1  |[0m 2020-01-30 22:05:56,749 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-ix067_1  |[0m 2020-01-30 22:05:56,750 sagemaker-containers INFO     Failed to parse hyperparameter dev-mode value False to Json.
[36malgo-1-ix067_1  |[0m Returning the value itself
[36malgo-1-ix067_1  |[0m 2020-01-30 22:05:56,750 sagemaker-containers INFO     Failed to parse hyperparameter net-type value GCN to Json.
[36malgo-1-ix067_1  |[0m Returning the value itself
[36malgo-1-ix067_1  |[0m 2020-01-30 22:05:56,772 sagemaker-containers INFO     Failed to parse hyperparameter dev-mode value False to Json.
[36malgo-1-ix067_1  |[0m Returning the value itself
[36malgo-1-ix067_1  |[0m 2020-01-30 22:05:56,772 sagemaker-containers INFO     Failed to parse hyperparameter net-type value GCN to Json.
[36malgo-1-ix067_1  |[0m Returning the value itse

# Run A Sagemaker Training Job

In [10]:
import sagemaker as sage

sess = sage.Session()

In [11]:
import boto3

client = boto3.client('sts')
account = client.get_caller_identity()['Account']

my_session = boto3.session.Session()
region = my_session.region_name

algorithm_name = 'drug-prediction-gnn'

ecr_image = '{}.dkr.ecr.{}.amazonaws.com/{}:latest'.format(account, region, algorithm_name)

print(ecr_image)

405787280505.dkr.ecr.us-west-2.amazonaws.com/drug-prediction-gcn:latest


In [8]:
from sagemaker.estimator import Estimator

data_location = 's3://jdurago-insight-2020a/data'
output_path = 's3://jdurago-insight-2020a/output/baseline/'
max_run_time = 3*60*60 # train for max of 3 hours

hyperparameters = {'dev-mode':False, 
                   'net-type': 'GCN',
                   'epochs': 200,
                   'learning-rate':1e-4,
                   'gcn-hidden-feats':128,
                   'classifier-hidden-feats':512,
                   'batch-size':200,
                   'num-hidden-layers':6}


# defaults
# hyperparameters = {'dev-mode':False, 
#                    'net-type': 'GCN',
#                    'epochs': 200,
#                    'learning-rate':1e-4,
#                    'gcn-hidden-feats':64,
#                    'gat-hidden-feats':32,
#                    'num-hidden-layers':2,
#                    'num-head-layers':2,
#                    'head-output-size':4,
#                    'classifier-hidden-feats':64,
#                    'batch-size':200,
#                    'num-hidden-layers':6}

base_job_name = hyperparameters['net-type']

metric_definitions = [{"Name": "train:loss", 
                       "Regex": "train_loss:(\d+.\d+)"}, 
                      {"Name": "test:auc_roc", 
                       "Regex": "test_score:(\d+.\d+)"},
                      {"Name": "val:auc_roc", 
                       "Regex": "auc_roc:(\d+.\d+)"},
                      {"Name": "val:f1_score", 
                       "Regex": "f1_score:(\d+.\d+)"}
                     ]

instance_type = 'ml.m5.12xlarge'

tags = [{'Key': 'net-type',
         'Value': hyperparameters['net-type']}]

estimator = Estimator(role=role,
                      base_job_name = base_job_name,
                      train_instance_count=1,
                       train_instance_type=instance_type,
                      image_name=ecr_image,
                      hyperparameters=hyperparameters,
                     output_path = output_path,
                     train_max_run=max_run_time,
                     metric_definitions=metric_definitions,
                     tags=tags)

estimator.fit(data_location)


2020-01-30 22:11:06 Starting - Starting the training job...
2020-01-30 22:11:08 Starting - Launching requested ML instances......
2020-01-30 22:12:09 Starting - Preparing the instances for training...
2020-01-30 22:12:56 Downloading - Downloading input data
2020-01-30 22:12:56 Training - Downloading the training image..............[34m2020-01-30 22:15:10,949 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-01-30 22:15:10,950 sagemaker-containers INFO     Failed to parse hyperparameter net-type value GCN to Json.[0m
[34mReturning the value itself[0m
[34m2020-01-30 22:15:10,950 sagemaker-containers INFO     Failed to parse hyperparameter dev-mode value False to Json.[0m
[34mReturning the value itself[0m
[34m2020-01-30 22:15:13,978 sagemaker-containers INFO     Failed to parse hyperparameter net-type value GCN to Json.[0m
[34mReturning the value itself[0m
[34m2020-01-30 22:15:13,978 sagemaker-containers INFO     Failed to parse hyperp

KeyboardInterrupt: 

# Hyperparameter Tuning

In [12]:
from sagemaker.tuner import IntegerParameter, CategoricalParameter, ContinuousParameter, HyperparameterTuner
from sagemaker.estimator import Estimator
import sagemaker as sage

sess = sage.Session()

max_jobs = 64
max_parallel_jobs = 4

data_location = 's3://jdurago-insight-2020a/data'
output_path = 's3://jdurago-insight-2020a/output/baseline/'
max_run_time = 6*60*60 # train for max of 6 hours
instance_type = 'ml.m5.4xlarge'


hyperparameters = {'dev-mode':False, 
                   'net-type': 'GAT',
                   'epochs': 200,
                   'batch-size':200,
                   'learning-rate':1e-4}

base_tuning_job_name = 'hpm-tuning-' + hyperparameters['net-type']

tags = [{'Key': 'net-type',
         'Value': hyperparameters['net-type']}]

# For GCN
# hyperparameter_ranges = {'gcn-hidden-feats': CategoricalParameter([32,64,128,256,512]),
#                          'classifier-hidden-feats': CategoricalParameter([32,64,128,256,512]),
#                          'num-hidden-layers': CategoricalParameter([2,3,4,5,6])}

# For GAT
hyperparameter_ranges = {'gat-hidden-feats': CategoricalParameter([32,64,128]),
                         'classifier-hidden-feats': CategoricalParameter([64,128,256,512]),
                         'num-head-layers': CategoricalParameter([2,3,4,5,6]),
                         'head-output-size': CategoricalParameter([2,3,4,5,6])}


objective_metric_name = 'val:f1_score'
objective_type = 'Maximize'
metric_definitions = [{"Name": "train:loss", 
                       "Regex": "train_loss:(\d+.\d+)"}, 
                      {"Name": "test:auc_roc", 
                       "Regex": "test_score:(\d+.\d+)"},
                      {"Name": "val:auc_roc", 
                       "Regex": "auc_roc:(\d+.\d+)"},
                      {"Name": "val:f1_score", 
                       "Regex": "f1_score:(\d+.\d+)"}
                     ]

estimator = Estimator(role=role,
                      train_instance_count=1,
                       train_instance_type=instance_type,
                      image_name=ecr_image,
                      hyperparameters=hyperparameters,
                     output_path = output_path,
                     train_max_run=max_run_time,
                     metric_definitions=metric_definitions,
                     tags=tags)

tuner = HyperparameterTuner(estimator,
                            objective_metric_name,
                            hyperparameter_ranges,
                            metric_definitions,
                            max_jobs=max_jobs,
                            base_tuning_job_name = base_tuning_job_name,
                            max_parallel_jobs=max_parallel_jobs,
                            objective_type=objective_type)

tuner.fit(data_location)