# Push Docker Image to AWS Elastic Container Registry

In [24]:
!cd container; ./build_and_push.sh drug-prediction-gcn

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded
Sending build context to Docker daemon  125.4kB
Step 1/13 : ARG REGION=us-west-2
Step 2/13 : FROM continuumio/miniconda3
 ---> 406f2b43ea59
Step 3/13 : RUN apt-get update
 ---> Using cache
 ---> 01b536fe282c
Step 4/13 : RUN apt-get install -y build-essential
 ---> Using cache
 ---> 7a497c4b78b1
Step 5/13 : RUN conda install -y -c conda-forge rdkit==2018.09.3
 ---> Using cache
 ---> ac7f0adc29fc
Step 6/13 : RUN conda install pytorch-cpu torchvision -y -c pytorch
 ---> Using cache
 ---> 895cb85379f1
Step 7/13 : RUN pip install sagemaker-containers
 ---> Using cache
 ---> 836f6e41614f
Step 8/13 : RUN conda install -y -c conda-forge matplotlib
 ---> Using cache
 ---> 506e420b1ba2
Step 9/13 : RUN pip install scikit-learn==0.21.3 dgl==0.4.1 seaborn
 ---> Using cache
 ---> a425e0e892e9
Step 10/13 : ENV PATH="/opt/ml/code:${PATH}"
 ---> Using cache
 ---> ad6487c013f2
Step 11/13 : COPY /src /opt/ml/co

# Run Local Training Job

In [None]:
# Download the dataset to local

# download pretrained mol2vec model
!wget -O data/model_300dim.pkl https://github.com/samoturk/mol2vec/raw/master/examples/models/model_300dim.pkl model_300dim.pkl   

# download example data     
!wget -O data/ames.sdf https://github.com/samoturk/mol2vec/raw/master/examples/data/ames.sdf
    
#download hiv data
!wget -O data/hiv.zip https://s3-us-west-1.amazonaws.com/deepchem.io/datasets/molnet_publish/hiv.zip
!cd data; unzip hiv.zip

In [25]:
from sagemaker import get_execution_role

role = get_execution_role()

In [26]:
import os
import subprocess

instance_type = 'local'

if subprocess.call('nvidia-smi') == 0:
    ## Set type to GPU if one is present
    instance_type = 'local_gpu'
    
print("Instance type = " + instance_type)

Instance type = local


In [27]:
from sagemaker.estimator import Estimator

hyperparameters = {'dev-mode':True, 
                   'epochs': 200,
                   'learning-rate':1e-4,
                   'gcn-hidden-feats':64,
                   'classifier-hidden-feats':64,
                   'batch-size':200,
                   'num-hidden-layers':2}

estimator = Estimator(role=role,
                      train_instance_count=1,
                      train_instance_type=instance_type,
                      image_name='drug-prediction-gcn:latest',
                      hyperparameters=hyperparameters)

estimator.fit('file:///home/ec2-user/SageMaker/graph_neural_net/data')

Creating tmpcb2ptwrz_algo-1-pyh1k_1 ... 
[1BAttaching to tmpcb2ptwrz_algo-1-pyh1k_12mdone[0m
[36malgo-1-pyh1k_1  |[0m 2020-01-27 19:19:12,375 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-pyh1k_1  |[0m 2020-01-27 19:19:12,376 sagemaker-containers INFO     Failed to parse hyperparameter dev-mode value True to Json.
[36malgo-1-pyh1k_1  |[0m Returning the value itself
[36malgo-1-pyh1k_1  |[0m 2020-01-27 19:19:12,389 sagemaker-containers INFO     Failed to parse hyperparameter dev-mode value True to Json.
[36malgo-1-pyh1k_1  |[0m Returning the value itself
[36malgo-1-pyh1k_1  |[0m 2020-01-27 19:19:12,392 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)
[36malgo-1-pyh1k_1  |[0m 2020-01-27 19:19:12,404 sagemaker-containers INFO     Failed to parse hyperparameter dev-mode value True to Json.
[36malgo-1-pyh1k_1  |[0m Returning the value itself
[36malgo-1-pyh1k_1  |[0m 2020-01-27 19:19:12,407 sagemaker-con

# Run A Sagemaker Training Job

In [28]:
import sagemaker as sage

sess = sage.Session()

In [29]:
import boto3

client = boto3.client('sts')
account = client.get_caller_identity()['Account']

my_session = boto3.session.Session()
region = my_session.region_name

algorithm_name = 'drug-prediction-gcn'

ecr_image = '{}.dkr.ecr.{}.amazonaws.com/{}:latest'.format(account, region, algorithm_name)

print(ecr_image)

405787280505.dkr.ecr.us-west-2.amazonaws.com/drug-prediction-gcn:latest


In [30]:
from sagemaker.estimator import Estimator

data_location = 's3://jdurago-insight-2020a/data'
output_path = 's3://jdurago-insight-2020a/output/baseline/'
max_run_time = 3*60*60 # train for max of 3 hours

hyperparameters = {'dev-mode':False, 
                   'epochs': 200,
                   'learning-rate':1e-4,
                   'gcn-hidden-feats':64,
                   'classifier-hidden-feats':64,
                   'batch-size':200,
                   'num-hidden-layers':2}

metric_definitions = [{"Name": "train:loss", 
                       "Regex": "train_loss:(\d+.\d+),;"}, 
                      {"Name": "test:score", 
                       "Regex": "test_score:(\d+.\d+);"}]

instance_type = 'ml.m5.4xlarge'

estimator = Estimator(role=role,
                      train_instance_count=1,
                       train_instance_type=instance_type,
                      image_name=ecr_image,
                      hyperparameters=hyperparameters,
                     output_path = output_path,
                     train_max_run=max_run_time)

estimator.fit(data_location)


2020-01-27 19:21:47 Starting - Starting the training job...
2020-01-27 19:21:48 Starting - Launching requested ML instances...
2020-01-27 19:22:45 Starting - Preparing the instances for training......
2020-01-27 19:23:29 Downloading - Downloading input data
2020-01-27 19:23:29 Training - Downloading the training image...............
2020-01-27 19:26:04 Training - Training image download completed. Training in progress.[34m2020-01-27 19:26:04,508 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-01-27 19:26:04,508 sagemaker-containers INFO     Failed to parse hyperparameter dev-mode value False to Json.[0m
[34mReturning the value itself[0m
[34m2020-01-27 19:26:05,941 sagemaker-containers INFO     Failed to parse hyperparameter dev-mode value False to Json.[0m
[34mReturning the value itself[0m
[34m2020-01-27 19:26:05,944 sagemaker-containers INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2020-01-27 19:26:05,954 sagemaker