In [1]:
import os
import io
import ast
import boto3
import sagemaker
import numpy as np
import pandas as pd

from sagemaker.pytorch import PyTorch

from ibov.utils import load_config
from ibov.deploy import get_deploy_config, define_model

### Loading Configs

In [24]:
# Load config dict
config = load_config()

In [3]:
role = config.get("sagemaker").get("role")
prefix = config.get("sagemaker").get("bucket_prefx")
data_dir = config.get("data").get("dir")
region = config.get("sagemaker").get("region")

In [4]:
dropout = config.get("model").get("dropout")
window = config.get("feature").get("window")
hidden_layer = config.get("model").get("hidden_layer")
lr = config.get("model").get("lr")
seed = config.get("model").get("seed")
epochs = config.get("model").get("epochs")

### Upload Data

In [5]:
session = sagemaker.Session(boto_session=boto3.session.Session(region_name=region))
bucket = session.default_bucket()

In [6]:
input_data = session.upload_data(path=data_dir, bucket=bucket, key_prefix=prefix)
input_config = session.upload_data(path="config.json", bucket=bucket, key_prefix=prefix)

### Train Model

In [7]:
estimator = PyTorch(entry_point="train.py", 
                    source_dir="ibov", 
                    py_version="py3",
                    role=role, 
                    framework_version='0.4.0',
                    instance_count=1, 
                    instance_type='ml.p2.xlarge',
                    hyperparameters = {
                        "batch-size": 50,
                        "epochs": epochs,
                        "seed": seed,
                        "input-layer": window,
                        "hidden-layer": hidden_layer,
                        "dropout": dropout        
                    })

In [8]:
estimator.fit({'train': input_data, "config": input_config})

2021-01-26 15:19:48 Starting - Starting the training job...
2021-01-26 15:20:12 Starting - Launching requested ML instancesProfilerReport-1611674386: InProgress
......
2021-01-26 15:21:33 Starting - Preparing the instances for training.........
2021-01-26 15:23:14 Downloading - Downloading input data
2021-01-26 15:23:14 Training - Downloading the training image...
2021-01-26 15:23:35 Training - Training image download completed. Training in progress.[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2021-01-26 15:23:36,242 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2021-01-26 15:23:36,270 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2021-01-26 15:23:37,695 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2021-01-26 15:23:38,047 sagemaker-containers INFO     Module 

[34m15:23:48, epoch: 3, train: 0.857, valid: 0.119[0m
[34m15:23:50, epoch: 4, train: 0.779, valid: 0.041[0m
[34m15:23:52, epoch: 5, train: 0.633, valid: 0.057[0m
[34m15:23:54, epoch: 6, train: 0.633, valid: 0.055[0m
[34m15:23:56, epoch: 7, train: 0.601, valid: 0.037[0m
[34m15:23:59, epoch: 8, train: 0.565, valid: 0.036[0m
[34m15:24:03, epoch: 9, train: 0.579, valid: 0.039[0m
[34m15:24:07, epoch: 10, train: 0.593, valid: 0.051[0m
[34m15:24:10, epoch: 11, train: 0.576, valid: 0.041[0m
[34m15:24:14, epoch: 12, train: 0.604, valid: 0.034[0m
[34m15:24:19, epoch: 13, train: 0.555, valid: 0.033[0m
[34m15:24:23, epoch: 14, train: 0.519, valid: 0.039[0m
[34m15:24:28, epoch: 15, train: 0.484, valid: 0.043[0m
[34m15:24:34, epoch: 16, train: 0.495, valid: 0.037[0m
[34m15:24:40, epoch: 17, train: 0.463, valid: 0.032[0m
[34m15:24:46, epoch: 18, train: 0.515, valid: 0.029[0m
[34m15:24:52, epoch: 19, train: 0.485, valid: 0.03[0m
[34m15:24:59, epoch: 20, train: 0.423,

In [25]:
estimator.latest_training_job.job_name

'sagemaker-pytorch-2021-01-26-15-19-44-976'

In [26]:
deploy_config = get_deploy_config(config)

In [27]:
def define_model(deploy_config):

    codename = deploy_config.get("codename")
    
    client  = boto3.client("sagemaker")

    model = client.create_model(ModelName=codename,
                                PrimaryContainer=deploy_config.get("primary_container"),
                                ExecutionRoleArn=deploy_config.get("role"))

    return model

In [28]:
define_model(deploy_config)

{'ModelArn': 'arn:aws:sagemaker:us-east-1:977053370764:model/sagemaker-pytorch-2021-01-26-15-19-44-976',
 'ResponseMetadata': {'RequestId': 'cc296fc3-d472-4005-97ba-ab0d91a9a4bb',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'cc296fc3-d472-4005-97ba-ab0d91a9a4bb',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '103',
   'date': 'Tue, 26 Jan 2021 16:21:40 GMT'},
  'RetryAttempts': 0}}

### Endpoint Deployment

config = load_config()

deploy_config = get_deploy_config(config)

build_endpoint(deploy_config)

kill_endpoint(config)