# SageMaker Tutorial (DSGO)

In [None]:
import sagemaker
from boto3 import Session as BotoSession
from time import sleep

# create sagemaker session
boto_session = BotoSession(profile_name='default', 
                           region_name='us-east-1')
sagemaker_session = sagemaker.Session(boto_session=boto_session)
sagemaker_role = '' # YOUR SAGEMAKER ROLE HERE

## Training - Cloud

In [None]:
#  upload training data to s3
train_dir = 'data/train'
project_name = 'sagemaker-dsgo-tutorial'
train_input = sagemaker_session.upload_data(
    train_dir, key_prefix="{}/{}".format(project_name, train_dir))
print('location in s3: {}'.format(train_input))

In [None]:
from sagemaker.sklearn.estimator import SKLearn

# config model training
cloud_model = SKLearn(
    entry_point='sagemaker_entry_point.py',
    source_dir='.',
    train_instance_type='ml.c4.xlarge',
    train_instance_count=1,
    role=sagemaker_role
)

In [None]:
# run model training (data has to be from s3)
cloud_model.fit({'train': train_input})

## Training - Local

In [None]:
from sagemaker.sklearn.estimator import SKLearn

# config model training
local_model = SKLearn(
    entry_point='sagemaker_entry_point.py',
    source_dir='.',
    train_instance_type='local',
    train_instance_count=1,
    role=sagemaker_role
)

In [None]:
# run model training (data can be from disk or s3)
local_model.fit({'train': 'file://{}'.format(train_dir)})

## Deploy - Cloud

In [None]:
# from model trained on cloud via sagemaker
cloud_predictor = cloud_model.deploy(initial_instance_count=1, 
                                     instance_type="ml.m4.xlarge")

In [None]:
from sagemaker.predictor import json_serializer

# define input format as JSON
cloud_predictor.serializer = json_serializer
cloud_predictor.content_type = 'application/json'

## Predict - Cloud

In [None]:
book1 = 'tale of two cities'
book2 = 'tale by two cities'
result = cloud_predictor.predict({'arg1': book1, 'arg2': book2})

print("\nRESULT --> {} VS {}: {}".format(book1, book2, result))

In [None]:
book1 = 'tale of two cities'
book2 = 'tale of two towns'
result = cloud_predictor.predict({'arg1': book1, 'arg2': book2})

print("\nRESULT --> {} VS {}: {}".format(book1, book2, result))

In [None]:
# cleanup
cloud_predictor.delete_model()
cloud_model.delete_endpoint()

## Deploy - Cloud (trained outside Sagemaker)

In [None]:
#  upload model data to s3

trained_model_dir = 'data/fitted_model'
project_name = 'sagemaker-dsgo-tutorial'
trained_model_input = sagemaker_session.upload_data(
    trained_model_dir, key_prefix="{}/{}".format(project_name, 
                                                 trained_model_dir))
print('location in s3: {}'.format(trained_model_input))

In [None]:
from sagemaker.sklearn import SKLearnModel

# from model trained some other way (data has to be from s3)
cloud_manual_model = SKLearnModel(
    entry_point='sagemaker_entry_point.py',
    source_dir='.',
    model_data = trained_model_input,
    role=sagemaker_role
)

In [None]:
cloud_manual_predictor = cloud_model.deploy(initial_instance_count=1,
                                            instance_type="ml.m4.xlarge")

In [None]:
from sagemaker.predictor import json_serializer

# define input format as JSON
cloud_manual_predictor.serializer = json_serializer
cloud_manual_predictor.content_type = 'application/json'

## Predict - Cloud (trained outside Sagemaker)

In [None]:
arg1 = 'bluevine'
arg2 = 'blue vine'

result = cloud_manual_predictor.predict({'arg1': arg1, 'arg2': arg2})
sleep(1)
print("\nRESULT --> {} VS {}: {}".format(arg1, arg2, result))

In [None]:
# cleanup
cloud_manual_predictor.delete_model()
cloud_manual_model.delete_endpoint()

## Deploy - Local

In [None]:
# from model trained locally via sagemaker

local_predictor = local_model.deploy(initial_instance_count=1, 
                                     instance_type='local')

In [None]:
from sagemaker.predictor import json_serializer

# define input format as JSON
local_predictor.serializer = json_serializer
local_predictor.content_type = 'application/json'

## Predict - Local

In [None]:
arg1 = 'bluevine'
arg2 = 'blue vine'

result = local_predictor.predict({'arg1': arg1, 'arg2': arg2})
sleep(1)
print("\nRESULT --> {} VS {}: {}".format(arg1, arg2, result))

In [None]:
# cleanup
local_predictor.delete_model()
local_model.delete_endpoint()

## Deploy - Local (trained outside sagemaker)

In [None]:
from sagemaker.sklearn import SKLearnModel

# from model trained some other way (data can be from disk or s3)
local_manual_model = SKLearnModel(
    entry_point='sagemaker_entry_point.py',
    source_dir='.',
    model_data='file://data/fitted_model/name_comparison_model.joblib',
    role=sagemaker_role
)

In [None]:
local_manual_predictor = local_manual_model.deploy(
    initial_instance_count=1, instance_type='local')

In [None]:
from sagemaker.predictor import json_serializer

# define input format as JSON
local_manual_predictor.serializer = json_serializer
local_manual_predictor.content_type = 'application/json'

## Predict - Local (trained outside sagemaker)

In [None]:
arg1 = 'bluevine'
arg2 = 'blue vine'

result = local_manual_predictor.predict({'arg1': arg1, 'arg2': arg2})
sleep(1)
print("\nRESULT --> {} VS {}: {}".format(arg1, arg2, result))

In [None]:
# cleanup
local_manual_model.delete_model()
local_manual_predictor.delete_endpoint()