In [None]:
import os
import sagemaker
from sagemaker import get_execution_role

bucket = os.getenv('BUCKET_NAME')
endpoint_name = os.getenv('ENDPOINT_NAME')
sagemaker_session = sagemaker.Session(default_bucket=bucket)

# Get a SageMaker-compatible role used by this Notebook Instance.
role = get_execution_role()
region = sagemaker_session.boto_session.region_name

In [None]:
from sagemaker.sklearn.estimator import SKLearn

entry_point = 'sklearn_featureizer.py'
source_dir = 'pipeline'

s3_output_key_prefix = "training_output"
model_output_path = 's3://{}/{}/{}'.format(bucket, s3_output_key_prefix, 'w2vmodel')

# terminate model training after 48 hours
train_max_run = 48 * 60 * 60

grid_search = SKLearn(
    framework_version='0.23-1',
    source_dir=source_dir,
    entry_point=entry_point,
    role=role,
    train_instance_type="ml.c5.18xlarge",
    sagemaker_session=sagemaker_session,
    output_path=model_output_path,
    train_max_run=train_max_run
)

train_input = 's3://beularnotebookstack-beularsagemakerapibucket1198e-xck265jh9uop/training_output/train/train.csv'

In [None]:
# Note that this will take awhile.
grid_search.fit({'train': train_input}, logs=True)