In [None]:
import numpy as np
import pandas as pd

import boto3
import re

import sagemaker
from sagemaker import get_execution_role

In [None]:
bucket_name = 'das20202-ml-sagemaker'

training_folder = r'mushroom/training/'
validation_folder = r'mushroom/validation/'

s3_model_output_location = r's3://{0}/mushroom/model'.format(bucket_name)
s3_training_file_location = r's3://{0}/{1}'.format(bucket_name,training_folder)
s3_validation_file_location = r's3://{0}/{1}'.format(bucket_name,validation_folder)

In [None]:
print(s3_model_output_location)
print(s3_training_file_location)
print(s3_validation_file_location)

In [None]:
# Write the files created via the mushroom notebook
def write_to_s3(filename, bucket, key):
    with open(filename,'rb') as f: # Read in binary mode
        return boto3.Session().resource('s3').Bucket(bucket).Object(key).upload_fileobj(f)

In [None]:
write_to_s3('mushroom_train.csv', 
            bucket_name,
            training_folder + 'mushroom_train.csv')

write_to_s3('mushroom_validation.csv',
            bucket_name,
            validation_folder + 'mushroom_validation.csv')

In [None]:
sess = sagemaker.Session()

In [None]:
role = get_execution_role()

In [None]:
# Sagemaker API now maintains the algorithm container mapping for us
# Specify the region, algorithm and version
container = sagemaker.amazon.amazon_estimator.get_image_uri(
    sess.boto_region_name,
    "xgboost", 
    "0.90-1")

print('Using SageMaker XGBoost container:\n{} ({})'.format(container, sess.boto_region_name))

In [None]:
estimator = sagemaker.estimator.Estimator(
    container,
    role, 
    train_instance_count=1, 
    train_instance_type='ml.m4.xlarge',
    output_path=s3_model_output_location,
    sagemaker_session=sess,
    base_job_name ='mushroom-v1')

In [None]:
estimator.set_hyperparameters(
    objective="binary:logistic",
    num_round=150,
    early_stopping_rounds=10
)

In [None]:
estimator.hyperparameters()

In [None]:
training_input_config = sagemaker.session.s3_input(
    s3_data=s3_training_file_location,
    content_type='csv',
    s3_data_type='S3Prefix')

validation_input_config = sagemaker.session.s3_input(
    s3_data=s3_validation_file_location,
    content_type='csv',
    s3_data_type='S3Prefix'
)

data_channels = {'train': training_input_config, 'validation': validation_input_config}

In [None]:
print(training_input_config.config)
print(validation_input_config.config)

In [None]:
estimator.fit(data_channels)