In [39]:
import sagemaker
import pandas as pd
# Creates a SKLearn Estimator for Scikit-learn environment.
from sagemaker.sklearn.estimator import SKLearn
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

sess = sagemaker.Session()
role = sagemaker.get_execution_role()
# Return the name of the default bucket to use in relevant Amazon SageMaker interactions.
bucket = sess.default_bucket()

# uri of your remote mlflow server
tracking_uri = 'http://ec2-3-105-100-75.ap-southeast-2.compute.amazonaws.com'

In [49]:
role

'arn:aws:iam::831671504240:role/service-role/AmazonSageMaker-ExecutionRole-20220105T134939'

In [40]:
# we use the Boston housing dataset 
data = load_boston()

X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.25, random_state=42)

trainX = pd.DataFrame(X_train, columns=data.feature_names)
trainX['target'] = y_train

testX = pd.DataFrame(X_test, columns=data.feature_names)
testX['target'] = y_test

trainX.to_csv('boston_train.csv')
testX.to_csv('boston_test.csv')

In [41]:
# send data to S3. SageMaker will take training data from s3
train_path = sess.upload_data(path='boston_train.csv', bucket=bucket, key_prefix='sagemaker/sklearncontainer')
test_path = sess.upload_data(path='boston_test.csv', bucket=bucket, key_prefix='sagemaker/sklearncontainer')

In [42]:
train_path

's3://sagemaker-ap-southeast-2-831671504240/sagemaker/sklearncontainer/boston_train.csv'

#### Train

In [46]:
hyperparameters = {
    'tracking_uri': tracking_uri,
    'experiment_name': 'boston-housing',
    'n-estimators': 100,
    'min-samples-leaf': 3,
    'features': 'CRIM ZN INDUS CHAS NOX RM AGE DIS RAD TAX PTRATIO B LSTAT',
    'target': 'target'
}

metric_definitions = [{'Name': 'median-AE', 'Regex': "AE-at-50th-percentile: ([0-9.]+).*$"}]

estimator = SKLearn(
    entry_point='train.py',
    source_dir='source_dir',
    role=role,
    metric_definitions=metric_definitions,
    hyperparameters=hyperparameters,
    instance_count=1,
    instance_type='ml.m5.large',
    framework_version='0.23-1',
    base_job_name='mlflow',
)

In [47]:
estimator.fit({'train': train_path, 'test': test_path})

'http://ec2-3-105-100-75.ap-southeast-2.compute.amazonaws.com'