In [None]:
# preparation

import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

train_df = pd.DataFrame(X_train, columns=iris.feature_names)
train_df['label'] = y_train

test_df = pd.DataFrame(X_test, columns=iris.feature_names)
test_df['label'] = y_test

train_df.to_csv("iris_train.csv", index=False)
test_df.to_csv("iris_test.csv", index=False)

In [None]:
# training

import sagemaker
from sagemaker.sklearn.estimator import SKLearn

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

train_path = sagemaker_session.upload_data('iris_train.csv', key_prefix='iris/data')
test_path = sagemaker_session.upload_data('iris_test.csv', key_prefix='iris/data')

sklearn_estimator = SKLearn(
    entry_point='random_forest.py',
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    framework_version='0.23-1',
    py_version='py3',
    hyperparameters={}
)

sklearn_estimator.fit({'train': train_path})

In [None]:
# realtime endpoint

predictor = sklearn_estimator.deploy(initial_instance_count=1, instance_type='ml.m5.large')

In [None]:
from sagemaker.predictor import Predictor
import json

# エンドポイント名を指定
endpoint_name = "xxxxxxxx"

predictor = Predictor(endpoint_name=endpoint_name)

# 推論を行うテストデータの準備
test_samples = test_df.drop('label', axis=1).head().values
json_data = json.dumps(test_samples.tolist())

predictions = predictor.predict(json_data, initial_args={"ContentType": "application/json"})
print(predictions)

In [None]:
# batch transform

test_data_no_label = test_df.drop('label', axis=1)
test_data_no_label.to_csv("iris_test_no_label.csv", index=False, header=False)

test_data_path = sagemaker_session.upload_data('iris_test_no_label.csv', key_prefix='iris/batch_input')

In [None]:
transformer = sklearn_estimator.transformer(
    instance_count=1,
    instance_type='ml.m5.large',
    output_path='s3://{}/iris/batch_output'.format(sagemaker_session.default_bucket())
)

transformer.transform(
    data=test_data_path,
    content_type='text/csv',
    split_type='Line'
)

transformer.wait()

In [None]:
import boto3
import os

s3_client = boto3.client('s3',region_name='ap-northeast-1')

bucket_name = sagemaker_session.default_bucket()
s3_prefix = transformer.output_path.replace(f"s3://{bucket_name}/", "")
s3_key = f"{s3_prefix}/iris_test_no_label.csv.out"

output_file = 'batch_transform_output.csv'
s3_client.download_file(Bucket=bucket_name, Key=s3_key, Filename=output_file)

predictions = pd.read_csv(output_file, header=None)
print(predictions.head())