### Data Preparation

In [1]:
import pandas as pd
from sklearn.datasets import load_iris

In [2]:
iris = load_iris(as_frame=True)["data"]
target = load_iris(as_frame=True)["target"]
data = pd.concat([target, iris], axis=1)

In [3]:
data.head()

Unnamed: 0,target,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,0,5.1,3.5,1.4,0.2
1,0,4.9,3.0,1.4,0.2
2,0,4.7,3.2,1.3,0.2
3,0,4.6,3.1,1.5,0.2
4,0,5.0,3.6,1.4,0.2


In [5]:
filepath_predictors = "../data/predictors.csv"
iris.to_csv(filepath_predictors, header=False, index=False)

In [6]:
filepath_data = "../data/data.csv"
data.to_csv(filepath_data, header=False, index=False)

### Sagemaker Training

In [7]:
import sagemaker
from sagemaker.image_uris import retrieve
from sagemaker.inputs import TrainingInput
from sagemaker.estimator import Estimator

In [8]:
session = sagemaker.Session()

In [9]:
region_name = session.boto_region_name
region_name

'us-east-1'

In [10]:
container_image = retrieve(framework="xgboost", region=region_name, version="latest")
container_image

'811284229777.dkr.ecr.us-east-1.amazonaws.com/xgboost:latest'

In [11]:
bucket = session.default_bucket()
bucket

'sagemaker-us-east-1-885248014373'

In [12]:
uploaded_data = session.upload_data(path=filepath_data, bucket=bucket, key_prefix="training")
uploaded_data

's3://sagemaker-us-east-1-885248014373/training/data.csv'

In [13]:
input_data = TrainingInput(s3_data=uploaded_data, content_type="text/csv")

In [14]:
role = "arn:aws:iam::885248014373:role/service-role/AmazonSageMaker-ExecutionRole-20210305T230941"

In [15]:
xgboost = Estimator(
    image_uri=container_image,
    role=role,
    instance_type="ml.m5.large", 
    instance_count=1,
    output_path=f"s3://{bucket}/output",
    sagemaker_session=session)   

In [16]:
xgboost.set_hyperparameters(num_round=5, max_depth=5)

In [17]:
xgboost.fit({"train": input_data}) 

2021-06-29 22:32:26 Starting - Starting the training job...
2021-06-29 22:32:28 Starting - Launching requested ML instancesProfilerReport-1625005945: InProgress
...
2021-06-29 22:33:29 Starting - Preparing the instances for training.........
2021-06-29 22:34:57 Downloading - Downloading input data...
2021-06-29 22:35:42 Training - Training image download completed. Training in progress.
2021-06-29 22:35:42 Uploading - Uploading generated training model.[34mArguments: train[0m
[34m[2021-06-29:22:35:37:INFO] Running standalone xgboost training.[0m
[34m[2021-06-29:22:35:37:INFO] Path /opt/ml/input/data/validation does not exist![0m
[34m[2021-06-29:22:35:37:INFO] File size need to be processed in the node: 0.0mb. Available memory size in the node: 178.36mb[0m
[34m[2021-06-29:22:35:37:INFO] Determined delimiter of CSV input is ','[0m
[34m[22:35:37] S3DistributionType set as FullyReplicated[0m
[34m[22:35:37] 150x4 matrix with 600 entries loaded from /opt/ml/input/data/train?form

In [18]:
xgboost.latest_training_job.job_name

'xgboost-2021-06-29-22-32-25-915'

In [19]:
xgboost.model_data

's3://sagemaker-us-east-1-885248014373/output/xgboost-2021-06-29-22-32-25-915/output/model.tar.gz'

### Prediction

In [None]:
import boto3

In [None]:
runtime = boto3.client("sagemaker-runtime")

In [None]:
with open(filepath_predictors, "r") as f:
    dados = f.read()

In [None]:
EndpointName = ""

In [None]:
response = runtime.invoke_endpoint(EndpointName=EndpointName, Body=dados)

In [None]:
response["Body"].read().decode("utf-8")