In [43]:
import sagemaker
from datetime import datetime
from sagemaker.session import TrainingInput
from sagemaker.serializers import CSVSerializer
from sagemaker.tuner import HyperparameterTuner
from sagemaker.parameter import ContinuousParameter, IntegerParameter

In [44]:
# Training 

In [45]:
BUCKET = "page2sage"

region = sagemaker.Session().boto_region_name # AWS Region
role = sagemaker.get_execution_role() # Rol de AMI para ejecución


In [46]:
container = sagemaker.image_uris.retrieve("xgboost", region, "latest") # https://github.com/aws/sagemaker-python-sdk/tree/master/src/sagemaker/image_uri_config
xgb_model = sagemaker.estimator.Estimator(
    image_uri=container, 
    role=role,
    instance_count=1, # Para Paralelizacion
    instance_type="ml.m5.large", # https://github.com/awsdocs/amazon-sagemaker-developer-guide/blob/master/doc_source/notebooks-available-instance-types.md
    volume_size=5, # en GB
    output_path=f"s3://{BUCKET}/models/xgboost/", # Donde guardar el modelo entrenado
    sagemaker_session=sagemaker.Session()
)
xgb_model.set_hyperparameters(num_round=100)

In [47]:
tuner = HyperparameterTuner(
    xgb_model,
    "validation:rmse",
    hyperparameter_ranges={
        "max_depth": IntegerParameter(1, 10),
        "eta": ContinuousParameter(0, 1)
    },
    objective_type="Minimize",
    max_jobs=5
)

In [48]:
train_input = TrainingInput(
    f"s3://{BUCKET}/train/train.csv", # S3 Data location
    content_type="csv", # Data type
    s3_data_type="S3Prefix" # ["S3Prefix", "ManifestFile", "AugmentedManifestFile"]
)

validation_input = TrainingInput(
    f"s3://{BUCKET}/validation/validation.csv",
    content_type="csv",
    s3_data_type="S3Prefix"
)

In [None]:
tuner.fit({"train":train_input, "validation":validation_input}, wait=True)

.......

In [None]:
tuner.best_estimator()