In [4]:
import sagemaker
from sagemaker.debugger import Rule
from sagemaker.debugger import rule_configs
from sagemaker.session import TrainingInput
from sagemaker.tuner import IntegerParameter
from sagemaker.tuner import HyperparameterTuner

___

In [5]:
region = sagemaker.Session().boto_region_name
print(f"AWS Region: {region}")

role = sagemaker.get_execution_role()
print(f"RoleArn: {role}")

sm_sess = sagemaker.session.Session()

AWS Region: eu-west-3
RoleArn: arn:aws:iam::260598086981:role/service-role/AmazonSageMaker-ExecutionRole-20210916T222927


In [7]:
bucket = sagemaker.Session().default_bucket()
bucket

'sagemaker-eu-west-3-260598086981'

In [8]:
prefix = "demo-sagemaker-xgboost-boston"
model = 'xgboost_model'
s3_output_location = f's3://{bucket}/{prefix}/{model}'
print(f"s3 output location: {s3_output_location}")

's3://sagemaker-eu-west-3-260598086981/demo-sagemaker-xgboost-boston/xgboost_model'

In [9]:
container=sagemaker.image_uris.retrieve("xgboost", region, "1.2-1")
print(f"Container URI: {container}")

659782779980.dkr.ecr.eu-west-3.amazonaws.com/sagemaker-xgboost:1.2-1


In [10]:
train_input = TrainingInput(
    f"s3://{bucket}/{prefix}/data/train.csv", content_type="csv"
)
validation_input = TrainingInput(
    f"s3://{bucket}/{prefix}/data/validation.csv", content_type="csv"
)

In [11]:
xgb_model=sagemaker.estimator.Estimator(
    image_uri=container,
    role=role,
    instance_count=1,
    instance_type='ml.m5.xlarge',
    output_path=s3_output_location,
    sagemaker_session=sm_sess,
)

In [12]:
xgb_model.set_hyperparameters(
    eta = 0.2,
    gamma = 4,
    min_child_weight = 6,
    subsample = 0.7,
    objective = "reg:squarederror",
    eval_metric='rmse', 
    num_round = 1000
)

In [13]:
hyperparameter_ranges = {
    "max_depth": IntegerParameter(1, 10, scaling_type="Linear"),
}

In [14]:
tuner = HyperparameterTuner(
    xgb_model,
    "validation:rmse",
    hyperparameter_ranges,
    objective_type='Minimize',
    max_jobs=5,
    max_parallel_jobs=5,
    strategy="Random",
)

In [15]:
tuner.fit(
    {"train": train_input, "validation": validation_input},
)

.........................................................!


In [16]:
df = sagemaker.HyperparameterTuningJobAnalytics(
    tuner.latest_tuning_job.job_name
).dataframe()

In [17]:
df

Unnamed: 0,max_depth,TrainingJobName,TrainingJobStatus,FinalObjectiveValue,TrainingStartTime,TrainingEndTime,TrainingElapsedTimeSeconds
0,9.0,sagemaker-xgboost-210924-0816-005-c835f44b,Completed,3.48566,2021-09-24 08:19:37+00:00,2021-09-24 08:20:44+00:00,67.0
1,2.0,sagemaker-xgboost-210924-0816-004-e6f877c2,Completed,3.15543,2021-09-24 08:19:11+00:00,2021-09-24 08:20:22+00:00,71.0
2,7.0,sagemaker-xgboost-210924-0816-003-5f67ec14,Completed,3.39773,2021-09-24 08:19:17+00:00,2021-09-24 08:20:29+00:00,72.0
3,3.0,sagemaker-xgboost-210924-0816-002-8c52c36d,Completed,3.24672,2021-09-24 08:19:15+00:00,2021-09-24 08:20:28+00:00,73.0
4,1.0,sagemaker-xgboost-210924-0816-001-0aa8a790,Completed,3.77103,2021-09-24 08:19:02+00:00,2021-09-24 08:20:11+00:00,69.0


In [None]:
df.loc[
    :, ['FinalObjectiveValue', 'max_depth']
].set_index('max_depth').sort_index().plot()