# Hyperparameter Tuning

In [None]:
# initialise train xgboost model
model_path = f's3://{sagemaker_session.default_bucket()}/{base_job_name}/AirlineTicketTrain'

# initialise xgboost training algorithm
image_uri = sagemaker.image_uris.retrieve(
    framework='xgboost',
    region=region,
    version='1.0-1',
    py_version='py3',
    instance_type=processing_instance_type
)

xgb_train = Estimator(
    image_uri=image_uri,
    instance_type=processing_instance_type,
    instance_count=processing_instance_count,
    output_path=model_path,
    base_job_name=f'{base_job_name}/airline-ticket-training',
    sagemaker_session=pipeline_session,
    role=role
)

xgb_train.set_hyperparameters(
    eval_metric='rmse',
    objective="reg:squarederror",
    num_round=50,
    max_depth=10,
    min_child_weight=6,
    subsample=0.5,
    silent=0
)
    

In [None]:
# initialise hyperparameter tuner
objective_metric_name = 'validation:rmse'

hyperparameter_ranges = {
    'max_depth': IntegerParameter(min_value=6, max_value= 9, scaling_type='Linear')
}

tuner_log = HyperparameterTuner(
    estimator=xgb_train,
    objective_metric_name=objective_metric_name,
    hyperparameter_ranges=hyperparameter_ranges,
    max_jobs=3,
    max_parallel_jobs=3,
    strategy='random'
)

In [None]:
# train xgboost model
step_args = tuner_log.fit(
    inputs={
        "train": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                "train"
            ].S3Output.S3Uri,
            content_type="text/csv"
        ),
        "validation": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                "validation"
            ].S3Output.S3Uri,
            content_type="text/csv"
        )
    }
)

step_tune = TuningStep(
    name="HPOTuningAirlinePrice",
    step_args=step_args
)

In [None]:
# create sagemaker model from best performing hyperparameter
model_prefix = f'{base_job_name}/AirlineTicketTrain'

model = Model(
    image_uri=image_uri,
    model_data=step_tune.get_top_model_s3_uri(
        top_k=0,
        s3_bucket=default_bucket,
        prefix=model_prefix
    ),
    predictor_cls=XGBoostPredictor,
    sagemamer_session=sagemaker_session,
    role=role
)

step_model = ModelStep(
    name='CreateModel',
    step_args=model.create(instance_type=processing_instance_type)
)

# Training Step

In [None]:
# initialise train xgboost model
model_path = f's3://{sagemaker_session.default_bucket()}/{base_job_name}/AirlineTicketTrain'

# initialise xgboost training algorithm
image_uri = sagemaker.image_uris.retrieve(
    framework='xgboost',
    region=region,
    version='1.0-1',
    py_version='py3',
    instance_type=processing_instance_type
)

xgb_train = Estimator(
    image_uri=image_uri,
    instance_type=processing_instance_type,
    instance_count=processing_instance_count,
    output_path=model_path,
    base_job_name=f'{base_job_name}/airline-ticket-training',
    sagemaker_session=pipeline_session,
    role=role
)

xgb_train.set_hyperparameters(
    objective="reg:squarederror",
    num_round=50,
    max_depth=10,
    min_child_weight=6,
    subsample=0.5,
    silent=0
)

In [None]:
# train xgboost model
step_args = xgb_train.fit(
    inputs={
        "train": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                "train"
            ].S3Output.S3Uri,
            content_type="text/csv"
        ),
        "validation": TrainingInput(
            s3_data=step_process.properties.ProcessingOutputConfig.Outputs[
                "validation"
            ].S3Output.S3Uri,
            content_type="text/csv"
        )
    }
)

step_train = TrainingStep(
    name="TrainingAirlinePrice",
    step_args=step_args
)