In [2]:
from flytekit.configuration import set_flyte_config_file, platform
set_flyte_config_file("/Users/changhonghsu/.flyte/notebook-staging.config")
#set_flyte_config_file("notebook.config")

print("Connected to {}".format(platform.URL.get()))

def print_console_url(exc):
    print("http://{}/console/projects/{}/domains/{}/executions/{}".format(platform.URL.get(), exc.id.project, exc.id.domain, exc.id.name))

Connected to flyte-staging.lyft.net


In [6]:
from flytekit.sdk.tasks import inputs
from flytekit.sdk.types import Types
from flytekit.sdk.workflow import workflow_class, Input, Output
from flytekit.common.tasks.sagemaker import training_job_task, hpo_job_task
from flytekit.models.sagemaker import training_job as training_job_models, hpo_job as hpo_job_models
from flytekit.sdk.sagemaker import types as _sdk_sagemaker_types
xgboost_hyperparameters = {
    "base_score": "0.5",
    "booster": "gbtree",
    "csv_weights": "0",
    "dsplit": "row",
    "grow_policy": "depthwise",
    "lambda_bias": "0.0",
    "max_bin": "256",
    "max_leaves": "0",
    "normalize_type": "tree",
    "objective": "reg:linear",
    "one_drop": "0",
    "prob_buffer_row": "1.0",
    "process_type": "default",
    "rate_drop": "0.0",
    "refresh_leaf": "1",
    "sample_type": "uniform",
    "scale_pos_weight": "1.0",
    "silent": "0",
    "skip_drop": "0.0",
    "tree_method": "auto",
    "tweedie_variance_power": "1.5",
    "updater": "grow_colmaker,prune",
}

alg_spec = training_job_models.AlgorithmSpecification(
    input_mode=_sdk_sagemaker_types.InputMode.FILE,
    algorithm_name=_sdk_sagemaker_types.AlgorithmName.XGBOOST,
    algorithm_version="0.72",
    metric_definitions=[training_job_models.MetricDefinition(name="Minimize", regex="validation:error")]
)

xgboost_train_task2 = training_job_task.SdkSimpleTrainingJobTask(
    training_job_config=training_job_models.TrainingJobConfig(
        instance_type="ml.m4.xlarge",
        instance_count=1,
        volume_size_in_gb=25,
    ),
    algorithm_specification=alg_spec,
    cache_version='2',
    cacheable=True,
)

xgboost_hpo_task2 = hpo_job_task.SdkSimpleHPOJobTask(
    training_job=xgboost_train_task2,
    max_number_of_training_jobs=10,
    max_parallel_training_jobs=5,
    cache_version='2',
    retries=2,
    cacheable=True,
)

In [9]:
from flytekit.models.sagemaker.training_job import StoppingCondition
from flytekit.models.sagemaker.hpo_job import HPOJobConfig, HyperparameterTuningObjective
from flytekit.models.sagemaker.parameter_ranges import ParameterRanges, CategoricalParameterRange, ContinuousParameterRange, IntegerParameterRange
inputs={
    "train": "s3://lyft-modelbuilder/test-datasets/pima-indians/train",
    "validation": "s3://lyft-modelbuilder/test-datasets/pima-indians/validation",
    "static_hyperparameters": xgboost_hyperparameters,
    "stopping_condition": StoppingCondition(
        max_runtime_in_seconds=43200,
    ).to_flyte_idl(),
    "hpo_job_config": HPOJobConfig(
        hyperparameter_ranges=ParameterRanges(
            parameter_range_map={
                "num_round": IntegerParameterRange(min_value=1, max_value=100, scaling_type=_sdk_sagemaker_types.HyperparameterScalingType.LOGARITHMIC),
            }
        ),
        tuning_strategy=_sdk_sagemaker_types.HyperparameterTuningStrategy.BAYESIAN,
        tuning_objective=HyperparameterTuningObjective(
            objective_type=_sdk_sagemaker_types.HyperparameterTuningObjectiveType.MINIMIZE,
            metric_name="validation:error",
        ),
        training_job_early_stopping_type=_sdk_sagemaker_types.TrainingJobEarlyStoppingType.AUTO
    ).to_flyte_idl(),
}

exc = xgboost_hpo_task2.register_and_launch("flyteexamples", "development", inputs=inputs)
print_console_url(exc)

http://flyte-staging.lyft.net/console/projects/flyteexamples/domains/development/executions/knnzbkl65z


In [8]:
exc.sync()

In [None]:
exc.outputs['model'].uri

In [10]:
standalone_trianing_xgboost_hyperparameters = {
    "base_score": "0.5",
    "booster": "gbtree",
    "csv_weights": "0",
    "dsplit": "row",
    "grow_policy": "depthwise",
    "lambda_bias": "0.0",
    "max_bin": "256",
    "max_leaves": "0",
    "normalize_type": "tree",
    "objective": "reg:linear",
    "one_drop": "0",
    "prob_buffer_row": "1.0",
    "process_type": "default",
    "rate_drop": "0.0",
    "refresh_leaf": "1",
    "sample_type": "uniform",
    "scale_pos_weight": "1.0",
    "silent": "0",
    "skip_drop": "0.0",
    "tree_method": "auto",
    "tweedie_variance_power": "1.5",
    "updater": "grow_colmaker,prune",
    "num_round": "23"
}

from flytekit.models.sagemaker.training_job import StoppingCondition

inputs={
    "train": "s3://lyft-modelbuilder/test-datasets/pima-indians/train",
    "validation": "s3://lyft-modelbuilder/test-datasets/pima-indians/validation",
    "static_hyperparameters": standalone_trianing_xgboost_hyperparameters,
    "stopping_condition": StoppingCondition(
        max_runtime_in_seconds=43200,
    ).to_flyte_idl(),
}

exc = xgboost_train_task2.register_and_launch("flyteexamples", "development", inputs=inputs)
print_console_url(exc)

http://flyte-staging.lyft.net/console/projects/flyteexamples/domains/development/executions/xaq1og1tnc
