Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/stepfunctions/steps/sagemaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ def __init__(self, state_id, estimator, job_name, data=None, hyperparameters=Non
else:
parameters = training_config(estimator=estimator, inputs=data, mini_batch_size=mini_batch_size)

if estimator.debugger_hook_config != None:
parameters['DebugHookConfig'] = estimator.debugger_hook_config._to_request_dict()

if estimator.rules != None:
parameters['DebugRuleConfigurations'] = [rule.to_debugger_rule_config_dict() for rule in estimator.rules]

if isinstance(job_name, (ExecutionInput, StepInput)):
parameters['TrainingJobName'] = job_name

Expand Down
8 changes: 8 additions & 0 deletions src/stepfunctions/template/pipeline/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,10 @@ def execute(self, job_name=None, hyperparameters=None):
s3_bucket=self.s3_bucket,
pipeline_name=self.workflow.name
)
inputs[StepId.TrainPreprocessor.value]['DebugHookConfig']['S3OutputPath'] = 's3://{s3_bucket}/{pipeline_name}/models/debug'.format(
s3_bucket=self.s3_bucket,
pipeline_name=self.workflow.name
)
inputs[StepId.CreatePreprocessorModel.value]['PrimaryContainer']['ModelDataUrl'] = '{s3_uri}/{job}/output/model.tar.gz'.format(
s3_uri=inputs[StepId.TrainPreprocessor.value]['OutputDataConfig']['S3OutputPath'],
job=inputs[StepId.TrainPreprocessor.value]['TrainingJobName']
Expand Down Expand Up @@ -236,6 +240,10 @@ def execute(self, job_name=None, hyperparameters=None):
s3_bucket=self.s3_bucket,
pipeline_name=self.workflow.name
)
inputs[StepId.Train.value]['DebugHookConfig']['S3OutputPath'] = 's3://{s3_bucket}/{pipeline_name}/models/debug'.format(
s3_bucket=self.s3_bucket,
pipeline_name=self.workflow.name
)
inputs[StepId.CreatePipelineModel.value]['ModelName'] = job_name
self.replace_sagemaker_job_name(inputs[StepId.Train.value], inputs[StepId.Train.value]['TrainingJobName'])

Expand Down
14 changes: 14 additions & 0 deletions tests/unit/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from sagemaker.sklearn.estimator import SKLearn
from unittest.mock import MagicMock, patch
from stepfunctions.template import TrainingPipeline, InferencePipeline
from sagemaker.debugger import DebuggerHookConfig

from tests.unit.utils import mock_boto_api_call

Expand Down Expand Up @@ -65,6 +66,10 @@ def sklearn_preprocessor():
source_dir=source_dir,
sagemaker_session=sagemaker_session
)

sklearn_preprocessor.debugger_hook_config = DebuggerHookConfig(
s3_output_path='s3://sagemaker/source/debug'
)

return sklearn_preprocessor

Expand All @@ -86,6 +91,10 @@ def linear_learner_estimator():
sagemaker_session=sagemaker_session
)

ll_estimator.debugger_hook_config = DebuggerHookConfig(
s3_output_path='s3://sagemaker/models/debug'
)

ll_estimator.set_hyperparameters(feature_dim=10, predictor_type='regressor', mini_batch_size=32)

return ll_estimator
Expand Down Expand Up @@ -238,6 +247,7 @@ def test_inference_pipeline(sklearn_preprocessor, linear_learner_estimator):
assert result['States']['Train Preprocessor'] == {
'Parameters': {
'AlgorithmSpecification.$': "$$.Execution.Input['Train Preprocessor'].AlgorithmSpecification",
'DebugHookConfig.$': "$$.Execution.Input['Train Preprocessor'].DebugHookConfig",
'HyperParameters.$': "$$.Execution.Input['Train Preprocessor'].HyperParameters",
'InputDataConfig.$': "$$.Execution.Input['Train Preprocessor'].InputDataConfig",
'OutputDataConfig.$': "$$.Execution.Input['Train Preprocessor'].OutputDataConfig",
Expand Down Expand Up @@ -342,6 +352,9 @@ def test_inference_pipeline(sklearn_preprocessor, linear_learner_estimator):
'OutputDataConfig': {
'S3OutputPath': 's3://sagemaker-us-east-1/inference-pipeline/models'
},
'DebugHookConfig': {
'S3OutputPath': 's3://sagemaker-us-east-1/inference-pipeline/models/debug'
},
'ResourceConfig': {
'InstanceCount': 1,
'InstanceType': 'ml.c4.xlarge',
Expand Down Expand Up @@ -406,6 +419,7 @@ def test_inference_pipeline(sklearn_preprocessor, linear_learner_estimator):
}
}],
'OutputDataConfig': { 'S3OutputPath': 's3://sagemaker-us-east-1/inference-pipeline/models' },
'DebugHookConfig': { 'S3OutputPath': 's3://sagemaker-us-east-1/inference-pipeline/models/debug' },
'ResourceConfig': {
'InstanceCount': 1,
'InstanceType': 'ml.c4.xlarge',
Expand Down
115 changes: 115 additions & 0 deletions tests/unit/test_sagemaker_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from sagemaker.tensorflow import TensorFlow
from sagemaker.pipeline import PipelineModel
from sagemaker.model_monitor import DataCaptureConfig
from sagemaker.debugger import Rule, rule_configs, DebuggerHookConfig, CollectionConfig

from unittest.mock import MagicMock, patch
from stepfunctions.steps.sagemaker import TrainingStep, TransformStep, ModelStep, EndpointStep, EndpointConfigStep
Expand Down Expand Up @@ -58,6 +59,54 @@ def pca_estimator():

return pca

@pytest.fixture
def pca_estimator_with_debug_hook():
s3_output_location = 's3://sagemaker/models'

hook_config = DebuggerHookConfig(
s3_output_path='s3://sagemaker/output/debug',
hook_parameters={
"save_interval": "1"
},
collection_configs=[
CollectionConfig("hyperparameters"),
CollectionConfig("metrics")
]
)

rules = [Rule.sagemaker(rule_configs.confusion(),
rule_parameters={
"category_no": "15",
"min_diag": "0.7",
"max_off_diag": "0.3",
"start_step": "17",
"end_step": "19"}
)]

pca = sagemaker.estimator.Estimator(
PCA_IMAGE,
role=EXECUTION_ROLE,
train_instance_count=1,
train_instance_type='ml.c4.xlarge',
output_path=s3_output_location,
debugger_hook_config = hook_config,
rules=rules
)

pca.set_hyperparameters(
feature_dim=50000,
num_components=10,
subtract_mean=True,
algorithm_mode='randomized',
mini_batch_size=200
)

pca.sagemaker_session = MagicMock()
pca.sagemaker_session.boto_region_name = 'us-east-1'
pca.sagemaker_session._default_bucket = 'sagemaker'

return pca

@pytest.fixture
def pca_model():
model_data = 's3://sagemaker/models/pca.tar.gz'
Expand Down Expand Up @@ -95,6 +144,10 @@ def tensorflow_estimator():
checkpoint_path='s3://sagemaker/models/sagemaker-tensorflow/checkpoints'
)

estimator.debugger_hook_config = DebuggerHookConfig(
s3_output_path='s3://sagemaker/models/debug'
)

estimator.sagemaker_session = MagicMock()
estimator.sagemaker_session.boto_region_name = 'us-east-1'
estimator.sagemaker_session._default_bucket = 'sagemaker'
Expand Down Expand Up @@ -148,6 +201,65 @@ def test_training_step_creation(pca_estimator):
'End': True
}

@patch('botocore.client.BaseClient._make_api_call', new=mock_boto_api_call)
def test_training_step_creation_with_debug_hook(pca_estimator_with_debug_hook):
step = TrainingStep('Training',
estimator=pca_estimator_with_debug_hook,
job_name='TrainingJob')
assert step.to_dict() == {
'Type': 'Task',
'Parameters': {
'AlgorithmSpecification': {
'TrainingImage': PCA_IMAGE,
'TrainingInputMode': 'File'
},
'OutputDataConfig': {
'S3OutputPath': 's3://sagemaker/models'
},
'StoppingCondition': {
'MaxRuntimeInSeconds': 86400
},
'ResourceConfig': {
'InstanceCount': 1,
'InstanceType': 'ml.c4.xlarge',
'VolumeSizeInGB': 30
},
'RoleArn': EXECUTION_ROLE,
'HyperParameters': {
'feature_dim': '50000',
'num_components': '10',
'subtract_mean': 'True',
'algorithm_mode': 'randomized',
'mini_batch_size': '200'
},
'DebugHookConfig': {
'S3OutputPath': 's3://sagemaker/output/debug',
'HookParameters': {'save_interval': '1'},
'CollectionConfigurations': [
{'CollectionName': 'hyperparameters'},
{'CollectionName': 'metrics'}
]
},
'DebugRuleConfigurations': [
{
'RuleConfigurationName': 'Confusion',
'RuleEvaluatorImage': '503895931360.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest',
'RuleParameters': {
'rule_to_invoke': 'Confusion',
'category_no': '15',
'min_diag': '0.7',
'max_off_diag': '0.3',
'start_step': '17',
'end_step': '19'
}
}
],
'TrainingJobName': 'TrainingJob'
},
'Resource': 'arn:aws:states:::sagemaker:createTrainingJob.sync',
'End': True
}

@patch('botocore.client.BaseClient._make_api_call', new=mock_boto_api_call)
def test_training_step_creation_with_model(pca_estimator):
training_step = TrainingStep('Training', estimator=pca_estimator, job_name='TrainingJob')
Expand Down Expand Up @@ -231,6 +343,9 @@ def test_training_step_creation_with_framework(tensorflow_estimator):
'OutputDataConfig': {
'S3OutputPath': 's3://sagemaker/models'
},
'DebugHookConfig': {
'S3OutputPath': 's3://sagemaker/models/debug'
},
'StoppingCondition': {
'MaxRuntimeInSeconds': 86400
},
Expand Down