From 9b2cd715e890069dc0a9382c5e7bfd0c99de8857 Mon Sep 17 00:00:00 2001 From: Fatema Alkhanaizi Date: Mon, 1 Aug 2022 16:34:15 +0100 Subject: [PATCH 01/15] adding template for custom image handling --- .../templates/custom_image_project_stack.py | 312 ++++++++++++++++++ .../build_w_custom_pipeline_construct.py | 240 ++++++++++++++ 2 files changed, 552 insertions(+) create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/custom_image_project_stack.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/pipeline_constructs/build_w_custom_pipeline_construct.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/custom_image_project_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/custom_image_project_stack.py new file mode 100644 index 00000000..28f0028e --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/custom_image_project_stack.py @@ -0,0 +1,312 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from aws_cdk import ( + Aws, + CfnDynamicReference, + CfnDynamicReferenceService, + Stack, + Tags, + aws_s3 as s3, + aws_iam as iam, + aws_kms as kms, + aws_ecr as ecr, + aws_sagemaker as sagemaker, +) + +import aws_cdk + +from constructs import Construct + +from mlops_sm_project_template_rt.templates.pipeline_constructs.build_w_custom_pipeline_construct import ( + BuildPipelineConstruct, +) +from mlops_sm_project_template_rt.templates.pipeline_constructs.deploy_pipeline_construct import ( + DeployPipelineConstruct, +) + +from mlops_sm_project_template_rt.config.constants import PREPROD_ACCOUNT, PROD_ACCOUNT, DEFAULT_DEPLOYMENT_REGION + + +class MLOpsStack(Stack): + DESCRIPTION: str = "This template includes a model building pipeline that includes a workflow to pre-process, train, evaluate and register a model. The deploy pipeline creates a preprod and production endpoint. The target DEV/PREPROD/PROD accounts are predefined in the template." + TEMPLATE_NAME: str = "Basic MLOps template for real-time deployment" + + def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: + super().__init__(scope, construct_id, **kwargs) + + # Define required parmeters + project_name = aws_cdk.CfnParameter( + self, + "SageMakerProjectName", + type="String", + description="The name of the SageMaker project.", + min_length=1, + max_length=32, + ).value_as_string + + project_id = aws_cdk.CfnParameter( + self, + "SageMakerProjectId", + type="String", + min_length=1, + max_length=16, + description="Service generated Id of the project.", + ).value_as_string + + Tags.of(self).add("sagemaker:project-id", project_id) + Tags.of(self).add("sagemaker:project-name", project_name) + + # create kms key to be used by the assets bucket + kms_key = kms.Key( + self, + "ArtifactsBucketKMSKey", + description="key used for encryption of data in Amazon S3", + enable_key_rotation=True, + policy=iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + actions=["kms:*"], + effect=iam.Effect.ALLOW, + resources=["*"], + principals=[iam.AccountRootPrincipal()], + ) + ] + ), + ) + + # allow cross account access to the kms key + kms_key.add_to_resource_policy( + iam.PolicyStatement( + actions=[ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey", + ], + resources=[ + "*", + ], + principals=[ + iam.ArnPrincipal(f"arn:aws:iam::{PREPROD_ACCOUNT}:root"), + iam.ArnPrincipal(f"arn:aws:iam::{PROD_ACCOUNT}:root"), + ], + ) + ) + + s3_artifact = s3.Bucket( + self, + "S3Artifact", + bucket_name=f"mlops-{project_name}-{project_id}-{Aws.REGION}", + encryption_key=kms_key, + versioned=True, + removal_policy=aws_cdk.RemovalPolicy.DESTROY, + ) + + # Block insecure requests to the bucket + s3_artifact.add_to_resource_policy( + iam.PolicyStatement( + sid="AllowSSLRequestsOnly", + actions=["s3:*"], + effect=iam.Effect.DENY, + resources=[ + s3_artifact.bucket_arn, + s3_artifact.arn_for_objects(key_pattern="*"), + ], + conditions={"Bool": {"aws:SecureTransport": "false"}}, + principals=[iam.AnyPrincipal()], + ) + ) + + # DEV account access to objects in the bucket + s3_artifact.add_to_resource_policy( + iam.PolicyStatement( + sid="AddDevPermissions", + actions=["s3:*"], + resources=[ + s3_artifact.arn_for_objects(key_pattern="*"), + s3_artifact.bucket_arn, + ], + principals=[ + iam.ArnPrincipal(f"arn:aws:iam::{Aws.ACCOUNT_ID}:root"), + ], + ) + ) + + # PROD account access to objects in the bucket + s3_artifact.add_to_resource_policy( + iam.PolicyStatement( + sid="AddCrossAccountPermissions", + actions=["s3:List*", "s3:Get*", "s3:Put*"], + resources=[ + s3_artifact.arn_for_objects(key_pattern="*"), + s3_artifact.bucket_arn, + ], + principals=[ + iam.ArnPrincipal(f"arn:aws:iam::{PREPROD_ACCOUNT}:root"), + iam.ArnPrincipal(f"arn:aws:iam::{PROD_ACCOUNT}:root"), + ], + ) + ) + + model_package_group_name = f"{project_name}-{project_id}" + + # cross account model registry resource policy + model_package_group_policy = iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + sid="ModelPackageGroup", + actions=[ + "sagemaker:DescribeModelPackageGroup", + ], + resources=[ + f"arn:aws:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:model-package-group/{model_package_group_name}" + ], + principals=[ + iam.ArnPrincipal(f"arn:aws:iam::{PREPROD_ACCOUNT}:root"), + iam.ArnPrincipal(f"arn:aws:iam::{PROD_ACCOUNT}:root"), + ], + ), + iam.PolicyStatement( + sid="ModelPackage", + actions=[ + "sagemaker:DescribeModelPackage", + "sagemaker:ListModelPackages", + "sagemaker:UpdateModelPackage", + "sagemaker:CreateModel", + ], + resources=[ + f"arn:aws:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:model-package/{model_package_group_name}/*" + ], + principals=[ + iam.ArnPrincipal(f"arn:aws:iam::{PREPROD_ACCOUNT}:root"), + iam.ArnPrincipal(f"arn:aws:iam::{PROD_ACCOUNT}:root"), + ], + ), + ] + ).to_json() + + model_package_group = sagemaker.CfnModelPackageGroup( + self, + "ModelPackageGroup", + model_package_group_name=model_package_group_name, + model_package_group_description=f"Model Package Group for {project_name}", + model_package_group_policy=model_package_group_policy, + tags=[ + aws_cdk.CfnTag(key="sagemaker:project-id", value=project_id), + aws_cdk.CfnTag(key="sagemaker:project-name", value=project_name), + ], + ) + + # create ECR repository + ml_models_ecr_repo = ecr.Repository( + self, + "MLModelsECRRepository", + image_scan_on_push=True, + image_tag_mutability=ecr.TagMutability.IMMUTABLE, + repository_name=f"{project_name}", + ) + + # add cross account resource policies + ml_models_ecr_repo.add_to_resource_policy( + iam.PolicyStatement( + actions=[ + "ecr:BatchCheckLayerAvailability", + "ecr:BatchGetImage", + "ecr:CompleteLayerUpload", + "ecr:GetDownloadUrlForLayer", + "ecr:InitiateLayerUpload", + "ecr:PutImage", + "ecr:UploadLayerPart", + ], + principals=[ + iam.ArnPrincipal(f"arn:aws:iam::{Aws.ACCOUNT_ID}:root"), + ], + ) + ) + + ml_models_ecr_repo.add_to_resource_policy( + iam.PolicyStatement( + actions=[ + "ecr:BatchCheckLayerAvailability", + "ecr:BatchGetImage", + "ecr:GetDownloadUrlForLayer", + ], + principals=[ + iam.ArnPrincipal(f"arn:aws:iam::{PREPROD_ACCOUNT}:root"), + iam.ArnPrincipal(f"arn:aws:iam::{PROD_ACCOUNT}:root"), + ], + ) + ) + + seed_bucket = CfnDynamicReference(CfnDynamicReferenceService.SSM, "/mlops/code/seed_bucket").to_string() + build_app_key = CfnDynamicReference(CfnDynamicReferenceService.SSM, "/mlops/code/build").to_string() + deploy_app_key = CfnDynamicReference(CfnDynamicReferenceService.SSM, "/mlops/code/deploy").to_string() + + kms_key = kms.Key( + self, + "PipelineBucketKMSKey", + description="key used for encryption of data in Amazon S3", + enable_key_rotation=True, + policy=iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + actions=["kms:*"], + effect=iam.Effect.ALLOW, + resources=["*"], + principals=[iam.AccountRootPrincipal()], + ) + ] + ), + ) + + pipeline_artifact_bucket = s3.Bucket( + self, + "PipelineBucket", + bucket_name=f"pipeline-{project_id}-{Aws.REGION}", + encryption_key=kms_key, + versioned=True, + removal_policy=aws_cdk.RemovalPolicy.DESTROY, + ) + + BuildPipelineConstruct( + self, + "build", + project_name, + project_id, + s3_artifact, + pipeline_artifact_bucket, + model_package_group_name, + ml_models_ecr_repo.repository_name, + seed_bucket, + build_app_key, + ) + + DeployPipelineConstruct( + self, + "deploy", + project_name, + project_id, + pipeline_artifact_bucket, + model_package_group_name, + seed_bucket, + deploy_app_key, + PREPROD_ACCOUNT, + PROD_ACCOUNT, + DEFAULT_DEPLOYMENT_REGION, + ) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/pipeline_constructs/build_w_custom_pipeline_construct.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/pipeline_constructs/build_w_custom_pipeline_construct.py new file mode 100644 index 00000000..e86ea939 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/pipeline_constructs/build_w_custom_pipeline_construct.py @@ -0,0 +1,240 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from aws_cdk import ( + Aws, + aws_codecommit as codecommit, + aws_codebuild as codebuild, + aws_s3 as s3, + aws_iam as iam, + aws_codepipeline as codepipeline, + aws_codepipeline_actions as codepipeline_actions, +) +import aws_cdk +from constructs import Construct + + +class BuildPipelineConstruct(Construct): + def __init__( + self, + scope: Construct, + construct_id: str, + project_name: str, + project_id: str, + s3_artifact: s3.IBucket, + pipeline_artifact_bucket: s3.IBucket, + model_package_group_name: str, + ecr_repository_name: str, + repo_s3_bucket_name: str, + repo_s3_object_key: str, + **kwargs, + ) -> None: + super().__init__(scope, construct_id, **kwargs) + + # Define resource names + pipeline_name = f"{project_name}-{construct_id}" + pipeline_description = f"{project_name} Model Build Pipeline" + + # Create source repo from seed bucket/key + build_app_cfnrepository = codecommit.CfnRepository( + self, + "BuildAppCodeRepo", + repository_name=f"{project_name}-{construct_id}", + code=codecommit.CfnRepository.CodeProperty( + s3=codecommit.CfnRepository.S3Property( + bucket=repo_s3_bucket_name, + key=repo_s3_object_key, + object_version=None, + ), + branch_name="main", + ), + tags=[ + aws_cdk.CfnTag(key="sagemaker:project-id", value=project_id), + aws_cdk.CfnTag(key="sagemaker:project-name", value=project_name), + ], + ) + + # Reference the newly created repository + build_app_repository = codecommit.Repository.from_repository_name( + self, "ImportedBuildRepo", build_app_cfnrepository.attr_name + ) + + codebuild_role = iam.Role( + self, + "CodeBuildRole", + assumed_by=iam.ServicePrincipal("codebuild.amazonaws.com"), + path="/service-role/", + ) + + sagemaker_execution_role = iam.Role( + self, + "SageMakerExecutionRole", + assumed_by=iam.ServicePrincipal("sagemaker.amazonaws.com"), + path="/service-role/", + ) + + # Create a policy statement for SM and ECR pull + sagemaker_policy = iam.Policy( + self, + "SageMakerPolicy", + document=iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + actions=[ + "logs:CreateLogGroup", + "logs:CreateLogStream", + "logs:PutLogEvents", + ], + resources=["*"], + ), + iam.PolicyStatement( + actions=["sagemaker:*"], + not_resources=[ + "arn:aws:sagemaker:*:*:domain/*", + "arn:aws:sagemaker:*:*:user-profile/*", + "arn:aws:sagemaker:*:*:app/*", + "arn:aws:sagemaker:*:*:flow-definition/*", + ], + ), + iam.PolicyStatement( + actions=[ + "ecr:BatchCheckLayerAvailability", + "ecr:BatchGetImage", + "ecr:Describe*", + "ecr:GetAuthorizationToken", + "ecr:GetDownloadUrlForLayer", + ], + resources=["*"], + ), + iam.PolicyStatement( + actions=[ + "cloudwatch:PutMetricData", + ], + resources=["*"], + ), + iam.PolicyStatement( + actions=[ + "s3:AbortMultipartUpload", + "s3:DeleteObject", + "s3:GetBucket*", + "s3:GetObject*", + "s3:List*", + "s3:PutObject*", + "s3:Create*", + ], + resources=[s3_artifact.bucket_arn, f"{s3_artifact.bucket_arn}/*", "arn:aws:s3:::sagemaker-*"], + ), + iam.PolicyStatement( + actions=["iam:PassRole"], + resources=[sagemaker_execution_role.role_arn], + ), + iam.PolicyStatement( + actions=[ + "kms:Encrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:Decrypt", + "kms:DescribeKey", + ], + effect=iam.Effect.ALLOW, + resources=[f"arn:aws:kms:{Aws.REGION}:{Aws.ACCOUNT_ID}:key/*"], + ), + ] + ), + ) + + sagemaker_policy.attach_to_role(sagemaker_execution_role) + sagemaker_policy.attach_to_role(codebuild_role) + + sm_pipeline_build = codebuild.PipelineProject( + self, + "SMPipelineBuild", + project_name=f"{project_name}-{construct_id}", + role=codebuild_role, # figure out what actually this role would need + build_spec=codebuild.BuildSpec.from_source_filename("buildspec.yml"), + environment=codebuild.BuildEnvironment( + build_image=codebuild.LinuxBuildImage.STANDARD_5_0, + environment_variables={ + "SAGEMAKER_PROJECT_NAME": codebuild.BuildEnvironmentVariable(value=project_name), + "SAGEMAKER_PROJECT_ID": codebuild.BuildEnvironmentVariable(value=project_id), + "MODEL_PACKAGE_GROUP_NAME": codebuild.BuildEnvironmentVariable(value=model_package_group_name), + "AWS_REGION": codebuild.BuildEnvironmentVariable(value=Aws.REGION), + "SAGEMAKER_PIPELINE_NAME": codebuild.BuildEnvironmentVariable( + value=pipeline_name, + ), + "SAGEMAKER_PIPELINE_DESCRIPTION": codebuild.BuildEnvironmentVariable( + value=pipeline_description, + ), + "SAGEMAKER_PIPELINE_ROLE_ARN": codebuild.BuildEnvironmentVariable( + value=sagemaker_execution_role.role_arn, + ), + "ARTIFACT_BUCKET": codebuild.BuildEnvironmentVariable(value=s3_artifact.bucket_name), + "ARTIFACT_BUCKET_KMS_ID": codebuild.BuildEnvironmentVariable( + value=s3_artifact.encryption_key.key_id + ), + }, + ), + ) + + # code build to include security scan over cloudformation template + security_scan = codebuild.Project( + self, + "SecurityScanTooling", + build_spec=codebuild.BuildSpec.from_object( + { + "phases": { + "build": { + "commands": [ + + ] + }, + }, + } + ), + environment=codebuild.BuildEnvironment( + build_image=codebuild.LinuxBuildImage.STANDARD_5_0, + privileged=True + ), + + ) + + source_artifact = codepipeline.Artifact(artifact_name="GitSource") + + build_pipeline = codepipeline.Pipeline( + self, "Pipeline", pipeline_name=f"{project_name}-{construct_id}", artifact_bucket=pipeline_artifact_bucket + ) + + # add a source stage + source_stage = build_pipeline.add_stage(stage_name="Source") + source_stage.add_action( + codepipeline_actions.CodeCommitSourceAction( + action_name="Source", + output=source_artifact, + repository=build_app_repository, + branch="main", + ) + ) + + # add a build stage + build_stage = build_pipeline.add_stage(stage_name="Build") + build_stage.add_action( + codepipeline_actions.CodeBuildAction( + action_name="SMPipeline", + input=source_artifact, + project=sm_pipeline_build, + ) + ) From dde42e6a002a1e92346bd0b210797204476b9e24 Mon Sep 17 00:00:00 2001 From: Fatema Alkhanaizi Date: Tue, 2 Aug 2022 10:15:15 +0100 Subject: [PATCH 02/15] adding byoc setup --- .../build_pipeline_construct.py} | 65 +++- .../deploy_pipeline_construct.py | 352 ++++++++++++++++++ ...project_stack.py => byoc_project_stack.py} | 6 +- .../build_app/source_scripts/docker-build.sh | 31 ++ .../seed_code/deploy_app/config/constants.py | 3 + .../deploy_endpoint/deploy_endpoint_stack.py | 32 +- 6 files changed, 470 insertions(+), 19 deletions(-) rename mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/{pipeline_constructs/build_w_custom_pipeline_construct.py => byoc_pipeline_constructs/build_pipeline_construct.py} (83%) create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/deploy_pipeline_construct.py rename mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/{custom_image_project_stack.py => byoc_project_stack.py} (97%) create mode 100755 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/docker-build.sh diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/pipeline_constructs/build_w_custom_pipeline_construct.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/build_pipeline_construct.py similarity index 83% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/pipeline_constructs/build_w_custom_pipeline_construct.py rename to mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/build_pipeline_construct.py index e86ea939..e0880c0f 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/pipeline_constructs/build_w_custom_pipeline_construct.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/build_pipeline_construct.py @@ -136,7 +136,11 @@ def __init__( "s3:PutObject*", "s3:Create*", ], - resources=[s3_artifact.bucket_arn, f"{s3_artifact.bucket_arn}/*", "arn:aws:s3:::sagemaker-*"], + resources=[ + s3_artifact.bucket_arn, + f"{s3_artifact.bucket_arn}/*", + "arn:aws:s3:::sagemaker-*", + ], ), iam.PolicyStatement( actions=["iam:PassRole"], @@ -169,9 +173,15 @@ def __init__( environment=codebuild.BuildEnvironment( build_image=codebuild.LinuxBuildImage.STANDARD_5_0, environment_variables={ - "SAGEMAKER_PROJECT_NAME": codebuild.BuildEnvironmentVariable(value=project_name), - "SAGEMAKER_PROJECT_ID": codebuild.BuildEnvironmentVariable(value=project_id), - "MODEL_PACKAGE_GROUP_NAME": codebuild.BuildEnvironmentVariable(value=model_package_group_name), + "SAGEMAKER_PROJECT_NAME": codebuild.BuildEnvironmentVariable( + value=project_name + ), + "SAGEMAKER_PROJECT_ID": codebuild.BuildEnvironmentVariable( + value=project_id + ), + "MODEL_PACKAGE_GROUP_NAME": codebuild.BuildEnvironmentVariable( + value=model_package_group_name + ), "AWS_REGION": codebuild.BuildEnvironmentVariable(value=Aws.REGION), "SAGEMAKER_PIPELINE_NAME": codebuild.BuildEnvironmentVariable( value=pipeline_name, @@ -182,7 +192,9 @@ def __init__( "SAGEMAKER_PIPELINE_ROLE_ARN": codebuild.BuildEnvironmentVariable( value=sagemaker_execution_role.role_arn, ), - "ARTIFACT_BUCKET": codebuild.BuildEnvironmentVariable(value=s3_artifact.bucket_name), + "ARTIFACT_BUCKET": codebuild.BuildEnvironmentVariable( + value=s3_artifact.bucket_name + ), "ARTIFACT_BUCKET_KMS_ID": codebuild.BuildEnvironmentVariable( value=s3_artifact.encryption_key.key_id ), @@ -191,31 +203,51 @@ def __init__( ) # code build to include security scan over cloudformation template - security_scan = codebuild.Project( + docker_build = codebuild.Project( self, - "SecurityScanTooling", + "DockerBuild", build_spec=codebuild.BuildSpec.from_object( { "phases": { "build": { "commands": [ - + "chmod +x source_scripts/docker-build.sh", + f"./source_scripts/docker-build.sh {ecr_repository_name}", ] }, }, } ), environment=codebuild.BuildEnvironment( - build_image=codebuild.LinuxBuildImage.STANDARD_5_0, - privileged=True + build_image=codebuild.LinuxBuildImage.STANDARD_5_0, privileged=True ), + ) + + docker_build.add_to_role_policy( + iam.PolicyStatement( + actions=["ecr:*"], + effect=iam.Effect.ALLOW, + resources=[ + f"arn:aws:ecr:{Aws.REGION}:{Aws.ACCOUNT_ID}:repository/{ecr_repository_name}" + ], + ) + ) + docker_build.add_to_role_policy( + iam.PolicyStatement( + actions=["ecr:Get*"], + effect=iam.Effect.ALLOW, + resources=["*"], + ) ) source_artifact = codepipeline.Artifact(artifact_name="GitSource") build_pipeline = codepipeline.Pipeline( - self, "Pipeline", pipeline_name=f"{project_name}-{construct_id}", artifact_bucket=pipeline_artifact_bucket + self, + "Pipeline", + pipeline_name=f"{project_name}-{construct_id}", + artifact_bucket=pipeline_artifact_bucket, ) # add a source stage @@ -231,10 +263,21 @@ def __init__( # add a build stage build_stage = build_pipeline.add_stage(stage_name="Build") + + build_stage.add_action( + codepipeline_actions.CodeBuildAction( + action_name="DockerBuild", + input=source_artifact, + project=docker_build, + run_order=1 + ) + ) + build_stage.add_action( codepipeline_actions.CodeBuildAction( action_name="SMPipeline", input=source_artifact, project=sm_pipeline_build, + run_order=2 ) ) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/deploy_pipeline_construct.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/deploy_pipeline_construct.py new file mode 100644 index 00000000..27280fcd --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/deploy_pipeline_construct.py @@ -0,0 +1,352 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +from aws_cdk import ( + Aws, + CfnCapabilities, + aws_codecommit as codecommit, + aws_codebuild as codebuild, + aws_codepipeline_actions as codepipeline_actions, + aws_codepipeline as codepipeline, + aws_events as events, + aws_events_targets as targets, + aws_s3 as s3, + aws_iam as iam, +) +import aws_cdk +from constructs import Construct + + +class DeployPipelineConstruct(Construct): + def __init__( + self, + scope: Construct, + construct_id: str, + project_name: str, + project_id: str, + pipeline_artifact_bucket: s3.IBucket, + model_package_group_name: str, + ecr_repo_arn: str, + model_bucket_arn: str, + repo_s3_bucket_name: str, + repo_s3_object_key: str, + preprod_account: int, + prod_account: int, + deployment_region: str, + **kwargs, + ) -> None: + super().__init__(scope, construct_id, **kwargs) + + # Define resource names + pipeline_name = f"{project_name}-{construct_id}" + + # Create source repo from seed bucket/key + deploy_app_cfnrepository = codecommit.CfnRepository( + self, + "BuildAppCodeRepo", + repository_name=f"{project_name}-{construct_id}", + code=codecommit.CfnRepository.CodeProperty( + s3=codecommit.CfnRepository.S3Property( + bucket=repo_s3_bucket_name, + key=repo_s3_object_key, + object_version=None, + ), + branch_name="main", + ), + tags=[ + aws_cdk.CfnTag(key="sagemaker:project-id", value=project_id), + aws_cdk.CfnTag(key="sagemaker:project-name", value=project_name), + ], + ) + + # Reference the newly created repository + deploy_app_repository = codecommit.Repository.from_repository_name( + self, "ImportedDeployRepo", deploy_app_cfnrepository.attr_name + ) + + cdk_synth_build_role = iam.Role( + self, + "CodeBuildRole", + assumed_by=iam.ServicePrincipal("codebuild.amazonaws.com"), + path="/service-role/", + ) + + cdk_synth_build_role.add_to_policy( + iam.PolicyStatement( + actions=["sagemaker:ListModelPackages"], + resources=[ + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:model-package-group/{project_name}-{project_id}*", + f"arn:{Aws.PARTITION}:sagemaker:{Aws.REGION}:{Aws.ACCOUNT_ID}:model-package/{project_name}-{project_id}/*", + ], + ) + ) + + cdk_synth_build_role.add_to_policy( + iam.PolicyStatement( + actions=["ssm:GetParameter"], + resources=[ + f"arn:{Aws.PARTITION}:ssm:{Aws.REGION}:{Aws.ACCOUNT_ID}:parameter/*", + ], + ) + ) + + cdk_synth_build_role.add_to_policy( + iam.PolicyStatement( + actions=[ + "kms:Encrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:Decrypt", + "kms:DescribeKey", + ], + effect=iam.Effect.ALLOW, + resources=[f"arn:aws:kms:{Aws.REGION}:{Aws.ACCOUNT_ID}:key/*"], + ), + ) + + cdk_synth_build = codebuild.PipelineProject( + self, + "CDKSynthBuild", + role=cdk_synth_build_role, + build_spec=codebuild.BuildSpec.from_object( + { + "version": "0.2", + "phases": { + "build": { + "commands": [ + "npm install -g aws-cdk", + "pip install -r requirements.txt", + "cdk synth --no-lookups", + ] + } + }, + "artifacts": {"base-directory": "cdk.out", "files": "**/*"}, + } + ), + environment=codebuild.BuildEnvironment( + build_image=codebuild.LinuxBuildImage.STANDARD_5_0, + environment_variables={ + "MODEL_PACKAGE_GROUP_NAME": codebuild.BuildEnvironmentVariable(value=model_package_group_name), + "PROJECT_ID": codebuild.BuildEnvironmentVariable(value=project_id), + "PROJECT_NAME": codebuild.BuildEnvironmentVariable(value=project_name), + "ECR_REPO_ARN": codebuild.BuildEnvironmentVariable(value=ecr_repo_arn), + "MODEL_BUCKET_ARN": codebuild.BuildEnvironmentVariable(value=model_bucket_arn), + }, + ), + ) + + # code build to include security scan over cloudformation template + security_scan = codebuild.Project( + self, + "SecurityScanTooling", + build_spec=codebuild.BuildSpec.from_object( + { + "version": 0.2, + "env": { + "shell": "bash", + "variables": { + "TemplateFolder": "./*.template.json", + "FAIL_BUILD": "true", + }, + }, + "phases": { + "install": { + "runtime-versions": {"ruby": 2.6}, + "commands": [ + "export date=`date +%Y-%m-%dT%H:%M:%S.%NZ`", + "echo Installing cfn_nag - `pwd`", + "gem install cfn-nag", + "echo cfn_nag installation complete `date`", + ], + }, + "build": { + "commands": [ + "echo Starting cfn scanning `date` in `pwd`", + "echo 'RulesToSuppress:\n- id: W58\n reason: W58 is an warning raised due to Lambda functions require permission to write CloudWatch Logs, although the lambda role contains the policy that support these permissions cgn_nag continues to through this problem (https://github.com/stelligent/cfn_nag/issues/422)' > cfn_nag_ignore.yml", # this is temporary solution to an issue with W58 rule with cfn_nag + 'mkdir report || echo "dir report exists"', + "SCAN_RESULT=$(cfn_nag_scan --fail-on-warnings --deny-list-path cfn_nag_ignore.yml --input-path ${TemplateFolder} -o json > ./report/cfn_nag.out.json && echo OK || echo FAILED)", + "echo Completed cfn scanning `date`", + "echo $SCAN_RESULT", + "echo $FAIL_BUILD", + """if [[ "$FAIL_BUILD" = "true" && "$SCAN_RESULT" = "FAILED" ]]; then printf "\n\nFailiing pipeline as possible insecure configurations were detected\n\n" && exit 1; fi""", + ] + }, + }, + "artifacts": {"files": "./report/cfn_nag.out.json"}, + } + ), + environment=codebuild.BuildEnvironment( + build_image=codebuild.LinuxBuildImage.STANDARD_5_0, + ), + ) + + source_artifact = codepipeline.Artifact(artifact_name="GitSource") + cdk_synth_artifact = codepipeline.Artifact(artifact_name="CDKSynth") + cfn_nag_artifact = codepipeline.Artifact(artifact_name="CfnNagScanReport") + + deploy_code_pipeline = codepipeline.Pipeline( + self, + "DeployPipeline", + cross_account_keys=True, + pipeline_name=pipeline_name, + artifact_bucket=pipeline_artifact_bucket, + ) + + # add a source stage + source_stage = deploy_code_pipeline.add_stage(stage_name="Source") + source_stage.add_action( + codepipeline_actions.CodeCommitSourceAction( + action_name="Source", + output=source_artifact, + repository=deploy_app_repository, + branch="main", + ) + ) + + # add a build stage + build_stage = deploy_code_pipeline.add_stage(stage_name="Build") + + build_stage.add_action( + codepipeline_actions.CodeBuildAction( + action_name="Synth", + input=source_artifact, + outputs=[cdk_synth_artifact], + project=cdk_synth_build, + ) + ) + + # add a security evaluation stage for cloudformation templates + security_stage = deploy_code_pipeline.add_stage(stage_name="SecurityEvaluation") + + security_stage.add_action( + codepipeline_actions.CodeBuildAction( + action_name="CFNNag", + input=cdk_synth_artifact, + outputs=[cfn_nag_artifact], + project=security_scan, + ) + ) + + # add stages to deploy to the different environments + deploy_code_pipeline.add_stage( + stage_name="DeployDev", + actions=[ + codepipeline_actions.CloudFormationCreateUpdateStackAction( + action_name="Deploy_CFN_Dev", + run_order=1, + template_path=cdk_synth_artifact.at_path("dev.template.json"), + stack_name=f"{project_name}-{construct_id}-dev", + admin_permissions=False, + replace_on_failure=True, + role=iam.Role.from_role_arn( + self, + "DevActionRole", + f"arn:{Aws.PARTITION}:iam::{Aws.ACCOUNT_ID}:role/cdk-hnb659fds-deploy-role-{Aws.ACCOUNT_ID}-{Aws.REGION}", + ), + deployment_role=iam.Role.from_role_arn( + self, + "DevDeploymentRole", + f"arn:{Aws.PARTITION}:iam::{Aws.ACCOUNT_ID}:role/cdk-hnb659fds-cfn-exec-role-{Aws.ACCOUNT_ID}-{Aws.REGION}", + ), + cfn_capabilities=[ + CfnCapabilities.AUTO_EXPAND, + CfnCapabilities.NAMED_IAM, + ], + ), + codepipeline_actions.ManualApprovalAction( + action_name="Approve_PreProd", + run_order=2, + additional_information="Approving deployment for preprod", + ), + ], + ) + + deploy_code_pipeline.add_stage( + stage_name="DeployPreProd", + actions=[ + codepipeline_actions.CloudFormationCreateUpdateStackAction( + action_name="Deploy_CFN_PreProd", + run_order=1, + template_path=cdk_synth_artifact.at_path("preprod.template.json"), + stack_name=f"{project_name}-{construct_id}-preprod", + admin_permissions=False, + replace_on_failure=True, + role=iam.Role.from_role_arn( + self, + "PreProdActionRole", + f"arn:{Aws.PARTITION}:iam::{preprod_account}:role/cdk-hnb659fds-deploy-role-{preprod_account}-{deployment_region}", + ), + deployment_role=iam.Role.from_role_arn( + self, + "PreProdDeploymentRole", + f"arn:{Aws.PARTITION}:iam::{preprod_account}:role/cdk-hnb659fds-cfn-exec-role-{preprod_account}-{deployment_region}", + ), + cfn_capabilities=[ + CfnCapabilities.AUTO_EXPAND, + CfnCapabilities.NAMED_IAM, + ], + ), + codepipeline_actions.ManualApprovalAction( + action_name="Approve_Prod", + run_order=2, + additional_information="Approving deployment for prod", + ), + ], + ) + + deploy_code_pipeline.add_stage( + stage_name="DeployProd", + actions=[ + codepipeline_actions.CloudFormationCreateUpdateStackAction( + action_name="Deploy_CFN_Prod", + run_order=1, + template_path=cdk_synth_artifact.at_path("prod.template.json"), + stack_name=f"{project_name}-{construct_id}-prod", + admin_permissions=False, + replace_on_failure=True, + role=iam.Role.from_role_arn( + self, + "ProdActionRole", + f"arn:{Aws.PARTITION}:iam::{prod_account}:role/cdk-hnb659fds-deploy-role-{prod_account}-{deployment_region}", + ), + deployment_role=iam.Role.from_role_arn( + self, + "ProdDeploymentRole", + f"arn:{Aws.PARTITION}:iam::{prod_account}:role/cdk-hnb659fds-cfn-exec-role-{prod_account}-{deployment_region}", + ), + cfn_capabilities=[ + CfnCapabilities.AUTO_EXPAND, + CfnCapabilities.NAMED_IAM, + ], + ), + ], + ) + + # CloudWatch rule to trigger model pipeline when a status change event happens to the model package group + model_event_rule = events.Rule( + self, + "ModelEventRule", + event_pattern=events.EventPattern( + source=["aws.sagemaker"], + detail_type=["SageMaker Model Package State Change"], + detail={ + "ModelPackageGroupName": [model_package_group_name], + "ModelApprovalStatus": ["Approved", "Rejected"], + }, + ), + targets=[targets.CodePipeline(deploy_code_pipeline)], + ) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/custom_image_project_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py similarity index 97% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/custom_image_project_stack.py rename to mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py index 28f0028e..9ddf9116 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/custom_image_project_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py @@ -32,10 +32,10 @@ from constructs import Construct -from mlops_sm_project_template_rt.templates.pipeline_constructs.build_w_custom_pipeline_construct import ( +from mlops_sm_project_template_rt.templates.byoc_pipeline_constructs.build_pipeline_construct import ( BuildPipelineConstruct, ) -from mlops_sm_project_template_rt.templates.pipeline_constructs.deploy_pipeline_construct import ( +from mlops_sm_project_template_rt.templates.byoc_pipeline_constructs.deploy_pipeline_construct import ( DeployPipelineConstruct, ) @@ -304,6 +304,8 @@ def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: project_id, pipeline_artifact_bucket, model_package_group_name, + ml_models_ecr_repo.repository_arn, + s3_artifact.bucket_arn, seed_bucket, deploy_app_key, PREPROD_ACCOUNT, diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/docker-build.sh b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/docker-build.sh new file mode 100755 index 00000000..54e60cc0 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/docker-build.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +REPO_NAME=$1 + +echo $REPO_NAME + +aws ecr describe-repositories --region $AWS_DEFAULT_REGION --repository-names $REPO_NAME | jq --raw-output '.repositories[0]' > repository-info.json; + +AWS_ACCOUNT_ID=$(jq -r .registryId repository-info.json); +REPOSITORY_URI=${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPO_NAME}; +# REPOSITORY_URI=local + +aws ecr get-login-password --region AWS_DEFAULT_REGION | docker login --username AWS --password-stdin ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com + +for f in */ +do + if [ -d "$f" ]; then + tag=$(sed 's/.\{1\}$//' <<<"$f") + + IMAGE_TAG=$tag-$CODEBUILD_RESOLVED_SOURCE_VERSION; + + echo $IMAGE_TAG + + docker build --target $tag -t $REPOSITORY_URI:$tag . + docker tag $REPOSITORY_URI:$tag $REPOSITORY_URI:$IMAGE_TAG + + docker push $REPOSITORY_URI:$tag + docker push $REPOSITORY_URI:$IMAGE_TAG + + fi +done diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/constants.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/constants.py index 1ecfd485..9f133d6a 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/constants.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/constants.py @@ -33,3 +33,6 @@ PROJECT_NAME = os.getenv("PROJECT_NAME", "") PROJECT_ID = os.getenv("PROJECT_ID", "") MODEL_PACKAGE_GROUP_NAME = os.getenv("MODEL_PACKAGE_GROUP_NAME", "") +MODEL_BUCKET_ARN = os.getenv("MODEL_BUCKET_ARN", "arn:aws:s3:::*mlops*") +ECR_REPO_ARN = os.getenv("ECR_REPO_ARN", "*") + diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py index b21a80e0..41f099c0 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py @@ -30,7 +30,14 @@ from .get_approved_package import get_approved_package -from config.constants import PROJECT_NAME, PROJECT_ID, MODEL_PACKAGE_GROUP_NAME, DEV_ACCOUNT +from config.constants import ( + PROJECT_NAME, + PROJECT_ID, + MODEL_PACKAGE_GROUP_NAME, + DEV_ACCOUNT, + ECR_REPO_ARN, + MODEL_BUCKET_ARN +) from datetime import datetime, timezone from dataclasses import dataclass @@ -51,7 +58,9 @@ class EndpointConfigProductionVariant(StageYamlDataClassConfig): instance_type: str = "ml.m5.2xlarge" variant_name: str = "AllTraffic" - FILE_PATH: Path = create_file_path_field("endpoint-config.yml", path_is_absolute=True) + FILE_PATH: Path = create_file_path_field( + "endpoint-config.yml", path_is_absolute=True + ) def get_endpoint_config_production_variant(self, model_name): """ @@ -126,7 +135,7 @@ def __init__( ], effect=iam.Effect.ALLOW, resources=[ - f"arn:aws:s3:::*mlops*", + MODEL_BUCKET_ARN ], ), iam.PolicyStatement( @@ -140,6 +149,11 @@ def __init__( effect=iam.Effect.ALLOW, resources=[f"arn:aws:kms:{Aws.REGION}:{DEV_ACCOUNT}:key/*"], ), + iam.PolicyStatement( + actions=["ecr:Get*"], + effect=iam.Effect.ALLOW, + resources=[ECR_REPO_ARN], + ), ] ), ) @@ -150,7 +164,9 @@ def __init__( assumed_by=iam.ServicePrincipal("sagemaker.amazonaws.com"), managed_policies=[ model_execution_policy, - iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFullAccess"), + iam.ManagedPolicy.from_aws_managed_policy_name( + "AmazonSageMakerFullAccess" + ), ], ) @@ -171,7 +187,9 @@ def __init__( execution_role_arn=model_execution_role.role_arn, model_name=model_name, containers=[ - sagemaker.CfnModel.ContainerDefinitionProperty(model_package_name=latest_approved_model_package) + sagemaker.CfnModel.ContainerDefinitionProperty( + model_package_name=latest_approved_model_package + ) ], vpc_config=sagemaker.CfnModel.VpcConfigProperty( security_group_ids=[sg_id], @@ -210,7 +228,9 @@ def __init__( endpoint_config_name=endpoint_config_name, kms_key_id=kms_key.key_id, production_variants=[ - endpoint_config_production_variant.get_endpoint_config_production_variant(model.model_name) + endpoint_config_production_variant.get_endpoint_config_production_variant( + model.model_name + ) ], ) From 5c06acd8431cfead93b5a763a185f304116a90b3 Mon Sep 17 00:00:00 2001 From: Fatema Alkhanaizi Date: Tue, 2 Aug 2022 10:15:30 +0100 Subject: [PATCH 03/15] fixed script --- .../seed_code/build_app/source_scripts/docker-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/docker-build.sh b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/docker-build.sh index 54e60cc0..90f75d19 100755 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/docker-build.sh +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/docker-build.sh @@ -15,7 +15,7 @@ aws ecr get-login-password --region AWS_DEFAULT_REGION | docker login --usernam for f in */ do if [ -d "$f" ]; then - tag=$(sed 's/.\{1\}$//' <<<"$f") + tag=$(sed 's/.\{1\}$//' <<< "$f") IMAGE_TAG=$tag-$CODEBUILD_RESOLVED_SOURCE_VERSION; From ddaf68cf53837b4e421acfc0dde960dd118cd4c0 Mon Sep 17 00:00:00 2001 From: Fatema Alkhanaizi Date: Tue, 2 Aug 2022 14:04:37 +0100 Subject: [PATCH 04/15] adding more seed code and upating the docker build script --- .../service_catalog_stack.py | 27 +- .../templates/byoc_project_stack.py | 2 +- .../byoc_build_app/.githooks/pre-commit | 44 ++ .../byoc_build_app/.pre-commit-config.yaml | 52 ++ .../seed_code/byoc_build_app/Makefile | 102 ++++ .../seed_code/byoc_build_app/README.md | 24 + .../seed_code/byoc_build_app/buildspec.yml | 19 + .../byoc_build_app/ml_pipelines/README.md | 7 + .../byoc_build_app/ml_pipelines/__init__.py | 30 ++ .../ml_pipelines/__version__.py | 26 + .../byoc_build_app/ml_pipelines/_utils.py | 91 ++++ .../ml_pipelines/get_pipeline_definition.py | 77 +++ .../ml_pipelines/run_pipeline.py | 109 +++++ .../ml_pipelines/training/README.md | 7 + .../ml_pipelines/training/__init__.py | 30 ++ .../ml_pipelines/training/_utils.py | 86 ++++ .../ml_pipelines/training/pipeline.py | 331 +++++++++++++ .../byoc_build_app/notebooks/README.md | 4 + .../notebooks/sm_pipelines_runbook.ipynb | 458 ++++++++++++++++++ .../seed_code/byoc_build_app/setup.cfg | 14 + .../seed_code/byoc_build_app/setup.py | 77 +++ .../source_scripts/Dockerfile | 0 .../byoc_build_app/source_scripts/README.md | 0 .../source_scripts/docker-build.sh | 14 +- .../evaluate/evaluate_xgboost/README.md | 0 .../evaluate/evaluate_xgboost/main.py | 72 +++ .../evaluate_xgboost/requirements.txt | 0 .../source_scripts/helpers/README.md | 0 .../source_scripts/helpers/logger.py | 16 + .../source_scripts/helpers/requirements.txt | 0 .../source_scripts/helpers/s3_helper.py | 16 + .../source_scripts/helpers/test/test_a.py | 16 + .../prepare_abalone_data/README.md | 0 .../prepare_abalone_data/main.py | 132 +++++ .../prepare_abalone_data/requirements.txt | 0 .../source_scripts/repository-info.json | 0 .../source_scripts/training/xgboost/README.md | 0 .../training/xgboost/__main__.py | 16 + .../training/xgboost/requirements.txt | 0 .../training/xgboost/test/test_a.py | 16 + 40 files changed, 1904 insertions(+), 11 deletions(-) create mode 100755 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/.githooks/pre-commit create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/.pre-commit-config.yaml create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/Makefile create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/README.md create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/buildspec.yml create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/README.md create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/__init__.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/__version__.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/_utils.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/get_pipeline_definition.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/run_pipeline.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/README.md create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/__init__.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/_utils.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/notebooks/README.md create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/notebooks/sm_pipelines_runbook.ipynb create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/setup.cfg create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/setup.py rename mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/{build_app => byoc_build_app}/source_scripts/Dockerfile (100%) create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/README.md rename mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/{build_app => byoc_build_app}/source_scripts/docker-build.sh (66%) create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/README.md create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/main.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/requirements.txt create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/README.md create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/logger.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/requirements.txt create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/s3_helper.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/test/test_a.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/README.md create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/main.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/requirements.txt create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/repository-info.json create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/README.md create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/__main__.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/requirements.txt create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/test/test_a.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py index 1a890dde..0f3baf5a 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py @@ -243,12 +243,14 @@ def __init__( # ) # Create the build and deployment asset as an output to pass to pipeline stack + zip_image = DockerImage.from_build("mlops_sm_project_template_rt/cdk_helper_scripts/zip-image") + build_app_asset = s3_assets.Asset( self, "BuildAsset", path="seed_code/build_app/", bundling=BundlingOptions( - image=DockerImage.from_build("mlops_sm_project_template_rt/cdk_helper_scripts/zip-image"), + image=zip_image, command=[ "sh", "-c", @@ -258,12 +260,27 @@ def __init__( ), ) + byoc_build_app_asset = s3_assets.Asset( + self, + "BuildAsset", + path="seed_code/byoc_build_app/", + bundling=BundlingOptions( + image=zip_image, + command=[ + "sh", + "-c", + """zip -r /asset-output/byoc_build_app.zip .""", + ], + output_type=BundlingOutput.ARCHIVED, + ), + ) + deploy_app_asset = s3_assets.Asset( self, "DeployAsset", path="seed_code/deploy_app/", bundling=BundlingOptions( - image=DockerImage.from_build("mlops_sm_project_template_rt/cdk_helper_scripts/zip-image"), + image=zip_image, command=[ "sh", "-c", @@ -275,6 +292,7 @@ def __init__( build_app_asset.grant_read(grantee=products_launch_role) deploy_app_asset.grant_read(grantee=products_launch_role) + byoc_build_app_asset.grant_read(grantee=products_launch_role) # Output the deployment bucket and key, for input into pipeline stack self.export_ssm( @@ -287,6 +305,11 @@ def __init__( "/mlops/code/build", build_app_asset.s3_object_key, ) + self.export_ssm( + "CodeDeployKey", + "/mlops/code/build/byoc", + byoc_build_app_asset.s3_object_key, + ) self.export_ssm( "CodeDeployKey", "/mlops/code/deploy", diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py index 9ddf9116..44f59b8a 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py @@ -255,7 +255,7 @@ def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: ) seed_bucket = CfnDynamicReference(CfnDynamicReferenceService.SSM, "/mlops/code/seed_bucket").to_string() - build_app_key = CfnDynamicReference(CfnDynamicReferenceService.SSM, "/mlops/code/build").to_string() + build_app_key = CfnDynamicReference(CfnDynamicReferenceService.SSM, "/mlops/code/build/byoc").to_string() deploy_app_key = CfnDynamicReference(CfnDynamicReferenceService.SSM, "/mlops/code/deploy").to_string() kms_key = kms.Key( diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/.githooks/pre-commit b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/.githooks/pre-commit new file mode 100755 index 00000000..12eaeef7 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/.githooks/pre-commit @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +# File generated by pre-commit: https://pre-commit.com +# ID: 138fd403232d2ddd5efb44317e38bf03 +import os +import sys + +# we try our best, but the shebang of this script is difficult to determine: +# - macos doesn't ship with python3 +# - windows executables are almost always `python.exe` +# therefore we continue to support python2 for this small script +if sys.version_info < (3, 3): + from distutils.spawn import find_executable as which +else: + from shutil import which + +# work around https://github.com/Homebrew/homebrew-core/issues/30445 +os.environ.pop("__PYVENV_LAUNCHER__", None) + +# start templated +INSTALL_PYTHON = "/usr/local/Caskroom/miniconda/base/envs/aws/bin/python" +ARGS = ["hook-impl", "--config=.pre-commit-config.yaml", "--hook-type=pre-commit"] +# end templated +ARGS.extend(("--hook-dir", os.path.realpath(os.path.dirname(__file__)))) +ARGS.append("--") +ARGS.extend(sys.argv[1:]) + +DNE = "`pre-commit` not found. Did you forget to activate your virtualenv?" +if os.access(INSTALL_PYTHON, os.X_OK): + CMD = [INSTALL_PYTHON, "-mpre_commit"] +elif which("pre-commit"): + CMD = ["pre-commit"] +else: + raise SystemExit(DNE) + +CMD.extend(ARGS) +if sys.platform == "win32": # https://bugs.python.org/issue19124 + import subprocess + + if sys.version_info < (3, 7): # https://bugs.python.org/issue25942 + raise SystemExit(subprocess.Popen(CMD).wait()) + else: + raise SystemExit(subprocess.call(CMD)) +else: + os.execvp(CMD[0], CMD) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/.pre-commit-config.yaml b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/.pre-commit-config.yaml new file mode 100644 index 00000000..7a9c7e1c --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/.pre-commit-config.yaml @@ -0,0 +1,52 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: check-added-large-files + - id: check-json + - id: check-merge-conflict + # - id: check-yaml + - id: end-of-file-fixer + - id: requirements-txt-fixer + - id: trailing-whitespace +- repo: https://github.com/psf/black + rev: 22.6.0 + hooks: + - id: black + args: ["--line-length=120"] +- repo: https://gitlab.com/PyCQA/flake8 + rev: 3.9.2 + hooks: + - id: flake8 + args: ["--ignore=E231,E501,F841,W503,F403,E266,W605,F541,F401,E302", "--exclude=app.py", "--max-line-length=120"] +- repo: https://github.com/Lucas-C/pre-commit-hooks + rev: v1.2.0 + hooks: + - id: forbid-crlf + - id: remove-crlf + - id: insert-license + files: \.(py|yaml)$ +- repo: local + hooks: + - id: clear-jupyter-notebooks + name: clear-jupyter-notebooks + entry: bash -c 'find . -type f -name "*.ipynb" -exec jupyter nbconvert --ClearOutputPreprocessor.enabled=True --inplace "{}" \; && git add . && exit 0' + language: system + pass_filenames: false diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/Makefile b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/Makefile new file mode 100644 index 00000000..ce0bc7b2 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/Makefile @@ -0,0 +1,102 @@ +.PHONY: lint init + +################################################################################# +# GLOBALS # +################################################################################# + +PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST)))) +PROJECT_NAME = gfdtv-dataanalysis-data-models +PYTHON_INTERPRETER = python3 + +ifeq (,$(shell which conda)) +HAS_CONDA=False +else +HAS_CONDA=True +endif + +################################################################################# +# COMMANDS # +################################################################################# + +## Lint using flake8 +lint: + flake8 src +## Setup git hooks +init: + git config core.hooksPath .githooks + +clean: + rm -f cdk.staging + rm -rf cdk.out + find . -name '*.egg-info' -exec rm -fr {} + + find . -name '.coverage' -exec rm -fr {} + + find . -name '.pytest_cache' -exec rm -fr {} + + find . -name '.tox' -exec rm -fr {} + + find . -name '__pycache__' -exec rm -fr {} + +################################################################################# +# PROJECT RULES # +################################################################################# + + + + +################################################################################# +# Self Documenting Commands # +################################################################################# + +.DEFAULT_GOAL := help + +# Inspired by +# sed script explained: +# /^##/: +# * save line in hold space +# * purge line +# * Loop: +# * append newline + line to hold space +# * go to next line +# * if line starts with doc comment, strip comment character off and loop +# * remove target prerequisites +# * append hold space (+ newline) to line +# * replace newline plus comments by `---` +# * print line +# Separate expressions are necessary because labels cannot be delimited by +# semicolon; see +.PHONY: help +help: + @echo "$$(tput bold)Available rules:$$(tput sgr0)" + @echo + @sed -n -e "/^## / { \ + h; \ + s/.*//; \ + :doc" \ + -e "H; \ + n; \ + s/^## //; \ + t doc" \ + -e "s/:.*//; \ + G; \ + s/\\n## /---/; \ + s/\\n/ /g; \ + p; \ + }" ${MAKEFILE_LIST} \ + | LC_ALL='C' sort --ignore-case \ + | awk -F '---' \ + -v ncol=$$(tput cols) \ + -v indent=19 \ + -v col_on="$$(tput setaf 6)" \ + -v col_off="$$(tput sgr0)" \ + '{ \ + printf "%s%*s%s ", col_on, -indent, $$1, col_off; \ + n = split($$2, words, " "); \ + line_length = ncol - indent; \ + for (i = 1; i <= n; i++) { \ + line_length -= length(words[i]) + 1; \ + if (line_length <= 0) { \ + line_length = ncol - indent - length(words[i]) - 1; \ + printf "\n%*s ", -indent, " "; \ + } \ + printf "%s ", words[i]; \ + } \ + printf "\n"; \ + }' \ + | more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars') diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/README.md new file mode 100644 index 00000000..5f37e522 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/README.md @@ -0,0 +1,24 @@ +# SageMaker Build - Train Pipelines + +This folder contains all the SageMaker Pipelines of your project. + +`buildspec.yml` defines how to run a pipeline after each commit to this repository. +`ml_pipelines/` contains the SageMaker pipelines definitions. +The expected output of the your main pipeline (here `training/pipeline.py`) is a model registered to SageMaker Model Registry. + +`source_scripts/` contains the underlying scripts run by the steps of your SageMaker Pipelines. For example, if your SageMaker Pipeline runs a Processing Job as part of a Processing Step, the code being run inside the Processing Job should be defined in this folder. +A typical folder structure for `source_scripts/` can contain `helpers`, `preprocessing`, `training`, `postprocessing`, `evaluate`, depending on the nature of the steps run as part of the SageMaker Pipeline. +We provide here an example with the Abalone dataset, to train an XGBoost model (using), and exaluating the model on a test set before sending it for manual approval to SageMaker Model Registry inside the SageMaker ModelPackageGroup defined when creating the SageMaker Project. +Additionally, if you use custom containers, the Dockerfile definitions should be found in that folder. + +`tests/` contains the unittests for your `source_scripts/` + +`notebooks/` contains experimentation notebooks. + +# Run pipeline from command line from this folder + +``` +pip install -e . + +run-pipeline --module-name ml_pipelines.training.pipeline --role-arn YOUR_SAGEMAKER_EXECUTION_ROLE_ARN --kwargs '{"region":"eu-west-1"}' +``` diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/buildspec.yml b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/buildspec.yml new file mode 100644 index 00000000..9f9010d1 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/buildspec.yml @@ -0,0 +1,19 @@ +version: 0.2 + +phases: + install: + runtime-versions: + python: 3.8 + commands: + - pip install --upgrade --force-reinstall . "awscli>1.20.30" + + build: + commands: + - export PYTHONUNBUFFERED=TRUE + - export SAGEMAKER_PROJECT_NAME_ID="${SAGEMAKER_PROJECT_NAME}-${SAGEMAKER_PROJECT_ID}" + - | + run-pipeline --module-name ml_pipelines.training.pipeline \ + --role-arn $SAGEMAKER_PIPELINE_ROLE_ARN \ + --tags "[{\"Key\":\"sagemaker:project-name\", \"Value\":\"${SAGEMAKER_PROJECT_NAME}\"}, {\"Key\":\"sagemaker:project-id\", \"Value\":\"${SAGEMAKER_PROJECT_ID}\"}]" \ + --kwargs "{\"region\":\"${AWS_REGION}\",\"role\":\"${SAGEMAKER_PIPELINE_ROLE_ARN}\",\"default_bucket\":\"${ARTIFACT_BUCKET}\",\"pipeline_name\":\"${SAGEMAKER_PROJECT_NAME_ID}\",\"model_package_group_name\":\"${MODEL_PACKAGE_GROUP_NAME}\",\"base_job_prefix\":\"${SAGEMAKER_PROJECT_NAME_ID}\", \"bucket_kms_id\":\"${ARTIFACT_BUCKET_KMS_ID}\"}" + - echo "Create/Update of the SageMaker Pipeline and execution completed." diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/README.md new file mode 100644 index 00000000..8e309f81 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/README.md @@ -0,0 +1,7 @@ +# SageMaker Pipelines + +This folder contains SageMaker Pipeline definitions and helper scripts to either simply "get" a SageMaker Pipeline definition (JSON dictionnary) with `get_pipeline_definition.py`, or "run" a SageMaker Pipeline from a SageMaker pipeline definition with `run_pipeline.py`. + +Those files are generic and can be reused to call any SageMaker Pipeline. + +Each SageMaker Pipeline definition should be be treated as a modul inside its own folder, for example here the "training" pipeline, contained inside `training/`. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/__init__.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/__init__.py new file mode 100644 index 00000000..ff79f21c --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/__init__.py @@ -0,0 +1,30 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +# © 2021 Amazon Web Services, Inc. or its affiliates. All Rights Reserved. This +# AWS Content is provided subject to the terms of the AWS Customer Agreement +# available at http://aws.amazon.com/agreement or other written agreement between +# Customer and either Amazon Web Services, Inc. or Amazon Web Services EMEA SARL +# or both. +# +# Any code, applications, scripts, templates, proofs of concept, documentation +# and other items provided by AWS under this SOW are "AWS Content," as defined +# in the Agreement, and are provided for illustration purposes only. All such +# AWS Content is provided solely at the option of AWS, and is subject to the +# terms of the Addendum and the Agreement. Customer is solely responsible for +# using, deploying, testing, and supporting any code and applications provided +# by AWS under this SOW. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/__version__.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/__version__.py new file mode 100644 index 00000000..660d19ee --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/__version__.py @@ -0,0 +1,26 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +"""Metadata for the ml pipelines package.""" + +__title__ = "ml_pipelines" +__description__ = "ml pipelines - template package" +__version__ = "0.0.1" +__author__ = "" +__author_email__ = "" +__license__ = "Apache 2.0" +__url__ = "" diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/_utils.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/_utils.py new file mode 100644 index 00000000..581e1eb7 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/_utils.py @@ -0,0 +1,91 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +# © 2021 Amazon Web Services, Inc. or its affiliates. All Rights Reserved. This +# AWS Content is provided subject to the terms of the AWS Customer Agreement +# available at http://aws.amazon.com/agreement or other written agreement between +# Customer and either Amazon Web Services, Inc. or Amazon Web Services EMEA SARL +# or both. +# +# Any code, applications, scripts, templates, proofs of concept, documentation +# and other items provided by AWS under this SOW are "AWS Content," as defined +# in the Agreement, and are provided for illustration purposes only. All such +# AWS Content is provided solely at the option of AWS, and is subject to the +# terms of the Addendum and the Agreement. Customer is solely responsible for +# using, deploying, testing, and supporting any code and applications provided +# by AWS under this SOW. + +# Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Provides utilities for SageMaker Pipeline CLI.""" +from __future__ import absolute_import + +import ast + + +def get_pipeline_driver(module_name, passed_args=None): + """Gets the driver for generating your pipeline definition. + + Pipeline modules must define a get_pipeline() module-level method. + + Args: + module_name: The module name of your pipeline. + passed_args: Optional passed arguments that your pipeline may be templated by. + + Returns: + The SageMaker Workflow pipeline. + """ + _imports = __import__(module_name, fromlist=["get_pipeline"]) + kwargs = convert_struct(passed_args) + return _imports.get_pipeline(**kwargs) + + +def convert_struct(str_struct=None): + """convert the string argument to it's proper type + + Args: + str_struct (str, optional): string to be evaluated. Defaults to None. + + Returns: + string struct as it's actuat evaluated type + """ + return ast.literal_eval(str_struct) if str_struct else {} + + +def get_pipeline_custom_tags(module_name, args, tags): + """Gets the custom tags for pipeline + + Returns: + Custom tags to be added to the pipeline + """ + try: + _imports = __import__(module_name, fromlist=["get_pipeline_custom_tags"]) + kwargs = convert_struct(args) + return _imports.get_pipeline_custom_tags(tags, kwargs["region"], kwargs["sagemaker_project_arn"]) + except Exception as e: + print(f"Error getting project tags: {e}") + return tags diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/get_pipeline_definition.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/get_pipeline_definition.py new file mode 100644 index 00000000..edfb6b40 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/get_pipeline_definition.py @@ -0,0 +1,77 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +"""A CLI to get pipeline definitions from pipeline modules.""" +from __future__ import absolute_import + +import argparse +import sys + +from ml_pipelines._utils import get_pipeline_driver + + +def main(): # pragma: no cover + """The main harness that gets the pipeline definition JSON. + + Prints the json to stdout or saves to file. + """ + parser = argparse.ArgumentParser("Gets the pipeline definition for the pipeline script.") + + parser.add_argument( + "-n", + "--module-name", + dest="module_name", + type=str, + help="The module name of the pipeline to import.", + ) + parser.add_argument( + "-f", + "--file-name", + dest="file_name", + type=str, + default=None, + help="The file to output the pipeline definition json to.", + ) + parser.add_argument( + "-kwargs", + "--kwargs", + dest="kwargs", + default=None, + help="Dict string of keyword arguments for the pipeline generation (if supported)", + ) + args = parser.parse_args() + + if args.module_name is None: + parser.print_help() + sys.exit(2) + + try: + pipeline = get_pipeline_driver(args.module_name, args.kwargs) + content = pipeline.definition() + if args.file_name: + with open(args.file_name, "w") as f: + f.write(content) + else: + print(content) + except Exception as e: # pylint: disable=W0703 + print(f"Exception: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/run_pipeline.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/run_pipeline.py new file mode 100644 index 00000000..d91be30b --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/run_pipeline.py @@ -0,0 +1,109 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +"""A CLI to create or update and run pipelines.""" +from __future__ import absolute_import + +import argparse +import json +import sys + +from ml_pipelines._utils import get_pipeline_driver, convert_struct, get_pipeline_custom_tags + + +def main(): # pragma: no cover + """The main harness that creates or updates and runs the pipeline. + + Creates or updates the pipeline and runs it. + """ + parser = argparse.ArgumentParser("Creates or updates and runs the pipeline for the pipeline script.") + + parser.add_argument( + "-n", + "--module-name", + dest="module_name", + type=str, + help="The module name of the pipeline to import.", + ) + parser.add_argument( + "-kwargs", + "--kwargs", + dest="kwargs", + default=None, + help="Dict string of keyword arguments for the pipeline generation (if supported)", + ) + parser.add_argument( + "-role-arn", + "--role-arn", + dest="role_arn", + type=str, + help="The role arn for the pipeline service execution role.", + ) + parser.add_argument( + "-description", + "--description", + dest="description", + type=str, + default=None, + help="The description of the pipeline.", + ) + parser.add_argument( + "-tags", + "--tags", + dest="tags", + default=None, + help="""List of dict strings of '[{"Key": "string", "Value": "string"}, ..]'""", + ) + args = parser.parse_args() + + if args.module_name is None or args.role_arn is None: + parser.print_help() + sys.exit(2) + tags = convert_struct(args.tags) + + try: + pipeline = get_pipeline_driver(args.module_name, args.kwargs) + print("###### Creating/updating a SageMaker Pipeline with the following definition:") + parsed = json.loads(pipeline.definition()) + print(json.dumps(parsed, indent=2, sort_keys=True)) + + all_tags = get_pipeline_custom_tags(args.module_name, args.kwargs, tags) + + upsert_response = pipeline.upsert(role_arn=args.role_arn, description=args.description, tags=all_tags) + + upsert_response = pipeline.upsert( + role_arn=args.role_arn, description=args.description + ) # , tags=tags) # Removing tag momentaneously + print("\n###### Created/Updated SageMaker Pipeline: Response received:") + print(upsert_response) + + execution = pipeline.start() + print(f"\n###### Execution started with PipelineExecutionArn: {execution.arn}") + + # TODO removiong wait time as training can take some time + print("Waiting for the execution to finish...") + execution.wait() + print("\n#####Execution completed. Execution step details:") + + print(execution.list_steps()) + except Exception as e: # pylint: disable=W0703 + print(f"Exception: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/README.md new file mode 100644 index 00000000..8a493ac6 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/README.md @@ -0,0 +1,7 @@ +# Training SageMaker Pipeline + +This SageMaker Pipeline definition creates a workflow that will: +- Prepare the Abalone dataset through a SageMaker Processing Job +- Train an XGBoost algorithm on the train set +- Evaluate the performance of the trained XGBoost algorithm on the validation set +- If the performance reaches a specified threshold, send the model for Manual Approval to SageMaker Model Registry. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/__init__.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/__init__.py new file mode 100644 index 00000000..ff79f21c --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/__init__.py @@ -0,0 +1,30 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +# © 2021 Amazon Web Services, Inc. or its affiliates. All Rights Reserved. This +# AWS Content is provided subject to the terms of the AWS Customer Agreement +# available at http://aws.amazon.com/agreement or other written agreement between +# Customer and either Amazon Web Services, Inc. or Amazon Web Services EMEA SARL +# or both. +# +# Any code, applications, scripts, templates, proofs of concept, documentation +# and other items provided by AWS under this SOW are "AWS Content," as defined +# in the Agreement, and are provided for illustration purposes only. All such +# AWS Content is provided solely at the option of AWS, and is subject to the +# terms of the Addendum and the Agreement. Customer is solely responsible for +# using, deploying, testing, and supporting any code and applications provided +# by AWS under this SOW. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/_utils.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/_utils.py new file mode 100644 index 00000000..78330433 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/_utils.py @@ -0,0 +1,86 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import logging + +from botocore.exceptions import ClientError + +logger = logging.getLogger(__name__) + + +def resolve_ecr_uri_from_image_versions(sagemaker_session, image_versions, image_name): + """Gets ECR URI from image versions + Args: + sagemaker_session: boto3 session for sagemaker client + image_versions: list of the image versions + image_name: Name of the image + + Returns: + ECR URI of the image version + """ + + # Fetch image details to get the Base Image URI + for image_version in image_versions: + if image_version["ImageVersionStatus"] == "CREATED": + image_arn = image_version["ImageVersionArn"] + version = image_version["Version"] + logger.info(f"Identified the latest image version: {image_arn}") + response = sagemaker_session.sagemaker_client.describe_image_version(ImageName=image_name, Version=version) + return response["ContainerImage"] + return None + + +def resolve_ecr_uri(sagemaker_session, image_arn): + """Gets the ECR URI from the image name + + Args: + sagemaker_session: boto3 session for sagemaker client + image_name: name of the image + + Returns: + ECR URI of the latest image version + """ + + # Fetching image name from image_arn (^arn:aws(-[\w]+)*:sagemaker:.+:[0-9]{12}:image/[a-z0-9]([-.]?[a-z0-9])*$) + image_name = image_arn.partition("image/")[2] + try: + # Fetch the image versions + next_token = "" + while True: + response = sagemaker_session.sagemaker_client.list_image_versions( + ImageName=image_name, MaxResults=100, SortBy="VERSION", SortOrder="DESCENDING", NextToken=next_token + ) + + ecr_uri = resolve_ecr_uri_from_image_versions(sagemaker_session, response["ImageVersions"], image_name) + + if ecr_uri is not None: + return ecr_uri + + if "NextToken" in response: + next_token = response["NextToken"] + else: + break + + # Return error if no versions of the image found + error_message = f"No image version found for image name: {image_name}" + logger.error(error_message) + raise Exception(error_message) + + except (ClientError, sagemaker_session.sagemaker_client.exceptions.ResourceNotFound) as e: + error_message = e.response["Error"]["Message"] + logger.error(error_message) + raise Exception(error_message) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py new file mode 100644 index 00000000..df9c8c74 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py @@ -0,0 +1,331 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +"""Example workflow pipeline script for abalone pipeline. + + . -RegisterModel + . + Process-> Train -> Evaluate -> Condition . + . + . -(stop) + +Implements a get_pipeline(**kwargs) method. +""" +import os + +import boto3 +import logging +import sagemaker +import sagemaker.session + +from sagemaker.estimator import Estimator +from sagemaker.inputs import TrainingInput +from sagemaker.model_metrics import ( + MetricsSource, + ModelMetrics, +) +from sagemaker.processing import ( + ProcessingInput, + ProcessingOutput, + ScriptProcessor, +) +from sagemaker.sklearn.processing import SKLearnProcessor +from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo +from sagemaker.workflow.condition_step import ( + ConditionStep, +) +from sagemaker.workflow.functions import ( + JsonGet, +) +from sagemaker.workflow.parameters import ( + ParameterInteger, + ParameterString, +) +from sagemaker.workflow.pipeline import Pipeline +from sagemaker.workflow.properties import PropertyFile +from sagemaker.workflow.steps import ( + ProcessingStep, + TrainingStep, +) +from sagemaker.workflow.step_collections import RegisterModel + +from botocore.exceptions import ClientError +from sagemaker.network import NetworkConfig + + +# BASE_DIR = os.path.dirname(os.path.realpath(__file__)) + +logger = logging.getLogger(__name__) + + +def get_session(region, default_bucket): + """Gets the sagemaker session based on the region. + + Args: + region: the aws region to start the session + default_bucket: the bucket to use for storing the artifacts + + Returns: + `sagemaker.session.Session instance + """ + + boto_session = boto3.Session(region_name=region) + + sagemaker_client = boto_session.client("sagemaker") + runtime_client = boto_session.client("sagemaker-runtime") + session = sagemaker.session.Session( + boto_session=boto_session, + sagemaker_client=sagemaker_client, + sagemaker_runtime_client=runtime_client, + default_bucket=default_bucket, + ) + + return session + + +def get_pipeline( + region, + role=None, + default_bucket=None, + bucket_kms_id=None, + model_package_group_name="AbalonePackageGroup", + pipeline_name="AbalonePipeline", + base_job_prefix="Abalone", + project_id="SageMakerProjectId", +): + """Gets a SageMaker ML Pipeline instance working with on abalone data. + + Args: + region: AWS region to create and run the pipeline. + role: IAM role to create and run steps and pipeline. + default_bucket: the bucket to use for storing the artifacts + + Returns: + an instance of a pipeline + """ + + sagemaker_session = get_session(region, default_bucket) + if role is None: + role = sagemaker.session.get_execution_role(sagemaker_session) + + # parameters for pipeline execution + processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1) + processing_instance_type = ParameterString(name="ProcessingInstanceType", default_value="ml.m5.xlarge") + training_instance_type = ParameterString(name="TrainingInstanceType", default_value="ml.m5.xlarge") + inference_instance_type = ParameterString(name="InferenceInstanceType", default_value="ml.m5.xlarge") + model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="PendingManualApproval") + input_data = ParameterString( + name="InputDataUrl", + default_value=f"s3://sagemaker-servicecatalog-seedcode-{region}/dataset/abalone-dataset.csv", + ) + processing_image_name = "sagemaker-{0}-processingimagebuild".format(project_id) + training_image_name = "sagemaker-{0}-trainingimagebuild".format(project_id) + inference_image_name = "sagemaker-{0}-inferenceimagebuild".format(project_id) + + # network_config = NetworkConfig( + # enable_network_isolation=True, + # security_group_ids=security_group_ids, + # subnets=subnets, + # encrypt_inter_container_traffic=True, + # ) + + # processing step for feature engineering + try: + processing_image_uri = sagemaker_session.sagemaker_client.describe_image_version( + ImageName=processing_image_name + )["ContainerImage"] + except (sagemaker_session.sagemaker_client.exceptions.ResourceNotFound): + processing_image_uri = sagemaker.image_uris.retrieve( + framework="xgboost", + region=region, + version="1.0-1", + py_version="py3", + instance_type="ml.m5.xlarge", + ) + script_processor = ScriptProcessor( + image_uri=processing_image_uri, + instance_type=processing_instance_type, + instance_count=processing_instance_count, + base_job_name=f"{base_job_prefix}/sklearn-abalone-preprocess", + command=["python3"], + sagemaker_session=sagemaker_session, + role=role, + output_kms_key=bucket_kms_id, + ) + step_process = ProcessingStep( + name="PreprocessAbaloneData", + processor=script_processor, + outputs=[ + ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), + ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), + ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), + ], + code="source_scripts/preprocessing/prepare_abalone_data/main.py", # we must figure out this path to get it from step_source directory + job_arguments=["--input-data", input_data], + ) + + # training step for generating model artifacts + model_path = f"s3://{default_bucket}/{base_job_prefix}/AbaloneTrain" + + try: + training_image_uri = sagemaker_session.sagemaker_client.describe_image_version(ImageName=training_image_name)[ + "ContainerImage" + ] + except (sagemaker_session.sagemaker_client.exceptions.ResourceNotFound): + training_image_uri = sagemaker.image_uris.retrieve( + framework="xgboost", + region=region, + version="1.0-1", + py_version="py3", + instance_type="ml.m5.xlarge", + ) + + xgb_train = Estimator( + image_uri=training_image_uri, + instance_type=training_instance_type, + instance_count=1, + output_path=model_path, + base_job_name=f"{base_job_prefix}/abalone-train", + sagemaker_session=sagemaker_session, + role=role, + output_kms_key=bucket_kms_id, + ) + xgb_train.set_hyperparameters( + objective="reg:linear", + num_round=50, + max_depth=5, + eta=0.2, + gamma=4, + min_child_weight=6, + subsample=0.7, + silent=0, + ) + step_train = TrainingStep( + name="TrainAbaloneModel", + estimator=xgb_train, + inputs={ + "train": TrainingInput( + s3_data=step_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri, + content_type="text/csv", + ), + "validation": TrainingInput( + s3_data=step_process.properties.ProcessingOutputConfig.Outputs["validation"].S3Output.S3Uri, + content_type="text/csv", + ), + }, + ) + + # processing step for evaluation + script_eval = ScriptProcessor( + image_uri=training_image_uri, + command=["python3"], + instance_type=processing_instance_type, + instance_count=1, + base_job_name=f"{base_job_prefix}/script-abalone-eval", + sagemaker_session=sagemaker_session, + role=role, + output_kms_key=bucket_kms_id, + ) + evaluation_report = PropertyFile( + name="AbaloneEvaluationReport", + output_name="evaluation", + path="evaluation.json", + ) + step_eval = ProcessingStep( + name="EvaluateAbaloneModel", + processor=script_eval, + inputs=[ + ProcessingInput( + source=step_train.properties.ModelArtifacts.S3ModelArtifacts, + destination="/opt/ml/processing/model", + ), + ProcessingInput( + source=step_process.properties.ProcessingOutputConfig.Outputs["test"].S3Output.S3Uri, + destination="/opt/ml/processing/test", + ), + ], + outputs=[ + ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"), + ], + code="source_scripts/evaluate/evaluate_xgboost/main.py", + property_files=[evaluation_report], + ) + + # register model step that will be conditionally executed + model_metrics = ModelMetrics( + model_statistics=MetricsSource( + s3_uri="{}/evaluation.json".format( + step_eval.arguments["ProcessingOutputConfig"]["Outputs"][0]["S3Output"]["S3Uri"] + ), + content_type="application/json", + ) + ) + + try: + inference_image_uri = sagemaker_session.sagemaker_client.describe_image_version(ImageName=inference_image_name)[ + "ContainerImage" + ] + except (sagemaker_session.sagemaker_client.exceptions.ResourceNotFound): + inference_image_uri = sagemaker.image_uris.retrieve( + framework="xgboost", + region=region, + version="1.0-1", + py_version="py3", + instance_type="ml.m5.xlarge", + ) + step_register = RegisterModel( + name="RegisterAbaloneModel", + estimator=xgb_train, + image_uri=inference_image_uri, + model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, + content_types=["text/csv"], + response_types=["text/csv"], + inference_instances=["ml.t2.medium", "ml.m5.large"], + transform_instances=["ml.m5.large"], + model_package_group_name=model_package_group_name, + approval_status=model_approval_status, + model_metrics=model_metrics, + ) + + # condition step for evaluating model quality and branching execution + cond_lte = ConditionLessThanOrEqualTo( + left=JsonGet( + step_name=step_eval.name, property_file=evaluation_report, json_path="regression_metrics.mse.value" + ), + right=6.0, + ) + step_cond = ConditionStep( + name="CheckMSEAbaloneEvaluation", + conditions=[cond_lte], + if_steps=[step_register], + else_steps=[], + ) + + # pipeline instance + pipeline = Pipeline( + name=pipeline_name, + parameters=[ + processing_instance_type, + processing_instance_count, + training_instance_type, + model_approval_status, + input_data, + ], + steps=[step_process, step_train, step_eval, step_cond], + sagemaker_session=sagemaker_session, + ) + return pipeline diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/notebooks/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/notebooks/README.md new file mode 100644 index 00000000..c0749333 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/notebooks/README.md @@ -0,0 +1,4 @@ +# Jupyter Notebooks + +This folder is intended to store your experiment notebooks. +Typically the first step would be to store your Data Science notebooks, and start defining example SageMaker pipelines in here. Once satisfied with the first iteration of a SageMaker pipeline, the code should move as python scripts inside the respective `ml_pipelines/` and `source_scripts/` folders. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/notebooks/sm_pipelines_runbook.ipynb b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/notebooks/sm_pipelines_runbook.ipynb new file mode 100644 index 00000000..b6e8e6b6 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/notebooks/sm_pipelines_runbook.ipynb @@ -0,0 +1,458 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import boto3\n", + "import logging\n", + "import sagemaker\n", + "import sagemaker.session\n", + "\n", + "from sagemaker.estimator import Estimator\n", + "from sagemaker.inputs import TrainingInput\n", + "from sagemaker.model_metrics import (\n", + " MetricsSource,\n", + " ModelMetrics,\n", + ")\n", + "from sagemaker.processing import (\n", + " ProcessingInput,\n", + " ProcessingOutput,\n", + " ScriptProcessor,\n", + ")\n", + "from sagemaker.sklearn.processing import SKLearnProcessor\n", + "from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo\n", + "from sagemaker.workflow.condition_step import (\n", + " ConditionStep,\n", + ")\n", + "from sagemaker.workflow.functions import (\n", + " JsonGet,\n", + ")\n", + "from sagemaker.workflow.parameters import (\n", + " ParameterInteger,\n", + " ParameterString,\n", + ")\n", + "from sagemaker.workflow.pipeline import Pipeline\n", + "from sagemaker.workflow.properties import PropertyFile\n", + "from sagemaker.workflow.steps import (\n", + " ProcessingStep,\n", + " TrainingStep,\n", + ")\n", + "from sagemaker.workflow.step_collections import RegisterModel\n", + "\n", + "from botocore.exceptions import ClientError" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "logger = logging.getLogger(__name__)\n", + "\n", + "\"\"\"Environment Variables\"\"\"\n", + "proj_dir = \"TO_BE_DEFINED\"\n", + "region= \"TO_BE_DEFINED\"\n", + "model_artefact_bucket= \"TO_BE_DEFINED\"\n", + "role = \"TO_BE_DEFINED\"\n", + "project_name= \"TO_BE_DEFINED\"\n", + "stage= \"test\"\n", + "model_package_group_name=\"AbalonePackageGroup\",\n", + "pipeline_name=\"AbalonePipeline\",\n", + "base_job_prefix=\"Abalone\",\n", + "project_id=\"SageMakerProjectId\",\n", + "processing_image_uri=None\n", + "training_image_uri=None\n", + "inference_image_uri=None" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def get_session(region, default_bucket):\n", + " \"\"\"Gets the sagemaker session based on the region.\n", + "\n", + " Args:\n", + " region: the aws region to start the session\n", + " default_bucket: the bucket to use for storing the artifacts\n", + "\n", + " Returns:\n", + " `sagemaker.session.Session instance\n", + " \"\"\"\n", + "\n", + " boto_session = boto3.Session(region_name=region)\n", + "\n", + " sagemaker_client = boto_session.client(\"sagemaker\")\n", + " runtime_client = boto_session.client(\"sagemaker-runtime\")\n", + " return sagemaker.session.Session(\n", + " boto_session=boto_session,\n", + " sagemaker_client=sagemaker_client,\n", + " sagemaker_runtime_client=runtime_client,\n", + " default_bucket=default_bucket,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sagemaker_session = get_session(region, model_artefact_bucket)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feature Engineering\n", + "This section describes the different steps involved in feature engineering which includes loading and transforming different data sources to build the features needed for the ML Use Case" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n", + "processing_instance_type = ParameterString(name=\"ProcessingInstanceType\", default_value=\"ml.m5.xlarge\")\n", + "training_instance_type = ParameterString(name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\")\n", + "inference_instance_type = ParameterString(name=\"InferenceInstanceType\", default_value=\"ml.m5.xlarge\")\n", + "model_approval_status = ParameterString(name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\")\n", + "input_data = ParameterString(\n", + " name=\"InputDataUrl\",\n", + " default_value=f\"s3://sagemaker-servicecatalog-seedcode-{region}/dataset/abalone-dataset.csv\",\n", + ")\n", + "processing_image_name = \"sagemaker-{0}-processingimagebuild\".format(project_id)\n", + "training_image_name = \"sagemaker-{0}-trainingimagebuild\".format(project_id)\n", + "inference_image_name = \"sagemaker-{0}-inferenceimagebuild\".format(project_id)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# processing step for feature engineering\n", + "try:\n", + " processing_image_uri = sagemaker_session.sagemaker_client.describe_image_version(\n", + " ImageName=processing_image_name\n", + " )[\"ContainerImage\"]\n", + "\n", + "except (sagemaker_session.sagemaker_client.exceptions.ResourceNotFound):\n", + " processing_image_uri = sagemaker.image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version=\"1.0-1\",\n", + " py_version=\"py3\",\n", + " instance_type=processing_instance_type,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define Script Processor\n", + "script_processor = ScriptProcessor(\n", + " image_uri=processing_image_uri,\n", + " instance_type=processing_instance_type,\n", + " instance_count=processing_instance_count,\n", + " base_job_name=f\"{base_job_prefix}/sklearn-abalone-preprocess\",\n", + " command=[\"python3\"],\n", + " sagemaker_session=sagemaker_session,\n", + " role=role,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define ProcessingStep\n", + "step_process = ProcessingStep(\n", + " name=\"PreprocessAbaloneData\",\n", + " processor=script_processor,\n", + " outputs=[\n", + " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", + " ProcessingOutput(output_name=\"validation\", source=\"/opt/ml/processing/validation\"),\n", + " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", + " ],\n", + " code=\"source_scripts/preprocessing/prepare_abalone_data/main.py\", # we must figure out this path to get it from step_source directory\n", + " job_arguments=[\"--input-data\", input_data],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Training an XGBoost model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# training step for generating model artifacts\n", + "model_path = f\"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/AbaloneTrain\"\n", + "\n", + "try:\n", + " training_image_uri = sagemaker_session.sagemaker_client.describe_image_version(ImageName=training_image_name)[\n", + " \"ContainerImage\"\n", + " ]\n", + "except (sagemaker_session.sagemaker_client.exceptions.ResourceNotFound):\n", + " training_image_uri = sagemaker.image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version=\"1.0-1\",\n", + " py_version=\"py3\",\n", + " instance_type=training_instance_type,\n", + " )\n", + "\n", + "xgb_train = Estimator(\n", + " image_uri=training_image_uri,\n", + " instance_type=training_instance_type,\n", + " instance_count=1,\n", + " output_path=model_path,\n", + " base_job_name=f\"{base_job_prefix}/abalone-train\",\n", + " sagemaker_session=sagemaker_session,\n", + " role=role,\n", + ")\n", + "xgb_train.set_hyperparameters(\n", + " objective=\"reg:linear\",\n", + " num_round=50,\n", + " max_depth=5,\n", + " eta=0.2,\n", + " gamma=4,\n", + " min_child_weight=6,\n", + " subsample=0.7,\n", + " silent=0,\n", + ")\n", + "step_train = TrainingStep(\n", + " name=\"TrainAbaloneModel\",\n", + " estimator=xgb_train,\n", + " inputs={\n", + " \"train\": TrainingInput(\n", + " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", + " content_type=\"text/csv\",\n", + " ),\n", + " \"validation\": TrainingInput(\n", + " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"validation\"].S3Output.S3Uri,\n", + " content_type=\"text/csv\",\n", + " ),\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Evaluate the Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# processing step for evaluation\n", + "script_eval = ScriptProcessor(\n", + " image_uri=training_image_uri,\n", + " command=[\"python3\"],\n", + " instance_type=processing_instance_type,\n", + " instance_count=1,\n", + " base_job_name=f\"{base_job_prefix}/script-abalone-eval\",\n", + " sagemaker_session=sagemaker_session,\n", + " role=role,\n", + ")\n", + "evaluation_report = PropertyFile(\n", + " name=\"AbaloneEvaluationReport\",\n", + " output_name=\"evaluation\",\n", + " path=\"evaluation.json\",\n", + ")\n", + "step_eval = ProcessingStep(\n", + " name=\"EvaluateAbaloneModel\",\n", + " processor=script_eval,\n", + " inputs=[\n", + " ProcessingInput(\n", + " source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + " destination=\"/opt/ml/processing/model\",\n", + " ),\n", + " ProcessingInput(\n", + " source=step_process.properties.ProcessingOutputConfig.Outputs[\"test\"].S3Output.S3Uri,\n", + " destination=\"/opt/ml/processing/test\",\n", + " ),\n", + " ],\n", + " outputs=[\n", + " ProcessingOutput(output_name=\"evaluation\", source=\"/opt/ml/processing/evaluation\"),\n", + " ],\n", + " code=\"source_scripts/evaluate/evaluate_xgboost/main.py\",\n", + " property_files=[evaluation_report],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Conditional step to push model to SageMaker Model Registry" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# register model step that will be conditionally executed\n", + "model_metrics = ModelMetrics(\n", + " model_statistics=MetricsSource(\n", + " s3_uri=\"{}/evaluation.json\".format(\n", + " step_eval.arguments[\"ProcessingOutputConfig\"][\"Outputs\"][0][\"S3Output\"][\"S3Uri\"]\n", + " ),\n", + " content_type=\"application/json\",\n", + " )\n", + ")\n", + "\n", + "try:\n", + " inference_image_uri = sagemaker_session.sagemaker_client.describe_image_version(ImageName=inference_image_name)[\n", + " \"ContainerImage\"\n", + " ]\n", + "except (sagemaker_session.sagemaker_client.exceptions.ResourceNotFound):\n", + " inference_image_uri = sagemaker.image_uris.retrieve(\n", + " framework=\"xgboost\",\n", + " region=region,\n", + " version=\"1.0-1\",\n", + " py_version=\"py3\",\n", + " instance_type=inference_instance_type,\n", + " )\n", + "step_register = RegisterModel(\n", + " name=\"RegisterAbaloneModel\",\n", + " estimator=xgb_train,\n", + " image_uri=inference_image_uri,\n", + " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", + " content_types=[\"text/csv\"],\n", + " response_types=[\"text/csv\"],\n", + " inference_instances=[\"ml.t2.medium\", \"ml.m5.large\"],\n", + " transform_instances=[\"ml.m5.large\"],\n", + " model_package_group_name=model_package_group_name,\n", + " approval_status=model_approval_status,\n", + " model_metrics=model_metrics,\n", + ")\n", + "\n", + "# condition step for evaluating model quality and branching execution\n", + "cond_lte = ConditionLessThanOrEqualTo(\n", + " left=JsonGet(\n", + " step_name=step_eval.name, property_file=evaluation_report, json_path=\"regression_metrics.mse.value\"\n", + " ),\n", + " right=6.0,\n", + ")\n", + "step_cond = ConditionStep(\n", + " name=\"CheckMSEAbaloneEvaluation\",\n", + " conditions=[cond_lte],\n", + " if_steps=[step_register],\n", + " else_steps=[],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Create and run the Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pipeline instance\n", + "pipeline = Pipeline(\n", + " name=pipeline_name,\n", + " parameters=[\n", + " processing_instance_type,\n", + " processing_instance_count,\n", + " training_instance_type,\n", + " model_approval_status,\n", + " input_data,\n", + " ],\n", + " steps=[step_process, step_train, step_eval, step_cond],\n", + " sagemaker_session=sagemaker_session,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "\n", + "definition = json.loads(pipeline.definition())\n", + "definition" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline.upsert(role_arn=role, description=f'{stage} pipelines for {project_name}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline.start()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_python3", + "language": "python", + "name": "conda_python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/setup.cfg b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/setup.cfg new file mode 100644 index 00000000..6f878705 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/setup.cfg @@ -0,0 +1,14 @@ +[tool:pytest] +addopts = + -vv +testpaths = tests + +[aliases] +test=pytest + +[metadata] +description-file = README.md +license_file = LICENSE + +[wheel] +universal = 1 diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/setup.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/setup.py new file mode 100644 index 00000000..b10bb142 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/setup.py @@ -0,0 +1,77 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +import os +import setuptools + + +about = {} +here = os.path.abspath(os.path.dirname(__file__)) +with open(os.path.join(here, "ml_pipelines", "__version__.py")) as f: + exec(f.read(), about) + + +with open("README.md", "r") as f: + readme = f.read() + + +required_packages = ["sagemaker"] +extras = { + "test": [ + "black", + "coverage", + "flake8", + "mock", + "pydocstyle", + "pytest", + "pytest-cov", + "sagemaker", + "tox", + ] +} +setuptools.setup( + name=about["__title__"], + description=about["__description__"], + version=about["__version__"], + author=about["__author__"], + author_email=["__author_email__"], + long_description=readme, + long_description_content_type="text/markdown", + url=about["__url__"], + license=about["__license__"], + packages=setuptools.find_packages(), + include_package_data=True, + python_requires=">=3.6", + install_requires=required_packages, + extras_require=extras, + entry_points={ + "console_scripts": [ + "get-pipeline-definition=pipelines.get_pipeline_definition:main", + "run-pipeline=ml_pipelines.run_pipeline:main", + ] + }, + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Natural Language :: English", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + ], +) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/Dockerfile b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/Dockerfile similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/Dockerfile rename to mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/Dockerfile diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/README.md new file mode 100644 index 00000000..e69de29b diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/docker-build.sh b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker-build.sh similarity index 66% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/docker-build.sh rename to mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker-build.sh index 90f75d19..a51dd96d 100755 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/docker-build.sh +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker-build.sh @@ -12,20 +12,18 @@ REPOSITORY_URI=${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${R aws ecr get-login-password --region AWS_DEFAULT_REGION | docker login --username AWS --password-stdin ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com -for f in */ +target_stages=("xgboost") + +for stage in "${target_stages[@]}" do - if [ -d "$f" ]; then - tag=$(sed 's/.\{1\}$//' <<< "$f") - IMAGE_TAG=$tag-$CODEBUILD_RESOLVED_SOURCE_VERSION; + IMAGE_TAG=$stage-$CODEBUILD_RESOLVED_SOURCE_VERSION; echo $IMAGE_TAG - docker build --target $tag -t $REPOSITORY_URI:$tag . - docker tag $REPOSITORY_URI:$tag $REPOSITORY_URI:$IMAGE_TAG + docker build --target $stage -t $REPOSITORY_URI:$stage . + docker tag $REPOSITORY_URI:$stage $REPOSITORY_URI:$IMAGE_TAG - docker push $REPOSITORY_URI:$tag docker push $REPOSITORY_URI:$IMAGE_TAG - fi done diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/README.md new file mode 100644 index 00000000..e69de29b diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/main.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/main.py new file mode 100644 index 00000000..7027811e --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/main.py @@ -0,0 +1,72 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +"""Evaluation script for measuring mean squared error.""" +import json +import logging +import pathlib +import pickle +import tarfile + +import numpy as np +import pandas as pd +import xgboost + +from sklearn.metrics import mean_squared_error + +logger = logging.getLogger() +logger.setLevel(logging.INFO) +logger.addHandler(logging.StreamHandler()) + + +if __name__ == "__main__": + logger.debug("Starting evaluation.") + model_path = "/opt/ml/processing/model/model.tar.gz" + with tarfile.open(model_path) as tar: + tar.extractall(path=".") + + logger.debug("Loading xgboost model.") + model = pickle.load(open("xgboost-model", "rb")) + + logger.debug("Reading test data.") + test_path = "/opt/ml/processing/test/test.csv" + df = pd.read_csv(test_path, header=None) + + logger.debug("Reading test data.") + y_test = df.iloc[:, 0].to_numpy() + df.drop(df.columns[0], axis=1, inplace=True) + X_test = xgboost.DMatrix(df.values) + + logger.info("Performing predictions against test data.") + predictions = model.predict(X_test) + + logger.debug("Calculating mean squared error.") + mse = mean_squared_error(y_test, predictions) + std = np.std(y_test - predictions) + report_dict = { + "regression_metrics": { + "mse": {"value": mse, "standard_deviation": std}, + }, + } + + output_dir = "/opt/ml/processing/evaluation" + pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) + + logger.info("Writing out evaluation report with mse: %f", mse) + evaluation_path = f"{output_dir}/evaluation.json" + with open(evaluation_path, "w") as f: + f.write(json.dumps(report_dict)) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/requirements.txt b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/README.md new file mode 100644 index 00000000..e69de29b diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/logger.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/logger.py new file mode 100644 index 00000000..bc27f7d9 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/logger.py @@ -0,0 +1,16 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/requirements.txt b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/s3_helper.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/s3_helper.py new file mode 100644 index 00000000..bc27f7d9 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/s3_helper.py @@ -0,0 +1,16 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/test/test_a.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/test/test_a.py new file mode 100644 index 00000000..bc27f7d9 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/test/test_a.py @@ -0,0 +1,16 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/README.md new file mode 100644 index 00000000..e69de29b diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/main.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/main.py new file mode 100644 index 00000000..063a1d81 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/main.py @@ -0,0 +1,132 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +"""Feature engineers the abalone dataset.""" +import argparse +import logging +import os +import pathlib +import requests +import tempfile + +import boto3 +import numpy as np +import pandas as pd + +from sklearn.compose import ColumnTransformer +from sklearn.impute import SimpleImputer +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler, OneHotEncoder + +logger = logging.getLogger() +logger.setLevel(logging.INFO) +logger.addHandler(logging.StreamHandler()) + + +# Since we get a headerless CSV file we specify the column names here. +feature_columns_names = [ + "sex", + "length", + "diameter", + "height", + "whole_weight", + "shucked_weight", + "viscera_weight", + "shell_weight", +] +label_column = "rings" + +feature_columns_dtype = { + "sex": str, + "length": np.float64, + "diameter": np.float64, + "height": np.float64, + "whole_weight": np.float64, + "shucked_weight": np.float64, + "viscera_weight": np.float64, + "shell_weight": np.float64, +} +label_column_dtype = {"rings": np.float64} + + +def merge_two_dicts(x, y): + """Merges two dicts, returning a new copy.""" + z = x.copy() + z.update(y) + return z + + +if __name__ == "__main__": + logger.debug("Starting preprocessing.") + parser = argparse.ArgumentParser() + parser.add_argument("--input-data", type=str, required=True) + args = parser.parse_args() + + base_dir = "/opt/ml/processing" + pathlib.Path(f"{base_dir}/data").mkdir(parents=True, exist_ok=True) + input_data = args.input_data + bucket = input_data.split("/")[2] + key = "/".join(input_data.split("/")[3:]) + + logger.info("Downloading data from bucket: %s, key: %s", bucket, key) + fn = f"{base_dir}/data/abalone-dataset.csv" + s3 = boto3.resource("s3") + s3.Bucket(bucket).download_file(key, fn) + + logger.debug("Reading downloaded data.") + df = pd.read_csv( + fn, + header=None, + names=feature_columns_names + [label_column], + dtype=merge_two_dicts(feature_columns_dtype, label_column_dtype), + ) + os.unlink(fn) + + logger.debug("Defining transformers.") + numeric_features = list(feature_columns_names) + numeric_features.remove("sex") + numeric_transformer = Pipeline(steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]) + + categorical_features = ["sex"] + categorical_transformer = Pipeline( + steps=[ + ("imputer", SimpleImputer(strategy="constant", fill_value="missing")), + ("onehot", OneHotEncoder(handle_unknown="ignore")), + ] + ) + + preprocess = ColumnTransformer( + transformers=[ + ("num", numeric_transformer, numeric_features), + ("cat", categorical_transformer, categorical_features), + ] + ) + + logger.info("Applying transforms.") + y = df.pop("rings") + X_pre = preprocess.fit_transform(df) + y_pre = y.to_numpy().reshape(len(y), 1) + + X = np.concatenate((y_pre, X_pre), axis=1) + + logger.info("Splitting %d rows of data into train, validation, test datasets.", len(X)) + np.random.shuffle(X) + train, validation, test = np.split(X, [int(0.7 * len(X)), int(0.85 * len(X))]) + + logger.info("Writing out datasets to %s.", base_dir) + pd.DataFrame(train).to_csv(f"{base_dir}/train/train.csv", header=False, index=False) + pd.DataFrame(validation).to_csv(f"{base_dir}/validation/validation.csv", header=False, index=False) + pd.DataFrame(test).to_csv(f"{base_dir}/test/test.csv", header=False, index=False) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/requirements.txt b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/repository-info.json b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/repository-info.json new file mode 100644 index 00000000..e69de29b diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/README.md new file mode 100644 index 00000000..e69de29b diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/__main__.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/__main__.py new file mode 100644 index 00000000..bc27f7d9 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/__main__.py @@ -0,0 +1,16 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/requirements.txt b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/test/test_a.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/test/test_a.py new file mode 100644 index 00000000..bc27f7d9 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/test/test_a.py @@ -0,0 +1,16 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. From b55223590ca73261dd5f42d31c80329768e4b9ab Mon Sep 17 00:00:00 2001 From: Georgios Schinas Date: Tue, 2 Aug 2022 15:58:36 +0100 Subject: [PATCH 05/15] fixes to typos and iam --- .../service_catalog_stack.py | 81 ++++++++++--------- .../templates/byoc_project_stack.py | 4 +- 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py index 0f3baf5a..ce44309b 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py @@ -137,6 +137,9 @@ def __init__( products_launch_role.add_managed_policy( iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSSMReadOnlyAccess") ) + products_launch_role.add_managed_policy( + iam.ManagedPolicy.from_aws_managed_policy_name("AmazonEC2ContainerRegistryFullAccess") + ) products_launch_role.add_to_policy( iam.PolicyStatement( @@ -196,51 +199,51 @@ def __init__( principal_type="IAM", ) - product = servicecatalog.CloudFormationProduct( - self, - "DeployProduct", - owner=portfolio_owner, - product_name=MLOpsStack.TEMPLATE_NAME, - product_versions=[ - servicecatalog.CloudFormationProductVersion( - cloud_formation_template=servicecatalog.CloudFormationTemplate.from_asset( - self.generate_template(MLOpsStack, f"MLOpsApp-{stage_name}", **kwargs) - ), - product_version_name=product_version, - ) - ], - description=MLOpsStack.DESCRIPTION, - ) + # product = servicecatalog.CloudFormationProduct( + # self, + # "DeployProduct", + # owner=portfolio_owner, + # product_name=MLOpsStack.TEMPLATE_NAME, + # product_versions=[ + # servicecatalog.CloudFormationProductVersion( + # cloud_formation_template=servicecatalog.CloudFormationTemplate.from_asset( + # self.generate_template(MLOpsStack, f"MLOpsApp-{stage_name}", **kwargs) + # ), + # product_version_name=product_version, + # ) + # ], + # description=MLOpsStack.DESCRIPTION, + # ) - portfolio_association.node.add_dependency(product) + # portfolio_association.node.add_dependency(product) - # Add product tags, and create role constraint for each product + # # Add product tags, and create role constraint for each product - portfolio.add_product(product) + # portfolio.add_product(product) - Tags.of(product).add(key="sagemaker:studio-visibility", value="true") + # Tags.of(product).add(key="sagemaker:studio-visibility", value="true") - role_constraint = servicecatalog.CfnLaunchRoleConstraint( - self, - f"LaunchRoleConstraint", - portfolio_id=portfolio.portfolio_id, - product_id=product.product_id, - role_arn=products_launch_role.role_arn, - description=f"Launch as {products_launch_role.role_arn}", - ) - role_constraint.add_depends_on(portfolio_association) + # role_constraint = servicecatalog.CfnLaunchRoleConstraint( + # self, + # f"LaunchRoleConstraint", + # portfolio_id=portfolio.portfolio_id, + # product_id=product.product_id, + # role_arn=products_launch_role.role_arn, + # description=f"Launch as {products_launch_role.role_arn}", + # ) + # role_constraint.add_depends_on(portfolio_association) # uncomment this block if you want to create service catalog products based on all templates # make sure you comment out lines 213-247 - # products = self.deploy_all_products( - # portfolio_association, - # portfolio, - # products_launch_role, - # portfolio_owner, - # product_version, - # stage_name, - # **kwargs, - # ) + products = self.deploy_all_products( + portfolio_association, + portfolio, + products_launch_role, + portfolio_owner, + product_version, + stage_name, + **kwargs, + ) # Create the build and deployment asset as an output to pass to pipeline stack zip_image = DockerImage.from_build("mlops_sm_project_template_rt/cdk_helper_scripts/zip-image") @@ -262,7 +265,7 @@ def __init__( byoc_build_app_asset = s3_assets.Asset( self, - "BuildAsset", + "BYOCBuildAsset", path="seed_code/byoc_build_app/", bundling=BundlingOptions( image=zip_image, @@ -306,7 +309,7 @@ def __init__( build_app_asset.s3_object_key, ) self.export_ssm( - "CodeDeployKey", + "BYOCCodeBuildKey", "/mlops/code/build/byoc", byoc_build_app_asset.s3_object_key, ) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py index 44f59b8a..91ab00df 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py @@ -43,8 +43,8 @@ class MLOpsStack(Stack): - DESCRIPTION: str = "This template includes a model building pipeline that includes a workflow to pre-process, train, evaluate and register a model. The deploy pipeline creates a preprod and production endpoint. The target DEV/PREPROD/PROD accounts are predefined in the template." - TEMPLATE_NAME: str = "Basic MLOps template for real-time deployment" + DESCRIPTION: str = "This template includes a model building pipeline that includes a workflow to build your own container, pre-process, train, evaluate and register a model. The deploy pipeline creates a preprod and production endpoint. The target DEV/PREPROD/PROD accounts are predefined in the template." + TEMPLATE_NAME: str = "MLOps template for real-time deployment using your own container" def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: super().__init__(scope, construct_id, **kwargs) From 7cb9ef9c2b121dbfb33a441d5569ce5123d3295d Mon Sep 17 00:00:00 2001 From: Fatema Alkhanaizi Date: Wed, 3 Aug 2022 12:15:49 +0100 Subject: [PATCH 06/15] fix some errors in the code and added a read me for advanced topics --- .../ADVANCED_TOPICS.md | 55 +++++++++++++++++++ .../mlops-sm-project-template-rt/README.md | 4 ++ .../service_catalog_stack.py | 2 +- .../build_pipeline_construct.py | 6 +- .../byoc_build_app/source_scripts/Dockerfile | 2 +- .../source_scripts/docker-build.sh | 3 +- 6 files changed, 67 insertions(+), 5 deletions(-) create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/ADVANCED_TOPICS.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/ADVANCED_TOPICS.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/ADVANCED_TOPICS.md new file mode 100644 index 00000000..9f86a1b4 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/ADVANCED_TOPICS.md @@ -0,0 +1,55 @@ +# Advanced topics + +## Test the created templates as normal cloudformation templates +***NOTE:** make sure to run `cdk synth` before running any of the commands defined below.* + +You will need to deploy the `service catalog stack` as that would setup your account with the required resources and ssm parameters before you can start testing your templates directly. If you don't have the service catalog stack already deployed in your account, you can achieve this by running the following command: +``` +cdk --app ./cdk.out/assembly-Personal deploy —all --profile mlops-dev +``` + +otherwise make sure you have these ssm parameters defined: +- in the dev account: + - /mlops/dev/account_id + - /mlops/code/seed_bucket + - /mlops/code/build + - /mlops/code/build/byoc + - /mlops/code/deploy +- in the preprod account: + - /mlops/preprod/account_id + - /mlops/preprod/region +- in the prod account: + - /mlops/prod/account_id + - /mlops/prod/region + +For quick testing of the sagemaker templates, you could deploy the json generated by CDK directly in your account by running the following command: +``` +aws cloudformation deploy \ + --template-file ./cdk.out/byoc-project-stack-dev.template.json \ + --stack-name byoc-project-stack-dev \ + --region eu-west-1 \ + --capabilities CAPABILITY_NAMED_IAM CAPABILITY_AUTO_EXPAND \ + --disable-rollback \ + --s3-bucket \ + --profile mlops-dev \ + --parameter-overrides \ + SageMakerProjectName=mlops-test-0 \ + SageMakerProjectId=sm12340 +``` +This command will deploy the byoc project stack if you want to deploy other templates just change the `--template-file`, if you want to create a new stack you can change the other fields as well. + +It is also possible to use CDK command for this exact purpose but this would require you to add the following to `app.py` file: +``` +from mlops_sm_project_template_rt.templates.byoc_project_stack import MLOpsStack + +MLOpsStack( + app, + "test", + env=deployment_env, +) +``` +The run `cdk synth` and then run the following to deploy: +``` +cdk deploy test --parameters SageMakerProjectName=mlops-test \ + --parameters SageMakerProjectId=sm1234 --profile mlops-dev +``` \ No newline at end of file diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/README.md index 2192cc07..9c1ad3d1 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/README.md +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/README.md @@ -20,6 +20,8 @@ This repository contains the resources that are required to deploy the MLOps Fou - [Manual Deployment of Service Catalog Stack](#manual-deployment-of-service-catalog-stack) - [Clean-up](#clean-up) - [Troubleshooting](#troubleshooting) + - [Advanced topics](#advanced-topics) + - [Test the created templates as normal cloudformation templates](#test-the-created-templates-as-normal-cloudformation-templates) ## Solution Architecture @@ -369,3 +371,5 @@ One of the following would solve the problem: * Docker is having an issue so restart your docker daemon * Refresh your awscli credentials * Clear all cached cdk outputs by running `make clean` + + diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py index ce44309b..6c69c7b4 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py @@ -234,7 +234,7 @@ def __init__( # role_constraint.add_depends_on(portfolio_association) # uncomment this block if you want to create service catalog products based on all templates - # make sure you comment out lines 213-247 + # make sure you comment out lines 202-234 products = self.deploy_all_products( portfolio_association, portfolio, diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/build_pipeline_construct.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/build_pipeline_construct.py index e0880c0f..303f8673 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/build_pipeline_construct.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/build_pipeline_construct.py @@ -208,11 +208,13 @@ def __init__( "DockerBuild", build_spec=codebuild.BuildSpec.from_object( { + "version": 0.2, "phases": { "build": { "commands": [ - "chmod +x source_scripts/docker-build.sh", - f"./source_scripts/docker-build.sh {ecr_repository_name}", + "cd source_scripts", + "chmod +x docker-build.sh", + f"./docker-build.sh {ecr_repository_name}", ] }, }, diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/Dockerfile b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/Dockerfile index 7057bb4f..df8a1386 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/Dockerfile +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/Dockerfile @@ -1,4 +1,4 @@ -FROM public.ecr.aws/docker/library/python:3.7-buster as base +FROM public.ecr.aws/docker/library/python:buster as base RUN apt-get -y update && apt-get install -y \ nginx \ diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker-build.sh b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker-build.sh index a51dd96d..2a74648e 100755 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker-build.sh +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker-build.sh @@ -10,7 +10,7 @@ AWS_ACCOUNT_ID=$(jq -r .registryId repository-info.json); REPOSITORY_URI=${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${REPO_NAME}; # REPOSITORY_URI=local -aws ecr get-login-password --region AWS_DEFAULT_REGION | docker login --username AWS --password-stdin ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com +aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com target_stages=("xgboost") @@ -24,6 +24,7 @@ do docker build --target $stage -t $REPOSITORY_URI:$stage . docker tag $REPOSITORY_URI:$stage $REPOSITORY_URI:$IMAGE_TAG + docker push $REPOSITORY_URI:$stage docker push $REPOSITORY_URI:$IMAGE_TAG done From 46fa879524b3f48b75fedebadf3d6c80c837b460 Mon Sep 17 00:00:00 2001 From: Georgios Schinas Date: Wed, 3 Aug 2022 17:03:51 +0100 Subject: [PATCH 07/15] byoc changes --- .../build_pipeline_construct.py | 3 + .../templates/byoc_project_stack.py | 2 +- .../seed_code/byoc_build_app/buildspec.yml | 2 +- .../ml_pipelines/training/pipeline.py | 100 ++-- .../notebooks/sm_pipelines_runbook.ipynb | 458 ------------------ .../byoc_build_app/source_scripts/Dockerfile | 57 ++- .../source_scripts/docker-build.sh | 2 +- .../docker_helpers/entrypoint.R | 62 +++ .../source_scripts/docker_helpers/run.sh | 3 + .../source_scripts/evaluate/README.md | 1 + .../evaluate/evaluate_xgboost/README.md | 0 .../evaluate/evaluate_xgboost/main.py | 72 --- .../evaluate_xgboost/requirements.txt | 0 .../source_scripts/evaluate/evaluation.R | 47 ++ .../source_scripts/preprocessing/README.md | 1 + .../prepare_abalone_data/README.md | 0 .../prepare_abalone_data/main.py | 132 ----- .../prepare_abalone_data/preprocessing.R | 50 ++ .../prepare_abalone_data/requirements.txt | 0 .../source_scripts/training/README.md | 1 + .../source_scripts/training/deploy.R | 39 ++ .../{xgboost/__main__.py => endpoints.R} | 21 + .../training/{xgboost => }/test/test_a.py | 0 .../source_scripts/training/train.R | 52 ++ .../source_scripts/training/xgboost/README.md | 0 .../training/xgboost/requirements.txt | 0 26 files changed, 340 insertions(+), 765 deletions(-) delete mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/notebooks/sm_pipelines_runbook.ipynb create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker_helpers/entrypoint.R create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker_helpers/run.sh create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/README.md delete mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/README.md delete mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/main.py delete mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/requirements.txt create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluation.R create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/README.md delete mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/README.md delete mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/main.py create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/preprocessing.R delete mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/requirements.txt create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/README.md create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/deploy.R rename mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/{xgboost/__main__.py => endpoints.R} (73%) rename mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/{xgboost => }/test/test_a.py (100%) create mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/train.R delete mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/README.md delete mode 100644 mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/requirements.txt diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/build_pipeline_construct.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/build_pipeline_construct.py index 303f8673..cf34af13 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/build_pipeline_construct.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/build_pipeline_construct.py @@ -198,6 +198,9 @@ def __init__( "ARTIFACT_BUCKET_KMS_ID": codebuild.BuildEnvironmentVariable( value=s3_artifact.encryption_key.key_id ), + "ECR_REPO_URI": codebuild.BuildEnvironmentVariable( + value=f"{Aws.ACCOUNT_ID}.dkr.ecr.{Aws.REGION}.amazonaws.com/{ecr_repository_name}" + ), }, ), ) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py index 91ab00df..2c4e425e 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py @@ -218,7 +218,7 @@ def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: self, "MLModelsECRRepository", image_scan_on_push=True, - image_tag_mutability=ecr.TagMutability.IMMUTABLE, + image_tag_mutability=ecr.TagMutability.MUTABLE, repository_name=f"{project_name}", ) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/buildspec.yml b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/buildspec.yml index 9f9010d1..600f53a4 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/buildspec.yml +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/buildspec.yml @@ -15,5 +15,5 @@ phases: run-pipeline --module-name ml_pipelines.training.pipeline \ --role-arn $SAGEMAKER_PIPELINE_ROLE_ARN \ --tags "[{\"Key\":\"sagemaker:project-name\", \"Value\":\"${SAGEMAKER_PROJECT_NAME}\"}, {\"Key\":\"sagemaker:project-id\", \"Value\":\"${SAGEMAKER_PROJECT_ID}\"}]" \ - --kwargs "{\"region\":\"${AWS_REGION}\",\"role\":\"${SAGEMAKER_PIPELINE_ROLE_ARN}\",\"default_bucket\":\"${ARTIFACT_BUCKET}\",\"pipeline_name\":\"${SAGEMAKER_PROJECT_NAME_ID}\",\"model_package_group_name\":\"${MODEL_PACKAGE_GROUP_NAME}\",\"base_job_prefix\":\"${SAGEMAKER_PROJECT_NAME_ID}\", \"bucket_kms_id\":\"${ARTIFACT_BUCKET_KMS_ID}\"}" + --kwargs "{\"region\":\"${AWS_REGION}\",\"role\":\"${SAGEMAKER_PIPELINE_ROLE_ARN}\",\"default_bucket\":\"${ARTIFACT_BUCKET}\",\"pipeline_name\":\"${SAGEMAKER_PROJECT_NAME_ID}\",\"model_package_group_name\":\"${MODEL_PACKAGE_GROUP_NAME}\",\"base_job_prefix\":\"${SAGEMAKER_PROJECT_NAME_ID}\", \"bucket_kms_id\":\"${ARTIFACT_BUCKET_KMS_ID}\", \"git_hash\":\"${CODEBUILD_RESOLVED_SOURCE_VERSION}\", \"ecr_repo_uri\":\"${ECR_REPO_URI}\"}" - echo "Create/Update of the SageMaker Pipeline and execution completed." diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py index df9c8c74..6366540e 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py @@ -106,6 +106,8 @@ def get_pipeline( pipeline_name="AbalonePipeline", base_job_prefix="Abalone", project_id="SageMakerProjectId", + git_hash="", + ecr_repo_uri="", ): """Gets a SageMaker ML Pipeline instance working with on abalone data. @@ -113,6 +115,8 @@ def get_pipeline( region: AWS region to create and run the pipeline. role: IAM role to create and run steps and pipeline. default_bucket: the bucket to use for storing the artifacts + git_hash: the hash id of the current commit. Used to determine which docker image version to use + ecr_repo_uri: uri of the ECR repository used by this project Returns: an instance of a pipeline @@ -132,9 +136,9 @@ def get_pipeline( name="InputDataUrl", default_value=f"s3://sagemaker-servicecatalog-seedcode-{region}/dataset/abalone-dataset.csv", ) - processing_image_name = "sagemaker-{0}-processingimagebuild".format(project_id) - training_image_name = "sagemaker-{0}-trainingimagebuild".format(project_id) - inference_image_name = "sagemaker-{0}-inferenceimagebuild".format(project_id) + processing_image_uri = f"{ecr_repo_uri}:processing-{git_hash}" + training_image_uri = f"{ecr_repo_uri}:training-{git_hash}" + inference_image_uri = f"{ecr_repo_uri}:training-{git_hash}" # network_config = NetworkConfig( # enable_network_isolation=True, @@ -143,25 +147,12 @@ def get_pipeline( # encrypt_inter_container_traffic=True, # ) - # processing step for feature engineering - try: - processing_image_uri = sagemaker_session.sagemaker_client.describe_image_version( - ImageName=processing_image_name - )["ContainerImage"] - except (sagemaker_session.sagemaker_client.exceptions.ResourceNotFound): - processing_image_uri = sagemaker.image_uris.retrieve( - framework="xgboost", - region=region, - version="1.0-1", - py_version="py3", - instance_type="ml.m5.xlarge", - ) script_processor = ScriptProcessor( image_uri=processing_image_uri, instance_type=processing_instance_type, instance_count=processing_instance_count, - base_job_name=f"{base_job_prefix}/sklearn-abalone-preprocess", - command=["python3"], + base_job_name=f"{base_job_prefix}/byoc-abalone-preprocess", + command=["Rscript"], sagemaker_session=sagemaker_session, role=role, output_kms_key=bucket_kms_id, @@ -169,32 +160,19 @@ def get_pipeline( step_process = ProcessingStep( name="PreprocessAbaloneData", processor=script_processor, + inputs=[ProcessingInput(source =input_data, destination="/opt/ml/processing/input")], outputs=[ - ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), - ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), - ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), + ProcessingOutput(output_name="train", source="/opt/ml/processing/output/train"), + ProcessingOutput(output_name="validation", source="/opt/ml/processing/output/validation"), + ProcessingOutput(output_name="test", source="/opt/ml/processing/test/output"), ], - code="source_scripts/preprocessing/prepare_abalone_data/main.py", # we must figure out this path to get it from step_source directory - job_arguments=["--input-data", input_data], + code="source_scripts/preprocessing/prepare_abalone_data/preprocessing.R", # we must figure out this path to get it from step_source directory ) # training step for generating model artifacts model_path = f"s3://{default_bucket}/{base_job_prefix}/AbaloneTrain" - try: - training_image_uri = sagemaker_session.sagemaker_client.describe_image_version(ImageName=training_image_name)[ - "ContainerImage" - ] - except (sagemaker_session.sagemaker_client.exceptions.ResourceNotFound): - training_image_uri = sagemaker.image_uris.retrieve( - framework="xgboost", - region=region, - version="1.0-1", - py_version="py3", - instance_type="ml.m5.xlarge", - ) - - xgb_train = Estimator( + train_estimator = Estimator( image_uri=training_image_uri, instance_type=training_instance_type, instance_count=1, @@ -203,36 +181,30 @@ def get_pipeline( sagemaker_session=sagemaker_session, role=role, output_kms_key=bucket_kms_id, + source_dir="source_scripts/training/", + entry_point="train.R", + metric_definitions=[{"Name":"rmse-validation", "Regex": "Calculated validation RMSE: ([0-9.]+);.*$"}], ) - xgb_train.set_hyperparameters( - objective="reg:linear", - num_round=50, - max_depth=5, - eta=0.2, - gamma=4, - min_child_weight=6, - subsample=0.7, - silent=0, - ) + step_train = TrainingStep( name="TrainAbaloneModel", - estimator=xgb_train, + estimator=train_estimator, inputs={ "train": TrainingInput( s3_data=step_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri, content_type="text/csv", ), - "validation": TrainingInput( - s3_data=step_process.properties.ProcessingOutputConfig.Outputs["validation"].S3Output.S3Uri, - content_type="text/csv", - ), + # "validation": TrainingInput( # Validation data not used by seed code, but uncomment to make available during training + # s3_data=step_process.properties.ProcessingOutputConfig.Outputs["validation"].S3Output.S3Uri, + # content_type="text/csv", + # ), }, ) # processing step for evaluation script_eval = ScriptProcessor( image_uri=training_image_uri, - command=["python3"], + command=["Rscript"], instance_type=processing_instance_type, instance_count=1, base_job_name=f"{base_job_prefix}/script-abalone-eval", @@ -261,7 +233,7 @@ def get_pipeline( outputs=[ ProcessingOutput(output_name="evaluation", source="/opt/ml/processing/evaluation"), ], - code="source_scripts/evaluate/evaluate_xgboost/main.py", + code="source_scripts/evaluate/evaluation.R", property_files=[evaluation_report], ) @@ -275,25 +247,13 @@ def get_pipeline( ) ) - try: - inference_image_uri = sagemaker_session.sagemaker_client.describe_image_version(ImageName=inference_image_name)[ - "ContainerImage" - ] - except (sagemaker_session.sagemaker_client.exceptions.ResourceNotFound): - inference_image_uri = sagemaker.image_uris.retrieve( - framework="xgboost", - region=region, - version="1.0-1", - py_version="py3", - instance_type="ml.m5.xlarge", - ) step_register = RegisterModel( name="RegisterAbaloneModel", - estimator=xgb_train, + estimator=train_estimator, image_uri=inference_image_uri, model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, - content_types=["text/csv"], - response_types=["text/csv"], + content_types=["application/json"], + response_types=["application/json"], inference_instances=["ml.t2.medium", "ml.m5.large"], transform_instances=["ml.m5.large"], model_package_group_name=model_package_group_name, @@ -304,7 +264,7 @@ def get_pipeline( # condition step for evaluating model quality and branching execution cond_lte = ConditionLessThanOrEqualTo( left=JsonGet( - step_name=step_eval.name, property_file=evaluation_report, json_path="regression_metrics.mse.value" + step_name=step_eval.name, property_file=evaluation_report, json_path="regression_metrics.rmse.value" ), right=6.0, ) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/notebooks/sm_pipelines_runbook.ipynb b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/notebooks/sm_pipelines_runbook.ipynb deleted file mode 100644 index b6e8e6b6..00000000 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/notebooks/sm_pipelines_runbook.ipynb +++ /dev/null @@ -1,458 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import boto3\n", - "import logging\n", - "import sagemaker\n", - "import sagemaker.session\n", - "\n", - "from sagemaker.estimator import Estimator\n", - "from sagemaker.inputs import TrainingInput\n", - "from sagemaker.model_metrics import (\n", - " MetricsSource,\n", - " ModelMetrics,\n", - ")\n", - "from sagemaker.processing import (\n", - " ProcessingInput,\n", - " ProcessingOutput,\n", - " ScriptProcessor,\n", - ")\n", - "from sagemaker.sklearn.processing import SKLearnProcessor\n", - "from sagemaker.workflow.conditions import ConditionLessThanOrEqualTo\n", - "from sagemaker.workflow.condition_step import (\n", - " ConditionStep,\n", - ")\n", - "from sagemaker.workflow.functions import (\n", - " JsonGet,\n", - ")\n", - "from sagemaker.workflow.parameters import (\n", - " ParameterInteger,\n", - " ParameterString,\n", - ")\n", - "from sagemaker.workflow.pipeline import Pipeline\n", - "from sagemaker.workflow.properties import PropertyFile\n", - "from sagemaker.workflow.steps import (\n", - " ProcessingStep,\n", - " TrainingStep,\n", - ")\n", - "from sagemaker.workflow.step_collections import RegisterModel\n", - "\n", - "from botocore.exceptions import ClientError" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "logger = logging.getLogger(__name__)\n", - "\n", - "\"\"\"Environment Variables\"\"\"\n", - "proj_dir = \"TO_BE_DEFINED\"\n", - "region= \"TO_BE_DEFINED\"\n", - "model_artefact_bucket= \"TO_BE_DEFINED\"\n", - "role = \"TO_BE_DEFINED\"\n", - "project_name= \"TO_BE_DEFINED\"\n", - "stage= \"test\"\n", - "model_package_group_name=\"AbalonePackageGroup\",\n", - "pipeline_name=\"AbalonePipeline\",\n", - "base_job_prefix=\"Abalone\",\n", - "project_id=\"SageMakerProjectId\",\n", - "processing_image_uri=None\n", - "training_image_uri=None\n", - "inference_image_uri=None" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_session(region, default_bucket):\n", - " \"\"\"Gets the sagemaker session based on the region.\n", - "\n", - " Args:\n", - " region: the aws region to start the session\n", - " default_bucket: the bucket to use for storing the artifacts\n", - "\n", - " Returns:\n", - " `sagemaker.session.Session instance\n", - " \"\"\"\n", - "\n", - " boto_session = boto3.Session(region_name=region)\n", - "\n", - " sagemaker_client = boto_session.client(\"sagemaker\")\n", - " runtime_client = boto_session.client(\"sagemaker-runtime\")\n", - " return sagemaker.session.Session(\n", - " boto_session=boto_session,\n", - " sagemaker_client=sagemaker_client,\n", - " sagemaker_runtime_client=runtime_client,\n", - " default_bucket=default_bucket,\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sagemaker_session = get_session(region, model_artefact_bucket)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Feature Engineering\n", - "This section describes the different steps involved in feature engineering which includes loading and transforming different data sources to build the features needed for the ML Use Case" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "processing_instance_count = ParameterInteger(name=\"ProcessingInstanceCount\", default_value=1)\n", - "processing_instance_type = ParameterString(name=\"ProcessingInstanceType\", default_value=\"ml.m5.xlarge\")\n", - "training_instance_type = ParameterString(name=\"TrainingInstanceType\", default_value=\"ml.m5.xlarge\")\n", - "inference_instance_type = ParameterString(name=\"InferenceInstanceType\", default_value=\"ml.m5.xlarge\")\n", - "model_approval_status = ParameterString(name=\"ModelApprovalStatus\", default_value=\"PendingManualApproval\")\n", - "input_data = ParameterString(\n", - " name=\"InputDataUrl\",\n", - " default_value=f\"s3://sagemaker-servicecatalog-seedcode-{region}/dataset/abalone-dataset.csv\",\n", - ")\n", - "processing_image_name = \"sagemaker-{0}-processingimagebuild\".format(project_id)\n", - "training_image_name = \"sagemaker-{0}-trainingimagebuild\".format(project_id)\n", - "inference_image_name = \"sagemaker-{0}-inferenceimagebuild\".format(project_id)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# processing step for feature engineering\n", - "try:\n", - " processing_image_uri = sagemaker_session.sagemaker_client.describe_image_version(\n", - " ImageName=processing_image_name\n", - " )[\"ContainerImage\"]\n", - "\n", - "except (sagemaker_session.sagemaker_client.exceptions.ResourceNotFound):\n", - " processing_image_uri = sagemaker.image_uris.retrieve(\n", - " framework=\"xgboost\",\n", - " region=region,\n", - " version=\"1.0-1\",\n", - " py_version=\"py3\",\n", - " instance_type=processing_instance_type,\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define Script Processor\n", - "script_processor = ScriptProcessor(\n", - " image_uri=processing_image_uri,\n", - " instance_type=processing_instance_type,\n", - " instance_count=processing_instance_count,\n", - " base_job_name=f\"{base_job_prefix}/sklearn-abalone-preprocess\",\n", - " command=[\"python3\"],\n", - " sagemaker_session=sagemaker_session,\n", - " role=role,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Define ProcessingStep\n", - "step_process = ProcessingStep(\n", - " name=\"PreprocessAbaloneData\",\n", - " processor=script_processor,\n", - " outputs=[\n", - " ProcessingOutput(output_name=\"train\", source=\"/opt/ml/processing/train\"),\n", - " ProcessingOutput(output_name=\"validation\", source=\"/opt/ml/processing/validation\"),\n", - " ProcessingOutput(output_name=\"test\", source=\"/opt/ml/processing/test\"),\n", - " ],\n", - " code=\"source_scripts/preprocessing/prepare_abalone_data/main.py\", # we must figure out this path to get it from step_source directory\n", - " job_arguments=[\"--input-data\", input_data],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Training an XGBoost model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# training step for generating model artifacts\n", - "model_path = f\"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/AbaloneTrain\"\n", - "\n", - "try:\n", - " training_image_uri = sagemaker_session.sagemaker_client.describe_image_version(ImageName=training_image_name)[\n", - " \"ContainerImage\"\n", - " ]\n", - "except (sagemaker_session.sagemaker_client.exceptions.ResourceNotFound):\n", - " training_image_uri = sagemaker.image_uris.retrieve(\n", - " framework=\"xgboost\",\n", - " region=region,\n", - " version=\"1.0-1\",\n", - " py_version=\"py3\",\n", - " instance_type=training_instance_type,\n", - " )\n", - "\n", - "xgb_train = Estimator(\n", - " image_uri=training_image_uri,\n", - " instance_type=training_instance_type,\n", - " instance_count=1,\n", - " output_path=model_path,\n", - " base_job_name=f\"{base_job_prefix}/abalone-train\",\n", - " sagemaker_session=sagemaker_session,\n", - " role=role,\n", - ")\n", - "xgb_train.set_hyperparameters(\n", - " objective=\"reg:linear\",\n", - " num_round=50,\n", - " max_depth=5,\n", - " eta=0.2,\n", - " gamma=4,\n", - " min_child_weight=6,\n", - " subsample=0.7,\n", - " silent=0,\n", - ")\n", - "step_train = TrainingStep(\n", - " name=\"TrainAbaloneModel\",\n", - " estimator=xgb_train,\n", - " inputs={\n", - " \"train\": TrainingInput(\n", - " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"train\"].S3Output.S3Uri,\n", - " content_type=\"text/csv\",\n", - " ),\n", - " \"validation\": TrainingInput(\n", - " s3_data=step_process.properties.ProcessingOutputConfig.Outputs[\"validation\"].S3Output.S3Uri,\n", - " content_type=\"text/csv\",\n", - " ),\n", - " },\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Evaluate the Model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# processing step for evaluation\n", - "script_eval = ScriptProcessor(\n", - " image_uri=training_image_uri,\n", - " command=[\"python3\"],\n", - " instance_type=processing_instance_type,\n", - " instance_count=1,\n", - " base_job_name=f\"{base_job_prefix}/script-abalone-eval\",\n", - " sagemaker_session=sagemaker_session,\n", - " role=role,\n", - ")\n", - "evaluation_report = PropertyFile(\n", - " name=\"AbaloneEvaluationReport\",\n", - " output_name=\"evaluation\",\n", - " path=\"evaluation.json\",\n", - ")\n", - "step_eval = ProcessingStep(\n", - " name=\"EvaluateAbaloneModel\",\n", - " processor=script_eval,\n", - " inputs=[\n", - " ProcessingInput(\n", - " source=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", - " destination=\"/opt/ml/processing/model\",\n", - " ),\n", - " ProcessingInput(\n", - " source=step_process.properties.ProcessingOutputConfig.Outputs[\"test\"].S3Output.S3Uri,\n", - " destination=\"/opt/ml/processing/test\",\n", - " ),\n", - " ],\n", - " outputs=[\n", - " ProcessingOutput(output_name=\"evaluation\", source=\"/opt/ml/processing/evaluation\"),\n", - " ],\n", - " code=\"source_scripts/evaluate/evaluate_xgboost/main.py\",\n", - " property_files=[evaluation_report],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Conditional step to push model to SageMaker Model Registry" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# register model step that will be conditionally executed\n", - "model_metrics = ModelMetrics(\n", - " model_statistics=MetricsSource(\n", - " s3_uri=\"{}/evaluation.json\".format(\n", - " step_eval.arguments[\"ProcessingOutputConfig\"][\"Outputs\"][0][\"S3Output\"][\"S3Uri\"]\n", - " ),\n", - " content_type=\"application/json\",\n", - " )\n", - ")\n", - "\n", - "try:\n", - " inference_image_uri = sagemaker_session.sagemaker_client.describe_image_version(ImageName=inference_image_name)[\n", - " \"ContainerImage\"\n", - " ]\n", - "except (sagemaker_session.sagemaker_client.exceptions.ResourceNotFound):\n", - " inference_image_uri = sagemaker.image_uris.retrieve(\n", - " framework=\"xgboost\",\n", - " region=region,\n", - " version=\"1.0-1\",\n", - " py_version=\"py3\",\n", - " instance_type=inference_instance_type,\n", - " )\n", - "step_register = RegisterModel(\n", - " name=\"RegisterAbaloneModel\",\n", - " estimator=xgb_train,\n", - " image_uri=inference_image_uri,\n", - " model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,\n", - " content_types=[\"text/csv\"],\n", - " response_types=[\"text/csv\"],\n", - " inference_instances=[\"ml.t2.medium\", \"ml.m5.large\"],\n", - " transform_instances=[\"ml.m5.large\"],\n", - " model_package_group_name=model_package_group_name,\n", - " approval_status=model_approval_status,\n", - " model_metrics=model_metrics,\n", - ")\n", - "\n", - "# condition step for evaluating model quality and branching execution\n", - "cond_lte = ConditionLessThanOrEqualTo(\n", - " left=JsonGet(\n", - " step_name=step_eval.name, property_file=evaluation_report, json_path=\"regression_metrics.mse.value\"\n", - " ),\n", - " right=6.0,\n", - ")\n", - "step_cond = ConditionStep(\n", - " name=\"CheckMSEAbaloneEvaluation\",\n", - " conditions=[cond_lte],\n", - " if_steps=[step_register],\n", - " else_steps=[],\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Create and run the Pipeline" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# pipeline instance\n", - "pipeline = Pipeline(\n", - " name=pipeline_name,\n", - " parameters=[\n", - " processing_instance_type,\n", - " processing_instance_count,\n", - " training_instance_type,\n", - " model_approval_status,\n", - " input_data,\n", - " ],\n", - " steps=[step_process, step_train, step_eval, step_cond],\n", - " sagemaker_session=sagemaker_session,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "\n", - "\n", - "definition = json.loads(pipeline.definition())\n", - "definition" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pipeline.upsert(role_arn=role, description=f'{stage} pipelines for {project_name}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pipeline.start()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "conda_python3", - "language": "python", - "name": "conda_python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.13" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/Dockerfile b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/Dockerfile index df8a1386..c885bab3 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/Dockerfile +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/Dockerfile @@ -1,40 +1,37 @@ -FROM public.ecr.aws/docker/library/python:buster as base +FROM public.ecr.aws/docker/library/r-base:4.1.2 as base -RUN apt-get -y update && apt-get install -y \ - nginx \ - ca-certificates \ - policycoreutils \ - && rm -rf /var/lib/apt/lists/* +# Install tidyverse +RUN apt update && apt-get install -y --no-install-recommends \ + r-cran-tidyverse + +RUN R -e "install.packages(c('rjson'))" -ENV PATH="/usr/sbin/:${PATH}" - -COPY helpers/requirements.txt /requirements.txt - -RUN pip install --upgrade pip && pip install --no-cache -r /requirements.txt && \ - rm /requirements.txt -# Set up the program in the image -COPY helpers /opt/program +### start of PROCESSING container +FROM base as processing +ENTRYPOINT ["Rscript"] ### start of TRAINING container -FROM base as xgboost -COPY training/xgboost/requirements.txt /requirements.txt -RUN pip install --no-cache -r /requirements.txt && \ - rm /requirements.txt +FROM base as training +RUN apt-get -y update && apt-get install -y --no-install-recommends \ + wget \ + apt-transport-https \ + ca-certificates \ + libcurl4-openssl-dev \ + libsodium-dev + +RUN apt-get update && apt-get install -y python3-dev python3-pip +RUN pip3 install boto3 +RUN R -e "install.packages('reticulate',dependencies=TRUE, repos='http://cran.rstudio.com/')" +RUN R -e "install.packages(c('readr','plumber'))" -# sm vars -ENV SAGEMAKER_MODEL_SERVER_TIMEOUT="300" -ENV MODEL_SERVER_TIMEOUT="300" -ENV PYTHONUNBUFFERED=TRUE -ENV PYTHONDONTWRITEBYTECODE=TRUE -ENV PATH="/opt/program:${PATH}" +ENV PATH="/opt/ml/code:${PATH}" -# env vars +WORKDIR /opt/ml/code -# Set up the program in the image -COPY training/xgboost /opt/program +COPY docker_helpers/run.sh /opt/ml/code/run.sh +COPY docker_helpers/entrypoint.R /opt/ml/entrypoint.R -# set permissions of entrypoint -RUN chmod +x /opt/program/__main__.py +RUN /bin/bash -c 'chmod +x /opt/ml/code/run.sh' -WORKDIR /opt/program +ENTRYPOINT ["/bin/bash", "run.sh"] diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker-build.sh b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker-build.sh index 2a74648e..22e0d653 100755 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker-build.sh +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker-build.sh @@ -12,7 +12,7 @@ REPOSITORY_URI=${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com/${R aws ecr get-login-password --region $AWS_DEFAULT_REGION | docker login --username AWS --password-stdin ${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_DEFAULT_REGION}.amazonaws.com -target_stages=("xgboost") +target_stages=("processing" "training") for stage in "${target_stages[@]}" do diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker_helpers/entrypoint.R b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker_helpers/entrypoint.R new file mode 100644 index 00000000..70c4b10d --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker_helpers/entrypoint.R @@ -0,0 +1,62 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +library(jsonlite) +library(reticulate) +library(stringr) + + +args = commandArgs(trailingOnly=TRUE) +print(args) + +boto3 <- import('boto3') +s3 <- boto3$client('s3') + +# Setup parameters +# Container directories +prefix <- '/opt/ml' +input_path <- paste(prefix, 'input/data', sep='/') +output_path <- paste(prefix, 'output', sep='/') +model_path <- paste(prefix, 'model', sep='/') +code_dir <- paste(prefix, 'code', sep='/') +inference_code_dir <- paste(model_path, 'code', sep='/') + + +if (args=="train") { + + # This is where the hyperparamters are saved by the estimator on the container instance + param_path <- paste(prefix, 'input/config/hyperparameters.json', sep='/') + params <- read_json(param_path) + + s3_source_code_tar <- gsub('"', '', params$sagemaker_submit_directory) + script <- gsub('"', '', params$sagemaker_program) + + bucketkey <- str_replace(s3_source_code_tar, "s3://", "") + bucket <- str_remove(bucketkey, "/.*") + key <- str_remove(bucketkey, ".*?/") + + s3$download_file(bucket, key, "sourcedir.tar.gz") + untar("sourcedir.tar.gz", exdir=code_dir) + + print("training started") + source(file.path(code_dir, script)) + +} else if(args=="serve"){ + print("inference time") + source(file.path(inference_code_dir, "deploy.R")) +} diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker_helpers/run.sh b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker_helpers/run.sh new file mode 100644 index 00000000..ea149bb0 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker_helpers/run.sh @@ -0,0 +1,3 @@ +#!/bin/bash +echo "ready to execute" +Rscript /opt/ml/entrypoint.R $1 \ No newline at end of file diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/README.md new file mode 100644 index 00000000..daa6bdd5 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/README.md @@ -0,0 +1 @@ +Use this folder to add all code related to evaluate the performance of your model. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/README.md deleted file mode 100644 index e69de29b..00000000 diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/main.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/main.py deleted file mode 100644 index 7027811e..00000000 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/main.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# SPDX-License-Identifier: MIT-0 -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of this -# software and associated documentation files (the "Software"), to deal in the Software -# without restriction, including without limitation the rights to use, copy, modify, -# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -"""Evaluation script for measuring mean squared error.""" -import json -import logging -import pathlib -import pickle -import tarfile - -import numpy as np -import pandas as pd -import xgboost - -from sklearn.metrics import mean_squared_error - -logger = logging.getLogger() -logger.setLevel(logging.INFO) -logger.addHandler(logging.StreamHandler()) - - -if __name__ == "__main__": - logger.debug("Starting evaluation.") - model_path = "/opt/ml/processing/model/model.tar.gz" - with tarfile.open(model_path) as tar: - tar.extractall(path=".") - - logger.debug("Loading xgboost model.") - model = pickle.load(open("xgboost-model", "rb")) - - logger.debug("Reading test data.") - test_path = "/opt/ml/processing/test/test.csv" - df = pd.read_csv(test_path, header=None) - - logger.debug("Reading test data.") - y_test = df.iloc[:, 0].to_numpy() - df.drop(df.columns[0], axis=1, inplace=True) - X_test = xgboost.DMatrix(df.values) - - logger.info("Performing predictions against test data.") - predictions = model.predict(X_test) - - logger.debug("Calculating mean squared error.") - mse = mean_squared_error(y_test, predictions) - std = np.std(y_test - predictions) - report_dict = { - "regression_metrics": { - "mse": {"value": mse, "standard_deviation": std}, - }, - } - - output_dir = "/opt/ml/processing/evaluation" - pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) - - logger.info("Writing out evaluation report with mse: %f", mse) - evaluation_path = f"{output_dir}/evaluation.json" - with open(evaluation_path, "w") as f: - f.write(json.dumps(report_dict)) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/requirements.txt b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluate_xgboost/requirements.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluation.R b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluation.R new file mode 100644 index 00000000..7f0ed1cf --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluation.R @@ -0,0 +1,47 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +library(readr) +library(rjson) + +model_path <- "/opt/ml/processing/model/" +model_file_tar <- paste0(model_path, "model.tar.gz") +model_file <- paste0(model_path, "model") + +untar(model_file_tar, exdir = "/opt/ml/processing/model") + +load(model_file) + +test_path <- "/opt/ml/processing/test/" +abalone_test <- read_csv(paste0(test_path, 'abalone_test.csv')) + + +y_pred= predict(regressor, newdata=abalone_test[,-1]) +rmse <- sqrt(mean(((abalone_test[,1] - y_pred)^2)[,])) +print(paste0("Calculated validation RMSE: ",rmse,";")) + +report_dict = list( + regression_metrics = list( + rmse= list(value= rmse, standard_deviation = NA) + ) +) + +output_dir = "/opt/ml/processing/evaluation/evaluation.json" + +jsonData <- toJSON(report_dict) +write(jsonData, output_dir) \ No newline at end of file diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/README.md new file mode 100644 index 00000000..400033c4 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/README.md @@ -0,0 +1 @@ +Use this folder to add all code related to preprocessing your data. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/README.md deleted file mode 100644 index e69de29b..00000000 diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/main.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/main.py deleted file mode 100644 index 063a1d81..00000000 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/main.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -# -# SPDX-License-Identifier: MIT-0 -# -# Permission is hereby granted, free of charge, to any person obtaining a copy of this -# software and associated documentation files (the "Software"), to deal in the Software -# without restriction, including without limitation the rights to use, copy, modify, -# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to -# permit persons to whom the Software is furnished to do so. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, -# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A -# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -"""Feature engineers the abalone dataset.""" -import argparse -import logging -import os -import pathlib -import requests -import tempfile - -import boto3 -import numpy as np -import pandas as pd - -from sklearn.compose import ColumnTransformer -from sklearn.impute import SimpleImputer -from sklearn.pipeline import Pipeline -from sklearn.preprocessing import StandardScaler, OneHotEncoder - -logger = logging.getLogger() -logger.setLevel(logging.INFO) -logger.addHandler(logging.StreamHandler()) - - -# Since we get a headerless CSV file we specify the column names here. -feature_columns_names = [ - "sex", - "length", - "diameter", - "height", - "whole_weight", - "shucked_weight", - "viscera_weight", - "shell_weight", -] -label_column = "rings" - -feature_columns_dtype = { - "sex": str, - "length": np.float64, - "diameter": np.float64, - "height": np.float64, - "whole_weight": np.float64, - "shucked_weight": np.float64, - "viscera_weight": np.float64, - "shell_weight": np.float64, -} -label_column_dtype = {"rings": np.float64} - - -def merge_two_dicts(x, y): - """Merges two dicts, returning a new copy.""" - z = x.copy() - z.update(y) - return z - - -if __name__ == "__main__": - logger.debug("Starting preprocessing.") - parser = argparse.ArgumentParser() - parser.add_argument("--input-data", type=str, required=True) - args = parser.parse_args() - - base_dir = "/opt/ml/processing" - pathlib.Path(f"{base_dir}/data").mkdir(parents=True, exist_ok=True) - input_data = args.input_data - bucket = input_data.split("/")[2] - key = "/".join(input_data.split("/")[3:]) - - logger.info("Downloading data from bucket: %s, key: %s", bucket, key) - fn = f"{base_dir}/data/abalone-dataset.csv" - s3 = boto3.resource("s3") - s3.Bucket(bucket).download_file(key, fn) - - logger.debug("Reading downloaded data.") - df = pd.read_csv( - fn, - header=None, - names=feature_columns_names + [label_column], - dtype=merge_two_dicts(feature_columns_dtype, label_column_dtype), - ) - os.unlink(fn) - - logger.debug("Defining transformers.") - numeric_features = list(feature_columns_names) - numeric_features.remove("sex") - numeric_transformer = Pipeline(steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]) - - categorical_features = ["sex"] - categorical_transformer = Pipeline( - steps=[ - ("imputer", SimpleImputer(strategy="constant", fill_value="missing")), - ("onehot", OneHotEncoder(handle_unknown="ignore")), - ] - ) - - preprocess = ColumnTransformer( - transformers=[ - ("num", numeric_transformer, numeric_features), - ("cat", categorical_transformer, categorical_features), - ] - ) - - logger.info("Applying transforms.") - y = df.pop("rings") - X_pre = preprocess.fit_transform(df) - y_pre = y.to_numpy().reshape(len(y), 1) - - X = np.concatenate((y_pre, X_pre), axis=1) - - logger.info("Splitting %d rows of data into train, validation, test datasets.", len(X)) - np.random.shuffle(X) - train, validation, test = np.split(X, [int(0.7 * len(X)), int(0.85 * len(X))]) - - logger.info("Writing out datasets to %s.", base_dir) - pd.DataFrame(train).to_csv(f"{base_dir}/train/train.csv", header=False, index=False) - pd.DataFrame(validation).to_csv(f"{base_dir}/validation/validation.csv", header=False, index=False) - pd.DataFrame(test).to_csv(f"{base_dir}/test/test.csv", header=False, index=False) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/preprocessing.R b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/preprocessing.R new file mode 100644 index 00000000..31932692 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/preprocessing.R @@ -0,0 +1,50 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +library(readr) +library(dplyr) +library(ggplot2) +library(forcats) + +input_dir <- "/opt/ml/processing/input/" +output_dir <- "/opt/ml/processing/output/" +#dir.create(output_dir, showWarnings = FALSE) + +filename <- Sys.glob(paste(input_dir, "*.csv", sep="")) +abalone <- read_csv(filename) + +abalone <- abalone %>% + mutate(female = as.integer(ifelse(sex == 'F', 1, 0)), + male = as.integer(ifelse(sex == 'M', 1, 0)), + infant = as.integer(ifelse(sex == 'I', 1, 0))) %>% + select(-sex) +abalone <- abalone %>% select(rings:infant, length:shell_weight) + + +abalone_train <- abalone %>% + sample_frac(size = 0.7) +abalone <- anti_join(abalone, abalone_train) +abalone_test <- abalone %>% + sample_frac(size = 0.5) +abalone_valid <- anti_join(abalone, abalone_test) + + +write_csv(abalone_train, paste0(output_dir,'train/abalone_train.csv')) + +write_csv(abalone_valid, paste0(output_dir,'validation/abalone_valid.csv')) +write_csv(abalone_test, paste0(output_dir,'test/abalone_test.csv')) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/requirements.txt b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/requirements.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/README.md new file mode 100644 index 00000000..31c36bb4 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/README.md @@ -0,0 +1 @@ +Use this folder to add all code related to training your model. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/deploy.R b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/deploy.R new file mode 100644 index 00000000..e621fc72 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/deploy.R @@ -0,0 +1,39 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +library(plumber) +library(readr) +library(jsonlite) + +# load the trained model +prefix <- '/opt/ml/' +model_path <- paste0(prefix, 'model/model') +code_path <- paste0(prefix, 'model/code/') + +load(model_path) +print("Loaded model successfully") + +# function to use our model. You may require to transform data to make compatible with model +inference <- function(x){ + data = read_csv(x) + output <- predict(regressor, newdata=data) + list(output=output) +} + +app <- plumb(paste0(code_path,'endpoints.R')) +app$run(host='0.0.0.0', port=8080) \ No newline at end of file diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/__main__.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/endpoints.R similarity index 73% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/__main__.py rename to mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/endpoints.R index bc27f7d9..b75ca2b9 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/__main__.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/endpoints.R @@ -14,3 +14,24 @@ # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +#' Ping to show server is there +#' @get /ping +function() { + return('Alive') +} + + +#' Parse input and return prediction from model +#' @param req The http request sent +#' @post /invocations +function(req) { + + # Read in data + input_json <- fromJSON(req$postBody) + output <- inference(input_json$features) + # Return prediction + return(output) + +} \ No newline at end of file diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/test/test_a.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/test/test_a.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/test/test_a.py rename to mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/test/test_a.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/train.R b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/train.R new file mode 100644 index 00000000..d1ad7f98 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/train.R @@ -0,0 +1,52 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# SPDX-License-Identifier: MIT-0 +# +# Permission is hereby granted, free of charge, to any person obtaining a copy of this +# software and associated documentation files (the "Software"), to deal in the Software +# without restriction, including without limitation the rights to use, copy, modify, +# merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +# INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +# PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +library(readr) + +prefix <- '/opt/ml/' + +input_path <- paste0(prefix , 'input/data/train/') +output_path <- paste0(prefix, 'output/') +model_path <- paste0(prefix, 'model/') +code_path <- paste(prefix, 'code', sep='/') +inference_code_dir <- paste(model_path, 'code', sep='/') + + +abalone_train <- read_csv(paste0(input_path, 'abalone_train.csv')) +abalone_valid <- read_csv(paste0(input_path, 'abalone_valid.csv')) + +regressor = lm(formula = rings ~ female + male + length + diameter + height + whole_weight + shucked_weight + viscera_weight + shell_weight, data = abalone_train) +summary(regressor) + +y_pred= predict(regressor, newdata=abalone_valid[,-1]) +rmse <- sqrt(mean(((abalone_valid[,1] - y_pred)^2)[,])) +print(paste0("Calculated validation RMSE: ",rmse,";")) + + +# Save trained model +save(regressor, file = paste0(model_path,"model")) + +# Save inference code to be used with model +# find the files that you want +list_of_files <- list.files(code_path) + +# copy the files to the new folder +dir.create(inference_code_dir) +file.copy(list_of_files, inference_code_dir, recursive=TRUE) + +print("successfully saved model & code") \ No newline at end of file diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/README.md b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/README.md deleted file mode 100644 index e69de29b..00000000 diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/requirements.txt b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/xgboost/requirements.txt deleted file mode 100644 index e69de29b..00000000 From 8fb963189ad9c731defb8e48e994dc635e609b6d Mon Sep 17 00:00:00 2001 From: Fatema Alkhanaizi Date: Thu, 4 Aug 2022 08:59:20 +0100 Subject: [PATCH 08/15] renamed folder name to mlops_sm_project_template --- .../.githooks/pre-commit | 0 .../.gitignore | 0 .../.pre-commit-config.yaml | 0 .../ADVANCED_TOPICS.md | 2 +- .../LICENSE.txt | 0 .../Makefile | 0 .../README.md | 10 +++++----- .../app.py | 6 +++--- .../cdk.json | 0 ...Architecture-mlops project cicd architecture.jpg | Bin ... Architecture-sagemaker project architecture.jpg | Bin .../diagrams/building.png | Bin .../diagrams/deployment.png | Bin .../mlops_sm_project_template}/__init__.py | 0 .../cdk_helper_scripts/zip-image/Dockerfile | 0 .../mlops_sm_project_template}/codecommit_stack.py | 4 ++-- .../mlops_sm_project_template}/config/constants.py | 0 .../mlops_sm_project_template}/pipeline_stack.py | 4 ++-- .../service_catalog_stack.py | 10 +++++----- .../mlops_sm_project_template}/ssm_construct.py | 2 +- .../templates/basic_project_stack.py | 6 +++--- .../build_pipeline_construct.py | 0 .../deploy_pipeline_construct.py | 0 .../templates/byoc_project_stack.py | 6 +++--- .../templates/dynamic_accounts_project_stack.py | 4 ++-- .../pipeline_constructs/build_pipeline_construct.py | 0 .../deploy_pipeline_construct.py | 0 .../requirements-dev.txt | 0 .../requirements.txt | 0 .../scripts/cdk-account-setup.sh | 2 +- .../scripts/install-prerequisites-brew.sh | 0 .../seed_code/build_app/.githooks/pre-commit | 0 .../seed_code/build_app/.pre-commit-config.yaml | 0 .../seed_code/build_app/Makefile | 0 .../seed_code/build_app/README.md | 0 .../seed_code/build_app/buildspec.yml | 0 .../seed_code/build_app/ml_pipelines/README.md | 0 .../seed_code/build_app/ml_pipelines/__init__.py | 0 .../seed_code/build_app/ml_pipelines/__version__.py | 0 .../seed_code/build_app/ml_pipelines/_utils.py | 0 .../ml_pipelines/get_pipeline_definition.py | 0 .../build_app/ml_pipelines/run_pipeline.py | 0 .../build_app/ml_pipelines/training/README.md | 0 .../build_app/ml_pipelines/training/__init__.py | 0 .../build_app/ml_pipelines/training/_utils.py | 0 .../build_app/ml_pipelines/training/pipeline.py | 0 .../seed_code/build_app/notebooks/README.md | 0 .../build_app/notebooks/sm_pipelines_runbook.ipynb | 0 .../seed_code/build_app/setup.cfg | 0 .../seed_code/build_app/setup.py | 0 .../seed_code/build_app/source_scripts/README.md | 0 .../evaluate/evaluate_xgboost/README.md | 0 .../evaluate/evaluate_xgboost/main.py | 0 .../evaluate/evaluate_xgboost/requirements.txt | 0 .../build_app/source_scripts/helpers/README.md | 0 .../build_app/source_scripts/helpers/logger.py | 0 .../source_scripts/helpers/requirements.txt | 0 .../build_app/source_scripts/helpers/s3_helper.py | 0 .../build_app/source_scripts/helpers/test/test_a.py | 0 .../preprocessing/prepare_abalone_data/README.md | 0 .../preprocessing/prepare_abalone_data/main.py | 0 .../prepare_abalone_data/requirements.txt | 0 .../source_scripts/training/xgboost/README.md | 0 .../source_scripts/training/xgboost/__main__.py | 0 .../training/xgboost/requirements.txt | 0 .../source_scripts/training/xgboost/test/test_a.py | 0 .../seed_code/byoc_build_app/.githooks/pre-commit | 0 .../byoc_build_app/.pre-commit-config.yaml | 0 .../seed_code/byoc_build_app/Makefile | 0 .../seed_code/byoc_build_app/README.md | 0 .../seed_code/byoc_build_app/buildspec.yml | 0 .../seed_code/byoc_build_app/ml_pipelines/README.md | 0 .../byoc_build_app/ml_pipelines/__init__.py | 0 .../byoc_build_app/ml_pipelines/__version__.py | 0 .../seed_code/byoc_build_app/ml_pipelines/_utils.py | 0 .../ml_pipelines/get_pipeline_definition.py | 0 .../byoc_build_app/ml_pipelines/run_pipeline.py | 0 .../byoc_build_app/ml_pipelines/training/README.md | 0 .../ml_pipelines/training/__init__.py | 0 .../byoc_build_app/ml_pipelines/training/_utils.py | 0 .../ml_pipelines/training/pipeline.py | 0 .../seed_code/byoc_build_app/notebooks/README.md | 0 .../seed_code/byoc_build_app/setup.cfg | 0 .../seed_code/byoc_build_app/setup.py | 0 .../byoc_build_app/source_scripts/Dockerfile | 0 .../byoc_build_app/source_scripts/README.md | 0 .../byoc_build_app/source_scripts/docker-build.sh | 0 .../source_scripts/docker_helpers/entrypoint.R | 0 .../source_scripts/docker_helpers/run.sh | 0 .../source_scripts/evaluate/README.md | 0 .../source_scripts/evaluate/evaluation.R | 0 .../byoc_build_app/source_scripts/helpers/README.md | 0 .../byoc_build_app/source_scripts/helpers/logger.py | 0 .../source_scripts/helpers/requirements.txt | 0 .../source_scripts/helpers/s3_helper.py | 0 .../source_scripts/helpers/test/test_a.py | 0 .../source_scripts/preprocessing/README.md | 0 .../prepare_abalone_data/preprocessing.R | 0 .../source_scripts/repository-info.json | 0 .../source_scripts/training/README.md | 0 .../byoc_build_app/source_scripts/training/deploy.R | 0 .../source_scripts/training/endpoints.R | 0 .../source_scripts/training/test/test_a.py | 0 .../byoc_build_app/source_scripts/training/train.R | 0 .../seed_code/deploy_app/.githooks/pre-commit | 0 .../seed_code/deploy_app/.pre-commit-config.yaml | 0 .../seed_code/deploy_app/Makefile | 0 .../seed_code/deploy_app/README.md | 0 .../seed_code/deploy_app/app.py | 0 .../seed_code/deploy_app/cdk.json | 0 .../seed_code/deploy_app/config/config_mux.py | 0 .../seed_code/deploy_app/config/constants.py | 0 .../seed_code/deploy_app/config/dev/constants.py | 0 .../deploy_app/config/dev/endpoint-config.yml | 0 .../seed_code/deploy_app/config/prod/constants.py | 0 .../deploy_app/config/prod/endpoint-config.yml | 0 .../deploy_app/config/staging/constants.py | 0 .../deploy_app/config/staging/endpoint-config.yml | 0 .../deploy_app/deploy_endpoint/__init__.py | 0 .../deploy_endpoint/deploy_endpoint_stack.py | 0 .../deploy_endpoint/get_approved_package.py | 0 .../seed_code/deploy_app/requirements-dev.txt | 0 .../seed_code/deploy_app/requirements.txt | 0 .../seed_code/deploy_app/source.bat | 0 .../seed_code/deploy_app/tests/README.md | 0 .../deploy_app/tests/integration_tests/__init__.py | 0 .../tests/integration_tests/buildspec.yml | 0 .../tests/integration_tests/endpoint_test.py | 0 .../deploy_app/tests/unittests/__init__.py | 0 .../tests/unittests/test_deploy_app_stack.py | 0 .../source.bat | 0 .../tests/__init__.py | 0 .../tests/unit/__init__.py | 0 .../tests/unit/test_mlops_batch_v2_stack.py | 4 ++-- 134 files changed, 30 insertions(+), 30 deletions(-) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/.githooks/pre-commit (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/.gitignore (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/.pre-commit-config.yaml (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/ADVANCED_TOPICS.md (96%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/LICENSE.txt (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/Makefile (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/README.md (95%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/app.py (85%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/cdk.json (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/diagrams/MLOPs Foundation Architecture-mlops project cicd architecture.jpg (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/diagrams/MLOPs Foundation Architecture-sagemaker project architecture.jpg (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/diagrams/building.png (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/diagrams/deployment.png (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt/mlops_sm_project_template_rt => mlops-sm-project-template/mlops_sm_project_template}/__init__.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt/mlops_sm_project_template_rt => mlops-sm-project-template/mlops_sm_project_template}/cdk_helper_scripts/zip-image/Dockerfile (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt/mlops_sm_project_template_rt => mlops-sm-project-template/mlops_sm_project_template}/codecommit_stack.py (96%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt/mlops_sm_project_template_rt => mlops-sm-project-template/mlops_sm_project_template}/config/constants.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt/mlops_sm_project_template_rt => mlops-sm-project-template/mlops_sm_project_template}/pipeline_stack.py (97%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt/mlops_sm_project_template_rt => mlops-sm-project-template/mlops_sm_project_template}/service_catalog_stack.py (97%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt/mlops_sm_project_template_rt => mlops-sm-project-template/mlops_sm_project_template}/ssm_construct.py (97%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt/mlops_sm_project_template_rt => mlops-sm-project-template/mlops_sm_project_template}/templates/basic_project_stack.py (96%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt/mlops_sm_project_template_rt => mlops-sm-project-template/mlops_sm_project_template}/templates/byoc_pipeline_constructs/build_pipeline_construct.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt/mlops_sm_project_template_rt => mlops-sm-project-template/mlops_sm_project_template}/templates/byoc_pipeline_constructs/deploy_pipeline_construct.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt/mlops_sm_project_template_rt => mlops-sm-project-template/mlops_sm_project_template}/templates/byoc_project_stack.py (97%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt/mlops_sm_project_template_rt => mlops-sm-project-template/mlops_sm_project_template}/templates/dynamic_accounts_project_stack.py (98%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt/mlops_sm_project_template_rt => mlops-sm-project-template/mlops_sm_project_template}/templates/pipeline_constructs/build_pipeline_construct.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt/mlops_sm_project_template_rt => mlops-sm-project-template/mlops_sm_project_template}/templates/pipeline_constructs/deploy_pipeline_construct.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/requirements-dev.txt (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/requirements.txt (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/scripts/cdk-account-setup.sh (96%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/scripts/install-prerequisites-brew.sh (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/.githooks/pre-commit (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/.pre-commit-config.yaml (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/Makefile (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/buildspec.yml (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/ml_pipelines/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/ml_pipelines/__init__.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/ml_pipelines/__version__.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/ml_pipelines/_utils.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/ml_pipelines/get_pipeline_definition.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/ml_pipelines/run_pipeline.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/ml_pipelines/training/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/ml_pipelines/training/__init__.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/ml_pipelines/training/_utils.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/ml_pipelines/training/pipeline.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/notebooks/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/notebooks/sm_pipelines_runbook.ipynb (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/setup.cfg (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/setup.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/main.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/requirements.txt (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/helpers/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/helpers/logger.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/helpers/requirements.txt (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/helpers/s3_helper.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/helpers/test/test_a.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/main.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/requirements.txt (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/training/xgboost/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/training/xgboost/__main__.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/training/xgboost/requirements.txt (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/build_app/source_scripts/training/xgboost/test/test_a.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/.githooks/pre-commit (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/.pre-commit-config.yaml (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/Makefile (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/buildspec.yml (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/ml_pipelines/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/ml_pipelines/__init__.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/ml_pipelines/__version__.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/ml_pipelines/_utils.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/ml_pipelines/get_pipeline_definition.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/ml_pipelines/run_pipeline.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/ml_pipelines/training/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/ml_pipelines/training/__init__.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/ml_pipelines/training/_utils.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/notebooks/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/setup.cfg (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/setup.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/Dockerfile (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/docker-build.sh (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/docker_helpers/entrypoint.R (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/docker_helpers/run.sh (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/evaluate/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/evaluate/evaluation.R (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/helpers/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/helpers/logger.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/helpers/requirements.txt (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/helpers/s3_helper.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/helpers/test/test_a.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/preprocessing/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/preprocessing.R (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/repository-info.json (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/training/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/training/deploy.R (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/training/endpoints.R (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/training/test/test_a.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/byoc_build_app/source_scripts/training/train.R (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/.githooks/pre-commit (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/.pre-commit-config.yaml (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/Makefile (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/app.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/cdk.json (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/config/config_mux.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/config/constants.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/config/dev/constants.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/config/dev/endpoint-config.yml (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/config/prod/constants.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/config/prod/endpoint-config.yml (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/config/staging/constants.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/config/staging/endpoint-config.yml (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/deploy_endpoint/__init__.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/deploy_endpoint/get_approved_package.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/requirements-dev.txt (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/requirements.txt (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/source.bat (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/tests/README.md (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/tests/integration_tests/__init__.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/tests/integration_tests/buildspec.yml (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/tests/integration_tests/endpoint_test.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/tests/unittests/__init__.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/seed_code/deploy_app/tests/unittests/test_deploy_app_stack.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/source.bat (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/tests/__init__.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/tests/unit/__init__.py (100%) rename mlops-multi-account-cdk/{mlops-sm-project-template-rt => mlops-sm-project-template}/tests/unit/test_mlops_batch_v2_stack.py (89%) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/.githooks/pre-commit b/mlops-multi-account-cdk/mlops-sm-project-template/.githooks/pre-commit similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/.githooks/pre-commit rename to mlops-multi-account-cdk/mlops-sm-project-template/.githooks/pre-commit diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/.gitignore b/mlops-multi-account-cdk/mlops-sm-project-template/.gitignore similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/.gitignore rename to mlops-multi-account-cdk/mlops-sm-project-template/.gitignore diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/.pre-commit-config.yaml b/mlops-multi-account-cdk/mlops-sm-project-template/.pre-commit-config.yaml similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/.pre-commit-config.yaml rename to mlops-multi-account-cdk/mlops-sm-project-template/.pre-commit-config.yaml diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/ADVANCED_TOPICS.md b/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md similarity index 96% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/ADVANCED_TOPICS.md rename to mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md index 9f86a1b4..0f33102e 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/ADVANCED_TOPICS.md +++ b/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md @@ -40,7 +40,7 @@ This command will deploy the byoc project stack if you want to deploy other temp It is also possible to use CDK command for this exact purpose but this would require you to add the following to `app.py` file: ``` -from mlops_sm_project_template_rt.templates.byoc_project_stack import MLOpsStack +from mlops_sm_project_template.templates.byoc_project_stack import MLOpsStack MLOpsStack( app, diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/LICENSE.txt b/mlops-multi-account-cdk/mlops-sm-project-template/LICENSE.txt similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/LICENSE.txt rename to mlops-multi-account-cdk/mlops-sm-project-template/LICENSE.txt diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/Makefile b/mlops-multi-account-cdk/mlops-sm-project-template/Makefile similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/Makefile rename to mlops-multi-account-cdk/mlops-sm-project-template/Makefile diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/README.md similarity index 95% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/README.md index 9c1ad3d1..03404521 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/README.md +++ b/mlops-multi-account-cdk/mlops-sm-project-template/README.md @@ -120,12 +120,12 @@ There are 2 way to trigger the deployment CI/CD Pipeline: - **Model Events** - These are events which get triggered through a status change to the model package group in SageMaker Model Registry. - **Code Events** - The pipeline is triggered on git update events over a specific branch, in this solution it is linked to the **main** branch. -**Note:** For the deployment stages for **PREPROD** and **PROD**, the roles defined for cloudformation deployment in `mlops_sm_project_template_rt/templates/constructs/deploy_pipeline_construct.py` lines 284-292 and lines 317-326 are created when the **PREPROD** and **PROD** are bootstrapped with CDK with trust policies for the deployment CI/CD pipeline account (**DEV** account in our solution); the roles must be created before deploying this stack to any account along with trust policies included between the accounts and the roles. If you can bootstrap those accounts for any reason you should ensure to create similar roles in each of those accounts and adding them to the lines mentioned above in the file. +**Note:** For the deployment stages for **PREPROD** and **PROD**, the roles defined for cloudformation deployment in `mlops_sm_project_template/templates/constructs/deploy_pipeline_construct.py` lines 284-292 and lines 317-326 are created when the **PREPROD** and **PROD** are bootstrapped with CDK with trust policies for the deployment CI/CD pipeline account (**DEV** account in our solution); the roles must be created before deploying this stack to any account along with trust policies included between the accounts and the roles. If you can bootstrap those accounts for any reason you should ensure to create similar roles in each of those accounts and adding them to the lines mentioned above in the file. ### CodeCommit Stack *This stack is only needed if you want to handle deployments of this folder of the repository to be managed through a CICD pipeline.* -This stack handles setting up an AWS CodeCommit repository for this folder of the repository. This repository will be used as the source for the CI/CD pipeline defined in [Pipeline Stack](#pipeline-stack). The repository will be named based on the value defined in `mlops_sm_project_template_rt/config/constants.py` with this variable `CODE_COMMIT_REPO_NAME`. The repository will be intialised with a default branch as defined in the `constants.py` file under `PIPELINE_BRANCH` variable. +This stack handles setting up an AWS CodeCommit repository for this folder of the repository. This repository will be used as the source for the CI/CD pipeline defined in [Pipeline Stack](#pipeline-stack). The repository will be named based on the value defined in `mlops_sm_project_template/config/constants.py` with this variable `CODE_COMMIT_REPO_NAME`. The repository will be intialised with a default branch as defined in the `constants.py` file under `PIPELINE_BRANCH` variable. ### Pipeline Stack @@ -133,7 +133,7 @@ This stack handles setting up an AWS CodeCommit repository for this folder of th The CICD pipeline in this repository is setup to monitor an AWS CodeCommit repository as defined in [CodeCommit Stack](#codecommit-stack). -If you are using other sources like github or bitbucket for your repository, you will need to modify the connection to the appropriate repository as defined in `mlops_sm_project_template_rt/pipeline_stack.py`. This can be done using AWS CodeStar but must be setup on the account. +If you are using other sources like github or bitbucket for your repository, you will need to modify the connection to the appropriate repository as defined in `mlops_sm_project_template/pipeline_stack.py`. This can be done using AWS CodeStar but must be setup on the account. Make sure the pipelines also point to your targeted branch; by default the pipeline is linked to `main` branch events, this is defined in the `constants.py` file under `PIPELINE_BRANCH` variable. @@ -164,7 +164,7 @@ This is an AWS CDK project written in Python 3.8. Here's what you need to have o ├── app.py ├── cdk.json ├── diagrams -├── mlops_sm_project_template_rt +├── mlops_sm_project_template │   ├── README.md │   ├── __init__.py │   ├── cdk_helper_scripts @@ -221,7 +221,7 @@ aws_session_token = YOUR_SESSION_TOKEN # this token is generated if you are usi ... ``` -Before you start with the deployment of the solution make sure to bootstrap your accounts. Ensure you add the account details in `mlops_sm_project_template_rt/config/constants.py` mainly the target deployment accounts: **DEV**, **PREPROD** and **PROD**. +Before you start with the deployment of the solution make sure to bootstrap your accounts. Ensure you add the account details in `mlops_sm_project_template/config/constants.py` mainly the target deployment accounts: **DEV**, **PREPROD** and **PROD**. ``` PIPELINE_ACCOUNT = "" # account to host the pipeline handling updates of this repository diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/app.py b/mlops-multi-account-cdk/mlops-sm-project-template/app.py similarity index 85% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/app.py rename to mlops-multi-account-cdk/mlops-sm-project-template/app.py index 889a0340..bcfb24ef 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/app.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/app.py @@ -18,9 +18,9 @@ import aws_cdk as cdk import os -from mlops_sm_project_template_rt.pipeline_stack import PipelineStack, CoreStage -from mlops_sm_project_template_rt.codecommit_stack import CodeCommitStack -from mlops_sm_project_template_rt.config.constants import DEFAULT_DEPLOYMENT_REGION, PIPELINE_ACCOUNT, DEV_ACCOUNT +from mlops_sm_project_template.pipeline_stack import PipelineStack, CoreStage +from mlops_sm_project_template.codecommit_stack import CodeCommitStack +from mlops_sm_project_template.config.constants import DEFAULT_DEPLOYMENT_REGION, PIPELINE_ACCOUNT, DEV_ACCOUNT app = cdk.App() diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/cdk.json b/mlops-multi-account-cdk/mlops-sm-project-template/cdk.json similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/cdk.json rename to mlops-multi-account-cdk/mlops-sm-project-template/cdk.json diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/diagrams/MLOPs Foundation Architecture-mlops project cicd architecture.jpg b/mlops-multi-account-cdk/mlops-sm-project-template/diagrams/MLOPs Foundation Architecture-mlops project cicd architecture.jpg similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/diagrams/MLOPs Foundation Architecture-mlops project cicd architecture.jpg rename to mlops-multi-account-cdk/mlops-sm-project-template/diagrams/MLOPs Foundation Architecture-mlops project cicd architecture.jpg diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/diagrams/MLOPs Foundation Architecture-sagemaker project architecture.jpg b/mlops-multi-account-cdk/mlops-sm-project-template/diagrams/MLOPs Foundation Architecture-sagemaker project architecture.jpg similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/diagrams/MLOPs Foundation Architecture-sagemaker project architecture.jpg rename to mlops-multi-account-cdk/mlops-sm-project-template/diagrams/MLOPs Foundation Architecture-sagemaker project architecture.jpg diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/diagrams/building.png b/mlops-multi-account-cdk/mlops-sm-project-template/diagrams/building.png similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/diagrams/building.png rename to mlops-multi-account-cdk/mlops-sm-project-template/diagrams/building.png diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/diagrams/deployment.png b/mlops-multi-account-cdk/mlops-sm-project-template/diagrams/deployment.png similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/diagrams/deployment.png rename to mlops-multi-account-cdk/mlops-sm-project-template/diagrams/deployment.png diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/__init__.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/__init__.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/__init__.py rename to mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/__init__.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/cdk_helper_scripts/zip-image/Dockerfile b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/cdk_helper_scripts/zip-image/Dockerfile similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/cdk_helper_scripts/zip-image/Dockerfile rename to mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/cdk_helper_scripts/zip-image/Dockerfile diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/codecommit_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/codecommit_stack.py similarity index 96% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/codecommit_stack.py rename to mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/codecommit_stack.py index 611bc5f2..ce6c5b10 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/codecommit_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/codecommit_stack.py @@ -26,7 +26,7 @@ from constructs import Construct -from mlops_sm_project_template_rt.config.constants import ( +from mlops_sm_project_template.config.constants import ( CODE_COMMIT_REPO_NAME, PIPELINE_BRANCH ) @@ -52,7 +52,7 @@ def __init__( "DeployAsset", path="", bundling=BundlingOptions( - image=DockerImage.from_build("mlops_sm_project_template_rt/cdk_helper_scripts/zip-image"), + image=DockerImage.from_build("mlops_sm_project_template/cdk_helper_scripts/zip-image"), command=[ "sh", "-c", diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/config/constants.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/config/constants.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/config/constants.py rename to mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/config/constants.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/pipeline_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/pipeline_stack.py similarity index 97% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/pipeline_stack.py rename to mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/pipeline_stack.py index 7b5b2bda..2d34267b 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/pipeline_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/pipeline_stack.py @@ -26,7 +26,7 @@ from constructs import Construct -from mlops_sm_project_template_rt.config.constants import ( +from mlops_sm_project_template.config.constants import ( APP_PREFIX, CODE_COMMIT_REPO_NAME, DEV_ACCOUNT, @@ -34,7 +34,7 @@ PIPELINE_BRANCH, ) -from mlops_sm_project_template_rt.service_catalog_stack import ServiceCatalogStack +from mlops_sm_project_template.service_catalog_stack import ServiceCatalogStack class CoreStage(Stage): diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/service_catalog_stack.py similarity index 97% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py rename to mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/service_catalog_stack.py index 6c69c7b4..f6664b30 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/service_catalog_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/service_catalog_stack.py @@ -37,8 +37,8 @@ from constructs import Construct -from mlops_sm_project_template_rt.templates.basic_project_stack import MLOpsStack -from mlops_sm_project_template_rt.ssm_construct import SSMConstruct +from mlops_sm_project_template.templates.basic_project_stack import MLOpsStack +from mlops_sm_project_template.ssm_construct import SSMConstruct # Get environment variables LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper() @@ -246,7 +246,7 @@ def __init__( ) # Create the build and deployment asset as an output to pass to pipeline stack - zip_image = DockerImage.from_build("mlops_sm_project_template_rt/cdk_helper_scripts/zip-image") + zip_image = DockerImage.from_build("mlops_sm_project_template/cdk_helper_scripts/zip-image") build_app_asset = s3_assets.Asset( self, @@ -329,7 +329,7 @@ def deploy_all_products( portfolio_owner: str, product_version: str, stage_name: str, - templates_directory: str = "mlops_sm_project_template_rt/templates", + templates_directory: str = "mlops_sm_project_template/templates", **kwargs, ): @@ -340,7 +340,7 @@ def deploy_all_products( if filename.endswith("_stack.py"): template_py_file = filename[:-3] - template_module = importlib.import_module(f"mlops_sm_project_template_rt.templates.{template_py_file}") + template_module = importlib.import_module(f"mlops_sm_project_template.templates.{template_py_file}") template_py_file = template_py_file.replace("_", "-") diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/ssm_construct.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/ssm_construct.py similarity index 97% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/ssm_construct.py rename to mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/ssm_construct.py index a768c72b..9fc3eb46 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/ssm_construct.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/ssm_construct.py @@ -21,7 +21,7 @@ from constructs import Construct -from mlops_sm_project_template_rt.config.constants import ( +from mlops_sm_project_template.config.constants import ( DEV_ACCOUNT, PREPROD_ACCOUNT, PROD_ACCOUNT, diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/basic_project_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/basic_project_stack.py similarity index 96% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/basic_project_stack.py rename to mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/basic_project_stack.py index 30f2bd67..779cb8c7 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/basic_project_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/basic_project_stack.py @@ -31,14 +31,14 @@ from constructs import Construct -from mlops_sm_project_template_rt.templates.pipeline_constructs.build_pipeline_construct import ( +from mlops_sm_project_template.templates.pipeline_constructs.build_pipeline_construct import ( BuildPipelineConstruct, ) -from mlops_sm_project_template_rt.templates.pipeline_constructs.deploy_pipeline_construct import ( +from mlops_sm_project_template.templates.pipeline_constructs.deploy_pipeline_construct import ( DeployPipelineConstruct, ) -from mlops_sm_project_template_rt.config.constants import PREPROD_ACCOUNT, PROD_ACCOUNT, DEFAULT_DEPLOYMENT_REGION +from mlops_sm_project_template.config.constants import PREPROD_ACCOUNT, PROD_ACCOUNT, DEFAULT_DEPLOYMENT_REGION class MLOpsStack(Stack): diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/build_pipeline_construct.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_pipeline_constructs/build_pipeline_construct.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/build_pipeline_construct.py rename to mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_pipeline_constructs/build_pipeline_construct.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/deploy_pipeline_construct.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_pipeline_constructs/deploy_pipeline_construct.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_pipeline_constructs/deploy_pipeline_construct.py rename to mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_pipeline_constructs/deploy_pipeline_construct.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_project_stack.py similarity index 97% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py rename to mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_project_stack.py index 2c4e425e..406041f6 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/byoc_project_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_project_stack.py @@ -32,14 +32,14 @@ from constructs import Construct -from mlops_sm_project_template_rt.templates.byoc_pipeline_constructs.build_pipeline_construct import ( +from mlops_sm_project_template.templates.byoc_pipeline_constructs.build_pipeline_construct import ( BuildPipelineConstruct, ) -from mlops_sm_project_template_rt.templates.byoc_pipeline_constructs.deploy_pipeline_construct import ( +from mlops_sm_project_template.templates.byoc_pipeline_constructs.deploy_pipeline_construct import ( DeployPipelineConstruct, ) -from mlops_sm_project_template_rt.config.constants import PREPROD_ACCOUNT, PROD_ACCOUNT, DEFAULT_DEPLOYMENT_REGION +from mlops_sm_project_template.config.constants import PREPROD_ACCOUNT, PROD_ACCOUNT, DEFAULT_DEPLOYMENT_REGION class MLOpsStack(Stack): diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/dynamic_accounts_project_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/dynamic_accounts_project_stack.py similarity index 98% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/dynamic_accounts_project_stack.py rename to mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/dynamic_accounts_project_stack.py index 1649cde7..419018bd 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/dynamic_accounts_project_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/dynamic_accounts_project_stack.py @@ -31,10 +31,10 @@ from constructs import Construct -from mlops_sm_project_template_rt.templates.pipeline_constructs.build_pipeline_construct import ( +from mlops_sm_project_template.templates.pipeline_constructs.build_pipeline_construct import ( BuildPipelineConstruct, ) -from mlops_sm_project_template_rt.templates.pipeline_constructs.deploy_pipeline_construct import ( +from mlops_sm_project_template.templates.pipeline_constructs.deploy_pipeline_construct import ( DeployPipelineConstruct, ) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/pipeline_constructs/build_pipeline_construct.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/pipeline_constructs/build_pipeline_construct.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/pipeline_constructs/build_pipeline_construct.py rename to mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/pipeline_constructs/build_pipeline_construct.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/pipeline_constructs/deploy_pipeline_construct.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/pipeline_constructs/deploy_pipeline_construct.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/mlops_sm_project_template_rt/templates/pipeline_constructs/deploy_pipeline_construct.py rename to mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/pipeline_constructs/deploy_pipeline_construct.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/requirements-dev.txt b/mlops-multi-account-cdk/mlops-sm-project-template/requirements-dev.txt similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/requirements-dev.txt rename to mlops-multi-account-cdk/mlops-sm-project-template/requirements-dev.txt diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/requirements.txt b/mlops-multi-account-cdk/mlops-sm-project-template/requirements.txt similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/requirements.txt rename to mlops-multi-account-cdk/mlops-sm-project-template/requirements.txt diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/scripts/cdk-account-setup.sh b/mlops-multi-account-cdk/mlops-sm-project-template/scripts/cdk-account-setup.sh similarity index 96% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/scripts/cdk-account-setup.sh rename to mlops-multi-account-cdk/mlops-sm-project-template/scripts/cdk-account-setup.sh index 19061231..5b38d33a 100755 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/scripts/cdk-account-setup.sh +++ b/mlops-multi-account-cdk/mlops-sm-project-template/scripts/cdk-account-setup.sh @@ -15,7 +15,7 @@ sed -i '' -e "s/^PIPELINE_ACCOUNT = \"$pattern\"/PIPELINE_ACCOUNT = \"$gov_accou -e "s/^PREPROD_ACCOUNT = \"$pattern\"/PREPROD_ACCOUNT = \"$preprod_account\"/" \ -e "s/^PROD_ACCOUNT = \"$pattern\"/PROD_ACCOUNT = \"$prod_account\"/" \ -e "s/^DEFAULT_DEPLOYMENT_REGION = \"$pattern\"/DEFAULT_DEPLOYMENT_REGION = \"$region\"/" \ - mlops_sm_project_template_rt/config/constants.py + mlops_sm_project_template/config/constants.py echo 'AWS profiles to be used for each account' read -p 'Governance Account AWS Profile: ' gov_profile diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/scripts/install-prerequisites-brew.sh b/mlops-multi-account-cdk/mlops-sm-project-template/scripts/install-prerequisites-brew.sh similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/scripts/install-prerequisites-brew.sh rename to mlops-multi-account-cdk/mlops-sm-project-template/scripts/install-prerequisites-brew.sh diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/.githooks/pre-commit b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/.githooks/pre-commit similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/.githooks/pre-commit rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/.githooks/pre-commit diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/.pre-commit-config.yaml b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/.pre-commit-config.yaml similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/.pre-commit-config.yaml rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/.pre-commit-config.yaml diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/Makefile b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/Makefile similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/Makefile rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/Makefile diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/buildspec.yml b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/buildspec.yml similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/buildspec.yml rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/buildspec.yml diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/__init__.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/__init__.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/__init__.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/__init__.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/__version__.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/__version__.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/__version__.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/__version__.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/_utils.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/_utils.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/_utils.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/_utils.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/get_pipeline_definition.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/get_pipeline_definition.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/get_pipeline_definition.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/get_pipeline_definition.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/run_pipeline.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/run_pipeline.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/run_pipeline.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/run_pipeline.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/training/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/training/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/training/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/training/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/training/__init__.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/training/__init__.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/training/__init__.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/training/__init__.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/training/_utils.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/training/_utils.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/training/_utils.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/training/_utils.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/training/pipeline.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/training/pipeline.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/ml_pipelines/training/pipeline.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/ml_pipelines/training/pipeline.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/notebooks/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/notebooks/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/notebooks/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/notebooks/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/notebooks/sm_pipelines_runbook.ipynb b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/notebooks/sm_pipelines_runbook.ipynb similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/notebooks/sm_pipelines_runbook.ipynb rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/notebooks/sm_pipelines_runbook.ipynb diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/setup.cfg b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/setup.cfg similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/setup.cfg rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/setup.cfg diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/setup.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/setup.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/setup.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/setup.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/main.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/main.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/main.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/main.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/requirements.txt b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/requirements.txt similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/requirements.txt rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/evaluate/evaluate_xgboost/requirements.txt diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/helpers/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/helpers/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/helpers/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/helpers/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/helpers/logger.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/helpers/logger.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/helpers/logger.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/helpers/logger.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/helpers/requirements.txt b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/helpers/requirements.txt similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/helpers/requirements.txt rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/helpers/requirements.txt diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/helpers/s3_helper.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/helpers/s3_helper.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/helpers/s3_helper.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/helpers/s3_helper.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/helpers/test/test_a.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/helpers/test/test_a.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/helpers/test/test_a.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/helpers/test/test_a.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/main.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/main.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/main.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/main.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/requirements.txt b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/requirements.txt similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/requirements.txt rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/preprocessing/prepare_abalone_data/requirements.txt diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/training/xgboost/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/training/xgboost/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/training/xgboost/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/training/xgboost/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/training/xgboost/__main__.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/training/xgboost/__main__.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/training/xgboost/__main__.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/training/xgboost/__main__.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/training/xgboost/requirements.txt b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/training/xgboost/requirements.txt similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/training/xgboost/requirements.txt rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/training/xgboost/requirements.txt diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/training/xgboost/test/test_a.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/training/xgboost/test/test_a.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/build_app/source_scripts/training/xgboost/test/test_a.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/build_app/source_scripts/training/xgboost/test/test_a.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/.githooks/pre-commit b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/.githooks/pre-commit similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/.githooks/pre-commit rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/.githooks/pre-commit diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/.pre-commit-config.yaml b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/.pre-commit-config.yaml similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/.pre-commit-config.yaml rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/.pre-commit-config.yaml diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/Makefile b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/Makefile similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/Makefile rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/Makefile diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/buildspec.yml b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/buildspec.yml similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/buildspec.yml rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/buildspec.yml diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/__init__.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/__init__.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/__init__.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/__init__.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/__version__.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/__version__.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/__version__.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/__version__.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/_utils.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/_utils.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/_utils.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/_utils.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/get_pipeline_definition.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/get_pipeline_definition.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/get_pipeline_definition.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/get_pipeline_definition.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/run_pipeline.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/run_pipeline.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/run_pipeline.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/run_pipeline.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/training/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/training/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/__init__.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/training/__init__.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/__init__.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/training/__init__.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/_utils.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/training/_utils.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/_utils.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/training/_utils.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/notebooks/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/notebooks/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/notebooks/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/notebooks/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/setup.cfg b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/setup.cfg similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/setup.cfg rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/setup.cfg diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/setup.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/setup.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/setup.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/setup.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/Dockerfile b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/Dockerfile similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/Dockerfile rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/Dockerfile diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker-build.sh b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/docker-build.sh similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker-build.sh rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/docker-build.sh diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker_helpers/entrypoint.R b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/docker_helpers/entrypoint.R similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker_helpers/entrypoint.R rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/docker_helpers/entrypoint.R diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker_helpers/run.sh b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/docker_helpers/run.sh similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/docker_helpers/run.sh rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/docker_helpers/run.sh diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/evaluate/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/evaluate/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluation.R b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/evaluate/evaluation.R similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/evaluate/evaluation.R rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/evaluate/evaluation.R diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/helpers/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/helpers/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/logger.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/helpers/logger.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/logger.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/helpers/logger.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/requirements.txt b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/helpers/requirements.txt similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/requirements.txt rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/helpers/requirements.txt diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/s3_helper.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/helpers/s3_helper.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/s3_helper.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/helpers/s3_helper.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/test/test_a.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/helpers/test/test_a.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/helpers/test/test_a.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/helpers/test/test_a.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/preprocessing/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/preprocessing/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/preprocessing.R b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/preprocessing.R similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/preprocessing.R rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/preprocessing.R diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/repository-info.json b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/repository-info.json similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/repository-info.json rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/repository-info.json diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/deploy.R b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/deploy.R similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/deploy.R rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/deploy.R diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/endpoints.R b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/endpoints.R similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/endpoints.R rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/endpoints.R diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/test/test_a.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/test/test_a.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/test/test_a.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/test/test_a.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/train.R b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/train.R similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/train.R rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/train.R diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/.githooks/pre-commit b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/.githooks/pre-commit similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/.githooks/pre-commit rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/.githooks/pre-commit diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/.pre-commit-config.yaml b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/.pre-commit-config.yaml similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/.pre-commit-config.yaml rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/.pre-commit-config.yaml diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/Makefile b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/Makefile similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/Makefile rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/Makefile diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/app.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/app.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/app.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/app.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/cdk.json b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/cdk.json similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/cdk.json rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/cdk.json diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/config_mux.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/config_mux.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/config_mux.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/config_mux.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/constants.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/constants.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/constants.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/constants.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/dev/constants.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/dev/constants.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/dev/constants.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/dev/constants.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/dev/endpoint-config.yml b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/dev/endpoint-config.yml similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/dev/endpoint-config.yml rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/dev/endpoint-config.yml diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/prod/constants.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/prod/constants.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/prod/constants.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/prod/constants.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/prod/endpoint-config.yml b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/prod/endpoint-config.yml similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/prod/endpoint-config.yml rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/prod/endpoint-config.yml diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/staging/constants.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/staging/constants.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/staging/constants.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/staging/constants.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/staging/endpoint-config.yml b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/staging/endpoint-config.yml similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/config/staging/endpoint-config.yml rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/staging/endpoint-config.yml diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/deploy_endpoint/__init__.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/deploy_endpoint/__init__.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/deploy_endpoint/__init__.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/deploy_endpoint/__init__.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/deploy_endpoint/get_approved_package.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/deploy_endpoint/get_approved_package.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/deploy_endpoint/get_approved_package.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/deploy_endpoint/get_approved_package.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/requirements-dev.txt b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/requirements-dev.txt similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/requirements-dev.txt rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/requirements-dev.txt diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/requirements.txt b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/requirements.txt similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/requirements.txt rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/requirements.txt diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/source.bat b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/source.bat similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/source.bat rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/source.bat diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/tests/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/tests/README.md similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/tests/README.md rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/tests/README.md diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/tests/integration_tests/__init__.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/tests/integration_tests/__init__.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/tests/integration_tests/__init__.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/tests/integration_tests/__init__.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/tests/integration_tests/buildspec.yml b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/tests/integration_tests/buildspec.yml similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/tests/integration_tests/buildspec.yml rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/tests/integration_tests/buildspec.yml diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/tests/integration_tests/endpoint_test.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/tests/integration_tests/endpoint_test.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/tests/integration_tests/endpoint_test.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/tests/integration_tests/endpoint_test.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/tests/unittests/__init__.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/tests/unittests/__init__.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/tests/unittests/__init__.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/tests/unittests/__init__.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/tests/unittests/test_deploy_app_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/tests/unittests/test_deploy_app_stack.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/tests/unittests/test_deploy_app_stack.py rename to mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/tests/unittests/test_deploy_app_stack.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/source.bat b/mlops-multi-account-cdk/mlops-sm-project-template/source.bat similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/source.bat rename to mlops-multi-account-cdk/mlops-sm-project-template/source.bat diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/tests/__init__.py b/mlops-multi-account-cdk/mlops-sm-project-template/tests/__init__.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/tests/__init__.py rename to mlops-multi-account-cdk/mlops-sm-project-template/tests/__init__.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/tests/unit/__init__.py b/mlops-multi-account-cdk/mlops-sm-project-template/tests/unit/__init__.py similarity index 100% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/tests/unit/__init__.py rename to mlops-multi-account-cdk/mlops-sm-project-template/tests/unit/__init__.py diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/tests/unit/test_mlops_batch_v2_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/tests/unit/test_mlops_batch_v2_stack.py similarity index 89% rename from mlops-multi-account-cdk/mlops-sm-project-template-rt/tests/unit/test_mlops_batch_v2_stack.py rename to mlops-multi-account-cdk/mlops-sm-project-template/tests/unit/test_mlops_batch_v2_stack.py index add758e3..2201746a 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/tests/unit/test_mlops_batch_v2_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/tests/unit/test_mlops_batch_v2_stack.py @@ -18,11 +18,11 @@ import aws_cdk as core import aws_cdk.assertions as assertions -from mlops_sm_project_template_rt.sm_project_stack import MlopsBatchV2Stack +from mlops_sm_project_template.sm_project_stack import MlopsBatchV2Stack # example tests. To run these tests, uncomment this file along with the example -# resource in mlops_sm_project_template_rt_v2/mlops_sm_project_template_rt_v2_stack.py +# resource in mlops_sm_project_template_v2/mlops_sm_project_template_v2_stack.py def test_sqs_queue_created(): app = core.App() stack = MlopsBatchV2Stack(app, "mlops-batch-v2") From e78ee0babe5653e007878dc9be2fc8b70b733716 Mon Sep 17 00:00:00 2001 From: Fatema Alkhanaizi Date: Thu, 4 Aug 2022 09:00:02 +0100 Subject: [PATCH 09/15] modified description --- .../mlops_sm_project_template/templates/basic_project_stack.py | 2 +- .../mlops_sm_project_template/templates/byoc_project_stack.py | 2 +- .../templates/dynamic_accounts_project_stack.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/basic_project_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/basic_project_stack.py index 779cb8c7..886c966a 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/basic_project_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/basic_project_stack.py @@ -42,7 +42,7 @@ class MLOpsStack(Stack): - DESCRIPTION: str = "This template includes a model building pipeline that includes a workflow to pre-process, train, evaluate and register a model. The deploy pipeline creates a preprod and production endpoint. The target DEV/PREPROD/PROD accounts are predefined in the template." + DESCRIPTION: str = "This template includes a model building pipeline that includes a workflow to pre-process, train, evaluate and register a model. The deploy pipeline creates a dev,preprod and production endpoint. The target DEV/PREPROD/PROD accounts are predefined in the template." TEMPLATE_NAME: str = "Basic MLOps template for real-time deployment" def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_project_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_project_stack.py index 406041f6..8776d773 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_project_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_project_stack.py @@ -43,7 +43,7 @@ class MLOpsStack(Stack): - DESCRIPTION: str = "This template includes a model building pipeline that includes a workflow to build your own container, pre-process, train, evaluate and register a model. The deploy pipeline creates a preprod and production endpoint. The target DEV/PREPROD/PROD accounts are predefined in the template." + DESCRIPTION: str = "This template includes a model building pipeline that includes a workflow to build your own containers, pre-process, train, evaluate and register a model. The deploy pipeline creates a dev, preprod and production endpoint. The target DEV/PREPROD/PROD accounts are predefined in the template." TEMPLATE_NAME: str = "MLOps template for real-time deployment using your own container" def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/dynamic_accounts_project_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/dynamic_accounts_project_stack.py index 419018bd..e041449a 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/dynamic_accounts_project_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/dynamic_accounts_project_stack.py @@ -40,7 +40,7 @@ class MLOpsStack(Stack): - DESCRIPTION: str = "This template includes a model building pipeline that includes a workflow to pre-process, train, evaluate and register a model. The deploy pipeline creates a preprod and production endpoint. The target PREPROD/PROD accounts are provided as cloudformation paramters and must be provided during project creation." + DESCRIPTION: str = "This template includes a model building pipeline that includes a workflow to pre-process, train, evaluate and register a model. The deploy pipeline creates a dev, preprod and production endpoint. The target PREPROD/PROD accounts are provided as cloudformation paramters and must be provided during project creation." TEMPLATE_NAME: str = "Dynamic Accounts MLOps template for real-time deployment" def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None: From befac29b503415de64ce87d786243a9f69f40703 Mon Sep 17 00:00:00 2001 From: Georgios Schinas Date: Fri, 5 Aug 2022 11:30:15 +0100 Subject: [PATCH 10/15] fixes to BYOC seed code --- .../seed_code/byoc_build_app/buildspec.yml | 7 +++++-- .../ml_pipelines/training/pipeline.py | 16 +++++++++------- .../prepare_abalone_data/preprocessing.R | 3 ++- .../source_scripts/training/train.R | 3 ++- .../deploy_endpoint/deploy_endpoint_stack.py | 3 ++- 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/buildspec.yml b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/buildspec.yml index 600f53a4..6f475132 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/buildspec.yml +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/buildspec.yml @@ -11,9 +11,12 @@ phases: commands: - export PYTHONUNBUFFERED=TRUE - export SAGEMAKER_PROJECT_NAME_ID="${SAGEMAKER_PROJECT_NAME}-${SAGEMAKER_PROJECT_ID}" + # Copy sample dataset for template - REMOVE when using your own data + - aws s3 cp s3://sagemaker-sample-files/datasets/tabular/uci_abalone/abalone.csv . + - aws s3 cp abalone.csv s3://${ARTIFACT_BUCKET} - | run-pipeline --module-name ml_pipelines.training.pipeline \ --role-arn $SAGEMAKER_PIPELINE_ROLE_ARN \ --tags "[{\"Key\":\"sagemaker:project-name\", \"Value\":\"${SAGEMAKER_PROJECT_NAME}\"}, {\"Key\":\"sagemaker:project-id\", \"Value\":\"${SAGEMAKER_PROJECT_ID}\"}]" \ - --kwargs "{\"region\":\"${AWS_REGION}\",\"role\":\"${SAGEMAKER_PIPELINE_ROLE_ARN}\",\"default_bucket\":\"${ARTIFACT_BUCKET}\",\"pipeline_name\":\"${SAGEMAKER_PROJECT_NAME_ID}\",\"model_package_group_name\":\"${MODEL_PACKAGE_GROUP_NAME}\",\"base_job_prefix\":\"${SAGEMAKER_PROJECT_NAME_ID}\", \"bucket_kms_id\":\"${ARTIFACT_BUCKET_KMS_ID}\", \"git_hash\":\"${CODEBUILD_RESOLVED_SOURCE_VERSION}\", \"ecr_repo_uri\":\"${ECR_REPO_URI}\"}" - - echo "Create/Update of the SageMaker Pipeline and execution completed." + --kwargs "{\"region\":\"${AWS_REGION}\",\"role\":\"${SAGEMAKER_PIPELINE_ROLE_ARN}\",\"default_bucket\":\"${ARTIFACT_BUCKET}\",\"pipeline_name\":\"${SAGEMAKER_PROJECT_NAME_ID}\",\"model_package_group_name\":\"${MODEL_PACKAGE_GROUP_NAME}\",\"base_job_prefix\":\"${SAGEMAKER_PROJECT_NAME_ID}\", \"bucket_kms_id\":\"${ARTIFACT_BUCKET_KMS_ID}\", \"git_hash\":\"${CODEBUILD_RESOLVED_SOURCE_VERSION}\", \"ecr_repo_uri\":\"${ECR_REPO_URI}\", \"default_input_data\":\"s3://${ARTIFACT_BUCKET}/abalone.csv\"}" + - echo "Create/Update of the SageMaker Pipeline and execution completed." \ No newline at end of file diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py index 6366540e..37879308 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py @@ -108,6 +108,7 @@ def get_pipeline( project_id="SageMakerProjectId", git_hash="", ecr_repo_uri="", + default_input_data="", ): """Gets a SageMaker ML Pipeline instance working with on abalone data. @@ -116,7 +117,8 @@ def get_pipeline( role: IAM role to create and run steps and pipeline. default_bucket: the bucket to use for storing the artifacts git_hash: the hash id of the current commit. Used to determine which docker image version to use - ecr_repo_uri: uri of the ECR repository used by this project + ecr_repo_uri: uri of the ECR repository used by this project + default_input_data: s3 location with data to be used by pipeline Returns: an instance of a pipeline @@ -134,7 +136,7 @@ def get_pipeline( model_approval_status = ParameterString(name="ModelApprovalStatus", default_value="PendingManualApproval") input_data = ParameterString( name="InputDataUrl", - default_value=f"s3://sagemaker-servicecatalog-seedcode-{region}/dataset/abalone-dataset.csv", + default_value=default_input_data, ) processing_image_uri = f"{ecr_repo_uri}:processing-{git_hash}" training_image_uri = f"{ecr_repo_uri}:training-{git_hash}" @@ -164,7 +166,7 @@ def get_pipeline( outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/output/train"), ProcessingOutput(output_name="validation", source="/opt/ml/processing/output/validation"), - ProcessingOutput(output_name="test", source="/opt/ml/processing/test/output"), + ProcessingOutput(output_name="test", source="/opt/ml/processing/output/test"), ], code="source_scripts/preprocessing/prepare_abalone_data/preprocessing.R", # we must figure out this path to get it from step_source directory ) @@ -194,10 +196,10 @@ def get_pipeline( s3_data=step_process.properties.ProcessingOutputConfig.Outputs["train"].S3Output.S3Uri, content_type="text/csv", ), - # "validation": TrainingInput( # Validation data not used by seed code, but uncomment to make available during training - # s3_data=step_process.properties.ProcessingOutputConfig.Outputs["validation"].S3Output.S3Uri, - # content_type="text/csv", - # ), + "validation": TrainingInput( + s3_data=step_process.properties.ProcessingOutputConfig.Outputs["validation"].S3Output.S3Uri, + content_type="text/csv", + ), }, ) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/preprocessing.R b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/preprocessing.R index 31932692..a05da8b8 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/preprocessing.R +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/preprocessing/prepare_abalone_data/preprocessing.R @@ -28,6 +28,8 @@ output_dir <- "/opt/ml/processing/output/" filename <- Sys.glob(paste(input_dir, "*.csv", sep="")) abalone <- read_csv(filename) +names(abalone) <- c('sex', 'length', 'diameter', 'height', 'whole_weight', 'shucked_weight', 'viscera_weight', 'shell_weight', 'rings') + abalone <- abalone %>% mutate(female = as.integer(ifelse(sex == 'F', 1, 0)), male = as.integer(ifelse(sex == 'M', 1, 0)), @@ -45,6 +47,5 @@ abalone_valid <- anti_join(abalone, abalone_test) write_csv(abalone_train, paste0(output_dir,'train/abalone_train.csv')) - write_csv(abalone_valid, paste0(output_dir,'validation/abalone_valid.csv')) write_csv(abalone_test, paste0(output_dir,'test/abalone_test.csv')) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/train.R b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/train.R index d1ad7f98..082bb95b 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/train.R +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/byoc_build_app/source_scripts/training/train.R @@ -21,6 +21,7 @@ library(readr) prefix <- '/opt/ml/' input_path <- paste0(prefix , 'input/data/train/') +input_path_v <- paste0(prefix , 'input/data/validation/') output_path <- paste0(prefix, 'output/') model_path <- paste0(prefix, 'model/') code_path <- paste(prefix, 'code', sep='/') @@ -28,7 +29,7 @@ inference_code_dir <- paste(model_path, 'code', sep='/') abalone_train <- read_csv(paste0(input_path, 'abalone_train.csv')) -abalone_valid <- read_csv(paste0(input_path, 'abalone_valid.csv')) +abalone_valid <- read_csv(paste0(input_path_v, 'abalone_valid.csv')) regressor = lm(formula = rings ~ female + male + length + diameter + height + whole_weight + shucked_weight + viscera_weight + shell_weight, data = abalone_train) summary(regressor) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py index 41f099c0..bb8f0ac1 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template-rt/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py @@ -135,7 +135,8 @@ def __init__( ], effect=iam.Effect.ALLOW, resources=[ - MODEL_BUCKET_ARN + MODEL_BUCKET_ARN, + f"{MODEL_BUCKET_ARN}/*", ], ), iam.PolicyStatement( From a0399a4c6cc9933780e92d5145841162b86ba5ee Mon Sep 17 00:00:00 2001 From: Fatema Alkhanaizi Date: Fri, 5 Aug 2022 11:44:03 +0100 Subject: [PATCH 11/15] reformatting and adding more docs --- .../.pre-commit-config.yaml | 43 +++++++++++++---- .../ADVANCED_TOPICS.md | 47 +++++++++++++++++-- .../mlops-sm-project-template/README.md | 2 - .../codecommit_stack.py | 5 +- .../config/constants.py | 2 +- .../service_catalog_stack.py | 2 +- .../build_pipeline_construct.py | 36 ++++---------- .../seed_code/byoc_build_app/buildspec.yml | 4 +- .../ml_pipelines/training/pipeline.py | 4 +- .../byoc_build_app/source_scripts/Dockerfile | 6 +-- .../docker_helpers/entrypoint.R | 12 ++--- .../source_scripts/docker_helpers/run.sh | 2 +- .../source_scripts/evaluate/README.md | 2 +- .../source_scripts/evaluate/evaluation.R | 2 +- .../source_scripts/preprocessing/README.md | 2 +- .../source_scripts/repository-info.json | 0 .../source_scripts/training/README.md | 2 +- .../source_scripts/training/deploy.R | 2 +- .../source_scripts/training/endpoints.R | 6 +-- .../source_scripts/training/train.R | 2 +- .../seed_code/deploy_app/config/constants.py | 1 - .../deploy_endpoint/deploy_endpoint_stack.py | 18 ++----- 22 files changed, 115 insertions(+), 87 deletions(-) delete mode 100644 mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/repository-info.json diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/.pre-commit-config.yaml b/mlops-multi-account-cdk/mlops-sm-project-template/.pre-commit-config.yaml index 7a9c7e1c..288e4718 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/.pre-commit-config.yaml +++ b/mlops-multi-account-cdk/mlops-sm-project-template/.pre-commit-config.yaml @@ -16,16 +16,39 @@ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 - hooks: - - id: check-added-large-files - - id: check-json - - id: check-merge-conflict - # - id: check-yaml - - id: end-of-file-fixer - - id: requirements-txt-fixer - - id: trailing-whitespace +# General +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: check-case-conflict + - id: detect-private-key + - id: trailing-whitespace + - id: end-of-file-fixer + - id: mixed-line-ending + args: + - --fix=lf + exclude: /package-lock\.json$ + - id: check-added-large-files + args: + - --maxkb=1000 + - id: check-merge-conflict + - id: no-commit-to-branch + args: + - --branch + - main + - id: pretty-format-json + args: + - --autofix + - --indent=2 + - --no-sort-keys + exclude: /package-lock\.json$ +# Secrets +- repo: https://github.com/awslabs/git-secrets + rev: b9e96b3212fa06aea65964ff0d5cda84ce935f38 + hooks: + - id: git-secrets + entry: git-secrets --scan + files: . - repo: https://github.com/psf/black rev: 22.6.0 hooks: diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md b/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md index 0f33102e..7e2ce9f1 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md +++ b/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md @@ -1,6 +1,43 @@ # Advanced topics -## Test the created templates as normal cloudformation templates +- [Advanced topics](#advanced-topics) + - [Setup CodeCommit with this repository](#setup-codecommit-with-this-repository) + - [Test the created sagemaker templates](#test-the-created-sagemaker-templates) + + +## Setup CodeCommit with this repository +You would wonder after you have cloned this repository and deployed the solution how would you then start to interact with your deployed CodeCommit repository and start using it as a main repository and push changes to it. You have 2 options for this: +1. Clone the created CodeCommit repository and start treating it seperately from this repository +2. Just use this folder as a repository + +For the second option, you can do the following (while you are in the folder `mlops-sm-project-template`): +``` +git init +``` +this will create a local git for this folder which would be separate from the main so you can treat it as any git repo and it would not impact the main repository git. So, add the CodeCommit Repository as a remote source: +``` +git remote add origin https://git-codecommit.eu-west-1.amazonaws.com/v1/repos/mlops-sm-project-template +``` +Ensure you have configured your machine to connect to CodeCommit and make `git push` or `git pull` commands to it; follow [Step 3 from the AWS documentation](https://docs.aws.amazon.com/codecommit/latest/userguide/setting-up-https-unixes.html). + +Now you can interact with the CodeCommit repository as normal. You will need to do the following for the first commit: +``` +make init +git add -A +git commit -m "first commit" +git push origin main +``` + +Ensure that your git uses the branch name **main** by default, otherwise the push command might fail and you will need to create a main branch then push changes through it. + +If you want to push the changes you made back to the main repository this folder belongs to you can just run this command: +``` +rm -fr .git +``` +This will remove the git settings from this folder so it would go back to the main repository settings. + + +## Test the created sagemaker templates ***NOTE:** make sure to run `cdk synth` before running any of the commands defined below.* You will need to deploy the `service catalog stack` as that would setup your account with the required resources and ssm parameters before you can start testing your templates directly. If you don't have the service catalog stack already deployed in your account, you can achieve this by running the following command: @@ -22,7 +59,7 @@ otherwise make sure you have these ssm parameters defined: - /mlops/prod/account_id - /mlops/prod/region -For quick testing of the sagemaker templates, you could deploy the json generated by CDK directly in your account by running the following command: +**OPTION 1** For quick testing of the sagemaker templates, you could deploy the json generated by CDK directly in your account by running the following command: ``` aws cloudformation deploy \ --template-file ./cdk.out/byoc-project-stack-dev.template.json \ @@ -36,9 +73,9 @@ aws cloudformation deploy \ SageMakerProjectName=mlops-test-0 \ SageMakerProjectId=sm12340 ``` -This command will deploy the byoc project stack if you want to deploy other templates just change the `--template-file`, if you want to create a new stack you can change the other fields as well. +This command will deploy the byoc project stack if you want to deploy other templates just change the `--template-file`, if you want to create a new stack you can change the other fields as well. -It is also possible to use CDK command for this exact purpose but this would require you to add the following to `app.py` file: +**OPTION 2** It is also possible to use CDK command for this exact purpose but this would require you to add the following to `app.py` file: ``` from mlops_sm_project_template.templates.byoc_project_stack import MLOpsStack @@ -52,4 +89,4 @@ The run `cdk synth` and then run the following to deploy: ``` cdk deploy test --parameters SageMakerProjectName=mlops-test \ --parameters SageMakerProjectId=sm1234 --profile mlops-dev -``` \ No newline at end of file +``` diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/README.md index 03404521..4a1e2c44 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/README.md +++ b/mlops-multi-account-cdk/mlops-sm-project-template/README.md @@ -371,5 +371,3 @@ One of the following would solve the problem: * Docker is having an issue so restart your docker daemon * Refresh your awscli credentials * Clear all cached cdk outputs by running `make clean` - - diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/codecommit_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/codecommit_stack.py index ce6c5b10..b62ef866 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/codecommit_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/codecommit_stack.py @@ -26,10 +26,7 @@ from constructs import Construct -from mlops_sm_project_template.config.constants import ( - CODE_COMMIT_REPO_NAME, - PIPELINE_BRANCH -) +from mlops_sm_project_template.config.constants import CODE_COMMIT_REPO_NAME, PIPELINE_BRANCH class CodeCommitStack(Stack): diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/config/constants.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/config/constants.py index 1568cc11..cd64f57d 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/config/constants.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/config/constants.py @@ -15,7 +15,7 @@ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -CODE_COMMIT_REPO_NAME = "mlops-sm-project-template-rt" +CODE_COMMIT_REPO_NAME = "mlops-sm-project-template" PIPELINE_BRANCH = "main" PIPELINE_ACCOUNT = "" # account used to host the pipeline handling updates of this repository diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/service_catalog_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/service_catalog_stack.py index f6664b30..99e38f53 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/service_catalog_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/service_catalog_stack.py @@ -247,7 +247,7 @@ def __init__( # Create the build and deployment asset as an output to pass to pipeline stack zip_image = DockerImage.from_build("mlops_sm_project_template/cdk_helper_scripts/zip-image") - + build_app_asset = s3_assets.Asset( self, "BuildAsset", diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_pipeline_constructs/build_pipeline_construct.py b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_pipeline_constructs/build_pipeline_construct.py index cf34af13..a6434c47 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_pipeline_constructs/build_pipeline_construct.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/mlops_sm_project_template/templates/byoc_pipeline_constructs/build_pipeline_construct.py @@ -173,15 +173,9 @@ def __init__( environment=codebuild.BuildEnvironment( build_image=codebuild.LinuxBuildImage.STANDARD_5_0, environment_variables={ - "SAGEMAKER_PROJECT_NAME": codebuild.BuildEnvironmentVariable( - value=project_name - ), - "SAGEMAKER_PROJECT_ID": codebuild.BuildEnvironmentVariable( - value=project_id - ), - "MODEL_PACKAGE_GROUP_NAME": codebuild.BuildEnvironmentVariable( - value=model_package_group_name - ), + "SAGEMAKER_PROJECT_NAME": codebuild.BuildEnvironmentVariable(value=project_name), + "SAGEMAKER_PROJECT_ID": codebuild.BuildEnvironmentVariable(value=project_id), + "MODEL_PACKAGE_GROUP_NAME": codebuild.BuildEnvironmentVariable(value=model_package_group_name), "AWS_REGION": codebuild.BuildEnvironmentVariable(value=Aws.REGION), "SAGEMAKER_PIPELINE_NAME": codebuild.BuildEnvironmentVariable( value=pipeline_name, @@ -192,9 +186,7 @@ def __init__( "SAGEMAKER_PIPELINE_ROLE_ARN": codebuild.BuildEnvironmentVariable( value=sagemaker_execution_role.role_arn, ), - "ARTIFACT_BUCKET": codebuild.BuildEnvironmentVariable( - value=s3_artifact.bucket_name - ), + "ARTIFACT_BUCKET": codebuild.BuildEnvironmentVariable(value=s3_artifact.bucket_name), "ARTIFACT_BUCKET_KMS_ID": codebuild.BuildEnvironmentVariable( value=s3_artifact.encryption_key.key_id ), @@ -223,18 +215,14 @@ def __init__( }, } ), - environment=codebuild.BuildEnvironment( - build_image=codebuild.LinuxBuildImage.STANDARD_5_0, privileged=True - ), + environment=codebuild.BuildEnvironment(build_image=codebuild.LinuxBuildImage.STANDARD_5_0, privileged=True), ) docker_build.add_to_role_policy( iam.PolicyStatement( actions=["ecr:*"], effect=iam.Effect.ALLOW, - resources=[ - f"arn:aws:ecr:{Aws.REGION}:{Aws.ACCOUNT_ID}:repository/{ecr_repository_name}" - ], + resources=[f"arn:aws:ecr:{Aws.REGION}:{Aws.ACCOUNT_ID}:repository/{ecr_repository_name}"], ) ) @@ -271,18 +259,12 @@ def __init__( build_stage.add_action( codepipeline_actions.CodeBuildAction( - action_name="DockerBuild", - input=source_artifact, - project=docker_build, - run_order=1 + action_name="DockerBuild", input=source_artifact, project=docker_build, run_order=1 ) ) - + build_stage.add_action( codepipeline_actions.CodeBuildAction( - action_name="SMPipeline", - input=source_artifact, - project=sm_pipeline_build, - run_order=2 + action_name="SMPipeline", input=source_artifact, project=sm_pipeline_build, run_order=2 ) ) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/buildspec.yml b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/buildspec.yml index 6f475132..b94ff07c 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/buildspec.yml +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/buildspec.yml @@ -12,11 +12,11 @@ phases: - export PYTHONUNBUFFERED=TRUE - export SAGEMAKER_PROJECT_NAME_ID="${SAGEMAKER_PROJECT_NAME}-${SAGEMAKER_PROJECT_ID}" # Copy sample dataset for template - REMOVE when using your own data - - aws s3 cp s3://sagemaker-sample-files/datasets/tabular/uci_abalone/abalone.csv . + - aws s3 cp s3://sagemaker-sample-files/datasets/tabular/uci_abalone/abalone.csv . - aws s3 cp abalone.csv s3://${ARTIFACT_BUCKET} - | run-pipeline --module-name ml_pipelines.training.pipeline \ --role-arn $SAGEMAKER_PIPELINE_ROLE_ARN \ --tags "[{\"Key\":\"sagemaker:project-name\", \"Value\":\"${SAGEMAKER_PROJECT_NAME}\"}, {\"Key\":\"sagemaker:project-id\", \"Value\":\"${SAGEMAKER_PROJECT_ID}\"}]" \ --kwargs "{\"region\":\"${AWS_REGION}\",\"role\":\"${SAGEMAKER_PIPELINE_ROLE_ARN}\",\"default_bucket\":\"${ARTIFACT_BUCKET}\",\"pipeline_name\":\"${SAGEMAKER_PROJECT_NAME_ID}\",\"model_package_group_name\":\"${MODEL_PACKAGE_GROUP_NAME}\",\"base_job_prefix\":\"${SAGEMAKER_PROJECT_NAME_ID}\", \"bucket_kms_id\":\"${ARTIFACT_BUCKET_KMS_ID}\", \"git_hash\":\"${CODEBUILD_RESOLVED_SOURCE_VERSION}\", \"ecr_repo_uri\":\"${ECR_REPO_URI}\", \"default_input_data\":\"s3://${ARTIFACT_BUCKET}/abalone.csv\"}" - - echo "Create/Update of the SageMaker Pipeline and execution completed." \ No newline at end of file + - echo "Create/Update of the SageMaker Pipeline and execution completed." diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py index 37879308..0f84194c 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/ml_pipelines/training/pipeline.py @@ -162,7 +162,7 @@ def get_pipeline( step_process = ProcessingStep( name="PreprocessAbaloneData", processor=script_processor, - inputs=[ProcessingInput(source =input_data, destination="/opt/ml/processing/input")], + inputs=[ProcessingInput(source=input_data, destination="/opt/ml/processing/input")], outputs=[ ProcessingOutput(output_name="train", source="/opt/ml/processing/output/train"), ProcessingOutput(output_name="validation", source="/opt/ml/processing/output/validation"), @@ -185,7 +185,7 @@ def get_pipeline( output_kms_key=bucket_kms_id, source_dir="source_scripts/training/", entry_point="train.R", - metric_definitions=[{"Name":"rmse-validation", "Regex": "Calculated validation RMSE: ([0-9.]+);.*$"}], + metric_definitions=[{"Name": "rmse-validation", "Regex": "Calculated validation RMSE: ([0-9.]+);.*$"}], ) step_train = TrainingStep( diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/Dockerfile b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/Dockerfile index c885bab3..696959c0 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/Dockerfile +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/Dockerfile @@ -3,7 +3,7 @@ FROM public.ecr.aws/docker/library/r-base:4.1.2 as base # Install tidyverse RUN apt update && apt-get install -y --no-install-recommends \ r-cran-tidyverse - + RUN R -e "install.packages(c('rjson'))" @@ -19,8 +19,8 @@ RUN apt-get -y update && apt-get install -y --no-install-recommends \ ca-certificates \ libcurl4-openssl-dev \ libsodium-dev - -RUN apt-get update && apt-get install -y python3-dev python3-pip + +RUN apt-get update && apt-get install -y python3-dev python3-pip RUN pip3 install boto3 RUN R -e "install.packages('reticulate',dependencies=TRUE, repos='http://cran.rstudio.com/')" RUN R -e "install.packages(c('readr','plumber'))" diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/docker_helpers/entrypoint.R b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/docker_helpers/entrypoint.R index 70c4b10d..b69849d1 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/docker_helpers/entrypoint.R +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/docker_helpers/entrypoint.R @@ -38,24 +38,24 @@ inference_code_dir <- paste(model_path, 'code', sep='/') if (args=="train") { - + # This is where the hyperparamters are saved by the estimator on the container instance param_path <- paste(prefix, 'input/config/hyperparameters.json', sep='/') params <- read_json(param_path) - + s3_source_code_tar <- gsub('"', '', params$sagemaker_submit_directory) script <- gsub('"', '', params$sagemaker_program) - + bucketkey <- str_replace(s3_source_code_tar, "s3://", "") bucket <- str_remove(bucketkey, "/.*") key <- str_remove(bucketkey, ".*?/") - + s3$download_file(bucket, key, "sourcedir.tar.gz") untar("sourcedir.tar.gz", exdir=code_dir) - + print("training started") source(file.path(code_dir, script)) - + } else if(args=="serve"){ print("inference time") source(file.path(inference_code_dir, "deploy.R")) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/docker_helpers/run.sh b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/docker_helpers/run.sh index ea149bb0..3b4a2d2e 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/docker_helpers/run.sh +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/docker_helpers/run.sh @@ -1,3 +1,3 @@ #!/bin/bash echo "ready to execute" -Rscript /opt/ml/entrypoint.R $1 \ No newline at end of file +Rscript /opt/ml/entrypoint.R $1 diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/evaluate/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/evaluate/README.md index daa6bdd5..3727ec16 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/evaluate/README.md +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/evaluate/README.md @@ -1 +1 @@ -Use this folder to add all code related to evaluate the performance of your model. +Use this folder to add all code related to evaluate the performance of your model. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/evaluate/evaluation.R b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/evaluate/evaluation.R index 7f0ed1cf..4976fed1 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/evaluate/evaluation.R +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/evaluate/evaluation.R @@ -44,4 +44,4 @@ report_dict = list( output_dir = "/opt/ml/processing/evaluation/evaluation.json" jsonData <- toJSON(report_dict) -write(jsonData, output_dir) \ No newline at end of file +write(jsonData, output_dir) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/preprocessing/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/preprocessing/README.md index 400033c4..0b8678a4 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/preprocessing/README.md +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/preprocessing/README.md @@ -1 +1 @@ -Use this folder to add all code related to preprocessing your data. +Use this folder to add all code related to preprocessing your data. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/repository-info.json b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/repository-info.json deleted file mode 100644 index e69de29b..00000000 diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/README.md index 31c36bb4..fcf7e627 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/README.md +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/README.md @@ -1 +1 @@ -Use this folder to add all code related to training your model. +Use this folder to add all code related to training your model. diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/deploy.R b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/deploy.R index e621fc72..82848a64 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/deploy.R +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/deploy.R @@ -36,4 +36,4 @@ inference <- function(x){ } app <- plumb(paste0(code_path,'endpoints.R')) -app$run(host='0.0.0.0', port=8080) \ No newline at end of file +app$run(host='0.0.0.0', port=8080) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/endpoints.R b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/endpoints.R index b75ca2b9..f0126104 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/endpoints.R +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/endpoints.R @@ -27,11 +27,11 @@ function() { #' @param req The http request sent #' @post /invocations function(req) { - + # Read in data input_json <- fromJSON(req$postBody) output <- inference(input_json$features) # Return prediction return(output) - -} \ No newline at end of file + +} diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/train.R b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/train.R index 082bb95b..6559a239 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/train.R +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/byoc_build_app/source_scripts/training/train.R @@ -50,4 +50,4 @@ list_of_files <- list.files(code_path) dir.create(inference_code_dir) file.copy(list_of_files, inference_code_dir, recursive=TRUE) -print("successfully saved model & code") \ No newline at end of file +print("successfully saved model & code") diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/constants.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/constants.py index 9f133d6a..c0d97944 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/constants.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/constants.py @@ -35,4 +35,3 @@ MODEL_PACKAGE_GROUP_NAME = os.getenv("MODEL_PACKAGE_GROUP_NAME", "") MODEL_BUCKET_ARN = os.getenv("MODEL_BUCKET_ARN", "arn:aws:s3:::*mlops*") ECR_REPO_ARN = os.getenv("ECR_REPO_ARN", "*") - diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py index bb8f0ac1..14faa3e2 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py @@ -36,7 +36,7 @@ MODEL_PACKAGE_GROUP_NAME, DEV_ACCOUNT, ECR_REPO_ARN, - MODEL_BUCKET_ARN + MODEL_BUCKET_ARN, ) from datetime import datetime, timezone @@ -58,9 +58,7 @@ class EndpointConfigProductionVariant(StageYamlDataClassConfig): instance_type: str = "ml.m5.2xlarge" variant_name: str = "AllTraffic" - FILE_PATH: Path = create_file_path_field( - "endpoint-config.yml", path_is_absolute=True - ) + FILE_PATH: Path = create_file_path_field("endpoint-config.yml", path_is_absolute=True) def get_endpoint_config_production_variant(self, model_name): """ @@ -165,9 +163,7 @@ def __init__( assumed_by=iam.ServicePrincipal("sagemaker.amazonaws.com"), managed_policies=[ model_execution_policy, - iam.ManagedPolicy.from_aws_managed_policy_name( - "AmazonSageMakerFullAccess" - ), + iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFullAccess"), ], ) @@ -188,9 +184,7 @@ def __init__( execution_role_arn=model_execution_role.role_arn, model_name=model_name, containers=[ - sagemaker.CfnModel.ContainerDefinitionProperty( - model_package_name=latest_approved_model_package - ) + sagemaker.CfnModel.ContainerDefinitionProperty(model_package_name=latest_approved_model_package) ], vpc_config=sagemaker.CfnModel.VpcConfigProperty( security_group_ids=[sg_id], @@ -229,9 +223,7 @@ def __init__( endpoint_config_name=endpoint_config_name, kms_key_id=kms_key.key_id, production_variants=[ - endpoint_config_production_variant.get_endpoint_config_production_variant( - model.model_name - ) + endpoint_config_production_variant.get_endpoint_config_production_variant(model.model_name) ], ) From fee1150a0feaedb901d59eb6501d527cd8a73511 Mon Sep 17 00:00:00 2001 From: Fatema Alkhanaizi Date: Fri, 5 Aug 2022 11:44:32 +0100 Subject: [PATCH 12/15] more docs --- .../mlops-sm-project-template/ADVANCED_TOPICS.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md b/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md index 7e2ce9f1..4d01aa52 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md +++ b/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md @@ -34,7 +34,7 @@ If you want to push the changes you made back to the main repository this folder ``` rm -fr .git ``` -This will remove the git settings from this folder so it would go back to the main repository settings. +This will remove the git settings from this folder so it would go back to the main repository settings. You can then raise a PR to include your changes to the main repository in GitHub. ## Test the created sagemaker templates @@ -73,7 +73,7 @@ aws cloudformation deploy \ SageMakerProjectName=mlops-test-0 \ SageMakerProjectId=sm12340 ``` -This command will deploy the byoc project stack if you want to deploy other templates just change the `--template-file`, if you want to create a new stack you can change the other fields as well. +This command will deploy the byoc project stack if you want to deploy other templates just change the `--template-file`, if you want to create a new stack you can change the other fields as well. **OPTION 2** It is also possible to use CDK command for this exact purpose but this would require you to add the following to `app.py` file: ``` @@ -89,4 +89,4 @@ The run `cdk synth` and then run the following to deploy: ``` cdk deploy test --parameters SageMakerProjectName=mlops-test \ --parameters SageMakerProjectId=sm1234 --profile mlops-dev -``` +``` \ No newline at end of file From 4bb2651df1b162cb7ffd533ad699c4381be69ca5 Mon Sep 17 00:00:00 2001 From: Fatema Alkhanaizi Date: Fri, 5 Aug 2022 12:03:42 +0100 Subject: [PATCH 13/15] more docs --- .../mlops-sm-project-template/ADVANCED_TOPICS.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md b/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md index 4d01aa52..907bc5be 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md +++ b/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md @@ -1,4 +1,5 @@ # Advanced topics +The topics defined here assume you have already deployed the solution once following the steps in teh main [README](README.md) - [Advanced topics](#advanced-topics) - [Setup CodeCommit with this repository](#setup-codecommit-with-this-repository) @@ -22,10 +23,11 @@ Ensure you have configured your machine to connect to CodeCommit and make `git p Now you can interact with the CodeCommit repository as normal. You will need to do the following for the first commit: ``` -make init git add -A git commit -m "first commit" +export AWS_PROFILE=mlops-governance git push origin main +make init # this will enable precommit which will now block any further pushes to the main branch ``` Ensure that your git uses the branch name **main** by default, otherwise the push command might fail and you will need to create a main branch then push changes through it. From 0dd2077f5a8798b26567fdf1207b7e054fb38cb2 Mon Sep 17 00:00:00 2001 From: Fatema Alkhanaizi Date: Fri, 5 Aug 2022 12:06:43 +0100 Subject: [PATCH 14/15] first commit --- .../mlops-infra/ADVANCED_TOPICS.md | 39 +++++++++++++++++++ .../ADVANCED_TOPICS.md | 2 +- .../mlops-sm-project-template/README.md | 2 - 3 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 mlops-multi-account-cdk/mlops-infra/ADVANCED_TOPICS.md diff --git a/mlops-multi-account-cdk/mlops-infra/ADVANCED_TOPICS.md b/mlops-multi-account-cdk/mlops-infra/ADVANCED_TOPICS.md new file mode 100644 index 00000000..13154a85 --- /dev/null +++ b/mlops-multi-account-cdk/mlops-infra/ADVANCED_TOPICS.md @@ -0,0 +1,39 @@ +# Advanced topics +The topics defined here assume you have already deployed the solution once following the steps in the main [README](README.md) + +- [Advanced topics](#advanced-topics) + - [Setup CodeCommit with this repository](#setup-codecommit-with-this-repository) + + +## Setup CodeCommit with this repository +You would wonder after you have cloned this repository and deployed the solution how would you then start to interact with your deployed CodeCommit repository and start using it as a main repository and push changes to it. You have 2 options for this: +1. Clone the created CodeCommit repository and start treating it seperately from this repository +2. Just use this folder as a repository + +For the second option, you can do the following (while you are in the folder `mlops-infra`): +``` +git init +``` +this will create a local git for this folder which would be separate from the main so you can treat it as any git repo and it would not impact the main repository git. So, add the CodeCommit Repository as a remote source: +``` +git remote add origin https://git-codecommit.eu-west-1.amazonaws.com/v1/repos/mlops-infra +``` +Ensure you have configured your machine to connect to CodeCommit and make `git push` or `git pull` commands to it; follow [Step 3 from the AWS documentation](https://docs.aws.amazon.com/codecommit/latest/userguide/setting-up-https-unixes.html). + +Now you can interact with the CodeCommit repository as normal. You will need to do the following for the first commit: +``` +git add -A +git commit -m "first commit" +export AWS_PROFILE=mlops-governance +git push origin main +make init # this will enable precommit which will now block any further pushes to the main branch +``` + +Ensure that your git uses the branch name **main** by default, otherwise the push command might fail and you will need to create a main branch then push changes through it. + +If you want to push the changes you made back to the main repository this folder belongs to you can just run this command: +``` +rm -fr .git +``` +This will remove the git settings from this folder so it would go back to the main repository settings. You can then raise a PR to include your changes to the main repository in GitHub. + diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md b/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md index 907bc5be..b007106e 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md +++ b/mlops-multi-account-cdk/mlops-sm-project-template/ADVANCED_TOPICS.md @@ -1,5 +1,5 @@ # Advanced topics -The topics defined here assume you have already deployed the solution once following the steps in teh main [README](README.md) +The topics defined here assume you have already deployed the solution once following the steps in the main [README](README.md) - [Advanced topics](#advanced-topics) - [Setup CodeCommit with this repository](#setup-codecommit-with-this-repository) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/README.md b/mlops-multi-account-cdk/mlops-sm-project-template/README.md index 4a1e2c44..8925f9d7 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/README.md +++ b/mlops-multi-account-cdk/mlops-sm-project-template/README.md @@ -20,8 +20,6 @@ This repository contains the resources that are required to deploy the MLOps Fou - [Manual Deployment of Service Catalog Stack](#manual-deployment-of-service-catalog-stack) - [Clean-up](#clean-up) - [Troubleshooting](#troubleshooting) - - [Advanced topics](#advanced-topics) - - [Test the created templates as normal cloudformation templates](#test-the-created-templates-as-normal-cloudformation-templates) ## Solution Architecture From d1e8b149950afa2f92ce8e16f67bbd9c0a5b8a90 Mon Sep 17 00:00:00 2001 From: Fatema Alkhanaizi Date: Fri, 5 Aug 2022 15:20:35 +0100 Subject: [PATCH 15/15] minor bug fixes --- .../seed_code/deploy_app/config/constants.py | 2 +- .../deploy_endpoint/deploy_endpoint_stack.py | 30 +++++++++++++------ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/constants.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/constants.py index c0d97944..c4bb7d31 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/constants.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/config/constants.py @@ -34,4 +34,4 @@ PROJECT_ID = os.getenv("PROJECT_ID", "") MODEL_PACKAGE_GROUP_NAME = os.getenv("MODEL_PACKAGE_GROUP_NAME", "") MODEL_BUCKET_ARN = os.getenv("MODEL_BUCKET_ARN", "arn:aws:s3:::*mlops*") -ECR_REPO_ARN = os.getenv("ECR_REPO_ARN", "*") +ECR_REPO_ARN = os.getenv("ECR_REPO_ARN", None) diff --git a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py index 14faa3e2..bd2577ac 100644 --- a/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py +++ b/mlops-multi-account-cdk/mlops-sm-project-template/seed_code/deploy_app/deploy_endpoint/deploy_endpoint_stack.py @@ -58,7 +58,9 @@ class EndpointConfigProductionVariant(StageYamlDataClassConfig): instance_type: str = "ml.m5.2xlarge" variant_name: str = "AllTraffic" - FILE_PATH: Path = create_file_path_field("endpoint-config.yml", path_is_absolute=True) + FILE_PATH: Path = create_file_path_field( + "endpoint-config.yml", path_is_absolute=True + ) def get_endpoint_config_production_variant(self, model_name): """ @@ -148,22 +150,28 @@ def __init__( effect=iam.Effect.ALLOW, resources=[f"arn:aws:kms:{Aws.REGION}:{DEV_ACCOUNT}:key/*"], ), - iam.PolicyStatement( - actions=["ecr:Get*"], - effect=iam.Effect.ALLOW, - resources=[ECR_REPO_ARN], - ), ] ), ) + if ECR_REPO_ARN: + model_execution_policy.add_statements( + iam.PolicyStatement( + actions=["ecr:Get*"], + effect=iam.Effect.ALLOW, + resources=[ECR_REPO_ARN], + ) + ) + model_execution_role = iam.Role( self, "ModelExecutionRole", assumed_by=iam.ServicePrincipal("sagemaker.amazonaws.com"), managed_policies=[ model_execution_policy, - iam.ManagedPolicy.from_aws_managed_policy_name("AmazonSageMakerFullAccess"), + iam.ManagedPolicy.from_aws_managed_policy_name( + "AmazonSageMakerFullAccess" + ), ], ) @@ -184,7 +192,9 @@ def __init__( execution_role_arn=model_execution_role.role_arn, model_name=model_name, containers=[ - sagemaker.CfnModel.ContainerDefinitionProperty(model_package_name=latest_approved_model_package) + sagemaker.CfnModel.ContainerDefinitionProperty( + model_package_name=latest_approved_model_package + ) ], vpc_config=sagemaker.CfnModel.VpcConfigProperty( security_group_ids=[sg_id], @@ -223,7 +233,9 @@ def __init__( endpoint_config_name=endpoint_config_name, kms_key_id=kms_key.key_id, production_variants=[ - endpoint_config_production_variant.get_endpoint_config_production_variant(model.model_name) + endpoint_config_production_variant.get_endpoint_config_production_variant( + model.model_name + ) ], )