diff --git a/multi-branch-mlops-train/README.md b/multi-branch-mlops-train/README.md new file mode 100644 index 00000000..9097024d --- /dev/null +++ b/multi-branch-mlops-train/README.md @@ -0,0 +1,148 @@ +# Multi-Branch MLOps training pipeline + +## Purpose + +The purpose of this template is to enable multiple data scientists to work in parallel in concurrent experiments without interfering with each other and submitting conflicting changes to the repository. + +Much like in the context of software engineering where there is the concept of feature branches and GitFlow, this sample introduces the concept of experiment branches. + +Each experiment when submitted to the remote repository by using ``git push`` will trigger a training job that will generate a model artifact tagged with the commit hash and a `Pending` status. + +When a pull request gets approved from the ``experiment/`` branch into `main`, the produced model artifact status gets automatically changed to `Approved`. + +![experiment-branch.jpg](images/experiment-branch.jpg) + +## Architecture + +There are two architectures available, one using AWS CodePipeline and AWS CodeCommit and another using Jenkins and GitHub. + +### AWS CodePipeline and AWS CodeCommit + +![codepipeline-codecommit-arch-train-complete.png](images/codepipeline-codecommit-arch-train-complete.png) + +### Jenkins and GitHub + +![jenkins-arch-train-complete.png](images/jenkins-arch-train-complete.png) + +## Usage (Adding the template to Amazon SageMaker Projects in Studio) + +### Step 1. Deploy the baseline stack + +``` +git clone https://github.com/aws-samples/sagemaker-custom-project-templates.git +mkdir sample-multi-branch-train +cp -r sagemaker-custom-project-templates/multi-branch-mlops-train/* sample-multi-branch-train +cd sample-multi-branch-train +./deploy.sh -p code_pipeline+code_commit +``` + +In the example above you can also deploy the stack to support Jenkins and GitHub, using `./deploy.sh -p jenkins`. + +### Step 2. Create portfolio in AWS Service Catalog + +![img.png](images/create-portfolio.png) + +### Step 3. Create a new product for the portfolio + +![img.png](images/create-product-1.png) + +![img.png](images/create-product-2.png) + +Use the AWS Cloud Formation template deployed by the baseline stack. + +`https://cloud-formation--us-east-1.s3.amazonaws.com/model_train.yaml` + +![img.png](images/create-product-3.png) + +### Step 4. Add SageMaker visibility tag to the product + +Tag `sagemaker:studio-visibility` with value `true`. + +![img.png](images/add-product-tag.png) + +### Step 5. Go to the Portfolio created and add a constraint. + +The role `MultiBranchTrainMLOpsLaunchRole` was created by the baseline stack. + +![img.png](images/add-portfolio-constraint.png) + +### Step 6. Go to the Portfolio created and share it with the relevant users as well as the SageMaker execution role, used by SageMaker Studio. + +![img.png](images/add-portfolio-roles.png) + +### Step 7. The template becomes available in SageMaker Studio + +![img.png](images/studio-project-available.png) + +## Usage (Creating a new project) + +### Step 1. Select the template in the example above and provide a name. + +Note that the name may have a maximum of 18 characters. + +![img.png](images/create-project.png) + +### Step 2. Wait for the project to be created. + +![img.png](images/wait-project-create.png) + +### Step 3. Add the sample code to the created repository + +Continue from the previously used terminal. + +Note that the user or role that is being used must have permission to use CodeCommit, such as the [AWSCodeCommitPowerUser](https://docs.aws.amazon.com/codecommit/latest/userguide/security-iam-awsmanpol.html#managed-policies-poweruser). + +``` +git init +git stage . +git commit -m "adds sample code" +git remote add origin-aws https://git-codecommit.us-east-1.amazonaws.com/v1/repos/model-myawesomeproject-train +git push --set-upstream origin-aws main +``` + +## Usage (Creating a new experiment) + +### Step 1. Submit experiment code to the repository. + +Either clone the CodeCommit repository or start from the previous terminal. + +``` +git checkout -b experiment/myexperiment + +git commit -m "adds some-change" +git push --set-upstream origin-aws experiment/myexperiment +``` + +Given a few seconds a new pipeline gets created in AWS CodePipeline. + +![img.png](images/codepipeline-running.png) + +The `Train` step of the pipeline launches a new AWS SageMaker Pipelines pipeline that trains the model. + +![img.png](images/sagemakerpipeline-running.png) + +When the pipeline finishes, a new model gets stored in SageMaker Model Registry with `Pending` status. + +![img.png](images/model-registry-pending.png) + +At this point the data scientist can assess the experiment results and push subsequent commits attempting to reach better results for the experiment goal. When doing so, the pipeline will be triggered again and new model versions will be stored in the Model Registry. + +If on the other hand, the Data Scientist deems the experiment successful, he can go ahead and create a pull request, asking to merge the changes from the `experiment/myexperiment` branch into `main`. + +### Step 2. Open pull request with successful experiment code. + +![img.png](images/open-pr-button.png) + +![img.png](images/create-pr.png) + +![img.png](images/pr-created.png) + +With the pull request created it can be reviewed, not just the code, but the results of the experiment as well. + +If all is good, we can merge the pull request in Fast forward-merge. + +![img.png](images/merge-pr.png) + +As soon as the merge is done, the respective model gets automatically approved in the Model Registry. + +![img.png](images/model-registry-approved.png) \ No newline at end of file diff --git a/multi-branch-mlops-train/buildspec_train.yml b/multi-branch-mlops-train/buildspec_train.yml new file mode 100644 index 00000000..f71fdcb5 --- /dev/null +++ b/multi-branch-mlops-train/buildspec_train.yml @@ -0,0 +1,18 @@ +version: 0.2 + +env: + shell: bash + +phases: + install: + runtime-versions: + python: 3.8 + commands: + - pip install --upgrade --force-reinstall awscli + - pip install -r requirements.txt + build: + commands: + - export PYTHONUNBUFFERED=TRUE + - export BRANCH_NAME_NORM=$(echo $BRANCH_NAME | sed 's/origin\///;s/\//-/') + - export COMMIT_HASH=${CODEBUILD_RESOLVED_SOURCE_VERSION:-${COMMIT_HASH:-}} + - python pipelines/run_pipeline.py --region $AWS_DEFAULT_REGION --experiment-name $BRANCH_NAME_NORM --model-package-group-name $MODEL_PACKAGE_GROUP_NAME --model-name $MODEL_NAME --project-id $PROJECT_ID --commit-id $COMMIT_HASH --role-arn $SAGEMAKER_PIPELINE_ROLE_ARN diff --git a/multi-branch-mlops-train/cloud_formation/baseline.yaml b/multi-branch-mlops-train/cloud_formation/baseline.yaml new file mode 100644 index 00000000..493df993 --- /dev/null +++ b/multi-branch-mlops-train/cloud_formation/baseline.yaml @@ -0,0 +1,391 @@ +AWSTemplateFormatVersion: "2010-09-09" +Resources: + CloudFormationBucket: + Type: 'AWS::S3::Bucket' + Properties: + BucketName: !Sub 'cloud-formation-${AWS::AccountId}-${AWS::Region}' + BucketEncryption: + ServerSideEncryptionConfiguration: + - ServerSideEncryptionByDefault: + SSEAlgorithm: 'AES256' + ServiceCatalogProductLaunchRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: 'MultiBranchTrainMLOpsLaunchRole' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - servicecatalog.amazonaws.com + Action: + - 'sts:AssumeRole' + Path: / + Policies: + - PolicyName: ReadWriteCloudFormation + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'cloudformation:CreateStack' + - 'cloudformation:DeleteStack' + - 'cloudformation:DescribeStackEvents' + - 'cloudformation:DescribeStacks' + - 'cloudformation:GetTemplateSummary' + - 'cloudformation:SetStackPolicy' + - 'cloudformation:ValidateTemplate' + - 'cloudformation:UpdateStack' + - 'cloudformation:CreateChangeSet' + Resource: '*' + - PolicyName: ReadTemplate + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 's3:GetObject' + Resource: '*' + - PolicyName: WriteS3 + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 's3:CreateBucket' + - 's3:DeleteBucket' + - 's3:GetEncryptionConfiguration' + - 's3:PutEncryptionConfiguration' + - 's3:PutBucketVersioning' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 's3' + - '' + - '' + - !Sub 'model-code-artifacts-*-${AWS::Region}-${AWS::AccountId}' + - PolicyName: ReadWriteIAM + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'iam:GetRole' + - 'iam:CreateRole' + - 'iam:DeleteRole' + - 'iam:DetachRolePolicy' + - 'iam:DeleteRolePolicy' + - 'iam:AttachRolePolicy' + - 'iam:PutRolePolicy' + - 'iam:GetRolePolicy' + - 'iam:PassRole' + - 'iam:GetUser' + - 'iam:CreateUser' + - 'iam:DeleteUser' + - 'iam:DescribeUser' + - 'iam:ListAccessKeys' + - 'iam:GetUserPolicy' + - 'iam:PutUserPolicy' + - 'iam:DeleteUserPolicy' + - 'iam:TagRole' + - 'iam:UntagRole' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/StartPipelineTrainEventPipelineRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/ReleaseLambdaRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/CreatePipelineEventPipelineRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/DeletePipelineEventPipelineRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/StartPipelineReleaseEventPipelineRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/SageMakerPipelineRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/ReleaseCodePipelineSourceActionRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/ModelTrainCodeBuildRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/TrainCodePipelineSourceActionRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/ReleaseCodePipelineLambdaActionRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/ModelTrainCodePipelineRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/TrainCodePipelineBuildActionRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/ModelReleaseCodePipelineRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/LambdaCreatePipelineRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/LambdaDeletePipelineRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'role/CloudFormationRole*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'iam' + - '' + - !Ref AWS::AccountId + - 'user/jenkins-model-*' + - PolicyName: CodeCommitWrite + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'codecommit:CreateRepository' + - 'codecommit:DeleteRepository' + - 'codecommit:GetRepository' + - 'codecommit:TagResource' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'codecommit' + - !Ref AWS::Region + - !Ref AWS::AccountId + - 'model-*-train' + - PolicyName: CodeCommitRead + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'codecommit:ListRepositories' + Resource: '*' + - PolicyName: SageMakerWrite + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'sagemaker:CreateModelPackageGroup' + - 'sagemaker:DeleteModelPackageGroup' + - 'sagemaker:DescribeModelPackageGroup' + - 'sagemaker:GetModelPackageGroupPolicy' + - 'sagemaker:ListTags' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'sagemaker' + - !Ref AWS::Region + - !Ref AWS::AccountId + - 'model-package-group/model-*' + - PolicyName: LambdaReadWrite + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'lambda:CreateFunction' + - 'lambda:DeleteFunction' + - 'lambda:GetFunction' + - 'lambda:PutFunctionEventInvokeConfig' + - 'lambda:DeleteFunctionEventInvokeConfig' + - 'lambda:AddPermission' + - 'lambda:RemovePermission' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'lambda' + - !Ref AWS::Region + - !Ref AWS::AccountId + - 'function:release-model-package*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'lambda' + - !Ref AWS::Region + - !Ref AWS::AccountId + - 'function:create-train-pipeline*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'lambda' + - !Ref AWS::Region + - !Ref AWS::AccountId + - 'function:delete-train-pipeline*' + - PolicyName: CodeBuildReadWrite + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'codebuild:CreateProject' + - 'codebuild:DeleteProject' + - 'codebuild:BatchGetProjects' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'codebuild' + - !Ref AWS::Region + - !Ref AWS::AccountId + - 'project/model*train' + - PolicyName: CodePipelineReadWrite + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'codepipeline:GetPipeline' + - 'codepipeline:CreatePipeline' + - 'codepipeline:DeletePipeline' + - 'codepipeline:GetPipelineState' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'codepipeline' + - !Ref AWS::Region + - !Ref AWS::AccountId + - 'model-*-release*' + - PolicyName: EventsReadWrite + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'events:PutRule' + - 'events:GetRule' + - 'events:DeleteRule' + - 'events:DescribeRule' + - 'events:RemoveTargets' + - 'events:PutTargets' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'events' + - !Ref AWS::Region + - !Ref AWS::AccountId + - '*StartPipelineRule*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'events' + - !Ref AWS::Region + - !Ref AWS::AccountId + - '*CreatePipelineRule*' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'events' + - !Ref AWS::Region + - !Ref AWS::AccountId + - '*DeletePipelineRule*' + diff --git a/multi-branch-mlops-train/cloud_formation/create_pipeline_function/index.py b/multi-branch-mlops-train/cloud_formation/create_pipeline_function/index.py new file mode 100644 index 00000000..1d07187a --- /dev/null +++ b/multi-branch-mlops-train/cloud_formation/create_pipeline_function/index.py @@ -0,0 +1,85 @@ +import boto3 +import os +import json +import io + +cloud_formation = boto3.client('cloudformation') +s3_client = boto3.client('s3') + + +def get_s3_object(bucket_name: str, key: str) -> str: + buf = io.BytesIO() + s3_client.download_fileobj(Bucket=bucket_name, Key=key, Fileobj=buf) + return buf.getvalue().decode() + + +def lambda_handler(event, context): + repository_name: str = event['detail']['repositoryName'] + branch_name: str = event['detail']['referenceName'] + + if 'experiment' not in branch_name: + print('skipping pipeline creation because branch is not experiment') + return + + branch_name_norm = branch_name.replace('/', '-') + + pipeline_name = f'{repository_name}-{branch_name_norm}' + + template_body = get_s3_object( + bucket_name=os.getenv('PIPELINE_STACK_S3_BUCKET'), + key=os.getenv('PIPELINE_STACK_S3_KEY') + ) + + response = cloud_formation.create_stack( + StackName=pipeline_name, + TemplateBody=template_body, + Parameters=[ + { + 'ParameterKey': 'BranchName', + 'ParameterValue': branch_name, + }, + { + 'ParameterKey': 'BranchNameNorm', + 'ParameterValue': branch_name_norm + }, + { + 'ParameterKey': 'ModelName', + 'ParameterValue': os.getenv('MODEL_NAME'), + }, + { + 'ParameterKey': 'RepositoryName', + 'ParameterValue': repository_name, + }, + { + 'ParameterKey': 'CodePipelineArtifactBucket', + 'ParameterValue': os.getenv('CODE_PIPELINE_ARTIFACT_BUCKET'), + }, + { + 'ParameterKey': 'CodePipelineRoleArn', + 'ParameterValue': os.getenv('CODE_PIPELINE_ROLE_ARN'), + }, + { + 'ParameterKey': 'CodePipelineSourceActionRoleArn', + 'ParameterValue': os.getenv('CODE_PIPELINE_SOURCE_ACTION_ROLE_ARN'), + }, + { + 'ParameterKey': 'CodePipelineBuildActionRoleArn', + 'ParameterValue': os.getenv('CODE_PIPELINE_BUILD_ACTION_ROLE_ARN'), + }, + { + 'ParameterKey': 'EventStartPipelineRoleArn', + 'ParameterValue': os.getenv('EVENT_START_PIPELINE_ROLE_ARN'), + }, + { + 'ParameterKey': 'CodeBuildProjectName', + 'ParameterValue': os.getenv('CODE_BUILD_PROJECT_NAME'), + }, + ], + TimeoutInMinutes=5, + Capabilities=[ + 'CAPABILITY_IAM', + ], + RoleARN=os.getenv('CLOUD_FORMATION_ROLE_ARN'), + ) + + print(response) diff --git a/multi-branch-mlops-train/cloud_formation/delete_pipeline_function/index.py b/multi-branch-mlops-train/cloud_formation/delete_pipeline_function/index.py new file mode 100644 index 00000000..5050c7c3 --- /dev/null +++ b/multi-branch-mlops-train/cloud_formation/delete_pipeline_function/index.py @@ -0,0 +1,20 @@ +import boto3 +import json +import os + +cloud_formation = boto3.client('cloudformation') + + +def lambda_handler(event, context): + repository_name: str = event['detail']['repositoryName'] + branch_name: str = event['detail']['referenceName'] + branch_name_norm = branch_name.replace('/', '-') + + pipeline_name = f'{repository_name}-{branch_name_norm}' + + response = cloud_formation.delete_stack( + StackName=pipeline_name, + RoleARN=os.getenv('CLOUD_FORMATION_ROLE_ARN'), + ) + + print(response) diff --git a/multi-branch-mlops-train/cloud_formation/pipeline.yaml b/multi-branch-mlops-train/cloud_formation/pipeline.yaml new file mode 100644 index 00000000..8896845a --- /dev/null +++ b/multi-branch-mlops-train/cloud_formation/pipeline.yaml @@ -0,0 +1,97 @@ +Parameters: + BranchName: + Type: String + BranchNameNorm: + Type: String + ModelName: + Type: String + RepositoryName: + Type: String + CodePipelineArtifactBucket: + Type: String + CodePipelineRoleArn: + Type: String + CodePipelineSourceActionRoleArn: + Type: String + CodePipelineBuildActionRoleArn: + Type: String + EventStartPipelineRoleArn: + Type: String + CodeBuildProjectName: + Type: String +Resources: + ModelTrainCodePipeline: + Type: 'AWS::CodePipeline::Pipeline' + Properties: + Name: !Sub 'model-${ModelName}-train-${BranchNameNorm}' + RoleArn: !Ref CodePipelineRoleArn + ArtifactStore: + Type: S3 + Location: !Ref CodePipelineArtifactBucket + Stages: + - Name: Source + Actions: + - Name: SourceAction + ActionTypeId: + Category: Source + Owner: AWS + Version: 1 + Provider: CodeCommit + OutputArtifacts: + - Name: SourceOutput + Configuration: + BranchName: !Ref BranchName + RepositoryName: !Ref RepositoryName + PollForSourceChanges: false + RunOrder: 1 + RoleArn: !Ref CodePipelineSourceActionRoleArn + - Name: Train + Actions: + - Name: TrainAction + ActionTypeId: + Category: Build + Owner: AWS + Version: 1 + Provider: CodeBuild + InputArtifacts: + - Name: SourceOutput + OutputArtifacts: + - Name: TrainOutput + Configuration: + ProjectName: !Ref CodeBuildProjectName + EnvironmentVariables: !Sub '[{"name":"BRANCH_NAME","value":"${BranchName}","type":"PLAINTEXT"}]' + RoleArn: !Ref CodePipelineBuildActionRoleArn + StartPipelineRule: + Type: 'AWS::Events::Rule' + Properties: + EventPattern: + source: + - aws.codecommit + detail-type: + - 'CodeCommit Repository State Change' + resources: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'codecommit' + - !Ref AWS::Region + - !Ref AWS::AccountId + - !Ref RepositoryName + detail: + referenceName: + - !Ref BranchName + referenceType: + - branch + State: ENABLED + Targets: + - Arn: !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'codepipeline' + - !Ref AWS::Region + - !Ref AWS::AccountId + - !Ref ModelTrainCodePipeline + Id: ModelTrainCodePipeline + RoleArn: !Ref EventStartPipelineRoleArn diff --git a/multi-branch-mlops-train/cloud_formation/release_model_package_function/index.py b/multi-branch-mlops-train/cloud_formation/release_model_package_function/index.py new file mode 100644 index 00000000..7af93020 --- /dev/null +++ b/multi-branch-mlops-train/cloud_formation/release_model_package_function/index.py @@ -0,0 +1,77 @@ +import json +import os +import boto3 +import traceback + +code_pipeline = boto3.client('codepipeline') +sagemaker = boto3.client('sagemaker') + + +def approve_model_package(model_package_group_name, commit_id): + response_iterator = sagemaker.get_paginator('list_model_packages').paginate( + ModelPackageGroupName=model_package_group_name, + ModelPackageType='Versioned', + SortBy='CreationTime', + SortOrder='Descending', + ) + for response in response_iterator: + for model_package in response['ModelPackageSummaryList']: + model_package_arn = model_package['ModelPackageArn'] + model_package_response = sagemaker.describe_model_package( + ModelPackageName=model_package_arn + ) + if model_package_response['MetadataProperties']['CommitId'] == commit_id: + if model_package['ModelApprovalStatus'] == 'Rejected': + raise Exception(f'Model package name {model_package_arn} is rejected. Aborting.') + update_model_package_response = sagemaker.update_model_package( + ModelPackageArn=model_package_arn, + ModelApprovalStatus='Approved', + ) + print(f'Approved. Package={model_package}; Response={update_model_package_response}') + return + raise Exception(f'Did not find model with commit "{commit_id}" on model package group "{model_package_group_name}"') + + +def lambda_handler_code_pipeline(event, context): + job_id = event['CodePipeline.job']['id'] + try: + user_params = event['CodePipeline.job']['data']['actionConfiguration']['configuration']['UserParameters'] + user_params = json.loads(user_params) + + model_package_group_name = os.getenv('MODEL_PACKAGE_GROUP_NAME') + + approve_model_package( + model_package_group_name=model_package_group_name, + commit_id=user_params['commit_id'] + ) + + response = code_pipeline.put_job_success_result( + jobId=job_id, + ) + print(response) + except Exception as e: + traceback.print_exc() + response = code_pipeline.put_job_failure_result( + jobId=job_id, + failureDetails={ + 'type': 'JobFailed', + 'message': str(e), + } + ) + print(response) + + +def lambda_handler_jenkins(event, context): + model_package_group_name = os.getenv('MODEL_PACKAGE_GROUP_NAME') + + approve_model_package( + model_package_group_name=model_package_group_name, + commit_id=event['commit_id'] + ) + + +def lambda_handler(event, context): + if 'CodePipeline.job' in event: + return lambda_handler_code_pipeline(event, context) + else: + return lambda_handler_jenkins(event, context) diff --git a/multi-branch-mlops-train/cloud_formation/train-codepipeline-codecommit.yaml b/multi-branch-mlops-train/cloud_formation/train-codepipeline-codecommit.yaml new file mode 100644 index 00000000..3ad8ca7c --- /dev/null +++ b/multi-branch-mlops-train/cloud_formation/train-codepipeline-codecommit.yaml @@ -0,0 +1,755 @@ +Transform: AWS::Serverless-2016-10-31 +Parameters: + SageMakerProjectName: + Type: String + Description: Name of the project + MaxLength: 18 + SageMakerProjectId: + Type: String + Description: Service generated Id of the project. +Outputs: + CloneUrlHttp: + Description: Repository Clone URL HTTP + Value: !GetAtt ModelTrainRepository.CloneUrlHttp + CloneUrlSsh: + Description: Repository Clone URL SSH + Value: !GetAtt ModelTrainRepository.CloneUrlSsh +Resources: + ModelTrainRepository: + Type: 'AWS::CodeCommit::Repository' + Properties: + RepositoryName: + !Sub 'model-${SageMakerProjectName}-train' + ModelTrainCodePipelineRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'ModelTrainCodePipelineRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - codepipeline.amazonaws.com + Action: + - 'sts:AssumeRole' + Path: / + Policies: + - PolicyName: AssumeStagesRoles + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: 'sts:AssumeRole' + Resource: + - !GetAtt TrainCodePipelineSourceActionRole.Arn + - !GetAtt TrainCodePipelineBuildActionRole.Arn + CreatePipelineEventPipelineRole: + Type: AWS::IAM::Role + Properties: + RoleName: !Sub 'CreatePipelineEventPipelineRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - events.amazonaws.com + Action: sts:AssumeRole + Path: / + ManagedPolicyArns: + - 'arn:aws:iam::aws:policy/CloudWatchLogsFullAccess' + DeletePipelineEventPipelineRole: + Type: AWS::IAM::Role + Properties: + RoleName: !Sub 'DeletePipelineEventPipelineRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - events.amazonaws.com + Action: sts:AssumeRole + Path: / + ManagedPolicyArns: + - 'arn:aws:iam::aws:policy/CloudWatchLogsFullAccess' + StartPipelineTrainEventPipelineRole: + Type: AWS::IAM::Role + Properties: + RoleName: !Sub 'StartPipelineTrainEventPipelineRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - events.amazonaws.com + Action: sts:AssumeRole + Path: / + ManagedPolicyArns: + - 'arn:aws:iam::aws:policy/CloudWatchLogsFullAccess' + Policies: + - PolicyName: WriteCodePipeline + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: 'codepipeline:StartPipelineExecution' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'codepipeline' + - !Ref AWS::Region + - !Ref AWS::AccountId + - !Sub 'model-${SageMakerProjectName}-train*' + StartPipelineReleaseEventPipelineRole: + Type: AWS::IAM::Role + Properties: + RoleName: !Sub 'StartPipelineReleaseEventPipelineRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Principal: + Service: + - events.amazonaws.com + Action: sts:AssumeRole + Path: / + ManagedPolicyArns: + - 'arn:aws:iam::aws:policy/CloudWatchLogsFullAccess' + Policies: + - PolicyName: WriteCodePipeline + PolicyDocument: + Version: 2012-10-17 + Statement: + - Effect: Allow + Action: 'codepipeline:StartPipelineExecution' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'codepipeline' + - !Ref AWS::Region + - !Ref AWS::AccountId + - !Sub 'model-${SageMakerProjectName}-release' + ModelTrainCodeBuildRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'ModelTrainCodeBuildRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - codebuild.amazonaws.com + Action: + - 'sts:AssumeRole' + Path: / + ManagedPolicyArns: + - 'arn:aws:iam::aws:policy/CloudWatchLogsFullAccess' + - 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess' + Policies: + - PolicyName: ReadBucket + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 's3:GetObject' + Resource: !Sub '${ModelCodeArtifactsBucket.Arn}/*' + - PolicyName: WriteBucket + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 's3:CreateBucket' + - 's3:PutObject' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 's3' + - '' + - '' + - !Sub 'sagemaker-${AWS::Region}-${AWS::AccountId}' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 's3' + - '' + - '' + - !Sub 'sagemaker-${AWS::Region}-${AWS::AccountId}/*' + - PolicyName: PassRoleSagemakerPipeline + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'iam:PassRole' + Resource: !GetAtt SageMakerPipelineRole.Arn + ModelCodeArtifactsBucket: + Type: 'AWS::S3::Bucket' + Properties: + BucketName: !Sub 'model-code-artifacts-${SageMakerProjectName}-${AWS::Region}-${AWS::AccountId}' + BucketEncryption: + ServerSideEncryptionConfiguration: + - ServerSideEncryptionByDefault: + SSEAlgorithm: 'AES256' + ModelTrainCodeBuildProject: + Type: 'AWS::CodeBuild::Project' + Properties: + Name: !Sub 'model-${SageMakerProjectName}-train' + Artifacts: + Type: NO_ARTIFACTS + Environment: + ComputeType: BUILD_GENERAL1_SMALL + Image: aws/codebuild/standard:5.0 + ImagePullCredentialsType: CODEBUILD + PrivilegedMode: true + Type: LINUX_CONTAINER + EnvironmentVariables: + - Name: SAGEMAKER_PIPELINE_ROLE_ARN + Type: PLAINTEXT + Value: !GetAtt SageMakerPipelineRole.Arn + - Name: MODEL_NAME + Type: PLAINTEXT + Value: !Ref SageMakerProjectName + - Name: PROJECT_ID + Type: PLAINTEXT + Value: !Ref SageMakerProjectId + - Name: MODEL_PACKAGE_GROUP_NAME + Type: PLAINTEXT + Value: !Sub 'model-${SageMakerProjectName}' + - Name: AWS_ACCOUNT_ID + Type: PLAINTEXT + Value: !Ref AWS::AccountId + ServiceRole: !GetAtt ModelTrainCodeBuildRole.Arn + Source: + Location: !GetAtt ModelTrainRepository.CloneUrlHttp + Type: CODECOMMIT + BuildSpec: 'buildspec_train.yml' + TrainCodePipelineSourceActionRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'TrainCodePipelineSourceActionRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + AWS: !Sub '${AWS::AccountId}' + Action: + - 'sts:AssumeRole' + Path: / + Policies: + - PolicyName: ReadRepository + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'codecommit:GetBranch' + - 'codecommit:GetCommit' + - 'codecommit:UploadArchive' + - 'codecommit:GetUploadArchiveStatus' + Resource: !GetAtt ModelTrainRepository.Arn + - PolicyName: WriteBucket + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 's3:PutObject' + Resource: !Sub '${ModelCodeArtifactsBucket.Arn}/*' + ReleaseCodePipelineSourceActionRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'ReleaseCodePipelineSourceActionRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + AWS: !Sub '${AWS::AccountId}' + Action: + - 'sts:AssumeRole' + Path: / + Policies: + - PolicyName: ReadRepository + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'codecommit:GetBranch' + - 'codecommit:GetCommit' + - 'codecommit:UploadArchive' + - 'codecommit:GetUploadArchiveStatus' + Resource: !GetAtt ModelTrainRepository.Arn + - PolicyName: WriteBucket + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 's3:PutObject' + Resource: !Sub '${ModelCodeArtifactsBucket.Arn}/*' + TrainCodePipelineBuildActionRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'TrainCodePipelineBuildActionRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + AWS: !Sub '${AWS::AccountId}' + Action: + - 'sts:AssumeRole' + Path: / + Policies: + - PolicyName: ReadWriteBuild + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'codebuild:StartBuild' + - 'codebuild:BatchGetBuilds' + Resource: !GetAtt ModelTrainCodeBuildProject.Arn + ReleaseCodePipelineLambdaActionRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'ReleaseCodePipelineLambdaActionRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + AWS: !Sub '${AWS::AccountId}' + Action: + - 'sts:AssumeRole' + Path: / + Policies: + - PolicyName: InvokeLambda + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'lambda:InvokeFunction' + Resource: !GetAtt ReleaseLambda.Arn + ModelReleaseCodePipelineRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'ModelReleaseCodePipelineRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - codepipeline.amazonaws.com + Action: + - 'sts:AssumeRole' + Path: / + Policies: + - PolicyName: AssumeStagesRoles + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: 'sts:AssumeRole' + Resource: + - !GetAtt ReleaseCodePipelineSourceActionRole.Arn + - !GetAtt ReleaseCodePipelineLambdaActionRole.Arn + ModelReleaseCodePipeline: + Type: 'AWS::CodePipeline::Pipeline' + Properties: + Name: !Sub 'model-${SageMakerProjectName}-release' + RoleArn: !GetAtt ModelReleaseCodePipelineRole.Arn + ArtifactStore: + Type: S3 + Location: !Ref ModelCodeArtifactsBucket + Stages: + - Name: Source + Actions: + - Name: SourceAction + ActionTypeId: + Category: Source + Owner: AWS + Version: 1 + Provider: CodeCommit + OutputArtifacts: + - Name: SourceOutput + Configuration: + BranchName: 'main' + RepositoryName: !GetAtt ModelTrainRepository.Name + PollForSourceChanges: false + Namespace: 'SourceVariables' + RunOrder: 1 + RoleArn: !GetAtt ReleaseCodePipelineSourceActionRole.Arn + - Name: Release + Actions: + - Name: ReleaseAction + ActionTypeId: + Category: Invoke + Owner: AWS + Version: 1 + Provider: Lambda + Configuration: + FunctionName: !Ref ReleaseLambda + UserParameters: '{"commit_id": "#{SourceVariables.CommitId}"}' + RoleArn: !GetAtt ReleaseCodePipelineLambdaActionRole.Arn + LambdaCreatePipelineRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'LambdaCreatePipelineRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - 'sts:AssumeRole' + Path: / + ManagedPolicyArns: + - 'arn:aws:iam::aws:policy/CloudWatchLogsFullAccess' + Policies: + - PolicyName: ReadBucket + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 's3:GetObject' + Resource: + - !Sub 'arn:aws:s3:::cloud-formation-${AWS::AccountId}-${AWS::Region}/pipeline.yaml' + - PolicyName: PassRole + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'iam:PassRole' + Resource: !GetAtt CloudFormationRole.Arn + - PolicyName: WriteCloudFormation + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'cloudformation:CreateStack' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'cloudformation' + - !Ref AWS::Region + - !Ref AWS::AccountId + - !Sub 'stack/model-${SageMakerProjectName}-train*' + LambdaDeletePipelineRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'LambdaDeletePipelineRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - 'sts:AssumeRole' + Path: / + ManagedPolicyArns: + - 'arn:aws:iam::aws:policy/CloudWatchLogsFullAccess' + Policies: + - PolicyName: PassRole + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: 'iam:PassRole' + Resource: !GetAtt CloudFormationRole.Arn + - PolicyName: WriteCloudFormation + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: 'cloudformation:DeleteStack' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'cloudformation' + - !Ref AWS::Region + - !Ref AWS::AccountId + - !Sub 'stack/model-${SageMakerProjectName}-train*' + SageMakerPipelineRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'SageMakerPipelineRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - sagemaker.amazonaws.com + Action: + - 'sts:AssumeRole' + Path: / + ManagedPolicyArns: + - 'arn:aws:iam::aws:policy/CloudWatchLogsFullAccess' + - 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess' + ReleaseLambdaRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'ReleaseLambdaRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - 'sts:AssumeRole' + Path: / + ManagedPolicyArns: + - 'arn:aws:iam::aws:policy/CloudWatchLogsFullAccess' + Policies: + - PolicyName: CodePipelineWrite + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'codepipeline:PutJobSuccessResult' + - 'codepipeline:PutJobFailureResult' + Resource: '*' + - PolicyName: ReadWriteSagemakerModelRegistry + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'sagemaker:ListModelPackages' + - 'sagemaker:DescribeModelPackage' + - 'sagemaker:UpdateModelPackage' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'sagemaker' + - !Ref AWS::Region + - !Ref AWS::AccountId + - !Sub 'model-package/model-${SageMakerProjectName}/*' + ReleaseLambda: + Type: 'AWS::Serverless::Function' + Properties: + FunctionName: !Sub 'release-model-package-${SageMakerProjectName}' + CodeUri: cloud_formation/release_model_package_function/index.py + Handler: index.lambda_handler + Role: !GetAtt ReleaseLambdaRole.Arn + Runtime: python3.8 + MemorySize: 128 + Timeout: 60 + EventInvokeConfig: + MaximumRetryAttempts: 0 + Environment: + Variables: + MODEL_PACKAGE_GROUP_NAME: !Sub 'model-${SageMakerProjectName}' + CreatePipelineLambda: + Type: 'AWS::Serverless::Function' + Properties: + FunctionName: !Sub 'create-train-pipeline-${SageMakerProjectName}' + CodeUri: cloud_formation/create_pipeline_function/index.py + Handler: index.lambda_handler + Role: !GetAtt LambdaCreatePipelineRole.Arn + Runtime: python3.8 + MemorySize: 128 + Timeout: 10 + EventInvokeConfig: + MaximumRetryAttempts: 0 + Environment: + Variables: + MODEL_NAME: !Sub '${SageMakerProjectName}' + PIPELINE_STACK_S3_BUCKET: !Sub 'cloud-formation-${AWS::AccountId}-${AWS::Region}' + PIPELINE_STACK_S3_KEY: 'pipeline.yaml' + CLOUD_FORMATION_ROLE_ARN: !GetAtt CloudFormationRole.Arn + CODE_PIPELINE_ARTIFACT_BUCKET: !Ref ModelCodeArtifactsBucket + CODE_PIPELINE_ROLE_ARN: !GetAtt ModelTrainCodePipelineRole.Arn + CODE_PIPELINE_SOURCE_ACTION_ROLE_ARN: !GetAtt TrainCodePipelineSourceActionRole.Arn + CODE_PIPELINE_BUILD_ACTION_ROLE_ARN: !GetAtt TrainCodePipelineBuildActionRole.Arn + EVENT_START_PIPELINE_ROLE_ARN: !GetAtt StartPipelineTrainEventPipelineRole.Arn + CODE_BUILD_PROJECT_NAME: !Ref ModelTrainCodeBuildProject + SAGEMAKER_PIPELINE_ROLE_ARN: !GetAtt SageMakerPipelineRole.Arn + CreatePipelineLambdaEventPermission: + Type: 'AWS::Lambda::Permission' + Properties: + Action: 'lambda:InvokeFunction' + FunctionName: !Ref CreatePipelineLambda + Principal: 'events.amazonaws.com' + SourceArn: !GetAtt CreatePipelineRule.Arn + CreatePipelineRule: + Type: 'AWS::Events::Rule' + Properties: + EventPattern: + source: + - aws.codecommit + detail-type: + - 'CodeCommit Repository State Change' + detail: + event: + - referenceCreated + referenceType: + - branch + State: ENABLED + RoleArn: !GetAtt CreatePipelineEventPipelineRole.Arn + Targets: + - Arn: !GetAtt CreatePipelineLambda.Arn + Id: CreatePipelineLambda + DeletePipelineLambda: + Type: 'AWS::Serverless::Function' + Properties: + FunctionName: !Sub 'delete-train-pipeline-${SageMakerProjectName}' + CodeUri: cloud_formation/delete_pipeline_function/index.py + Handler: index.lambda_handler + Role: !GetAtt LambdaDeletePipelineRole.Arn + Runtime: python3.8 + MemorySize: 128 + Timeout: 10 + EventInvokeConfig: + MaximumRetryAttempts: 0 + Environment: + Variables: + CLOUD_FORMATION_ROLE_ARN: !GetAtt CloudFormationRole.Arn + DeletePipelineLambdaEventPermission: + Type: 'AWS::Lambda::Permission' + Properties: + Action: 'lambda:InvokeFunction' + FunctionName: !Ref DeletePipelineLambda + Principal: 'events.amazonaws.com' + SourceArn: !GetAtt DeletePipelineRule.Arn + DeletePipelineRule: + Type: 'AWS::Events::Rule' + Properties: + EventPattern: + source: + - aws.codecommit + detail-type: + - 'CodeCommit Repository State Change' + detail: + event: + - referenceDeleted + referenceType: + - branch + State: ENABLED + RoleArn: !GetAtt DeletePipelineEventPipelineRole.Arn + Targets: + - Arn: !GetAtt DeletePipelineLambda.Arn + Id: DeletePipelineLambda + StartPipelineRule: + Type: 'AWS::Events::Rule' + Properties: + EventPattern: + source: + - aws.codecommit + detail-type: + - 'CodeCommit Repository State Change' + resources: + - !GetAtt ModelTrainRepository.Arn + detail: + referenceName: + - 'main' + referenceType: + - branch + State: ENABLED + Targets: + - Arn: !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'codepipeline' + - !Ref AWS::Region + - !Ref AWS::AccountId + - !Ref ModelReleaseCodePipeline + Id: ModelTrainCodePipeline + RoleArn: !GetAtt StartPipelineReleaseEventPipelineRole.Arn + CloudFormationRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'CloudFormationRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - cloudformation.amazonaws.com + Action: + - 'sts:AssumeRole' + Path: / + Policies: + - PolicyName: PassRoleCodePipeline + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: 'iam:PassRole' + Resource: + - !GetAtt ModelTrainCodePipelineRole.Arn + - !GetAtt TrainCodePipelineSourceActionRole.Arn + - !GetAtt TrainCodePipelineBuildActionRole.Arn + - !GetAtt StartPipelineTrainEventPipelineRole.Arn + - !GetAtt CreatePipelineEventPipelineRole.Arn + - PolicyName: ReadWriteCodePipeline + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: 'codepipeline:*' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'codepipeline' + - !Ref AWS::Region + - !Ref AWS::AccountId + - !Sub 'model-${SageMakerProjectName}-train*' + - PolicyName: ReadWriteEvents + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'events:*' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'events' + - !Ref AWS::Region + - !Ref AWS::AccountId + - !Sub 'rule/model-${SageMakerProjectName}-train*' + ModelPackageGroup: + Type: 'AWS::SageMaker::ModelPackageGroup' + Properties: + ModelPackageGroupName: !Sub 'model-${SageMakerProjectName}' diff --git a/multi-branch-mlops-train/cloud_formation/train-jenkins.yaml b/multi-branch-mlops-train/cloud_formation/train-jenkins.yaml new file mode 100644 index 00000000..9eff5d29 --- /dev/null +++ b/multi-branch-mlops-train/cloud_formation/train-jenkins.yaml @@ -0,0 +1,231 @@ +Transform: AWS::Serverless-2016-10-31 +Parameters: + SageMakerProjectName: + Type: String + Description: Name of the project + MaxLength: 18 + SageMakerProjectId: + Type: String + Description: Service generated Id of the project. +Resources: + ModelTrainCodeBuildRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'ModelTrainCodeBuildRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - codebuild.amazonaws.com + Action: + - 'sts:AssumeRole' + Path: / + ManagedPolicyArns: + - 'arn:aws:iam::aws:policy/CloudWatchLogsFullAccess' + - 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess' + Policies: + - PolicyName: ReadBucket + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 's3:GetObject' + - 's3:GetObjectVersion' + Resource: !Sub '${ModelCodeArtifactsBucket.Arn}/*' + - PolicyName: WriteBucket + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 's3:CreateBucket' + - 's3:PutObject' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 's3' + - '' + - '' + - !Sub 'sagemaker-${AWS::Region}-${AWS::AccountId}' + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 's3' + - '' + - '' + - !Sub 'sagemaker-${AWS::Region}-${AWS::AccountId}/*' + - PolicyName: PassRoleSagemakerPipeline + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'iam:PassRole' + Resource: !GetAtt SageMakerPipelineRole.Arn + ModelCodeArtifactsBucket: + Type: 'AWS::S3::Bucket' + Properties: + BucketName: !Sub 'model-code-artifacts-${SageMakerProjectName}-${AWS::Region}-${AWS::AccountId}' + BucketEncryption: + ServerSideEncryptionConfiguration: + - ServerSideEncryptionByDefault: + SSEAlgorithm: 'AES256' + VersioningConfiguration: + Status: Enabled + ModelTrainCodeBuildProject: + Type: 'AWS::CodeBuild::Project' + Properties: + Name: !Sub 'model-${SageMakerProjectName}-train' + Artifacts: + Type: NO_ARTIFACTS + Environment: + ComputeType: BUILD_GENERAL1_SMALL + Image: aws/codebuild/standard:5.0 + ImagePullCredentialsType: CODEBUILD + PrivilegedMode: true + Type: LINUX_CONTAINER + EnvironmentVariables: + - Name: SAGEMAKER_PIPELINE_ROLE_ARN + Type: PLAINTEXT + Value: !GetAtt SageMakerPipelineRole.Arn + - Name: MODEL_NAME + Type: PLAINTEXT + Value: !Ref SageMakerProjectName + - Name: PROJECT_ID + Type: PLAINTEXT + Value: !Ref SageMakerProjectId + - Name: MODEL_PACKAGE_GROUP_NAME + Type: PLAINTEXT + Value: !Sub 'model-${SageMakerProjectName}' + - Name: AWS_ACCOUNT_ID + Type: PLAINTEXT + Value: !Ref AWS::AccountId + ServiceRole: !GetAtt ModelTrainCodeBuildRole.Arn + Source: + Location: !Join + - '/' + - - !Ref ModelCodeArtifactsBucket + - 'code.zip' + Type: S3 + BuildSpec: 'buildspec_train.yml' + SageMakerPipelineRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'SageMakerPipelineRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - sagemaker.amazonaws.com + Action: + - 'sts:AssumeRole' + Path: / + ManagedPolicyArns: + - 'arn:aws:iam::aws:policy/CloudWatchLogsFullAccess' + - 'arn:aws:iam::aws:policy/AmazonSageMakerFullAccess' + ReleaseLambdaRole: + Type: 'AWS::IAM::Role' + Properties: + RoleName: !Sub 'ReleaseLambdaRole-${SageMakerProjectName}' + AssumeRolePolicyDocument: + Version: '2012-10-17' + Statement: + - Effect: Allow + Principal: + Service: + - lambda.amazonaws.com + Action: + - 'sts:AssumeRole' + Path: / + ManagedPolicyArns: + - 'arn:aws:iam::aws:policy/CloudWatchLogsFullAccess' + Policies: + - PolicyName: CodePipelineWrite + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'codepipeline:PutJobSuccessResult' + - 'codepipeline:PutJobFailureResult' + Resource: '*' + - PolicyName: ReadWriteSagemakerModelRegistry + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'sagemaker:ListModelPackages' + - 'sagemaker:DescribeModelPackage' + - 'sagemaker:UpdateModelPackage' + Resource: + - !Join + - ':' + - - 'arn' + - !Ref AWS::Partition + - 'sagemaker' + - !Ref AWS::Region + - !Ref AWS::AccountId + - !Sub 'model-package/model-${SageMakerProjectName}/*' + ReleaseLambda: + Type: 'AWS::Serverless::Function' + Properties: + FunctionName: !Sub 'release-model-package-${SageMakerProjectName}' + CodeUri: cloud_formation/release_model_package_function/index.py + Handler: index.lambda_handler + Role: !GetAtt ReleaseLambdaRole.Arn + Runtime: python3.8 + MemorySize: 128 + Timeout: 60 + EventInvokeConfig: + MaximumRetryAttempts: 0 + Environment: + Variables: + MODEL_PACKAGE_GROUP_NAME: !Sub 'model-${SageMakerProjectName}' + ModelPackageGroup: + Type: 'AWS::SageMaker::ModelPackageGroup' + Properties: + ModelPackageGroupName: !Sub 'model-${SageMakerProjectName}' + JenkinsUser: + Type: 'AWS::IAM::User' + Properties: + Policies: + - PolicyName: WriteCodeBuild + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'codebuild:StartBuild' + - 'codebuild:BatchGetProjects' + - 'codebuild:BatchGetBuilds' + Resource: !GetAtt ModelTrainCodeBuildProject.Arn + - PolicyName: WriteS3 + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 's3:PutObject' + - 's3:GetObject' + - 's3:GetBucketVersioning' + Resource: + - !Sub '${ModelCodeArtifactsBucket.Arn}/*' + - !Sub '${ModelCodeArtifactsBucket.Arn}' + - PolicyName: WriteLambda + PolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Action: + - 'lambda:InvokeFunction' + Resource: !GetAtt ReleaseLambda.Arn + UserName: !Sub 'jenkins-model-${SageMakerProjectName}' diff --git a/multi-branch-mlops-train/deploy.sh b/multi-branch-mlops-train/deploy.sh new file mode 100755 index 00000000..ea2d3790 --- /dev/null +++ b/multi-branch-mlops-train/deploy.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +if ! command -v aws &> /dev/null +then + echo "Please install the AWS CLI" + exit +fi + +while getopts p:* flag +do + case "${flag}" in + p) pipeline=${OPTARG};; + esac +done + +account_id=$(aws sts get-caller-identity | jq '.Account' -r) +region=$(aws configure get region) + +aws cloudformation deploy \ + --stack-name train-baseline \ + --template-file cloud_formation/baseline.yaml \ + --capabilities CAPABILITY_IAM CAPABILITY_NAMED_IAM + +if [ "$pipeline" = "code_pipeline+code_commit" ] +then + aws cloudformation package \ + --template-file cloud_formation/train-codepipeline-codecommit.yaml \ + --s3-bucket cloud-formation-"$account_id"-"$region" \ + --output-template-file cloud_formation/model_train.yaml + + aws s3 cp cloud_formation/pipeline.yaml s3://cloud-formation-"$account_id"-"$region"/pipeline.yaml + aws s3 cp cloud_formation/model_train.yaml s3://cloud-formation-"$account_id"-"$region"/model_train.yaml +elif [ "$pipeline" == "jenkins" ] +then + aws cloudformation package \ + --template-file cloud_formation/train-jenkins.yaml \ + --s3-bucket cloud-formation-"$account_id"-"$region" \ + --output-template-file cloud_formation/model_train.yaml + + aws s3 cp cloud_formation/model_train.yaml s3://cloud-formation-"$account_id"-"$region"/model_train.yaml +fi diff --git a/multi-branch-mlops-train/images/add-portfolio-constraint.png b/multi-branch-mlops-train/images/add-portfolio-constraint.png new file mode 100644 index 00000000..9faf296f Binary files /dev/null and b/multi-branch-mlops-train/images/add-portfolio-constraint.png differ diff --git a/multi-branch-mlops-train/images/add-portfolio-roles.png b/multi-branch-mlops-train/images/add-portfolio-roles.png new file mode 100644 index 00000000..797e17a6 Binary files /dev/null and b/multi-branch-mlops-train/images/add-portfolio-roles.png differ diff --git a/multi-branch-mlops-train/images/add-product-tag.png b/multi-branch-mlops-train/images/add-product-tag.png new file mode 100644 index 00000000..9b700e6c Binary files /dev/null and b/multi-branch-mlops-train/images/add-product-tag.png differ diff --git a/multi-branch-mlops-train/images/codepipeline-codecommit-arch-train-complete.png b/multi-branch-mlops-train/images/codepipeline-codecommit-arch-train-complete.png new file mode 100644 index 00000000..66ac1f5b Binary files /dev/null and b/multi-branch-mlops-train/images/codepipeline-codecommit-arch-train-complete.png differ diff --git a/multi-branch-mlops-train/images/codepipeline-running.png b/multi-branch-mlops-train/images/codepipeline-running.png new file mode 100644 index 00000000..9ea82867 Binary files /dev/null and b/multi-branch-mlops-train/images/codepipeline-running.png differ diff --git a/multi-branch-mlops-train/images/create-portfolio.png b/multi-branch-mlops-train/images/create-portfolio.png new file mode 100644 index 00000000..0bcdca22 Binary files /dev/null and b/multi-branch-mlops-train/images/create-portfolio.png differ diff --git a/multi-branch-mlops-train/images/create-pr.png b/multi-branch-mlops-train/images/create-pr.png new file mode 100644 index 00000000..33d0f730 Binary files /dev/null and b/multi-branch-mlops-train/images/create-pr.png differ diff --git a/multi-branch-mlops-train/images/create-product-1.png b/multi-branch-mlops-train/images/create-product-1.png new file mode 100644 index 00000000..76e7c8f0 Binary files /dev/null and b/multi-branch-mlops-train/images/create-product-1.png differ diff --git a/multi-branch-mlops-train/images/create-product-2.png b/multi-branch-mlops-train/images/create-product-2.png new file mode 100644 index 00000000..ad01ab5b Binary files /dev/null and b/multi-branch-mlops-train/images/create-product-2.png differ diff --git a/multi-branch-mlops-train/images/create-product-3.png b/multi-branch-mlops-train/images/create-product-3.png new file mode 100644 index 00000000..8ad942d0 Binary files /dev/null and b/multi-branch-mlops-train/images/create-product-3.png differ diff --git a/multi-branch-mlops-train/images/create-project.png b/multi-branch-mlops-train/images/create-project.png new file mode 100644 index 00000000..3d525e0f Binary files /dev/null and b/multi-branch-mlops-train/images/create-project.png differ diff --git a/multi-branch-mlops-train/images/experiment-branch.jpg b/multi-branch-mlops-train/images/experiment-branch.jpg new file mode 100644 index 00000000..cc2c91d3 Binary files /dev/null and b/multi-branch-mlops-train/images/experiment-branch.jpg differ diff --git a/multi-branch-mlops-train/images/jenkins-arch-train-complete.png b/multi-branch-mlops-train/images/jenkins-arch-train-complete.png new file mode 100644 index 00000000..b49a2471 Binary files /dev/null and b/multi-branch-mlops-train/images/jenkins-arch-train-complete.png differ diff --git a/multi-branch-mlops-train/images/merge-pr.png b/multi-branch-mlops-train/images/merge-pr.png new file mode 100644 index 00000000..7d264e7c Binary files /dev/null and b/multi-branch-mlops-train/images/merge-pr.png differ diff --git a/multi-branch-mlops-train/images/model-registry-approved.png b/multi-branch-mlops-train/images/model-registry-approved.png new file mode 100644 index 00000000..557d22f1 Binary files /dev/null and b/multi-branch-mlops-train/images/model-registry-approved.png differ diff --git a/multi-branch-mlops-train/images/model-registry-pending.png b/multi-branch-mlops-train/images/model-registry-pending.png new file mode 100644 index 00000000..b391b9b3 Binary files /dev/null and b/multi-branch-mlops-train/images/model-registry-pending.png differ diff --git a/multi-branch-mlops-train/images/open-pr-button.png b/multi-branch-mlops-train/images/open-pr-button.png new file mode 100644 index 00000000..7af4905c Binary files /dev/null and b/multi-branch-mlops-train/images/open-pr-button.png differ diff --git a/multi-branch-mlops-train/images/pr-created.png b/multi-branch-mlops-train/images/pr-created.png new file mode 100644 index 00000000..5f61f4d9 Binary files /dev/null and b/multi-branch-mlops-train/images/pr-created.png differ diff --git a/multi-branch-mlops-train/images/sagemakerpipeline-running.png b/multi-branch-mlops-train/images/sagemakerpipeline-running.png new file mode 100644 index 00000000..5796ad76 Binary files /dev/null and b/multi-branch-mlops-train/images/sagemakerpipeline-running.png differ diff --git a/multi-branch-mlops-train/images/studio-project-available.png b/multi-branch-mlops-train/images/studio-project-available.png new file mode 100644 index 00000000..17afcb40 Binary files /dev/null and b/multi-branch-mlops-train/images/studio-project-available.png differ diff --git a/multi-branch-mlops-train/images/wait-project-create.png b/multi-branch-mlops-train/images/wait-project-create.png new file mode 100644 index 00000000..1de187a1 Binary files /dev/null and b/multi-branch-mlops-train/images/wait-project-create.png differ diff --git a/multi-branch-mlops-train/pipelines/__init__.py b/multi-branch-mlops-train/pipelines/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/multi-branch-mlops-train/pipelines/__version__.py b/multi-branch-mlops-train/pipelines/__version__.py new file mode 100644 index 00000000..02c58270 --- /dev/null +++ b/multi-branch-mlops-train/pipelines/__version__.py @@ -0,0 +1,9 @@ +"""Metadata for the pipelines package.""" + +__title__ = "pipelines" +__description__ = "pipelines - template package" +__version__ = "0.0.1" +__author__ = "" +__author_email__ = "" +__license__ = "Apache 2.0" +__url__ = "" diff --git a/multi-branch-mlops-train/pipelines/abalone/__init__.py b/multi-branch-mlops-train/pipelines/abalone/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/multi-branch-mlops-train/pipelines/abalone/evaluate.py b/multi-branch-mlops-train/pipelines/abalone/evaluate.py new file mode 100644 index 00000000..6ddf06d8 --- /dev/null +++ b/multi-branch-mlops-train/pipelines/abalone/evaluate.py @@ -0,0 +1,59 @@ +"""Evaluation script for measuring mean squared error.""" +import json +import logging +import pathlib +import pickle +import tarfile + +import numpy as np +import pandas as pd +import xgboost + +from sklearn.metrics import mean_squared_error + +logger = logging.getLogger() +logger.setLevel(logging.INFO) +logger.addHandler(logging.StreamHandler()) + + + +if __name__ == "__main__": + logger.debug("Starting evaluation.") + model_path = "/opt/ml/processing/model/model.tar.gz" + with tarfile.open(model_path) as tar: + tar.extractall(path=".") + + logger.debug("Loading xgboost model.") + model = pickle.load(open("xgboost-model", "rb")) + + logger.debug("Reading test data.") + test_path = "/opt/ml/processing/test/test.csv" + df = pd.read_csv(test_path, header=None) + + logger.debug("Reading test data.") + y_test = df.iloc[:, 0].to_numpy() + df.drop(df.columns[0], axis=1, inplace=True) + X_test = xgboost.DMatrix(df.values) + + logger.info("Performing predictions against test data.") + predictions = model.predict(X_test) + + logger.debug("Calculating mean squared error.") + mse = mean_squared_error(y_test, predictions) + std = np.std(y_test - predictions) + report_dict = { + "regression_metrics": { + "mse": { + "value": mse, + "standard_deviation": std + }, + }, + } + + output_dir = "/opt/ml/processing/evaluation" + pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True) + + logger.info("Writing out evaluation report with mse: %f", mse) + evaluation_path = f"{output_dir}/evaluation.json" + with open(evaluation_path, "w") as f: + f.write(json.dumps(report_dict)) diff --git a/multi-branch-mlops-train/pipelines/abalone/pipeline.py b/multi-branch-mlops-train/pipelines/abalone/pipeline.py new file mode 100644 index 00000000..ea1d59a9 --- /dev/null +++ b/multi-branch-mlops-train/pipelines/abalone/pipeline.py @@ -0,0 +1,167 @@ +import os + +import boto3 +import sagemaker +import sagemaker.session +from sagemaker.estimator import Estimator +from sagemaker.inputs import TrainingInput +from sagemaker.metadata_properties import MetadataProperties +from sagemaker.processing import ( + ProcessingOutput, +) +from sagemaker.sklearn.processing import SKLearnProcessor +from sagemaker.workflow.parameters import ( + ParameterInteger, + ParameterString, +) +from sagemaker.workflow.pipeline import Pipeline +from sagemaker.workflow.pipeline_experiment_config import PipelineExperimentConfig +from sagemaker.workflow.step_collections import RegisterModel +from sagemaker.workflow.steps import ( + ProcessingStep, + TrainingStep, +) + +BASE_DIR = os.path.dirname(os.path.realpath(__file__)) +# 1 + +def get_session(region, default_bucket): + boto_session = boto3.Session(region_name=region) + + sagemaker_client = boto_session.client("sagemaker") + runtime_client = boto_session.client("sagemaker-runtime") + return sagemaker.session.Session( + boto_session=boto_session, + sagemaker_client=sagemaker_client, + sagemaker_runtime_client=runtime_client, + default_bucket=default_bucket, + ) + + +def get_pipeline( + region, + model_package_group_name, + pipeline_name, + base_job_prefix, + commit_id, + role_arn, + default_bucket=None, +): + sagemaker_session = get_session(region, default_bucket) + if role_arn is None: + role_arn = sagemaker.session.get_execution_role(sagemaker_session) + + # parameters for pipeline execution + processing_instance_count = ParameterInteger(name="ProcessingInstanceCount", default_value=1) + processing_instance_type = ParameterString( + name="ProcessingInstanceType", default_value="ml.m5.xlarge" + ) + training_instance_type = ParameterString( + name="TrainingInstanceType", default_value="ml.m5.xlarge" + ) + model_approval_status = ParameterString( + name="ModelApprovalStatus", default_value="PendingManualApproval" + ) + input_data = ParameterString( + name="InputDataUrl", + default_value=f"s3://sagemaker-servicecatalog-seedcode-{region}/dataset/abalone-dataset.csv", + ) + + # processing step for feature engineering + sklearn_processor = SKLearnProcessor( + framework_version="0.23-1", + instance_type=processing_instance_type, + instance_count=processing_instance_count, + base_job_name=f"{base_job_prefix}/sklearn-abalone-preprocess", + sagemaker_session=sagemaker_session, + role=role_arn, + ) + step_process = ProcessingStep( + name="PreprocessAbaloneData", + processor=sklearn_processor, + outputs=[ + ProcessingOutput(output_name="train", source="/opt/ml/processing/train"), + ProcessingOutput(output_name="validation", source="/opt/ml/processing/validation"), + ProcessingOutput(output_name="test", source="/opt/ml/processing/test"), + ], + code=os.path.join(BASE_DIR, "preprocess.py"), + job_arguments=["--input-data", input_data], + ) + + # training step for generating model artifacts + model_path = f"s3://{sagemaker_session.default_bucket()}/{base_job_prefix}/AbaloneTrain" + image_uri = sagemaker.image_uris.retrieve( + framework="xgboost", + region=region, + version="1.0-1", + py_version="py3", + instance_type=training_instance_type, + ) + xgb_train = Estimator( + image_uri=image_uri, + instance_type=training_instance_type, + instance_count=1, + output_path=model_path, + base_job_name=f"{base_job_prefix}/abalone-train", + sagemaker_session=sagemaker_session, + role=role_arn, + ) + xgb_train.set_hyperparameters( + objective="reg:linear", + num_round=1, + max_depth=5, + eta=0.2, + gamma=4, + min_child_weight=6, + subsample=0.7, + silent=0, + ) + step_train = TrainingStep( + name="TrainAbaloneModel", + estimator=xgb_train, + inputs={ + "train": TrainingInput( + s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ + "train" + ].S3Output.S3Uri, + content_type="text/csv", + ), + "validation": TrainingInput( + s3_data=step_process.properties.ProcessingOutputConfig.Outputs[ + "validation" + ].S3Output.S3Uri, + content_type="text/csv", + ), + }, + ) + step_register = RegisterModel( + name="RegisterAbaloneModel", + estimator=xgb_train, + model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts, + content_types=["text/csv"], + response_types=["text/csv"], + inference_instances=["ml.t2.medium", "ml.m5.large"], + transform_instances=["ml.m5.large"], + model_package_group_name=model_package_group_name, + approval_status=model_approval_status, + description=f'commit_id={commit_id}', + metadata_properties=MetadataProperties(commit_id=commit_id) + ) + # pipeline instance + pipeline = Pipeline( + name=pipeline_name, + parameters=[ + processing_instance_type, + processing_instance_count, + training_instance_type, + model_approval_status, + input_data, + ], + steps=[step_process, step_train, step_register], + sagemaker_session=sagemaker_session, + pipeline_experiment_config=PipelineExperimentConfig( + experiment_name=model_package_group_name, + trial_name=commit_id + ) + ) + return pipeline diff --git a/multi-branch-mlops-train/pipelines/abalone/preprocess.py b/multi-branch-mlops-train/pipelines/abalone/preprocess.py new file mode 100644 index 00000000..dfb3e673 --- /dev/null +++ b/multi-branch-mlops-train/pipelines/abalone/preprocess.py @@ -0,0 +1,120 @@ +"""Feature engineers the abalone dataset.""" +import argparse +import logging +import os +import pathlib +import requests +import tempfile + +import boto3 +import numpy as np +import pandas as pd + +from sklearn.compose import ColumnTransformer +from sklearn.impute import SimpleImputer +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler, OneHotEncoder + +logger = logging.getLogger() +logger.setLevel(logging.INFO) +logger.addHandler(logging.StreamHandler()) + + +# Since we get a headerless CSV file we specify the column names here. +feature_columns_names = [ + "sex", + "length", + "diameter", + "height", + "whole_weight", + "shucked_weight", + "viscera_weight", + "shell_weight", +] +label_column = "rings" + +feature_columns_dtype = { + "sex": str, + "length": np.float64, + "diameter": np.float64, + "height": np.float64, + "whole_weight": np.float64, + "shucked_weight": np.float64, + "viscera_weight": np.float64, + "shell_weight": np.float64, +} +label_column_dtype = {"rings": np.float64} + + +def merge_two_dicts(x, y): + """Merges two dicts, returning a new copy.""" + z = x.copy() + z.update(y) + return z + + +if __name__ == "__main__": + logger.debug("Starting preprocessing.") + parser = argparse.ArgumentParser() + parser.add_argument("--input-data", type=str, required=True) + args = parser.parse_args() + + base_dir = "/opt/ml/processing" + pathlib.Path(f"{base_dir}/data").mkdir(parents=True, exist_ok=True) + input_data = args.input_data + bucket = input_data.split("/")[2] + key = "/".join(input_data.split("/")[3:]) + + logger.info("Downloading data from bucket: %s, key: %s", bucket, key) + fn = f"{base_dir}/data/abalone-dataset.csv" + s3 = boto3.resource("s3") + s3.Bucket(bucket).download_file(key, fn) + + logger.debug("Reading downloaded data.") + df = pd.read_csv( + fn, + header=None, + names=feature_columns_names + [label_column], + dtype=merge_two_dicts(feature_columns_dtype, label_column_dtype), + ) + os.unlink(fn) + + logger.debug("Defining transformers.") + numeric_features = list(feature_columns_names) + numeric_features.remove("sex") + numeric_transformer = Pipeline( + steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())] + ) + + categorical_features = ["sex"] + categorical_transformer = Pipeline( + steps=[ + ("imputer", SimpleImputer(strategy="constant", fill_value="missing")), + ("onehot", OneHotEncoder(handle_unknown="ignore")), + ] + ) + + preprocess = ColumnTransformer( + transformers=[ + ("num", numeric_transformer, numeric_features), + ("cat", categorical_transformer, categorical_features), + ] + ) + + logger.info("Applying transforms.") + y = df.pop("rings") + X_pre = preprocess.fit_transform(df) + y_pre = y.to_numpy().reshape(len(y), 1) + + X = np.concatenate((y_pre, X_pre), axis=1) + + logger.info("Splitting %d rows of data into train, validation, test datasets.", len(X)) + np.random.shuffle(X) + train, validation, test = np.split(X, [int(0.7 * len(X)), int(0.85 * len(X))]) + + logger.info("Writing out datasets to %s.", base_dir) + pd.DataFrame(train).to_csv(f"{base_dir}/train/train.csv", header=False, index=False) + pd.DataFrame(validation).to_csv( + f"{base_dir}/validation/validation.csv", header=False, index=False + ) + pd.DataFrame(test).to_csv(f"{base_dir}/test/test.csv", header=False, index=False) diff --git a/multi-branch-mlops-train/pipelines/requirements.txt b/multi-branch-mlops-train/pipelines/requirements.txt new file mode 100644 index 00000000..334f5102 --- /dev/null +++ b/multi-branch-mlops-train/pipelines/requirements.txt @@ -0,0 +1,2 @@ +sagemaker==2.50.0 +boto3==1.18.10 \ No newline at end of file diff --git a/multi-branch-mlops-train/pipelines/run_pipeline.py b/multi-branch-mlops-train/pipelines/run_pipeline.py new file mode 100644 index 00000000..3558f72a --- /dev/null +++ b/multi-branch-mlops-train/pipelines/run_pipeline.py @@ -0,0 +1,67 @@ +from __future__ import absolute_import + +import argparse +import json +import sys + +from abalone.pipeline import get_pipeline + + +def main(): # pragma: no cover + parser = argparse.ArgumentParser() + + parser.add_argument('-region', '--region', dest='region', type=str, required=True) + parser.add_argument('-model-package-group-name', '--model-package-group-name', dest='model_package_group_name', type=str, required=True) + parser.add_argument('-model-name', '--model-name', dest='model_name', type=str, required=True) + parser.add_argument('-project-id', '--project-id', dest='project_id', type=str, required=True) + parser.add_argument('-experiment-name', '--experiment-name', dest='experiment_name', type=str, required=True) + parser.add_argument('-commit-id', '--commit-id', dest='commit_id', type=str, required=True) + parser.add_argument('-role-arn', '--role-arn', dest='role_arn', type=str, required=True) + + args = parser.parse_args() + print(f'args={args}') + + try: + pipeline = get_pipeline( + region=args.region, + model_package_group_name=args.model_package_group_name, + pipeline_name=f'{args.model_name}-{args.experiment_name}', + base_job_prefix=args.model_name, + commit_id=args.commit_id, + role_arn=args.role_arn, + ) + print("###### Creating/updating a SageMaker Pipeline with the following definition:") + parsed = json.loads(pipeline.definition()) + print(json.dumps(parsed, indent=2, sort_keys=True)) + + upsert_response = pipeline.upsert( + role_arn=args.role_arn, + tags=[ + { + 'Key': 'sagemaker:project-name', + 'Value': args.model_name + }, + { + 'Key': 'sagemaker:project-id', + 'Value': args.project_id + } + ] + ) + print("\n###### Created/Updated SageMaker Pipeline: Response received:") + print(upsert_response) + + execution = pipeline.start() + print(f"\n###### Execution started with PipelineExecutionArn: {execution.arn}") + + print("Waiting for the execution to finish...") + execution.wait() + print("\n#####Execution completed. Execution step details:") + + print(execution.list_steps()) + except Exception as e: # pylint: disable=W0703 + print(f"Exception: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/multi-branch-mlops-train/requirements.txt b/multi-branch-mlops-train/requirements.txt new file mode 100644 index 00000000..288a677a --- /dev/null +++ b/multi-branch-mlops-train/requirements.txt @@ -0,0 +1,2 @@ +sagemaker==2.52.1 +scikit-learn==0.24.2 \ No newline at end of file