-
Notifications
You must be signed in to change notification settings - Fork 48
/
project-model-build-train.yaml
222 lines (209 loc) · 8.25 KB
/
project-model-build-train.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
AWSTemplateFormatVersion: 2010-09-09
Description: |
MLOps SageMaker Project for CI/CD pipeline for model build, train, validate.
This template creates a CI/CD pipeline to build, train, and validate an ML model using SageMaker pipelines
Parameters:
SageMakerProjectName:
Type: String
Description: Name of the project
MinLength: 1
MaxLength: 32
AllowedPattern: ^[a-zA-Z](-*[a-zA-Z0-9])*
SageMakerProjectId:
Type: String
Description: Service generated Id of the project.
Conditions:
MLOpsArtifactBucketCondition: !Equals [ 'true', 'true' ]
Resources:
GetEnvironmentConfiguration:
Type: Custom::GetEnvironmentConfiguration
Properties:
ServiceToken: !ImportValue 'ds-get-environment-configuration-lambda-arn'
SageMakerProjectName: !Ref SageMakerProjectName
SSMParams:
-
VariableName: 'DataBucketName'
ParameterName: 'data-bucket-name'
-
VariableName: 'ModelBucketName'
ParameterName: 'model-bucket-name'
-
VariableName: 'S3KmsKeyId'
ParameterName: 'kms-s3-key-arn'
-
VariableName: 'PipelineExecutionRole'
ParameterName: 'sm-pipeline-execution-role-arn'
-
VariableName: 'SeedCodeS3BucketName'
ParameterName: 'seed-code-s3bucket-name'
MlOpsArtifactsBucket:
Type: AWS::S3::Bucket
Condition: MLOpsArtifactBucketCondition
DeletionPolicy: Retain
UpdateReplacePolicy: Retain
Properties:
BucketName: !Sub sm-mlops-cp-${SageMakerProjectName}-${SageMakerProjectId} # 12+32+15=59 chars max/ 63 allowed
AccessControl: Private
PublicAccessBlockConfiguration:
BlockPublicAcls: TRUE
BlockPublicPolicy: TRUE
IgnorePublicAcls: TRUE
RestrictPublicBuckets: TRUE
BucketEncryption:
ServerSideEncryptionConfiguration:
- ServerSideEncryptionByDefault:
SSEAlgorithm: 'aws:kms'
KMSMasterKeyID: !GetAtt GetEnvironmentConfiguration.S3KmsKeyId
Tags:
- Key: SageMakerProjectName
Value: !Ref SageMakerProjectName
- Key: SageMakerProjectId
Value: !Ref SageMakerProjectId
- Key: EnvironmentName
Value: !GetAtt GetEnvironmentConfiguration.EnvironmentName
- Key: EnvironmentType
Value: !GetAtt GetEnvironmentConfiguration.EnvironmentType
ModelBuildCodeCommitEventRule:
Type: AWS::Events::Rule
Properties:
# Max length allowed: 64
Name: !Sub sagemaker-${SageMakerProjectName}-${SageMakerProjectId}-build # max: 10+33+15+5=63 chars
Description: "Rule to launch a pipeline run when ModelBuild CodeCommit repository is updated"
EventPattern:
source:
- "aws.codecommit"
detail-type:
- "CodeCommit Repository State Change"
resources:
- !GetAtt ModelBuildCodeCommitRepository.Arn
detail:
referenceType:
- "branch"
referenceName:
- "main"
State: "ENABLED"
Targets:
-
Arn:
!Sub 'arn:${AWS::Partition}:codepipeline:${AWS::Region}:${AWS::AccountId}:${ModelBuildPipeline}'
RoleArn:
!Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole'
Id: !Sub codecommit-${SageMakerProjectName}-modelbuild
ModelBuildCodeCommitRepository:
Type: AWS::CodeCommit::Repository
Properties:
# Max allowed length: 100 chars
RepositoryName: !Sub sagemaker-${SageMakerProjectName}-${SageMakerProjectId}-model-build-train # max: 10+33+15+18=76
RepositoryDescription: !Sub SageMaker Model building infrastructure as code for the project ${SageMakerProjectName}
Code:
S3:
Bucket: !GetAtt GetEnvironmentConfiguration.SeedCodeS3BucketName
Key: sagemaker-mlops/seed-code/mlops-model-build-train-v1.0.zip
BranchName: main
Tags:
- Key: SageMakerProjectName
Value: !Ref SageMakerProjectName
- Key: SageMakerProjectId
Value: !Ref SageMakerProjectId
- Key: EnvironmentName
Value: !GetAtt GetEnvironmentConfiguration.EnvironmentName
- Key: EnvironmentType
Value: !GetAtt GetEnvironmentConfiguration.EnvironmentType
ModelPipelineBuildProject:
Type: AWS::CodeBuild::Project
Properties:
# Max length: 255 chars
Name: !Sub sagemaker-${SageMakerProjectName}-${SageMakerProjectId}-modelbuild # max: 10+33+15+10=68
Description: Pulls the code from Model Build CodeCommit repository, creates the SageMaker Pipeline, and executes it
ServiceRole: !Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole'
Artifacts:
Type: CODEPIPELINE
Environment:
Type: LINUX_CONTAINER
ComputeType: BUILD_GENERAL1_SMALL
Image: aws/codebuild/amazonlinux2-x86_64-standard:3.0
EnvironmentVariables:
- Name: SAGEMAKER_PROJECT_NAME
Value: !Ref SageMakerProjectName
- Name: SAGEMAKER_PROJECT_ID
Value: !Ref SageMakerProjectId
- Name: ENV_NAME
Value: !GetAtt GetEnvironmentConfiguration.EnvironmentName
- Name: ENV_TYPE
Value: !GetAtt GetEnvironmentConfiguration.EnvironmentType
- Name: DATA_BUCKET
Value: !GetAtt GetEnvironmentConfiguration.DataBucketName
- Name: SAGEMAKER_PIPELINE_NAME
Value: !Sub sagemaker-${SageMakerProjectName}
- Name: SAGEMAKER_PIPELINE_ROLE_ARN
Value: !GetAtt GetEnvironmentConfiguration.PipelineExecutionRole
- Name: AWS_REGION
Value: !Ref AWS::Region
Source:
Type: CODEPIPELINE
BuildSpec: buildspec.yml
TimeoutInMinutes: 480
VpcConfig:
SecurityGroupIds: !GetAtt GetEnvironmentConfiguration.SecurityGroups
Subnets: !GetAtt GetEnvironmentConfiguration.SubnetIds
VpcId: !GetAtt GetEnvironmentConfiguration.VpcId
Tags:
- Key: SageMakerProjectName
Value: !Ref SageMakerProjectName
- Key: SageMakerProjectId
Value: !Ref SageMakerProjectId
- Key: EnvironmentName
Value: !GetAtt GetEnvironmentConfiguration.EnvironmentName
- Key: EnvironmentType
Value: !GetAtt GetEnvironmentConfiguration.EnvironmentType
ModelBuildPipeline:
Type: AWS::CodePipeline::Pipeline
Properties:
# Max length: 100 chars
Name: !Sub sagemaker-${SageMakerProjectName}-${SageMakerProjectId}-modelbuild # max: 10+33+15+10=68
RoleArn: !Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole'
ArtifactStore:
Type: S3
Location: !If
- MLOpsArtifactBucketCondition
- !Ref MlOpsArtifactsBucket
- !GetAtt GetEnvironmentConfiguration.DataBucketName
Tags:
- Key: SageMakerProjectName
Value: !Ref SageMakerProjectName
- Key: SageMakerProjectId
Value: !Ref SageMakerProjectId
- Key: EnvironmentName
Value: !GetAtt GetEnvironmentConfiguration.EnvironmentName
- Key: EnvironmentType
Value: !GetAtt GetEnvironmentConfiguration.EnvironmentType
Stages:
- Name: Source
Actions:
- Name: ModelBuildSource
ActionTypeId:
Category: Source
Owner: AWS
Provider: CodeCommit
Version: '1'
Configuration:
PollForSourceChanges: 'false'
RepositoryName: !GetAtt ModelBuildCodeCommitRepository.Name
BranchName: main
OutputArtifacts:
- Name: ModelBuildSourceArtifact
- Name: Build
Actions:
- Name: BuildAndExecuteSageMakerPipeline
ActionTypeId:
Category: Build
Owner: AWS
Provider: CodeBuild
Version: '1'
InputArtifacts:
- Name: ModelBuildSourceArtifact
OutputArtifacts:
- Name: ModelBuildBuildArtifact
Configuration:
ProjectName: !Ref ModelPipelineBuildProject
RunOrder: 1