-
Notifications
You must be signed in to change notification settings - Fork 8
/
cloudformation.yml
173 lines (146 loc) · 7.38 KB
/
cloudformation.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
AWSTemplateFormatVersion: "2010-09-09"
Description: "Monthly aggregation of CloudTrail events into Parquet"
Parameters:
LambdaName:
Description: "Name for the Lambda function and associated resources"
Type: "String"
Default: "cloudtrail-aggregation-monthly"
FunctionMemory:
Description: "Memory size, in MB, for Lambda function (increase if you increase rows per file)"
Type: "Number"
Default: 1536
PythonVersion:
Description: "The version of Python to use for the Lambda"
Type: "String"
Default: "3.11"
NumpyLayerArn:
Description: "ARN for a layer containing a compatible version of the NumPy library (must include layer version)"
Type: "String"
PyArrowLayerArn:
Description: "ARN for a layer containing a compatible version of the PyArrow library (must include layer version)"
Type: "String"
MaximumConcurrency:
Description: "The number of concurrent Lambda executions; increase to handle a backlog (minimum is 2)"
Type: "Number"
Default: 2
SrcBucket:
Description: "Name of the bucket holding daily CloudTrail logs"
Type: "String"
SrcPrefix:
Description: "Prefix for raw CloudTrail logs"
Type: "String"
Default: "cloudtrail_daily/"
DstBucket:
Description: "Name of the bucket that will receive monthly logs"
Type: "String"
DstPrefix:
Description: "Prefix for aggregated CloudTrail logs in the destination bucket (must include trailing slash)"
Type: "String"
Default: "cloudtrail_monthly/"
RowsPerFile:
Description: "The number of rows to store in a single output file"
Type: "Number"
Default: 250000
Resources:
LambdaLogGroup:
Type: "AWS::Logs::LogGroup"
DeletionPolicy: "Delete"
Properties:
LogGroupName: !Sub "/aws/lambda/${LambdaName}"
RetentionInDays: 7
LambdaRole:
Type: "AWS::IAM::Role"
Properties:
Path: "/lambda/"
RoleName: !Sub "${LambdaName}-ExecutionRole-${AWS::Region}"
AssumeRolePolicyDocument:
Version: "2012-10-17"
Statement:
Effect: "Allow"
Principal:
Service: "lambda.amazonaws.com"
Action: "sts:AssumeRole"
ManagedPolicyArns:
- "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
Policies:
- PolicyName: "OperationalPolicy"
PolicyDocument:
Version: "2012-10-17"
Statement:
- Sid: "ListSource"
Effect: "Allow"
Action:
- "s3:ListBucket"
Resource:
- !Sub "arn:${AWS::Partition}:s3:::${SrcBucket}"
- Sid: "ReadSource"
Effect: "Allow"
Action:
- "s3:GetObject"
Resource:
- !Sub "arn:${AWS::Partition}:s3:::${SrcBucket}/${SrcPrefix}*"
- Sid: "WriteDestination"
Effect: "Allow"
Action:
- "s3:PutObject"
Resource:
- !Sub "arn:${AWS::Partition}:s3:::${DstBucket}/${DstPrefix}*"
- Sid: "SQS"
Effect: "Allow"
Action:
- "sqs:ReceiveMessage"
- "sqs:DeleteMessage"
- "sqs:GetQueueAttributes"
Resource:
- !GetAtt TriggerQueue.Arn
LambdaFunction:
Type: "AWS::Lambda::Function"
Properties:
FunctionName: !Ref LambdaName
Description: "Stage 2 CloudTrail aggregation: combines a months's NDJSON files into a single Parquet file"
Role: !GetAtt LambdaRole.Arn
Runtime: !Sub "python${PythonVersion}"
Handler: "index.lambda_handler"
Code:
ZipFile: |
def lambda_handler(event, context):
print("this is a dummy handler; replace with contents of lambda.py")
Layers:
- !Ref NumpyLayerArn
- !Ref PyArrowLayerArn
MemorySize: !Ref FunctionMemory
Timeout: 900
Environment:
Variables:
LOG_LEVEL: "INFO"
SRC_BUCKET: !Ref SrcBucket
SRC_PREFIX: !Ref SrcPrefix
DST_BUCKET: !Ref DstBucket
DST_PREFIX: !Ref DstPrefix
ROWS_PER_FILE: !Ref RowsPerFile
TriggerQueue:
Type: "AWS::SQS::Queue"
Properties:
QueueName: !Sub "${LambdaName}-trigger"
MessageRetentionPeriod: 1209600
VisibilityTimeout: 1200 # 20 minutes; will redrive if Lambda times out
RedrivePolicy:
deadLetterTargetArn: !GetAtt TriggerDLQ.Arn
maxReceiveCount: 1
TriggerDLQ:
Type: "AWS::SQS::Queue"
Properties:
QueueName: !Sub "${LambdaName}-dlq"
MessageRetentionPeriod: 1209600
Trigger:
Type: "AWS::Lambda::EventSourceMapping"
Properties:
BatchSize: 1
EventSourceArn: !GetAtt TriggerQueue.Arn
FunctionName: !GetAtt LambdaFunction.Arn
ScalingConfig:
MaximumConcurrency: !Ref MaximumConcurrency
Outputs:
TriggerQueueUrl:
Description: "URL of the queue that triggers Lambda"
Value: !Ref TriggerQueue