-
Notifications
You must be signed in to change notification settings - Fork 14.1k
/
emr.py
497 lines (438 loc) · 18.9 KB
/
emr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import asyncio
import warnings
from typing import Any
from botocore.exceptions import WaiterError
from airflow.exceptions import AirflowProviderDeprecationWarning
from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook
from airflow.providers.amazon.aws.hooks.emr import EmrContainerHook, EmrHook, EmrServerlessHook
from airflow.providers.amazon.aws.triggers.base import AwsBaseWaiterTrigger
from airflow.triggers.base import BaseTrigger, TriggerEvent
class EmrAddStepsTrigger(BaseTrigger):
"""
Asynchronously poll the boto3 API and wait for the steps to finish executing.
:param job_flow_id: The id of the job flow.
:param step_ids: The id of the steps being waited upon.
:param poll_interval: The amount of time in seconds to wait between attempts.
:param max_attempts: The maximum number of attempts to be made.
:param aws_conn_id: The Airflow connection used for AWS credentials.
"""
def __init__(
self,
job_flow_id: str,
step_ids: list[str],
aws_conn_id: str,
max_attempts: int | None,
poll_interval: int | None,
):
self.job_flow_id = job_flow_id
self.step_ids = step_ids
self.aws_conn_id = aws_conn_id
self.max_attempts = max_attempts
self.poll_interval = poll_interval
def serialize(self) -> tuple[str, dict[str, Any]]:
return (
"airflow.providers.amazon.aws.triggers.emr.EmrAddStepsTrigger",
{
"job_flow_id": str(self.job_flow_id),
"step_ids": self.step_ids,
"poll_interval": str(self.poll_interval),
"max_attempts": str(self.max_attempts),
"aws_conn_id": str(self.aws_conn_id),
},
)
async def run(self):
self.hook = EmrHook(aws_conn_id=self.aws_conn_id)
async with self.hook.async_conn as client:
for step_id in self.step_ids:
waiter = client.get_waiter("step_complete")
for attempt in range(1, 1 + self.max_attempts):
try:
await waiter.wait(
ClusterId=self.job_flow_id,
StepId=step_id,
WaiterConfig={
"Delay": int(self.poll_interval),
"MaxAttempts": 1,
},
)
break
except WaiterError as error:
if "terminal failure" in str(error):
yield TriggerEvent(
{"status": "failure", "message": f"Step {step_id} failed: {error}"}
)
break
self.log.info(
"Status of step is %s - %s",
error.last_response["Step"]["Status"]["State"],
error.last_response["Step"]["Status"]["StateChangeReason"],
)
await asyncio.sleep(int(self.poll_interval))
if attempt >= int(self.max_attempts):
yield TriggerEvent({"status": "failure", "message": "Steps failed: max attempts reached"})
else:
yield TriggerEvent({"status": "success", "message": "Steps completed", "step_ids": self.step_ids})
class EmrCreateJobFlowTrigger(AwsBaseWaiterTrigger):
"""
Asynchronously poll the boto3 API and wait for the JobFlow to finish executing.
:param job_flow_id: The id of the job flow to wait for.
:param waiter_delay: The amount of time in seconds to wait between attempts.
:param waiter_max_attempts: The maximum number of attempts to be made.
:param aws_conn_id: The Airflow connection used for AWS credentials.
"""
def __init__(
self,
job_flow_id: str,
poll_interval: int | None = None, # deprecated
max_attempts: int | None = None, # deprecated
aws_conn_id: str | None = None,
waiter_delay: int = 30,
waiter_max_attempts: int = 60,
):
if poll_interval is not None or max_attempts is not None:
warnings.warn(
"please use waiter_delay instead of poll_interval "
"and waiter_max_attempts instead of max_attempts",
AirflowProviderDeprecationWarning,
stacklevel=2,
)
waiter_delay = poll_interval or waiter_delay
waiter_max_attempts = max_attempts or waiter_max_attempts
super().__init__(
serialized_fields={"job_flow_id": job_flow_id},
waiter_name="job_flow_waiting",
waiter_args={"ClusterId": job_flow_id},
failure_message="JobFlow creation failed",
status_message="JobFlow creation in progress",
status_queries=[
"Cluster.Status.State",
"Cluster.Status.StateChangeReason",
"Cluster.Status.ErrorDetails",
],
return_key="job_flow_id",
return_value=job_flow_id,
waiter_delay=waiter_delay,
waiter_max_attempts=waiter_max_attempts,
aws_conn_id=aws_conn_id,
)
def hook(self) -> AwsGenericHook:
return EmrHook(aws_conn_id=self.aws_conn_id)
class EmrTerminateJobFlowTrigger(AwsBaseWaiterTrigger):
"""
Asynchronously poll the boto3 API and wait for the JobFlow to finish terminating.
:param job_flow_id: ID of the EMR Job Flow to terminate
:param waiter_delay: The amount of time in seconds to wait between attempts.
:param waiter_max_attempts: The maximum number of attempts to be made.
:param aws_conn_id: The Airflow connection used for AWS credentials.
"""
def __init__(
self,
job_flow_id: str,
poll_interval: int | None = None, # deprecated
max_attempts: int | None = None, # deprecated
aws_conn_id: str | None = None,
waiter_delay: int = 30,
waiter_max_attempts: int = 60,
):
if poll_interval is not None or max_attempts is not None:
warnings.warn(
"please use waiter_delay instead of poll_interval "
"and waiter_max_attempts instead of max_attempts",
AirflowProviderDeprecationWarning,
stacklevel=2,
)
waiter_delay = poll_interval or waiter_delay
waiter_max_attempts = max_attempts or waiter_max_attempts
super().__init__(
serialized_fields={"job_flow_id": job_flow_id},
waiter_name="job_flow_terminated",
waiter_args={"ClusterId": job_flow_id},
failure_message="JobFlow termination failed",
status_message="JobFlow termination in progress",
status_queries=[
"Cluster.Status.State",
"Cluster.Status.StateChangeReason",
"Cluster.Status.ErrorDetails",
],
return_value=None,
waiter_delay=waiter_delay,
waiter_max_attempts=waiter_max_attempts,
aws_conn_id=aws_conn_id,
)
def hook(self) -> AwsGenericHook:
return EmrHook(aws_conn_id=self.aws_conn_id)
class EmrContainerTrigger(AwsBaseWaiterTrigger):
"""
Poll for the status of EMR container until reaches terminal state.
:param virtual_cluster_id: Reference Emr cluster id
:param job_id: job_id to check the state
:param aws_conn_id: Reference to AWS connection id
:param waiter_delay: polling period in seconds to check for the status
"""
def __init__(
self,
virtual_cluster_id: str,
job_id: str,
aws_conn_id: str = "aws_default",
poll_interval: int | None = None, # deprecated
waiter_delay: int = 30,
waiter_max_attempts: int = 600,
):
if poll_interval is not None:
warnings.warn(
"please use waiter_delay instead of poll_interval.",
AirflowProviderDeprecationWarning,
stacklevel=2,
)
waiter_delay = poll_interval or waiter_delay
super().__init__(
serialized_fields={"virtual_cluster_id": virtual_cluster_id, "job_id": job_id},
waiter_name="container_job_complete",
waiter_args={"id": job_id, "virtualClusterId": virtual_cluster_id},
failure_message="Job failed",
status_message="Job in progress",
status_queries=["jobRun.state", "jobRun.failureReason"],
return_key="job_id",
return_value=job_id,
waiter_delay=waiter_delay,
waiter_max_attempts=waiter_max_attempts,
aws_conn_id=aws_conn_id,
)
def hook(self) -> AwsGenericHook:
return EmrContainerHook(self.aws_conn_id)
class EmrStepSensorTrigger(AwsBaseWaiterTrigger):
"""
Poll for the status of EMR container until reaches terminal state.
:param job_flow_id: job_flow_id which contains the step check the state of
:param step_id: step to check the state of
:param waiter_delay: polling period in seconds to check for the status
:param waiter_max_attempts: The maximum number of attempts to be made
:param aws_conn_id: Reference to AWS connection id
"""
def __init__(
self,
job_flow_id: str,
step_id: str,
waiter_delay: int = 30,
waiter_max_attempts: int = 60,
aws_conn_id: str = "aws_default",
):
super().__init__(
serialized_fields={"job_flow_id": job_flow_id, "step_id": step_id},
waiter_name="step_wait_for_terminal",
waiter_args={"ClusterId": job_flow_id, "StepId": step_id},
failure_message=f"Error while waiting for step {step_id} to complete",
status_message=f"Step id: {step_id}, Step is still in non-terminal state",
status_queries=[
"Step.Status.State",
"Step.Status.FailureDetails",
"Step.Status.StateChangeReason",
],
return_value=None,
waiter_delay=waiter_delay,
waiter_max_attempts=waiter_max_attempts,
aws_conn_id=aws_conn_id,
)
def hook(self) -> AwsGenericHook:
return EmrHook(self.aws_conn_id)
class EmrServerlessCreateApplicationTrigger(AwsBaseWaiterTrigger):
"""
Poll an Emr Serverless application and wait for it to be created.
:param application_id: The ID of the application being polled.
:waiter_delay: polling period in seconds to check for the status
:param waiter_max_attempts: The maximum number of attempts to be made
:param aws_conn_id: Reference to AWS connection id
"""
def __init__(
self,
application_id: str,
waiter_delay: int = 30,
waiter_max_attempts: int = 60,
aws_conn_id: str = "aws_default",
) -> None:
super().__init__(
serialized_fields={"application_id": application_id},
waiter_name="serverless_app_created",
waiter_args={"applicationId": application_id},
failure_message="Application creation failed",
status_message="Application status is",
status_queries=["application.state", "application.stateDetails"],
return_key="application_id",
return_value=application_id,
waiter_delay=waiter_delay,
waiter_max_attempts=waiter_max_attempts,
aws_conn_id=aws_conn_id,
)
def hook(self) -> AwsGenericHook:
return EmrServerlessHook(self.aws_conn_id)
class EmrServerlessStartApplicationTrigger(AwsBaseWaiterTrigger):
"""
Poll an Emr Serverless application and wait for it to be started.
:param application_id: The ID of the application being polled.
:waiter_delay: polling period in seconds to check for the status
:param waiter_max_attempts: The maximum number of attempts to be made
:param aws_conn_id: Reference to AWS connection id
"""
def __init__(
self,
application_id: str,
waiter_delay: int = 30,
waiter_max_attempts: int = 60,
aws_conn_id: str = "aws_default",
) -> None:
super().__init__(
serialized_fields={"application_id": application_id},
waiter_name="serverless_app_started",
waiter_args={"applicationId": application_id},
failure_message="Application failed to start",
status_message="Application status is",
status_queries=["application.state", "application.stateDetails"],
return_key="application_id",
return_value=application_id,
waiter_delay=waiter_delay,
waiter_max_attempts=waiter_max_attempts,
aws_conn_id=aws_conn_id,
)
def hook(self) -> AwsGenericHook:
return EmrServerlessHook(self.aws_conn_id)
class EmrServerlessStopApplicationTrigger(AwsBaseWaiterTrigger):
"""
Poll an Emr Serverless application and wait for it to be stopped.
:param application_id: The ID of the application being polled.
:waiter_delay: polling period in seconds to check for the status
:param waiter_max_attempts: The maximum number of attempts to be made
:param aws_conn_id: Reference to AWS connection id.
"""
def __init__(
self,
application_id: str,
waiter_delay: int = 30,
waiter_max_attempts: int = 60,
aws_conn_id: str = "aws_default",
) -> None:
super().__init__(
serialized_fields={"application_id": application_id},
waiter_name="serverless_app_stopped",
waiter_args={"applicationId": application_id},
failure_message="Application failed to start",
status_message="Application status is",
status_queries=["application.state", "application.stateDetails"],
return_key="application_id",
return_value=application_id,
waiter_delay=waiter_delay,
waiter_max_attempts=waiter_max_attempts,
aws_conn_id=aws_conn_id,
)
def hook(self) -> AwsGenericHook:
return EmrServerlessHook(self.aws_conn_id)
class EmrServerlessStartJobTrigger(AwsBaseWaiterTrigger):
"""
Poll an Emr Serverless job run and wait for it to be completed.
:param application_id: The ID of the application the job in being run on.
:param job_id: The ID of the job run.
:waiter_delay: polling period in seconds to check for the status
:param waiter_max_attempts: The maximum number of attempts to be made
:param aws_conn_id: Reference to AWS connection id
"""
def __init__(
self,
application_id: str,
job_id: str | None,
waiter_delay: int = 30,
waiter_max_attempts: int = 60,
aws_conn_id: str = "aws_default",
) -> None:
super().__init__(
serialized_fields={"application_id": application_id, "job_id": job_id},
waiter_name="serverless_job_completed",
waiter_args={"applicationId": application_id, "jobRunId": job_id},
failure_message="Serverless Job failed",
status_message="Serverless Job status is",
status_queries=["jobRun.state", "jobRun.stateDetails"],
return_key="job_id",
return_value=job_id,
waiter_delay=waiter_delay,
waiter_max_attempts=waiter_max_attempts,
aws_conn_id=aws_conn_id,
)
def hook(self) -> AwsGenericHook:
return EmrServerlessHook(self.aws_conn_id)
class EmrServerlessDeleteApplicationTrigger(AwsBaseWaiterTrigger):
"""
Poll an Emr Serverless application and wait for it to be deleted.
:param application_id: The ID of the application being polled.
:waiter_delay: polling period in seconds to check for the status
:param waiter_max_attempts: The maximum number of attempts to be made
:param aws_conn_id: Reference to AWS connection id
"""
def __init__(
self,
application_id: str,
waiter_delay: int = 30,
waiter_max_attempts: int = 60,
aws_conn_id: str = "aws_default",
) -> None:
super().__init__(
serialized_fields={"application_id": application_id},
waiter_name="serverless_app_terminated",
waiter_args={"applicationId": application_id},
failure_message="Application failed to start",
status_message="Application status is",
status_queries=["application.state", "application.stateDetails"],
return_key="application_id",
return_value=application_id,
waiter_delay=waiter_delay,
waiter_max_attempts=waiter_max_attempts,
aws_conn_id=aws_conn_id,
)
def hook(self) -> AwsGenericHook:
return EmrServerlessHook(self.aws_conn_id)
class EmrServerlessCancelJobsTrigger(AwsBaseWaiterTrigger):
"""
Trigger for canceling a list of jobs in an EMR Serverless application.
:param application_id: EMR Serverless application ID
:param aws_conn_id: Reference to AWS connection id
:param waiter_delay: Delay in seconds between each attempt to check the status
:param waiter_max_attempts: Maximum number of attempts to check the status
"""
def __init__(
self,
application_id: str,
aws_conn_id: str,
waiter_delay: int,
waiter_max_attempts: int,
) -> None:
self.hook_instance = EmrServerlessHook(aws_conn_id)
states = list(self.hook_instance.JOB_INTERMEDIATE_STATES.union({"CANCELLING"}))
super().__init__(
serialized_fields={"application_id": application_id},
waiter_name="no_job_running",
waiter_args={"applicationId": application_id, "states": states},
failure_message="Error while waiting for jobs to cancel",
status_message="Currently running jobs",
status_queries=["jobRuns[*].applicationId", "jobRuns[*].state"],
return_key="application_id",
return_value=application_id,
waiter_delay=waiter_delay,
waiter_max_attempts=waiter_max_attempts,
aws_conn_id=aws_conn_id,
)
def hook(self) -> AwsGenericHook:
return self.hook_instance