-
Notifications
You must be signed in to change notification settings - Fork 258
/
launch_plan.py
466 lines (393 loc) · 19.6 KB
/
launch_plan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
from __future__ import annotations
import typing
from typing import Any, Callable, Dict, List, Optional, Type
from flytekit.core import workflow as _annotated_workflow
from flytekit.core.context_manager import FlyteContext, FlyteContextManager, FlyteEntities
from flytekit.core.interface import Interface, transform_function_to_interface, transform_inputs_to_parameters
from flytekit.core.promise import create_and_link_node, translate_inputs_to_literals
from flytekit.core.reference_entity import LaunchPlanReference, ReferenceEntity
from flytekit.models import common as _common_models
from flytekit.models import interface as _interface_models
from flytekit.models import literals as _literal_models
from flytekit.models import schedule as _schedule_model
from flytekit.models import security
from flytekit.models.core import workflow as _workflow_model
class LaunchPlan(object):
"""
Launch Plans are one of the core constructs of Flyte. Please take a look at the discussion in the
:std:ref:`core concepts <flyte:divedeep-launchplans>` if you are unfamiliar with them.
Every workflow is registered with a default launch plan, which is just a launch plan with none of the additional
attributes set - no default values, fixed values, schedules, etc. Assuming you have the following workflow
.. code-block:: python
@workflow
def wf(a: int, c: str) -> str:
...
Create the default launch plan with
.. code-block:: python
LaunchPlan.get_or_create(workflow=my_wf)
If you specify additional parameters, you'll also have to give the launch plan a unique name. Default and
fixed inputs can be expressed as Python native values like so:
.. literalinclude:: ../../../tests/flytekit/unit/core/test_launch_plan.py
:start-after: # fixed_and_default_start
:end-before: # fixed_and_default_end
:language: python
:dedent: 4
Additionally, a launch plan can be configured to run on a schedule and emit notifications.
Please see the relevant Schedule and Notification objects as well.
To configure the remaining parameters, you'll need to import the relevant model objects as well.
.. literalinclude:: ../../../tests/flytekit/unit/core/test_launch_plan.py
:start-after: # schedule_start
:end-before: # schedule_end
:language: python
:dedent: 4
.. code-block:: python
from flytekit.models.common import Annotations, AuthRole, Labels, RawOutputDataConfig
Then use as follows
.. literalinclude:: ../../../tests/flytekit/unit/core/test_launch_plan.py
:start-after: # auth_role_start
:end-before: # auth_role_end
:language: python
:dedent: 4
"""
# The reason we cache is simply because users may get the default launch plan twice for a single Workflow. We
# don't want to create two defaults, could be confusing.
CACHE = {}
@staticmethod
def get_default_launch_plan(ctx: FlyteContext, workflow: _annotated_workflow.WorkflowBase) -> LaunchPlan:
"""
Users should probably call the get_or_create function defined below instead. A default launch plan is the one
that will just pick up whatever default values are defined in the workflow function signature (if any) and
use the default auth information supplied during serialization, with no notifications or schedules.
:param ctx: This is not flytekit.current_context(). This is an internal context object. Users familiar with
flytekit should feel free to use this however.
:param workflow: The workflow to create a launch plan for.
"""
if workflow.name in LaunchPlan.CACHE:
return LaunchPlan.CACHE[workflow.name]
parameter_map = transform_inputs_to_parameters(ctx, workflow.python_interface)
lp = LaunchPlan(
name=workflow.name,
workflow=workflow,
parameters=parameter_map,
fixed_inputs=_literal_models.LiteralMap(literals={}),
)
LaunchPlan.CACHE[workflow.name] = lp
return lp
@classmethod
def create(
cls,
name: str,
workflow: _annotated_workflow.WorkflowBase,
default_inputs: Dict[str, Any] = None,
fixed_inputs: Dict[str, Any] = None,
schedule: _schedule_model.Schedule = None,
notifications: List[_common_models.Notification] = None,
labels: _common_models.Labels = None,
annotations: _common_models.Annotations = None,
raw_output_data_config: _common_models.RawOutputDataConfig = None,
max_parallelism: int = None,
security_context: typing.Optional[security.SecurityContext] = None,
auth_role: _common_models.AuthRole = None,
) -> LaunchPlan:
ctx = FlyteContextManager.current_context()
default_inputs = default_inputs or {}
fixed_inputs = fixed_inputs or {}
# Default inputs come from two places, the original signature of the workflow function, and the default_inputs
# argument to this function. We'll take the latter as having higher precedence.
wf_signature_parameters = transform_inputs_to_parameters(ctx, workflow.python_interface)
# Construct a new Interface object with just the default inputs given to get Parameters, maybe there's an
# easier way to do this, think about it later.
temp_inputs = {}
for k, v in default_inputs.items():
temp_inputs[k] = (workflow.python_interface.inputs[k], v)
temp_interface = Interface(inputs=temp_inputs, outputs={})
temp_signature = transform_inputs_to_parameters(ctx, temp_interface)
wf_signature_parameters._parameters.update(temp_signature.parameters)
# These are fixed inputs that cannot change at launch time. If the same argument is also in default inputs,
# it'll be taken out from defaults in the LaunchPlan constructor
fixed_literals = translate_inputs_to_literals(
ctx,
incoming_values=fixed_inputs,
flyte_interface_types=workflow.interface.inputs,
native_types=workflow.python_interface.inputs,
)
fixed_lm = _literal_models.LiteralMap(literals=fixed_literals)
if auth_role:
if security_context:
raise ValueError("Use of AuthRole is deprecated. You cannot specify both AuthRole and SecurityContext")
security_context = security.SecurityContext(
run_as=security.Identity(
iam_role=auth_role.assumable_iam_role,
k8s_service_account=auth_role.kubernetes_service_account,
),
)
lp = cls(
name=name,
workflow=workflow,
parameters=wf_signature_parameters,
fixed_inputs=fixed_lm,
schedule=schedule,
notifications=notifications,
labels=labels,
annotations=annotations,
raw_output_data_config=raw_output_data_config,
max_parallelism=max_parallelism,
security_context=security_context,
)
# This is just a convenience - we'll need the fixed inputs LiteralMap for when serializing the Launch Plan out
# to protobuf, but for local execution and such, why not save the original Python native values as well so
# we don't have to reverse it back every time.
default_inputs.update(fixed_inputs)
lp._saved_inputs = default_inputs
if name in cls.CACHE:
raise AssertionError(f"Launch plan named {name} was already created! Make sure your names are unique.")
cls.CACHE[name] = lp
return lp
@classmethod
def get_or_create(
cls,
workflow: _annotated_workflow.WorkflowBase,
name: Optional[str] = None,
default_inputs: Dict[str, Any] = None,
fixed_inputs: Dict[str, Any] = None,
schedule: _schedule_model.Schedule = None,
notifications: List[_common_models.Notification] = None,
labels: _common_models.Labels = None,
annotations: _common_models.Annotations = None,
raw_output_data_config: _common_models.RawOutputDataConfig = None,
max_parallelism: int = None,
security_context: typing.Optional[security.SecurityContext] = None,
auth_role: _common_models.AuthRole = None,
) -> LaunchPlan:
"""
This function offers a friendlier interface for creating launch plans. If the name for the launch plan is not
supplied, this assumes you are looking for the default launch plan for the workflow. If it is specified, it
will be used. If creating the default launch plan, none of the other arguments may be specified.
The resulting launch plan is also cached and if called again with the same name, the
cached version is returned
:param security_context: Security context for the execution
:param workflow: The Workflow to create a launch plan for.
:param name: If you supply a name, keep it mind it needs to be unique. That is, project, domain, version, and
this name form a primary key. If you do not supply a name, this function will assume you want the default
launch plan for the given workflow.
:param default_inputs: Default inputs, expressed as Python values.
:param fixed_inputs: Fixed inputs, expressed as Python values. At call time, these cannot be changed.
:param schedule: Optional schedule to run on.
:param notifications: Notifications to send.
:param labels: Optional labels to attach to executions created by this launch plan.
:param annotations: Optional annotations to attach to executions created by this launch plan.
:param raw_output_data_config: Optional location of offloaded data for things like S3, etc.
:param auth_role: Add an auth role if necessary.
:param max_parallelism: Controls the maximum number of tasknodes that can be run in parallel for the entire
workflow. This is useful to achieve fairness. Note: MapTasks are regarded as one unit, and
parallelism/concurrency of MapTasks is independent from this.
"""
if name is None and (
default_inputs is not None
or fixed_inputs is not None
or schedule is not None
or notifications is not None
or labels is not None
or annotations is not None
or raw_output_data_config is not None
or auth_role is not None
or max_parallelism is not None
or security_context is not None
):
raise ValueError(
"Only named launchplans can be created that have other properties. Drop the name if you want to create a default launchplan. Default launchplans cannot have any other associations"
)
if name is not None and name in LaunchPlan.CACHE:
cached_outputs = vars(LaunchPlan.CACHE[name])
notifications = notifications or []
default_inputs = default_inputs or {}
fixed_inputs = fixed_inputs or {}
default_inputs.update(fixed_inputs)
if auth_role and not security_context:
security_context = security.SecurityContext(
run_as=security.Identity(
iam_role=auth_role.assumable_iam_role,
k8s_service_account=auth_role.kubernetes_service_account,
),
)
if (
workflow != cached_outputs["_workflow"]
or schedule != cached_outputs["_schedule"]
or notifications != cached_outputs["_notifications"]
or default_inputs != cached_outputs["_saved_inputs"]
or labels != cached_outputs["_labels"]
or annotations != cached_outputs["_annotations"]
or raw_output_data_config != cached_outputs["_raw_output_data_config"]
or max_parallelism != cached_outputs["_max_parallelism"]
or security_context != cached_outputs["_security_context"]
):
raise AssertionError("The cached values aren't the same as the current call arguments")
return LaunchPlan.CACHE[name]
elif name is None and workflow.name in LaunchPlan.CACHE:
return LaunchPlan.CACHE[workflow.name]
# Otherwise, handle the default launch plan case
if name is None:
ctx = FlyteContext.current_context()
lp = cls.get_default_launch_plan(ctx, workflow)
else:
lp = cls.create(
name,
workflow,
default_inputs,
fixed_inputs,
schedule,
notifications,
labels,
annotations,
raw_output_data_config,
max_parallelism,
auth_role=auth_role,
security_context=security_context,
)
LaunchPlan.CACHE[name or workflow.name] = lp
return lp
def __init__(
self,
name: str,
workflow: _annotated_workflow.WorkflowBase,
parameters: _interface_models.ParameterMap,
fixed_inputs: _literal_models.LiteralMap,
schedule: _schedule_model.Schedule = None,
notifications: List[_common_models.Notification] = None,
labels: _common_models.Labels = None,
annotations: _common_models.Annotations = None,
raw_output_data_config: _common_models.RawOutputDataConfig = None,
max_parallelism: typing.Optional[int] = None,
security_context: typing.Optional[security.SecurityContext] = None,
):
self._name = name
self._workflow = workflow
# Ensure fixed inputs are not in parameter map
parameters = {k: v for k, v in parameters.parameters.items() if k not in fixed_inputs.literals}
self._parameters = _interface_models.ParameterMap(parameters=parameters)
self._fixed_inputs = fixed_inputs
# See create() for additional information
self._saved_inputs = {}
self._schedule = schedule
self._notifications = notifications or []
self._labels = labels
self._annotations = annotations
self._raw_output_data_config = raw_output_data_config
self._max_parallelism = max_parallelism
self._security_context = security_context
FlyteEntities.entities.append(self)
def clone_with(
self,
name: str,
parameters: _interface_models.ParameterMap = None,
fixed_inputs: _literal_models.LiteralMap = None,
schedule: _schedule_model.Schedule = None,
notifications: List[_common_models.Notification] = None,
labels: _common_models.Labels = None,
annotations: _common_models.Annotations = None,
raw_output_data_config: _common_models.RawOutputDataConfig = None,
auth_role: _common_models.AuthRole = None,
max_parallelism: int = None,
security_context: typing.Optional[security.SecurityContext] = None,
) -> LaunchPlan:
return LaunchPlan(
name=name,
workflow=self.workflow,
parameters=parameters or self.parameters,
fixed_inputs=fixed_inputs or self.fixed_inputs,
schedule=schedule or self.schedule,
notifications=notifications or self.notifications,
labels=labels or self.labels,
annotations=annotations or self.annotations,
raw_output_data_config=raw_output_data_config or self.raw_output_data_config,
auth_role=auth_role or self._auth_role,
max_parallelism=max_parallelism or self.max_parallelism,
security_context=security_context or self.security_context,
)
@property
def python_interface(self) -> Interface:
return self.workflow.python_interface
@property
def interface(self) -> _interface_models.TypedInterface:
return self.workflow.interface
@property
def name(self) -> str:
return self._name
@property
def parameters(self) -> _interface_models.ParameterMap:
return self._parameters
@property
def fixed_inputs(self) -> _literal_models.LiteralMap:
return self._fixed_inputs
@property
def workflow(self) -> _annotated_workflow.WorkflowBase:
return self._workflow
@property
def saved_inputs(self) -> Dict[str, Any]:
# See note in create()
# Since the call-site will typically update the dict returned, and since update updates in place, let's return
# a copy.
# TODO: What issues will there be when we start introducing custom classes as input types?
return self._saved_inputs.copy()
@property
def schedule(self) -> Optional[_schedule_model.Schedule]:
return self._schedule
@property
def notifications(self) -> List[_common_models.Notification]:
return self._notifications
@property
def labels(self) -> Optional[_common_models.Labels]:
return self._labels
@property
def annotations(self) -> Optional[_common_models.Annotations]:
return self._annotations
@property
def raw_output_data_config(self) -> Optional[_common_models.RawOutputDataConfig]:
return self._raw_output_data_config
@property
def max_parallelism(self) -> typing.Optional[int]:
return self._max_parallelism
@property
def security_context(self) -> typing.Optional[security.SecurityContext]:
return self._security_context
def construct_node_metadata(self) -> _workflow_model.NodeMetadata:
return self.workflow.construct_node_metadata()
def __call__(self, *args, **kwargs):
if len(args) > 0:
raise AssertionError("Only Keyword Arguments are supported for launch plan executions")
ctx = FlyteContext.current_context()
if ctx.compilation_state is not None:
inputs = self.saved_inputs
inputs.update(kwargs)
return create_and_link_node(ctx, entity=self, **inputs)
else:
# Calling a launch plan should just forward the call to the workflow, nothing more. But let's add in the
# saved inputs.
inputs = self.saved_inputs
inputs.update(kwargs)
return self.workflow(*args, **inputs)
class ReferenceLaunchPlan(ReferenceEntity, LaunchPlan):
"""
A reference launch plan serves as a pointer to a Launch Plan that already exists on your Flyte installation. This
object will not initiate a network call to Admin, which is why the user is asked to provide the expected interface.
If at registration time the interface provided causes an issue with compilation, an error will be returned.
"""
def __init__(
self, project: str, domain: str, name: str, version: str, inputs: Dict[str, Type], outputs: Dict[str, Type]
):
super().__init__(LaunchPlanReference(project, domain, name, version), inputs, outputs)
def reference_launch_plan(
project: str,
domain: str,
name: str,
version: str,
) -> Callable[[Callable[..., Any]], ReferenceLaunchPlan]:
"""
A reference launch plan is a pointer to a launch plan that already exists on your Flyte installation. This
object will not initiate a network call to Admin, which is why the user is asked to provide the expected interface.
If at registration time the interface provided causes an issue with compilation, an error will be returned.
"""
def wrapper(fn) -> ReferenceLaunchPlan:
interface = transform_function_to_interface(fn)
return ReferenceLaunchPlan(project, domain, name, version, interface.inputs, interface.outputs)
return wrapper