-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0474105
commit 55b9187
Showing
9 changed files
with
265 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 change: 1 addition & 0 deletions
1
python_modules/dagster/dagster/_core/definitions/declarative_scheduling/operands/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
119 changes: 119 additions & 0 deletions
119
...er/dagster/_core/definitions/declarative_scheduling/operands/scheduled_since_condition.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
import datetime | ||
from typing import Sequence, cast | ||
|
||
from dagster._core.definitions.declarative_scheduling.scheduling_evaluation_info import ( | ||
AssetSliceWithMetadata, | ||
) | ||
from dagster._core.definitions.declarative_scheduling.utils import SerializableTimeDelta | ||
from dagster._core.definitions.metadata.metadata_value import MetadataValue | ||
from dagster._serdes.serdes import whitelist_for_serdes | ||
|
||
from ..scheduling_condition import SchedulingCondition, SchedulingResult | ||
from ..scheduling_context import SchedulingContext | ||
|
||
_REQUEST_TIMESTAMP_METADATA_KEY = "request_timestamp" | ||
|
||
|
||
@whitelist_for_serdes | ||
class ScheduledSinceCondition(SchedulingCondition): | ||
"""SchedulingCondition which is true if the asset has been requested for materialization via | ||
the declarative scheduling system within the given time window. | ||
Will only detect requests which have been made since this condition was added to the asset. | ||
""" | ||
|
||
serializable_lookback_timedelta: SerializableTimeDelta | ||
|
||
@property | ||
def description(self) -> str: | ||
return f"Has been requested within the last {self.lookback_timedelta}" | ||
|
||
@property | ||
def lookback_timedelta(self) -> datetime.timedelta: | ||
return self.serializable_lookback_timedelta.to_timedelta() | ||
|
||
@staticmethod | ||
def from_lookback_delta(lookback_delta: datetime.timedelta) -> "ScheduledSinceCondition": | ||
return ScheduledSinceCondition( | ||
serializable_lookback_timedelta=SerializableTimeDelta.from_timedelta(lookback_delta) | ||
) | ||
|
||
def _get_minimum_timestamp(self, context: SchedulingContext) -> float: | ||
"""The minimum timestamp for a request to be considered in the lookback window.""" | ||
return (context.effective_dt - self.lookback_timedelta).timestamp() | ||
|
||
def _get_new_slices_with_metadata( | ||
self, context: SchedulingContext | ||
) -> Sequence[AssetSliceWithMetadata]: | ||
"""Updates the stored information as to when the asset was last requested.""" | ||
# the first time this asset has been evaluated | ||
if context.previous_evaluation_info is None: | ||
return [] | ||
|
||
previous_slices_with_metadata = ( | ||
context.previous_evaluation_node.slices_with_metadata | ||
if context.previous_evaluation_node | ||
else [] | ||
) | ||
|
||
# no new updates since previous tick | ||
if context.previous_requested_slice is None: | ||
return previous_slices_with_metadata | ||
|
||
# for existing subsets, remove references to newly-requested partitions, as these subsets | ||
# are meant to represent the most recent time that the asset was requested | ||
slices_with_metadata = [ | ||
AssetSliceWithMetadata( | ||
asset_slice.compute_difference(context.previous_requested_slice), metadata | ||
) | ||
for asset_slice, metadata in previous_slices_with_metadata | ||
] | ||
|
||
# for the newly-requested slice, add a new entry indicating that these partitions were | ||
# requested on the previous tick | ||
previous_request_timestamp = ( | ||
context.previous_evaluation_info.temporal_context.effective_dt.timestamp() | ||
) | ||
slices_with_metadata.append( | ||
AssetSliceWithMetadata( | ||
context.previous_requested_slice, | ||
{_REQUEST_TIMESTAMP_METADATA_KEY: MetadataValue.float(previous_request_timestamp)}, | ||
) | ||
) | ||
|
||
# finally, evict any empty subsets from the list, and any subsets with an older timestamp | ||
return [ | ||
asset_slice_with_metadata | ||
for asset_slice_with_metadata in slices_with_metadata | ||
if not ( | ||
asset_slice_with_metadata.asset_slice.is_empty | ||
or cast( | ||
float, | ||
asset_slice_with_metadata.metadata.get( | ||
_REQUEST_TIMESTAMP_METADATA_KEY, MetadataValue.float(0) | ||
).value, | ||
) | ||
< self._get_minimum_timestamp(context) | ||
) | ||
] | ||
|
||
def evaluate(self, context: SchedulingContext) -> SchedulingResult: | ||
slices_with_metadata = self._get_new_slices_with_metadata(context) | ||
|
||
# we keep track of all slices that have been requested within the lookback window, so we can | ||
# simply compute the union of all of these slices to determine the true slice | ||
requested_within_lookback_slice = context.asset_graph_view.create_empty_slice( | ||
context.asset_key | ||
) | ||
for asset_slice, _ in slices_with_metadata: | ||
requested_within_lookback_slice = requested_within_lookback_slice.compute_union( | ||
asset_slice | ||
) | ||
|
||
return SchedulingResult.create( | ||
context=context, | ||
true_slice=context.candidate_slice.compute_intersection( | ||
requested_within_lookback_slice | ||
), | ||
slices_with_metadata=slices_with_metadata, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
27 changes: 27 additions & 0 deletions
27
python_modules/dagster/dagster/_core/definitions/declarative_scheduling/utils.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import datetime | ||
from typing import NamedTuple | ||
|
||
from dagster._serdes.serdes import whitelist_for_serdes | ||
|
||
|
||
@whitelist_for_serdes | ||
class SerializableTimeDelta(NamedTuple): | ||
"""A Dagster-serializable version of a datetime.timedelta. The datetime.timedelta class | ||
internally stores values as an integer number of days, seconds, and microseconds. This class | ||
handles converting between the in-memory and serializable formats. | ||
""" | ||
|
||
days: int | ||
seconds: int | ||
microseconds: int | ||
|
||
@staticmethod | ||
def from_timedelta(timedelta: datetime.timedelta) -> "SerializableTimeDelta": | ||
return SerializableTimeDelta( | ||
days=timedelta.days, seconds=timedelta.seconds, microseconds=timedelta.microseconds | ||
) | ||
|
||
def to_timedelta(self) -> datetime.timedelta: | ||
return datetime.timedelta( | ||
days=self.days, seconds=self.seconds, microseconds=self.microseconds | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
68 changes: 68 additions & 0 deletions
68
...ions_tests/auto_materialize_tests/asset_condition_tests/test_scheduled_since_condition.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import datetime | ||
|
||
from dagster import SchedulingCondition | ||
|
||
from ..scenario_specs import one_asset, two_partitions_def | ||
from .asset_condition_scenario import AssetConditionScenarioState | ||
|
||
|
||
def test_scheduled_since_unpartitioned() -> None: | ||
state = AssetConditionScenarioState( | ||
one_asset, | ||
asset_condition=~SchedulingCondition.scheduled_since( | ||
lookback_delta=datetime.timedelta(hours=1) | ||
), | ||
# this condition depends on having non-empty results | ||
ensure_empty_result=False, | ||
) | ||
|
||
state, result = state.evaluate("A") | ||
assert result.true_subset.size == 1 | ||
|
||
# the last tick would have requested the asset for materialization | ||
state, result = state.evaluate("A") | ||
assert result.true_subset.size == 0 | ||
|
||
state, result = state.evaluate("A") | ||
assert result.true_subset.size == 0 | ||
|
||
# now it's been more than an hour since the last request | ||
state = state.with_current_time_advanced(hours=1, seconds=1) | ||
state, result = state.evaluate("A") | ||
assert result.true_subset.size == 1 | ||
|
||
# edge case: one hour passes in between a request and the next evaluation | ||
state = state.with_current_time_advanced(hours=1) | ||
state, result = state.evaluate("A") | ||
assert result.true_subset.size == 0 | ||
|
||
|
||
def test_scheduled_since_partitioned() -> None: | ||
state = AssetConditionScenarioState( | ||
one_asset, | ||
asset_condition=~SchedulingCondition.scheduled_since( | ||
lookback_delta=datetime.timedelta(hours=1) | ||
), | ||
# this condition depends on having non-empty results | ||
ensure_empty_result=False, | ||
).with_asset_properties(partitions_def=two_partitions_def) | ||
|
||
state, result = state.evaluate("A") | ||
assert result.true_subset.size == 2 | ||
|
||
# the last tick would have requested both assets for materialization | ||
state, result = state.evaluate("A") | ||
assert result.true_subset.size == 0 | ||
|
||
state, result = state.evaluate("A") | ||
assert result.true_subset.size == 0 | ||
|
||
# now it's been more than an hour since the last request | ||
state = state.with_current_time_advanced(hours=1, seconds=1) | ||
state, result = state.evaluate("A") | ||
assert result.true_subset.size == 2 | ||
|
||
# edge case: one hour passes in between a request and the next evaluation | ||
state = state.with_current_time_advanced(hours=1) | ||
state, result = state.evaluate("A") | ||
assert result.true_subset.size == 0 |