-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[DS][39/n] Use FailedSchedulingCondition and ScheduledSince condition…
… in eager policy (#21741) ## Summary & Motivation As title. This was alluded to in the initial eager policy PR, this is putting them in action. In the case that the latest run for an asset partition failed, we will not request the asset if we've already requested it within the last hour. This puts a natural "rate limit" on the asset in the case that it is repeatedly failing, but does not interfere if the asset is materializing as expected. ## How I Tested These Changes
- Loading branch information
1 parent
9eb7b35
commit e02a249
Showing
2 changed files
with
116 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
...ts/definitions_tests/auto_materialize_tests/asset_condition_tests/test_eager_condition.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
import datetime | ||
|
||
from dagster import SchedulingCondition | ||
|
||
from dagster_tests.definitions_tests.auto_materialize_tests.base_scenario import run_request | ||
|
||
from ..scenario_specs import hourly_partitions_def, two_assets_in_sequence | ||
from .asset_condition_scenario import AssetConditionScenarioState | ||
|
||
|
||
def test_eager_with_rate_limit_unpartitioned() -> None: | ||
state = AssetConditionScenarioState( | ||
two_assets_in_sequence, | ||
asset_condition=SchedulingCondition.eager_with_rate_limit(), | ||
ensure_empty_result=False, | ||
) | ||
|
||
# parent hasn't updated yet | ||
state, result = state.evaluate("B") | ||
assert result.true_subset.size == 0 | ||
|
||
# parent updated, now can execute | ||
state = state.with_runs(run_request("A")) | ||
state, result = state.evaluate("B") | ||
assert result.true_subset.size == 1 | ||
state = state.with_runs( | ||
*(run_request(ak, pk) for ak, pk in result.true_subset.asset_partitions) | ||
) | ||
|
||
# now B has been materialized, so don't execute again | ||
state, result = state.evaluate("B") | ||
assert result.true_subset.size == 0 | ||
|
||
# A gets materialized again before the hour, execute B again | ||
state = state.with_runs(run_request("A")) | ||
state, result = state.evaluate("B") | ||
assert result.true_subset.size == 1 | ||
# however, B fails | ||
state = state.with_failed_run_for_asset("B") | ||
|
||
# do not try to materialize B again immediately | ||
state, result = state.evaluate("B") | ||
assert result.true_subset.size == 0 | ||
|
||
# now it's been over an hour since B was requested, try again | ||
state = state.with_current_time_advanced(hours=1, seconds=1) | ||
state, result = state.evaluate("B") | ||
assert result.true_subset.size == 1 | ||
|
||
|
||
def test_eager_with_rate_limit_hourly_partitioned() -> None: | ||
state = ( | ||
AssetConditionScenarioState( | ||
two_assets_in_sequence, | ||
asset_condition=SchedulingCondition.eager_with_rate_limit( | ||
failure_retry_delta=datetime.timedelta(minutes=10) | ||
), | ||
ensure_empty_result=False, | ||
) | ||
.with_asset_properties(partitions_def=hourly_partitions_def) | ||
.with_current_time("2020-02-02T01:05:00") | ||
) | ||
|
||
# parent hasn't updated yet | ||
state, result = state.evaluate("B") | ||
assert result.true_subset.size == 0 | ||
|
||
# historical parent updated, doesn't matter | ||
state = state.with_runs(run_request("A", "2019-07-05-00:00")) | ||
state, result = state.evaluate("B") | ||
assert result.true_subset.size == 0 | ||
|
||
# latest parent updated, now can execute | ||
state = state.with_runs(run_request("A", "2020-02-02-00:00")) | ||
state, result = state.evaluate("B") | ||
assert result.true_subset.size == 1 | ||
state = state.with_runs( | ||
*(run_request(ak, pk) for ak, pk in result.true_subset.asset_partitions) | ||
) | ||
|
||
# now B has been materialized, so don't execute again | ||
state, result = state.evaluate("B") | ||
assert result.true_subset.size == 0 | ||
|
||
# new partition comes into being, parent hasn't been materialized yet | ||
state = state.with_current_time_advanced(hours=1) | ||
state, result = state.evaluate("B") | ||
assert result.true_subset.size == 0 | ||
|
||
# parent gets materialized, B requested | ||
state = state.with_runs(run_request("A", "2020-02-02-01:00")) | ||
state, result = state.evaluate("B") | ||
assert result.true_subset.size == 1 | ||
# but it fails | ||
state = state.with_failed_run_for_asset("B", "2020-02-02-01:00") | ||
|
||
# B does not get immediately requested again | ||
state, result = state.evaluate("B") | ||
assert result.true_subset.size == 0 | ||
|
||
# now it's been over 10 minutes since B was requested, try again | ||
state = state.with_current_time_advanced(minutes=10, seconds=1) | ||
state, result = state.evaluate("B") | ||
assert result.true_subset.size == 1 |