Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions airflow-core/src/airflow/cli/cli_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,15 +188,19 @@ def string_lower_type(val):
ARG_PARTITION_DATE_START = Arg(
("--partition-date-start",),
help=(
"Inclusive lower bound of the partition_date window (matched against DagRun.partition_date). "
"Inclusive lower bound of the partition_date window. Matched at local calendar-day "
"granularity: the start of the given local calendar day in the Dag's timetable timezone "
"(any time-of-day component is ignored). "
"Accepts the same datetime formats as --start-date."
),
type=parsedate,
)
ARG_PARTITION_DATE_END = Arg(
("--partition-date-end",),
help=(
"Inclusive upper bound of the partition_date window (matched against DagRun.partition_date). "
"Inclusive upper bound of the partition_date window. Matched at local calendar-day "
"granularity: all runs whose partition_date falls on the given local calendar day in the "
"Dag's timetable timezone are included (any time-of-day component is ignored). "
"Accepts the same datetime formats as --end-date."
),
type=parsedate,
Expand Down Expand Up @@ -1190,7 +1194,8 @@ class GroupCommand(NamedTuple):
"Clear Dag runs of the given dag_id and re-queue them for reprocessing. Exactly one "
"of the following selectors must be provided: --run-id (single run); --partition-key "
"(every run with that exact partition_key); or a partition_date window via "
"--partition-date-start and/or --partition-date-end (inclusive on both ends). "
"--partition-date-start and/or --partition-date-end (both bounds are inclusive local "
"calendar days, anchored in the Dag's timetable timezone). "
"Intended for partitioned Dags, whose runs are keyed by partition_date / "
"partition_key instead of logical_date. For traditional, non-partitioned Dags, use "
"`airflow tasks clear --start-date / --end-date`."
Expand Down
60 changes: 55 additions & 5 deletions airflow-core/src/airflow/cli/commands/dag_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,15 @@ def dag_delete(args) -> None:
@providers_configuration_loaded
@provide_session
def dag_clear(args, *, session: Session = NEW_SESSION) -> None:
"""Clear Dag runs selected by run_id, partition_key, or a partition_date window."""
"""
Clear Dag runs selected by run_id, partition_key, or a partition_date window.

When a partition_date window is given, both bounds are **day-granular** and
anchored in the timetable's timezone for tz-aware partitioned timetables.
--partition-date-start is the inclusive start local calendar day;
--partition-date-end is the inclusive end local calendar day (any
time-of-day component in either value is ignored).
"""
has_range = args.partition_date_start is not None or args.partition_date_end is not None
selectors_used = sum([args.run_id is not None, args.partition_key is not None, has_range])
if selectors_used == 0:
Expand Down Expand Up @@ -157,10 +165,52 @@ def dag_clear(args, *, session: Session = NEW_SESSION) -> None:
query = query.where(DagRun.partition_key == args.partition_key)
else:
query = query.where(DagRun.partition_date.is_not(None))
if args.partition_date_start is not None:
query = query.where(DagRun.partition_date >= args.partition_date_start)
if args.partition_date_end is not None:
query = query.where(DagRun.partition_date <= args.partition_date_end)
tt_tz = getattr(dag.timetable, "timezone", None) if dag.timetable.partitioned else None
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tt_tz is resolved by probing for a .timezone attribute, which only CronMixin-based timetables have (this PR adds the property). PartitionedAssetTimetable (timetables/simple.py:267) is also partitioned = True but has no .timezone, so it silently takes the no-tz branch. That's correct today if asset-partition dates are genuinely UTC-anchored, but the dispatch is duck-typed: any future tz-aware partitioned timetable that doesn't expose .timezone will silently fall back to the UTC branch and reintroduce the exact off-by-one this PR fixes, with no error to flag it. Worth either putting the tz accessor on the partitioned-timetable contract so it's explicit which timetables are tz-aware, or branching on a known type. Minor, related: the two day-bound blocks are nearly identical across the tz and no-tz paths and could share a _day_bounds(label, tz) helper to keep them from drifting.

if tt_tz is not None:
# Partitioned runs are stored as local-midnight UTC instants; compare at day
# granularity in the timetable's timezone rather than at the raw UTC instant.
if args.partition_date_start is not None:
start_label = args.partition_date_start.date()
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

parsedate returns the parsed instant in UTC (naive input is read as UTC), so .date() takes the UTC calendar day and then re-anchors it to midnight in the timetable tz. For naive values that's intuitive, but a tz-aware CLI value can shift the day: --partition-date-start 2026-02-19T07:00:00+08:00 parses to 2026-02-18T23:00Z, and .date() yields 2026-02-18, not the user's local 2026-02-19. The help text says time-of-day is ignored, but not that the calendar day is read from the parsed (UTC) instant rather than re-projected into the timetable tz. A one-line note would make the as-typed behaviour explicit. Same applies to the end bound at line 182.

lower_utc = timezone.convert_to_utc(
timezone.make_aware(
datetime.datetime(start_label.year, start_label.month, start_label.day),
tt_tz,
)
)
query = query.where(DagRun.partition_date >= lower_utc)
if args.partition_date_end is not None:
end_label = args.partition_date_end.date()
# Half-open upper bound: include all of the end local calendar day.
next_day = datetime.date(end_label.year, end_label.month, end_label.day) + datetime.timedelta(
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

end_label is already a datetime.date (from .date() above), so datetime.date(end_label.year, end_label.month, end_label.day) just rebuilds the same date. This can be next_day = end_label + datetime.timedelta(days=1), which is the simpler form the no-tz branch below already uses.

days=1
)
upper_utc = timezone.convert_to_utc(
timezone.make_aware(
datetime.datetime(next_day.year, next_day.month, next_day.day),
tt_tz,
)
)
query = query.where(DagRun.partition_date < upper_utc)
else:
# No timetable timezone: partition_date values are midnight-anchored UTC dates,
# so time-of-day on the CLI flags is not meaningful — truncate to calendar day.
if args.partition_date_start is not None:
start_day = args.partition_date_start.date()
query = query.where(
DagRun.partition_date
>= datetime.datetime(
start_day.year, start_day.month, start_day.day, tzinfo=datetime.timezone.utc
)
)
if args.partition_date_end is not None:
end_day = args.partition_date_end.date()
next_day = end_day + datetime.timedelta(days=1)
query = query.where(
DagRun.partition_date
< datetime.datetime(
next_day.year, next_day.month, next_day.day, tzinfo=datetime.timezone.utc
)
)
query = query.order_by(DagRun.partition_date, DagRun.run_id)

runs = list(session.execute(query).all())
Expand Down
4 changes: 4 additions & 0 deletions airflow-core/src/airflow/timetables/_cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ def __hash__(self):
def summary(self) -> str:
return self._expression

@property
def timezone(self) -> Timezone | FixedTimezone:
return self._timezone

def validate(self) -> None:
try:
croniter(self._expression)
Expand Down
Loading
Loading