Skip to content

Commit

Permalink
Use re2 for matching untrusted regex (#32060)
Browse files Browse the repository at this point in the history
  • Loading branch information
pierrejeambrun committed Jun 23, 2023
1 parent 59d64d8 commit 116e607
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
7 changes: 4 additions & 3 deletions airflow/models/dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import os
import pathlib
import pickle
import re
import sys
import traceback
import warnings
Expand All @@ -42,6 +41,7 @@
Iterable,
Iterator,
List,
Pattern,
Sequence,
Union,
cast,
Expand All @@ -51,6 +51,7 @@

import jinja2
import pendulum
import re2 as re
from dateutil.relativedelta import relativedelta
from pendulum.tz.timezone import Timezone
from sqlalchemy import Boolean, Column, ForeignKey, Index, Integer, String, Text, and_, case, func, not_, or_
Expand Down Expand Up @@ -2309,7 +2310,7 @@ def sub_dag(self, *args, **kwargs):

def partial_subset(
self,
task_ids_or_regex: str | re.Pattern | Iterable[str],
task_ids_or_regex: str | Pattern | Iterable[str],
include_downstream=False,
include_upstream=True,
include_direct_upstream=False,
Expand All @@ -2336,7 +2337,7 @@ def partial_subset(
memo = {id(self.task_dict): None, id(self._task_group): None}
dag = copy.deepcopy(self, memo) # type: ignore

if isinstance(task_ids_or_regex, (str, re.Pattern)):
if isinstance(task_ids_or_regex, (str, Pattern)):
matched_tasks = [t for t in self.tasks if re.findall(task_ids_or_regex, t.task_id)]
else:
matched_tasks = [t for t in self.tasks if t.task_id in task_ids_or_regex]
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ install_requires =
flask-login>=0.6.2
flask-session>=0.4.0
flask-wtf>=0.15
google-re2>=1.0
graphviz>=0.12
gunicorn>=20.1.0
httpx
Expand Down

0 comments on commit 116e607

Please sign in to comment.