From 32bb734db1e9d639c319bcef18071a5eb0d2d558 Mon Sep 17 00:00:00 2001 From: Dhimas Ardinata Date: Sun, 17 May 2026 16:26:20 +0700 Subject: [PATCH 1/2] perf(csv): avoid regex in CSV value escaping --- superset/utils/csv.py | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/superset/utils/csv.py b/superset/utils/csv.py index bf5045728ddf..28ab77fca449 100644 --- a/superset/utils/csv.py +++ b/superset/utils/csv.py @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. import logging -import re import urllib.request from typing import Any, Optional, Union from urllib.error import URLError @@ -28,15 +27,27 @@ logger = logging.getLogger(__name__) -negative_number_re = re.compile(r"^-[0-9.]+$") +PROBLEMATIC_CSV_PREFIXES = "-@+|=%" -# This regex will match if the string starts with: -# -# 1. one of -, @, +, |, =, % -# 2. two double quotes immediately followed by one of -, @, +, |, =, % -# 3. one or more spaces immediately followed by one of -, @, +, |, =, % -# -problematic_chars_re = re.compile(r'^(?:"{2}|\s{1,})(?=[\-@+|=%])|^[\-@+|=%]') + +def _starts_like_spreadsheet_formula(value: str) -> bool: + first = value[0] + if first in PROBLEMATIC_CSV_PREFIXES: + return True + if first == '"' and len(value) > 2: + return value[1] == '"' and value[2] in PROBLEMATIC_CSV_PREFIXES + if first.isspace(): + stripped = value.lstrip() + return bool(stripped) and stripped[0] in PROBLEMATIC_CSV_PREFIXES + return False + + +def _is_negative_number(value: str) -> bool: + return ( + len(value) > 1 + and value[0] == "-" + and all("0" <= character <= "9" or character == "." for character in value[1:]) + ) def escape_value(value: str) -> str: @@ -45,10 +56,10 @@ def escape_value(value: str) -> str: http://georgemauer.net/2017/10/07/csv-injection.html """ - needs_escaping = problematic_chars_re.match(value) is not None - is_negative_number = negative_number_re.match(value) is not None + if not value: + return value - if needs_escaping and not is_negative_number: + if _starts_like_spreadsheet_formula(value) and not _is_negative_number(value): # Escape pipe to be extra safe as this # can lead to remote code execution value = value.replace("|", "\\|") From 20b8442fb009ffe30050cad5c7653edd5ef96f68 Mon Sep 17 00:00:00 2001 From: Dhimas Ardinata Date: Sun, 17 May 2026 16:37:16 +0700 Subject: [PATCH 2/2] fix(csv): handle whitespace quoted formulas --- superset/utils/csv.py | 11 ++++++++--- tests/unit_tests/utils/csv_tests.py | 3 +++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/superset/utils/csv.py b/superset/utils/csv.py index 28ab77fca449..b04a39739402 100644 --- a/superset/utils/csv.py +++ b/superset/utils/csv.py @@ -30,16 +30,21 @@ PROBLEMATIC_CSV_PREFIXES = "-@+|=%" -def _starts_like_spreadsheet_formula(value: str) -> bool: +def _starts_with_formula_prefix(value: str) -> bool: first = value[0] if first in PROBLEMATIC_CSV_PREFIXES: return True if first == '"' and len(value) > 2: return value[1] == '"' and value[2] in PROBLEMATIC_CSV_PREFIXES + return False + + +def _starts_like_spreadsheet_formula(value: str) -> bool: + first = value[0] if first.isspace(): stripped = value.lstrip() - return bool(stripped) and stripped[0] in PROBLEMATIC_CSV_PREFIXES - return False + return bool(stripped) and _starts_with_formula_prefix(stripped) + return _starts_with_formula_prefix(value) def _is_negative_number(value: str) -> bool: diff --git a/tests/unit_tests/utils/csv_tests.py b/tests/unit_tests/utils/csv_tests.py index 747e8b32870c..f7dc903d331b 100644 --- a/tests/unit_tests/utils/csv_tests.py +++ b/tests/unit_tests/utils/csv_tests.py @@ -63,6 +63,9 @@ def test_escape_value(): result = csv.escape_value(" =10+2") assert result == "' =10+2" + result = csv.escape_value(' ""=10+2') + assert result == '\' ""=10+2' + def fake_get_chart_csv_data_none(chart_url, auth_cookies=None): return None