Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 28 additions & 12 deletions superset/utils/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
# specific language governing permissions and limitations
# under the License.
import logging
import re
import urllib.request
from typing import Any, Optional, Union
from urllib.error import URLError
Expand All @@ -28,15 +27,32 @@

logger = logging.getLogger(__name__)

negative_number_re = re.compile(r"^-[0-9.]+$")
PROBLEMATIC_CSV_PREFIXES = "-@+|=%"

# This regex will match if the string starts with:
#
# 1. one of -, @, +, |, =, %
# 2. two double quotes immediately followed by one of -, @, +, |, =, %
# 3. one or more spaces immediately followed by one of -, @, +, |, =, %
#
problematic_chars_re = re.compile(r'^(?:"{2}|\s{1,})(?=[\-@+|=%])|^[\-@+|=%]')

def _starts_with_formula_prefix(value: str) -> bool:
first = value[0]
if first in PROBLEMATIC_CSV_PREFIXES:
return True
if first == '"' and len(value) > 2:
return value[1] == '"' and value[2] in PROBLEMATIC_CSV_PREFIXES
return False


def _starts_like_spreadsheet_formula(value: str) -> bool:
first = value[0]
if first.isspace():
stripped = value.lstrip()
return bool(stripped) and _starts_with_formula_prefix(stripped)
return _starts_with_formula_prefix(value)


def _is_negative_number(value: str) -> bool:
return (
len(value) > 1
and value[0] == "-"
and all("0" <= character <= "9" or character == "." for character in value[1:])
)


def escape_value(value: str) -> str:
Expand All @@ -45,10 +61,10 @@ def escape_value(value: str) -> str:

http://georgemauer.net/2017/10/07/csv-injection.html
"""
needs_escaping = problematic_chars_re.match(value) is not None
is_negative_number = negative_number_re.match(value) is not None
if not value:
return value

if needs_escaping and not is_negative_number:
if _starts_like_spreadsheet_formula(value) and not _is_negative_number(value):
# Escape pipe to be extra safe as this
# can lead to remote code execution
value = value.replace("|", "\\|")
Expand Down
3 changes: 3 additions & 0 deletions tests/unit_tests/utils/csv_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ def test_escape_value():
result = csv.escape_value(" =10+2")
assert result == "' =10+2"

result = csv.escape_value(' ""=10+2')
assert result == '\' ""=10+2'


def fake_get_chart_csv_data_none(chart_url, auth_cookies=None):
return None
Expand Down
Loading