apache · rusackas · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026 · codeant-ai-for-open-source
diff --git a/superset/commands/database/uploaders/columnar_reader.py b/superset/commands/database/uploaders/columnar_reader.py
@@ -23,6 +23,7 @@
 
 import pandas as pd
 import pyarrow.parquet as pq
+from flask import current_app
 from flask_babel import lazy_gettext as _
 from pyarrow.lib import ArrowException
 from werkzeug.datastructures import FileStorage
@@ -33,10 +34,47 @@
     FileMetadata,
     ReaderOptions,
 )
+from superset.exceptions import SupersetException
+from superset.utils.core import check_is_safe_zip
 
 logger = logging.getLogger(__name__)
 
 
+def _check_file_size(file: FileStorage) -> None:
+    """
+    Reject an uploaded file whose raw (on-the-wire) size exceeds the configured
+    limit before its contents are buffered into memory.
+
+    This is complementary to the ZIP decompression-ratio guard: it bounds the
+    raw bytes accepted regardless of whether the payload is compressed.
+
+    :param file: The uploaded file to check.
+    :throws DatabaseUploadFailed: if the file exceeds the configured limit.
+    """
+    max_size = current_app.config.get("UPLOAD_MAX_FILE_SIZE_BYTES")
+    if not max_size:
+        return
+    stream = file.stream
+    try:
+        current_position = stream.tell()
+        stream.seek(0, 2)  # seek to end
+        size = stream.tell()
+        stream.seek(current_position)
+    except (AttributeError, OSError):
+        # If the stream is not seekable we cannot determine the size cheaply;
+        # skip the check and rely on downstream guards.
+        return
+    if size > max_size:
+        raise DatabaseUploadFailed(
+            _(
+                "File size %(size)s bytes exceeds the maximum allowed "
+                "upload size of %(max_size)s bytes",
+                size=size,
+                max_size=max_size,
+            )
+        )
+
+
 class ColumnarReaderOptions(ReaderOptions, total=False):
     columns_read: list[str]
 
@@ -80,6 +118,7 @@ def _yield_files(file: FileStorage) -> Generator[IO[bytes], None, None]:
         :param file: The file to yield files from.
         :return: A generator that yields files.
         """
+        _check_file_size(file)
         file_suffix = Path(file.filename).suffix
         if not file_suffix:
             raise DatabaseUploadFailed(_("Unexpected no file extension found"))
@@ -89,6 +128,12 @@ def _yield_files(file: FileStorage) -> Generator[IO[bytes], None, None]:
                 raise DatabaseUploadFailed(_("Not a valid ZIP file"))
             try:
                 with ZipFile(file) as zip_file:
+                    # guard against decompression bombs before reading entries,
+                    # mirroring the importer path
+                    try:
+                        check_is_safe_zip(zip_file)
+                    except SupersetException as ex:
+                        raise DatabaseUploadFailed(str(ex)) from ex
                     # check if all file types are of the same extension
                     file_suffixes = {Path(name).suffix for name in zip_file.namelist()}
                     if len(file_suffixes) > 1:

diff --git a/superset/config.py b/superset/config.py
@@ -168,6 +168,11 @@ def _try_json_readsha(filepath: str, length: int) -> str | None:
 # max rows retrieved by filter select auto complete
 FILTER_SELECT_ROW_LIMIT = 10000
 
+# Upper bound on the page size accepted by the generic DAO list/pagination layer.
+# Caps how many rows a single paginated query can request, regardless of the
+# requested page size, to keep query result sets bounded.
+SQLALCHEMY_DAO_MAX_PAGE_SIZE = 1000
+
 # SupersetClient HTTP retry configuration
 # Controls retry behavior for all HTTP requests made through SupersetClient
 # This helps handle transient server errors (like 502 Bad Gateway) automatically
@@ -1112,6 +1117,12 @@ class D3TimeFormat(TypedDict, total=False):
 UPLOAD_FOLDER = BASE_DIR + "/static/uploads/"
 UPLOAD_CHUNK_SIZE = 4096
 
+# Upper bound, in bytes, on the size of a single uploaded data file (e.g. CSV,
+# Excel, columnar). Files larger than this are rejected before their contents
+# are buffered into memory, keeping the resources consumed by a single upload
+# bounded. Set to ``None`` to disable the check. Defaults to 100 MB.
+UPLOAD_MAX_FILE_SIZE_BYTES: int | None = 100 * 1024 * 1024
+
 # ---------------------------------------------------
 # Cache configuration
 # ---------------------------------------------------

diff --git a/superset/daos/base.py b/superset/daos/base.py
@@ -33,6 +33,7 @@
 )
 
 import sqlalchemy as sa
+from flask import current_app
 from flask_appbuilder.models.filters import BaseFilter
 from flask_appbuilder.models.sqla.interface import SQLAInterface
 from pydantic import BaseModel, Field
@@ -749,7 +750,10 @@ def list(  # noqa: C901
             else:
                 query = query.order_by(asc(column))
         page = page
-        page_size = max(page_size, 1)
+        # Clamp the page size to a sane range: at least 1, and no larger than
+        # the configured upper bound, to keep result sets bounded.
+        max_page_size = current_app.config.get("SQLALCHEMY_DAO_MAX_PAGE_SIZE", 1000)
+        page_size = min(max(page_size, 1), max_page_size)
         query = query.offset(page * page_size).limit(page_size)
         items = query.all()
         # If columns are specified, SQLAlchemy returns Row objects (not tuples or

diff --git a/tests/unit_tests/commands/databases/columnar_reader_test.py b/tests/unit_tests/commands/databases/columnar_reader_test.py
@@ -17,10 +17,12 @@
 import io
 import tempfile
 from typing import Any
-from zipfile import ZipFile
+from unittest.mock import patch
+from zipfile import ZIP_DEFLATED, ZipFile
 
 import numpy as np
 import pytest
+from flask import current_app
 from werkzeug.datastructures import FileStorage
 
 from superset.commands.database.exceptions import DatabaseUploadFailed
@@ -230,6 +232,87 @@ def test_columnar_reader_bad_zip():
     assert str(ex.value) == "Not a valid ZIP file"
 
 
+def _make_high_ratio_zip() -> io.BytesIO:
+    """
+    Build a ZIP whose single entry has a very high decompression ratio,
+    well above the default ``ZIP_FILE_MAX_COMPRESS_RATIO`` threshold.
+    """
+    buffer = io.BytesIO()
+    with ZipFile(buffer, "w", ZIP_DEFLATED) as zip_file:
+        # A megabyte of zeros compresses to roughly a kilobyte, far exceeding
+        # the default 200:1 ratio guard.
+        zip_file.writestr("test.parquet", b"\x00" * (1024 * 1024))
+    buffer.seek(0)
+    return buffer
+
+
+def test_columnar_reader_unsafe_zip_rejected():
+    reader = ColumnarReader(
+        options=ColumnarReaderOptions(),
+    )
+    unsafe_zip = _make_high_ratio_zip()
+    with pytest.raises(DatabaseUploadFailed) as ex:
+        reader.file_to_dataframe(FileStorage(unsafe_zip, "test.zip"))
+    assert "compress ratio above allowed threshold" in str(ex.value)
+
+
+def test_columnar_reader_unsafe_zip_rejected_in_metadata():
+    reader = ColumnarReader(
+        options=ColumnarReaderOptions(),
+    )
+    unsafe_zip = _make_high_ratio_zip()
+    with pytest.raises(DatabaseUploadFailed) as ex:
+        reader.file_metadata(FileStorage(unsafe_zip, "test.zip"))
+    assert "compress ratio above allowed threshold" in str(ex.value)
+
+
+def test_columnar_reader_oversize_file_rejected():
+    reader = ColumnarReader(
+        options=ColumnarReaderOptions(),
+    )
+    file = create_columnar_file(COLUMNAR_DATA)
+    file.stream.seek(0, 2)
+    file_size = file.stream.tell()
+    file.stream.seek(0)
+    with patch.dict(
+        current_app.config,
+        {"UPLOAD_MAX_FILE_SIZE_BYTES": file_size - 1},
+    ):
+        with pytest.raises(DatabaseUploadFailed) as ex:
+            reader.file_to_dataframe(file)
+    assert "exceeds the maximum allowed upload size" in str(ex.value)
+
+
+def test_columnar_reader_oversize_file_rejected_in_metadata():
+    reader = ColumnarReader(
+        options=ColumnarReaderOptions(),
+    )
+    file = create_columnar_file(COLUMNAR_DATA)
+    file.stream.seek(0, 2)
+    file_size = file.stream.tell()
+    file.stream.seek(0)
+    with patch.dict(
+        current_app.config,
+        {"UPLOAD_MAX_FILE_SIZE_BYTES": file_size - 1},
+    ):
+        with pytest.raises(DatabaseUploadFailed) as ex:
+            reader.file_metadata(file)
+    assert "exceeds the maximum allowed upload size" in str(ex.value)
+
+
+def test_columnar_reader_under_limit_accepted():
+    reader = ColumnarReader(
+        options=ColumnarReaderOptions(),
+    )
+    file = create_columnar_file(COLUMNAR_DATA)
+    with patch.dict(
+        current_app.config,
+        {"UPLOAD_MAX_FILE_SIZE_BYTES": 100 * 1024 * 1024},
+    ):
+        df = reader.file_to_dataframe(file)
+    assert len(df) == 3
+
+
 def test_columnar_reader_metadata():
     reader = ColumnarReader(
         options=ColumnarReaderOptions(),

diff --git a/tests/unit_tests/dao/base_dao_test.py b/tests/unit_tests/dao/base_dao_test.py
@@ -258,3 +258,54 @@ def test_find_by_ids_none_id_column():
         results = TestDAO.find_by_ids([1, 2, 3])
 
         assert results == []
+
+
+def _list_with_page_size(page_size: int) -> Mock:
+    """
+    Run ``BaseDAO.list`` with a mocked query chain and return the mock query so
+    the ``.limit()`` call (the effective page size) can be inspected.
+    """
+    mock_query = Mock()
+    # Every chainable call returns the same mock so the chain is easy to inspect
+    mock_query.options.return_value = mock_query
+    mock_query.filter.return_value = mock_query
+    mock_query.order_by.return_value = mock_query
+    mock_query.offset.return_value = mock_query
+    mock_query.limit.return_value = mock_query
+    mock_query.count.return_value = 0
+    mock_query.all.return_value = []
+
+    mock_data_model = Mock()
+    mock_data_model.session.query.return_value = mock_query
+
+    with (
+        patch("superset.daos.base.SQLAInterface", return_value=mock_data_model),
+        patch.object(TestDAO, "_apply_base_filter", side_effect=lambda q, **_: q),
+    ):
+        TestDAO.list(page=0, page_size=page_size)
+
+    return mock_query
+
+
+def test_list_page_size_oversized_is_clamped():
+    """An oversized page_size is clamped to the configured maximum."""
+    from flask import current_app
+
+    max_page_size = current_app.config.get("SQLALCHEMY_DAO_MAX_PAGE_SIZE", 1000)
+    mock_query = _list_with_page_size(max_page_size + 5000)
+
+    mock_query.limit.assert_called_once_with(max_page_size)
+
+
+def test_list_page_size_normal_unaffected():
+    """A page_size within the allowed range is passed through unchanged."""
+    mock_query = _list_with_page_size(50)
+
+    mock_query.limit.assert_called_once_with(50)
+
+
+def test_list_page_size_below_one_is_floored():
+    """A non-positive page_size is floored to 1 (existing semantics)."""
+    mock_query = _list_with_page_size(0)
+
+    mock_query.limit.assert_called_once_with(1)