Skip to content

Commit 0de5d79

Browse files
msyavuzclaude
andcommitted
fix(rls): prevent double-apply when converting physical dataset to virtual
When a physical dataset with RLS rules is converted to a virtual dataset via the Edit modal, the dataset's own RLS predicate was injected twice: once on the outer WHERE (lookup by SqlaTable.id) and again inside the virtual SQL (lookup by database + schema + table_name, which still matched the dataset itself). Pass the virtual dataset's id into apply_rls / get_predicates_for_table so the inner-SQL lookup excludes it. The outer WHERE keeps applying the RLS exactly once. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 7bee2af commit 0de5d79

4 files changed

Lines changed: 70 additions & 14 deletions

File tree

superset/connectors/sqla/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2049,6 +2049,7 @@ class and any keys added via `ExtraCache`.
20492049
self.database,
20502050
self.catalog,
20512051
self.schema or default_schema or "",
2052+
exclude_dataset_id=self.id,
20522053
)
20532054
# Add each predicate as a separate cache key component
20542055
extra_cache_keys.extend(rls_predicates)

superset/models/helpers.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2054,12 +2054,17 @@ def get_from_clause(
20542054
default_schema = self.database.get_default_schema(self.catalog)
20552055
try:
20562056
rls_applied = False
2057+
# ``id`` lives on concrete subclasses (e.g. SqlaTable), not on
2058+
# ExploreMixin itself. getattr keeps this safe for non-dataset
2059+
# subclasses (e.g. SQL Lab Query), which have no RLS to dedupe.
2060+
self_id = getattr(self, "id", None)
20572061
for statement in parsed_script.statements:
20582062
if apply_rls(
20592063
self.database,
20602064
self.catalog,
20612065
self.schema or default_schema or "",
20622066
statement,
2067+
exclude_dataset_id=self_id,
20632068
):
20642069
rls_applied = True
20652070

superset/utils/rls.py

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,16 @@ def apply_rls(
3434
catalog: str | None,
3535
schema: str,
3636
parsed_statement: BaseSQLStatement[Any],
37+
exclude_dataset_id: int | None = None,
3738
) -> bool:
3839
"""
3940
Modify statement inplace to ensure RLS rules are applied.
4041
42+
:param exclude_dataset_id: When applying RLS to a virtual dataset's inner SQL,
43+
pass the virtual dataset's id here so its own RLS isn't injected again
44+
on top of the outer-WHERE application (avoids double-apply when the
45+
virtual dataset's table_name collides with a table in its own SQL — for
46+
example, after converting a physical dataset with RLS to virtual).
4147
:returns: True if any RLS predicates were actually applied, False otherwise.
4248
"""
4349
# There are two ways to insert RLS: either replacing the table with a subquery
@@ -55,6 +61,7 @@ def apply_rls(
5561
table,
5662
database,
5763
database.get_default_catalog(),
64+
exclude_dataset_id=exclude_dataset_id,
5865
)
5966
if predicate
6067
]
@@ -68,6 +75,7 @@ def get_predicates_for_table(
6875
table: Table,
6976
database: Database,
7077
default_catalog: str | None,
78+
exclude_dataset_id: int | None = None,
7179
) -> list[str]:
7280
"""
7381
Get the RLS predicates for a table.
@@ -87,18 +95,21 @@ def get_predicates_for_table(
8795
SqlaTable.catalog.is_(None),
8896
)
8997

90-
dataset = (
91-
db.session.query(SqlaTable)
92-
.filter(
93-
and_(
94-
SqlaTable.database_id == database.id,
95-
catalog_predicate,
96-
SqlaTable.schema == table.schema,
97-
SqlaTable.table_name == table.table,
98-
)
99-
)
100-
.one_or_none()
101-
)
98+
filters = [
99+
SqlaTable.database_id == database.id,
100+
catalog_predicate,
101+
SqlaTable.schema == table.schema,
102+
SqlaTable.table_name == table.table,
103+
]
104+
# When applying RLS to a virtual dataset's inner SQL, skip a match against
105+
# the dataset itself — its RLS is already applied on the outer WHERE via
106+
# get_sqla_row_level_filters(). Without this, a virtual dataset whose
107+
# table_name happens to equal a table in its own SQL (e.g. after a
108+
# physical→virtual conversion) double-applies its own predicates.
109+
if exclude_dataset_id is not None:
110+
filters.append(SqlaTable.id != exclude_dataset_id)
111+
112+
dataset = db.session.query(SqlaTable).filter(and_(*filters)).one_or_none()
102113
if not dataset:
103114
return []
104115

@@ -130,6 +141,7 @@ def collect_rls_predicates_for_sql(
130141
database: Database,
131142
catalog: str | None,
132143
schema: str,
144+
exclude_dataset_id: int | None = None,
133145
) -> list[str]:
134146
"""
135147
Collect all RLS predicates that would be applied to tables in the given SQL.
@@ -141,6 +153,9 @@ def collect_rls_predicates_for_sql(
141153
:param database: The database the query runs against
142154
:param catalog: The default catalog for the query
143155
:param schema: The default schema for the query
156+
:param exclude_dataset_id: Mirror of the same parameter on apply_rls — pass
157+
the virtual dataset's id so its self-match is excluded from the cache key
158+
(kept consistent with what's actually applied at query time).
144159
:return: List of RLS predicate strings that would be applied
145160
"""
146161
from superset.sql.parse import SQLScript
@@ -161,6 +176,7 @@ def collect_rls_predicates_for_sql(
161176
table,
162177
database,
163178
default_catalog,
179+
exclude_dataset_id=exclude_dataset_id,
164180
)
165181
}
166182
)

tests/unit_tests/sql_lab_test.py

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,18 @@ def test_apply_rls(mocker: MockerFixture) -> None:
285285

286286
get_predicates_for_table.assert_has_calls(
287287
[
288-
mocker.call(Table("t1", "public", "examples"), database, "examples"),
289-
mocker.call(Table("t2", "public", "examples"), database, "examples"),
288+
mocker.call(
289+
Table("t1", "public", "examples"),
290+
database,
291+
"examples",
292+
exclude_dataset_id=None,
293+
),
294+
mocker.call(
295+
Table("t2", "public", "examples"),
296+
database,
297+
"examples",
298+
exclude_dataset_id=None,
299+
),
290300
]
291301
)
292302

@@ -329,3 +339,27 @@ def test_get_predicates_for_table(mocker: MockerFixture) -> None:
329339
dataset.get_sqla_row_level_filters.assert_called_once_with(
330340
include_global_guest_rls=False
331341
)
342+
343+
344+
def test_get_predicates_for_table_excludes_self(mocker: MockerFixture) -> None:
345+
"""
346+
When ``exclude_dataset_id`` is supplied, the lookup query must add an
347+
``id != exclude_dataset_id`` filter so a virtual dataset whose
348+
``table_name`` matches a table referenced inside its own SQL doesn't get
349+
its own RLS injected into the inner SQL (would double-apply on top of the
350+
outer WHERE). Regression test for the physical→virtual conversion bug.
351+
"""
352+
database = mocker.MagicMock()
353+
db = mocker.patch("superset.utils.rls.db")
354+
db.session.query().filter().one_or_none.return_value = None
355+
356+
table = Table("orders", "public", "examples")
357+
assert (
358+
get_predicates_for_table(table, database, "examples", exclude_dataset_id=42)
359+
== []
360+
)
361+
# The filter call should have received four base filters plus the exclusion
362+
# filter, i.e. five total positional args inside and_().
363+
filter_call = db.session.query().filter.call_args
364+
and_clause = filter_call.args[0]
365+
assert len(and_clause.clauses) == 5

0 commit comments

Comments
 (0)