Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: datasource payload is incorrect #15184

Merged
merged 2 commits into from Jun 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
36 changes: 36 additions & 0 deletions superset/connectors/connector_registry.py
Expand Up @@ -14,6 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from collections import defaultdict
from typing import Dict, List, Optional, Set, Type, TYPE_CHECKING

from flask_babel import _
Expand Down Expand Up @@ -99,6 +100,41 @@ def get_datasource_by_id( # pylint: disable=too-many-arguments
pass
raise NoResultFound(_("Datasource id not found: %(id)s", id=datasource_id))

@classmethod
def get_user_datasources(cls, session: Session) -> List["BaseDatasource"]:
from superset import security_manager

# collect datasources which the user has explicit permissions to
user_perms = security_manager.user_view_menu_names("datasource_access")
schema_perms = security_manager.user_view_menu_names("schema_access")
user_datasources = set()
for datasource_class in ConnectorRegistry.sources.values():
user_datasources.update(
session.query(datasource_class)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@betodealmeida @dpgaspar I believe this logic, i.e., relying on the fragile perm and schema_perm columns in the database, breaks if people are using a custom security manager. Should we be calling get_datasources_accessible_by_user instead?

Note I sense currently Superset doesn't do a great job of differentiating between metadata and data access. The get_datasources_accessible_by_user is somewhat of a misnomer as it is merely used for metadata access (for an awareness perspective).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@john-bodley, get_datasources_accessible_by_user calls ConnectorRegistry.query_datasources_by_permissions, which uses the same logic I'm using here.

Since we're getting user_perms and schema_perms from the security manager (lines 108, 109) doesn't it means this works with custom security managers?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@betodealmeida @john-bodley I think that the question here is that AirBnb totally overrides the DB permissions, they do that by overriding the security manager, their permission "backend" is something totally different. So that's possible if all permission checks are done on the security manager.

Makes me think how can that work on the REST API defined filters

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dpgaspar that's correct. We actually don't use any of the FAB logic and thus we completely bypass the securiy_manager.user_view_menu_names and reliance of the datasource_class.perm and datasource_class.schema_perm database columns.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@betodealmeida if the get_datasources_accessible_by_user method isn't ideal, then I think the alternatively would be to move the get_user_datasources method to the security manager so deployments can overwrite the logic if necessary.

.filter(
or_(
datasource_class.perm.in_(user_perms),
datasource_class.schema_perm.in_(schema_perms),
)
)
.all()
)

# group all datasources by database
all_datasources = cls.get_all_datasources(session)
datasources_by_database: Dict["Database", Set["BaseDatasource"]] = defaultdict(
set
)
for datasource in all_datasources:
datasources_by_database[datasource.database].add(datasource)

# add datasources with implicit permission (eg, database access)
for database, datasources in datasources_by_database.items():
if security_manager.can_access_database(database):
user_datasources.update(datasources)

return list(user_datasources)

@classmethod
def get_datasource_by_name( # pylint: disable=too-many-arguments
cls,
Expand Down
2 changes: 1 addition & 1 deletion superset/views/chart/views.py
Expand Up @@ -65,7 +65,7 @@ def pre_delete(self, item: "SliceModelView") -> None:
def add(self) -> FlaskResponse:
datasources = [
{"value": str(d.id) + "__" + d.type, "label": repr(d)}
for d in ConnectorRegistry.get_all_datasources(db.session)
for d in ConnectorRegistry.get_user_datasources(db.session)
]
payload = {
"datasources": sorted(
Expand Down
2 changes: 1 addition & 1 deletion superset/views/core.py
Expand Up @@ -184,7 +184,7 @@ def datasources(self) -> FlaskResponse:
sorted(
[
datasource.short_data
for datasource in ConnectorRegistry.get_all_datasources(db.session)
for datasource in ConnectorRegistry.get_user_datasources(db.session)
if datasource.short_data.get("name")
],
key=lambda datasource: datasource["name"],
Expand Down
79 changes: 79 additions & 0 deletions tests/access_tests.py
Expand Up @@ -18,6 +18,7 @@
"""Unit tests for Superset"""
import json
import unittest
from collections import namedtuple
from unittest import mock
from tests.fixtures.birth_names_dashboard import load_birth_names_dashboard_with_slices

Expand Down Expand Up @@ -626,5 +627,83 @@ def test_request_access(self):
session.commit()


class TestDatasources(SupersetTestCase):
def test_get_user_datasources_admin(self):
Datasource = namedtuple("Datasource", ["database", "schema", "name"])

mock_session = mock.MagicMock()
mock_session.query.return_value.filter.return_value.all.return_value = []

with mock.patch("superset.security_manager") as mock_security_manager:
mock_security_manager.can_access_database.return_value = True

with mock.patch.object(
ConnectorRegistry, "get_all_datasources"
) as mock_get_all_datasources:
mock_get_all_datasources.return_value = [
Datasource("database1", "schema1", "table1"),
Datasource("database1", "schema1", "table2"),
Datasource("database2", None, "table1"),
]

datasources = ConnectorRegistry.get_user_datasources(mock_session)

assert sorted(datasources) == [
Datasource("database1", "schema1", "table1"),
Datasource("database1", "schema1", "table2"),
Datasource("database2", None, "table1"),
]

def test_get_user_datasources_gamma(self):
Datasource = namedtuple("Datasource", ["database", "schema", "name"])

mock_session = mock.MagicMock()
mock_session.query.return_value.filter.return_value.all.return_value = []

with mock.patch("superset.security_manager") as mock_security_manager:
mock_security_manager.can_access_database.return_value = False

with mock.patch.object(
ConnectorRegistry, "get_all_datasources"
) as mock_get_all_datasources:
mock_get_all_datasources.return_value = [
Datasource("database1", "schema1", "table1"),
Datasource("database1", "schema1", "table2"),
Datasource("database2", None, "table1"),
]

datasources = ConnectorRegistry.get_user_datasources(mock_session)

assert datasources == []

def test_get_user_datasources_gamma_with_schema(self):
Datasource = namedtuple("Datasource", ["database", "schema", "name"])

mock_session = mock.MagicMock()
mock_session.query.return_value.filter.return_value.all.return_value = [
Datasource("database1", "schema1", "table1"),
Datasource("database1", "schema1", "table2"),
]

with mock.patch("superset.security_manager") as mock_security_manager:
mock_security_manager.can_access_database.return_value = False

with mock.patch.object(
ConnectorRegistry, "get_all_datasources"
) as mock_get_all_datasources:
mock_get_all_datasources.return_value = [
Datasource("database1", "schema1", "table1"),
Datasource("database1", "schema1", "table2"),
Datasource("database2", None, "table1"),
]

datasources = ConnectorRegistry.get_user_datasources(mock_session)

assert sorted(datasources) == [
Datasource("database1", "schema1", "table1"),
Datasource("database1", "schema1", "table2"),
]


if __name__ == "__main__":
unittest.main()