Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor how RBAC enabled/disabled status is determined for Grafana Cloud stacks #4279

Merged
merged 20 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 20 additions & 54 deletions engine/apps/grafana_plugin/helpers/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class GrafanaUserWithPermissions(GrafanaUser):


class GCOMInstanceInfoConfigFeatureToggles(typing.TypedDict):
accessControlOnCall: str
accessControlOnCall: typing.NotRequired[str]


class GCOMInstanceInfoConfig(typing.TypedDict):
Expand Down Expand Up @@ -211,7 +211,7 @@ def __init__(self, api_url: str, api_token: str) -> None:
def check_token(self) -> APIClientResponse:
return self.api_head("api/org")

def get_users_permissions(self, rbac_is_enabled_for_org: bool) -> UserPermissionsDict:
def get_users_permissions(self) -> typing.Optional[UserPermissionsDict]:
"""
It is possible that this endpoint may not be available for certain Grafana orgs.
Ex: for Grafana Cloud orgs whom have pinned their Grafana version to an earlier version
Expand All @@ -229,13 +229,9 @@ def get_users_permissions(self, rbac_is_enabled_for_org: bool) -> UserPermission
}
}
"""
if not rbac_is_enabled_for_org:
return {}
response, _ = self.api_get(self.USER_PERMISSION_ENDPOINT)
if response is None:
return {}
elif isinstance(response, list):
return {}
if response is None or isinstance(response, list):
return None

data: typing.Dict[str, typing.Dict[str, typing.List[str]]] = response

Expand All @@ -259,7 +255,13 @@ def get_users(self, rbac_is_enabled_for_org: bool, **kwargs) -> GrafanaUsersWith

users: GrafanaUsersWithPermissions = users_response

user_permissions = self.get_users_permissions(rbac_is_enabled_for_org)
user_permissions = {}
if rbac_is_enabled_for_org:
user_permissions = self.get_users_permissions(rbac_is_enabled_for_org)
if user_permissions is None:
# If we cannot fetch permissions when RBAC is enabled (ex. HTTP 500), we should not return any users
# to avoid potentially wiping-out OnCall's copy of permissions for all users
return []

# merge the users permissions response into the org users response
for user in users:
Expand Down Expand Up @@ -349,51 +351,6 @@ def get_instance_info(
data, _ = self.api_get(url)
return data

def _feature_is_enabled_via_enable_key(
self, instance_feature_toggles: GCOMInstanceInfoConfigFeatureToggles, feature_name: str, delimiter: str
):
return feature_name in instance_feature_toggles.get("enable", "").split(delimiter)

def _feature_toggle_is_enabled(self, instance_info: GCOMInstanceInfo, feature_name: str) -> bool:
"""
there are two ways that feature toggles can be enabled, this method takes into account both
https://grafana.com/docs/grafana/latest/setup-grafana/configure-grafana/#enable
"""
instance_info_config = instance_info.get("config", {})
if not instance_info_config:
return False

instance_feature_toggles = instance_info_config.get("feature_toggles", {})

if not instance_feature_toggles:
return False

# features enabled via enable key can be either space or comma delimited
# https://raintank-corp.slack.com/archives/C036J5B39/p1690183217162019

feature_enabled_via_enable_key_space_delimited = self._feature_is_enabled_via_enable_key(
instance_feature_toggles, feature_name, " "
)
feature_enabled_via_enable_key_comma_delimited = self._feature_is_enabled_via_enable_key(
instance_feature_toggles, feature_name, ","
)
feature_enabled_via_direct_key = instance_feature_toggles.get(feature_name, "false") == "true"

return (
feature_enabled_via_direct_key
or feature_enabled_via_enable_key_space_delimited
or feature_enabled_via_enable_key_comma_delimited
)

def is_rbac_enabled_for_stack(self, stack_id: str) -> bool:
"""
NOTE: must use an "Admin" GCOM token when calling this method
"""
instance_info = self.get_instance_info(stack_id, True)
if not instance_info:
return False
return self._feature_toggle_is_enabled(instance_info, "accessControlOnCall")

def get_instances(self, query: str, page_size=None):
if not page_size:
page, _ = self.api_get(query)
Expand All @@ -409,11 +366,20 @@ def get_instances(self, query: str, page_size=None):
yield page
cursor = page["nextCursor"]

def _is_stack_in_certain_state(self, stack_id: str, state: str) -> bool:
instance_info = self.get_instance_info(stack_id)
if not instance_info:
return False
return instance_info.get("status") == state

def is_stack_deleted(self, stack_id: str) -> bool:
url = f"instances?includeDeleted=true&id={stack_id}"
instance_infos, _ = self.api_get(url)
return instance_infos["items"] and instance_infos["items"][0].get("status") == self.STACK_STATUS_DELETED
joeyorlando marked this conversation as resolved.
Show resolved Hide resolved

def is_stack_active(self, stack_id: str) -> bool:
return self._is_stack_in_certain_state(stack_id, self.STACK_STATUS_ACTIVE)

def post_active_users(self, body) -> APIClientResponse:
return self.api_post("app-active-users", body)

Expand Down
79 changes: 0 additions & 79 deletions engine/apps/grafana_plugin/tests/test_gcom_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,85 +8,6 @@
from settings.base import CLOUD_LICENSE_NAME


class TestIsRbacEnabledForStack:
TEST_FEATURE_TOGGLE = "helloWorld"

@pytest.mark.parametrize(
"gcom_api_response,expected",
[
(None, False),
({}, False),
({"config": {}}, False),
({"config": {"feature_toggles": {}}}, False),
({"config": {"feature_toggles": {"accessControlOnCall": "false"}}}, False),
({"config": {"feature_toggles": {"accessControlOnCall": "true"}}}, True),
],
)
@patch("apps.grafana_plugin.helpers.client.GcomAPIClient.api_get")
def test_it_returns_based_on_feature_toggle_value(
self, mocked_gcom_api_client_api_get, gcom_api_response, expected
):
stack_id = 5
mocked_gcom_api_client_api_get.return_value = (gcom_api_response, {"status_code": 200})

api_client = GcomAPIClient("someFakeApiToken")
assert api_client.is_rbac_enabled_for_stack(stack_id) == expected
assert mocked_gcom_api_client_api_get.called_once_with(f"instances/{stack_id}?config=true")

@pytest.mark.parametrize(
"instance_info_feature_toggles,delimiter,expected",
[
({}, " ", False),
({"enable": "foo,bar,baz"}, " ", False),
({"enable": "foo,bar,baz"}, ",", False),
({"enable": f"foo,bar,baz{TEST_FEATURE_TOGGLE}"}, " ", False),
({"enable": f"foo,bar,baz{TEST_FEATURE_TOGGLE}"}, ",", False),
({"enable": f"foo,bar,baz,{TEST_FEATURE_TOGGLE}abc"}, ",", False),
({"enable": f"foo,bar,baz,{TEST_FEATURE_TOGGLE}"}, ",", True),
],
)
def test_feature_is_enabled_via_enable_key(self, instance_info_feature_toggles, delimiter, expected) -> None:
assert (
GcomAPIClient("someFakeApiToken")._feature_is_enabled_via_enable_key(
instance_info_feature_toggles, self.TEST_FEATURE_TOGGLE, delimiter
)
== expected
)

@pytest.mark.parametrize(
"instance_info,expected",
[
({}, False),
({"config": {}}, False),
({"config": {"feature_toggles": {}}}, False),
({"config": {"feature_toggles": {"enable": "foo,bar,baz"}}}, False),
({"config": {"feature_toggles": {TEST_FEATURE_TOGGLE: "false"}}}, False),
({"config": {"feature_toggles": {"enable": f"foo,bar,{TEST_FEATURE_TOGGLE}baz"}}}, False),
({"config": {"feature_toggles": {"enable": f"foo,bar,{TEST_FEATURE_TOGGLE},baz"}}}, True),
({"config": {"feature_toggles": {"enable": f"foo bar {TEST_FEATURE_TOGGLE} baz"}}}, True),
({"config": {"feature_toggles": {"enable": "foo bar baz", TEST_FEATURE_TOGGLE: "true"}}}, True),
({"config": {"feature_toggles": {TEST_FEATURE_TOGGLE: "true"}}}, True),
# this case will probably never happen, but lets account for it anyways
(
{
"config": {
"feature_toggles": {
"enable": f"foo,bar,baz,{TEST_FEATURE_TOGGLE}",
TEST_FEATURE_TOGGLE: "false",
}
}
},
True,
),
],
)
def test_feature_toggle_is_enabled(self, instance_info, expected) -> None:
assert (
GcomAPIClient("someFakeApiToken")._feature_toggle_is_enabled(instance_info, self.TEST_FEATURE_TOGGLE)
== expected
)


def build_paged_responses(page_size, pages, total_items):
response = []
remaining = total_items
Expand Down
96 changes: 84 additions & 12 deletions engine/apps/grafana_plugin/tests/test_grafana_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,12 @@


class TestGetUsersPermissions:
def test_rbac_is_not_enabled_for_org(self):
api_client = GrafanaAPIClient(API_URL, API_TOKEN)
permissions = api_client.get_users_permissions(False)
assert len(permissions.keys()) == 0

@pytest.mark.parametrize("api_response_data", [None, []])
@patch("apps.grafana_plugin.helpers.client.GrafanaAPIClient.api_get")
def test_api_call_returns_none(self, mocked_grafana_api_client_api_get):
mocked_grafana_api_client_api_get.return_value = (None, "dfkjfdkj")

def test_api_call_returns_none_or_list(self, mocked_grafana_api_client_api_get, api_response_data):
mocked_grafana_api_client_api_get.return_value = (api_response_data, "dfkjfdkj")
api_client = GrafanaAPIClient(API_URL, API_TOKEN)

permissions = api_client.get_users_permissions(True)
assert len(permissions.keys()) == 0
assert api_client.get_users_permissions() is None

@patch("apps.grafana_plugin.helpers.client.GrafanaAPIClient.api_get")
def test_it_properly_transforms_the_data(self, mocked_grafana_api_client_api_get):
Expand All @@ -32,7 +25,7 @@ def test_it_properly_transforms_the_data(self, mocked_grafana_api_client_api_get

api_client = GrafanaAPIClient(API_URL, API_TOKEN)

permissions = api_client.get_users_permissions(True)
permissions = api_client.get_users_permissions()
assert permissions == {
"1": [
{"action": "grafana-oncall-app.alert-groups:read"},
Expand All @@ -41,6 +34,85 @@ def test_it_properly_transforms_the_data(self, mocked_grafana_api_client_api_get
}


class TestGetUsers:
@pytest.mark.parametrize(
"rbac_is_enabled,api_get_return_value,get_users_permissions_return_value,expected",
[
# RBAC is enabled - permissions are returned
(
True,
[
{"userId": 1, "foo": "bar"},
{"userId": 2, "foo": "baz"},
],
{
"1": [
{"action": "grafana-oncall-app.alert-groups:read"},
{"action": "grafana-oncall-app.alert-groups:write"},
],
},
[
{
"userId": 1,
"foo": "bar",
"permissions": [
{"action": "grafana-oncall-app.alert-groups:read"},
{"action": "grafana-oncall-app.alert-groups:write"},
],
},
{
"userId": 2,
"foo": "baz",
"permissions": [],
},
],
),
# RBAC is enabled - permissions endpoint returns no permissions (ex. HTTP 500)
(
True,
[
{"userId": 1, "foo": "bar"},
{"userId": 2, "foo": "baz"},
],
None,
[],
),
# RBAC is not enabled - we don't fetch permissions (hence don't care about its response)
(
False,
[
{"userId": 1, "foo": "bar"},
{"userId": 2, "foo": "baz"},
],
None,
[
{"userId": 1, "foo": "bar", "permissions": []},
{
"userId": 2,
"foo": "baz",
"permissions": [],
},
],
),
],
)
@patch("apps.grafana_plugin.helpers.client.GrafanaAPIClient.api_get")
@patch("apps.grafana_plugin.helpers.client.GrafanaAPIClient.get_users_permissions")
def test_it_returns_none_if_permissions_call_returns_none(
self,
mocked_grafana_api_client_get_users_permissions,
mocked_grafana_api_client_api_get,
rbac_is_enabled,
api_get_return_value,
get_users_permissions_return_value,
expected,
):
mocked_grafana_api_client_api_get.return_value = (api_get_return_value, "dfjkfdjkfd")
mocked_grafana_api_client_get_users_permissions.return_value = get_users_permissions_return_value
api_client = GrafanaAPIClient(API_URL, API_TOKEN)
assert api_client.get_users(rbac_is_enabled) == expected


class TestIsRbacEnabledForOrganization:
@pytest.mark.parametrize(
"api_response_connected,expected",
Expand Down
15 changes: 15 additions & 0 deletions engine/apps/user_management/models/organization.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import math
import typing
import uuid
from urllib.parse import urljoin
Expand Down Expand Up @@ -344,6 +345,20 @@ def get_notifiable_direct_paging_integrations(self) -> "RelatedManager['AlertRec
.distinct()
)

def should_be_considered_for_rbac_permissioning(self) -> bool:
"""
this is sort of a hacky workaround to address a cloud issue we introduced with the accessControlOncall
feature flag. The flag is technically enabled for all stacks, but the way in which OnCall used to be
reading it (via GCOM config.feature_flags for the stack) made it such that RBAC wasn't actually being
enabled for most stacks from the oncall backend perspective. Once we change things to start HEADing
the permissions search endpoint, this will effectively turn on RBAC for all orgs.. soo instead lets
slowly turn it on via the logic here
"""
# if rbac permissions are already enabled for the org, they're "grandfathered" in
if self.is_rbac_permissions_enabled:
return True
return self.id <= math.floor(Organization.objects.last().id * settings.CLOUD_RBAC_ROLLOUT_PERCENTAGE)

@property
def web_link(self):
return urljoin(self.grafana_url, "a/grafana-oncall-app/")
Expand Down
24 changes: 18 additions & 6 deletions engine/apps/user_management/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,28 @@ def sync_organization(organization: Organization) -> None:

def _sync_organization(organization: Organization) -> None:
grafana_api_client = GrafanaAPIClient(api_url=organization.grafana_url, api_token=organization.api_token)
rbac_is_enabled = organization.is_rbac_permissions_enabled

# NOTE: checking whether or not RBAC is enabled depends on whether we are dealing with an open-source or cloud
# stack. For Cloud we should make a call to the GCOM API, using an admin API token, and get the list of
# feature_toggles enabled for the stack. For open-source, simply make a HEAD request to the grafana instance's API
# and consider RBAC enabled if the list RBAC permissions endpoint returns 200. We cannot simply rely on the HEAD
# call in cloud because if an instance is not active, the grafana gateway will still return 200 for the
# HEAD request.
# stack. For open-source, simply make a HEAD request to the grafana instance's API and consider RBAC enabled if
joeyorlando marked this conversation as resolved.
Show resolved Hide resolved
# the list RBAC permissions endpoint returns 200.
#
# For cloud, we need to check the stack's status first. If the stack is active, we can make the same HEAD request
# to the grafana instance's API. If the stack is not active, we will simply rely on the org's previous state of
# org.is_rbac_permissions_enabled
if settings.LICENSE == settings.CLOUD_LICENSE_NAME:
# We cannot simply rely on the HEAD call in cloud because if an instance is not active
# the grafana gateway will still return 200 for the HEAD request.
stack_id = organization.stack_id
gcom_client = GcomAPIClient(settings.GRAFANA_COM_ADMIN_API_TOKEN)
rbac_is_enabled = gcom_client.is_rbac_enabled_for_stack(organization.stack_id)

if not organization.should_be_considered_for_rbac_permissioning():
rbac_is_enabled = False
elif gcom_client.is_stack_active(stack_id):
joeyorlando marked this conversation as resolved.
Show resolved Hide resolved
# the stack MUST be active for this check.. if it is in any other state
# the Grafana API risks returning an HTTP 200 but the actual permissions data that is
# synced later on will be empty (and we'd erase all RBAC permissions stored in OnCall)
rbac_is_enabled = grafana_api_client.is_rbac_enabled_for_organization()
else:
rbac_is_enabled = grafana_api_client.is_rbac_enabled_for_organization()

Expand Down
Loading
Loading