Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

drop the grants of authorized view when it's full refresh #1189

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions dbt/adapters/bigquery/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,25 @@ def add_access_entry_to_dataset(dataset: Dataset, access_entry: AccessEntry) ->
access_entries.append(access_entry)
dataset.access_entries = access_entries
return dataset



def delete_access_entry_from_dataset(dataset: Dataset, access_entry: AccessEntry) -> Dataset:
"""Remove an access entry from a dataset, always use.

Args:
dataset (Dataset): the dataset to be updated
access_entry (AccessEntry): the access entry to be removed from the dataset
"""
access_entries = dataset.access_entries
access_entries_id = [entity.entity_id for entity in access_entries]

full_dataset_id = f"{dataset.project}.{dataset.dataset_id}"
if access_entry.entity_id in access_entries_id:
dataset.access_entries = [
entry for entry in access_entries if entry.entity_id != access_entry.entity_id
]
else:
print(f"no need to revoke the dataset access for '{access_entry.entity_id}' to ' dataset '{full_dataset_id}.' it doesn't exist")
return dataset

56 changes: 49 additions & 7 deletions dbt/adapters/bigquery/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from dbt.adapters.bigquery import BigQueryColumn, BigQueryConnectionManager
from dbt.adapters.bigquery.column import get_nested_column_data_types
from dbt.adapters.bigquery.connections import BigQueryAdapterResponse
from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset, is_access_entry_in_dataset
from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset, is_access_entry_in_dataset, delete_access_entry_from_dataset
from dbt.adapters.bigquery.python_submissions import (
ClusterDataprocHelper,
ServerlessDataProcHelper,
Expand Down Expand Up @@ -817,10 +817,46 @@ def describe_relation(
return parser.from_bq_table(bq_table)
return None



@available.parse_none
def grant_access_to(self, entity, entity_type, role, grant_target_dict,full_refresh=False):
"""
Given an entity, grants access to a dataset.
"""
conn: BigQueryConnectionManager = self.connections.get_thread_connection()
client = conn.handle
GrantTarget.validate(grant_target_dict)
grant_target = GrantTarget.from_dict(grant_target_dict)
if entity_type == "view":
entity = self.get_table_ref_from_relation(entity).to_api_repr()
with _dataset_lock:
dataset_ref = self.connections.dataset_ref(grant_target.project, grant_target.dataset)
dataset = client.get_dataset(dataset_ref)
access_entry = AccessEntry(role, entity_type, entity)
# only perform update if access entry in dataset but if full_refresh remove it first
if is_access_entry_in_dataset(dataset, access_entry):
if not full_refresh:
logger.warning(f"Access entry {access_entry} " f"already exists in dataset")
return
else:
dataset = delete_access_entry_from_dataset(dataset,access_entry)
dataset = client.update_dataset(
dataset,
["access_entries"],
) # Make an API request.
full_dataset_id = f"{dataset.project}.{dataset.dataset_id}"
logger.info(f"Revoked dataset access for '{access_entry.entity_id}' to ' dataset '{full_dataset_id}.'")
dataset = add_access_entry_to_dataset(dataset, access_entry)
dataset = client.update_dataset(dataset, ["access_entries"])
full_dataset_id = f"{dataset.project}.{dataset.dataset_id}"
logger.info(f"allowed dataset access for '{access_entry.entity_id}' to ' dataset '{full_dataset_id}.'")


@available.parse_none
def grant_access_to(self, entity, entity_type, role, grant_target_dict):
def remove_grant_access_to(self, entity, entity_type, role, grant_target_dict):
"""
Given an entity, grants it access to a dataset.
Given an entity, grants access to a dataset.
colin-rogers-dbt marked this conversation as resolved.
Show resolved Hide resolved
"""
conn: BigQueryConnectionManager = self.connections.get_thread_connection()
client = conn.handle
Expand All @@ -832,12 +868,18 @@ def grant_access_to(self, entity, entity_type, role, grant_target_dict):
dataset_ref = self.connections.dataset_ref(grant_target.project, grant_target.dataset)
dataset = client.get_dataset(dataset_ref)
access_entry = AccessEntry(role, entity_type, entity)
# only perform update if access entry not in dataset
# only perform removing if access entry in dataset
full_dataset_id = f"{dataset.project}.{dataset.dataset_id}"
if is_access_entry_in_dataset(dataset, access_entry):
logger.warning(f"Access entry {access_entry} " f"already exists in dataset")
dataset = delete_access_entry_from_dataset(dataset,access_entry)
dataset = client.update_dataset(
dataset,
["access_entries"],
) # Make an API request.

logger.info(f"Revoked dataset access for '{access_entry.entity_id}' to ' dataset '{full_dataset_id}.'")
else:
dataset = add_access_entry_to_dataset(dataset, access_entry)
client.update_dataset(dataset, ["access_entries"])
logger.warning(f"Access entry {access_entry} not in the dataset {full_dataset_id} no need to remove it")

@available.parse_none
def get_dataset_location(self, relation):
Expand Down
2 changes: 1 addition & 1 deletion dbt/include/bigquery/macros/materializations/view.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

{% if config.get('grant_access_to') %}
{% for grant_target_dict in config.get('grant_access_to') %}
{% do adapter.grant_access_to(this, 'view', None, grant_target_dict) %}
{% do adapter.grant_access_to(this, 'view', None, grant_target_dict, should_full_refresh()) %}
{% endfor %}
{% endif %}

Expand Down
17 changes: 16 additions & 1 deletion tests/functional/adapter/test_grant_access_to.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest

from dbt.tests.util import run_dbt

import re

def select_1(dataset: str, materialized: str):
config = f"""config(
Expand Down Expand Up @@ -86,6 +86,21 @@ def test_grant_access_succeeds(self, project, setup_grant_schema, teardown_grant
assert len(results) == 2



class TestAccessGrantSucceedsWithFullRefresh(TestAccessGrantSucceeds):
def test_grant_access_succeeds(self, project, setup_grant_schema, teardown_grant_schema,capsys):
# Need to run twice to validate idempotency
results = run_dbt(["run"])
assert len(results) == 2
time.sleep(10)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need the sleep call?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know exactly why they used it, I found the sleep call already existing in the not full-refresh call of the function, I reproduce it by precaution.

results = run_dbt(["run","--full-refresh"])
assert len(results) == 2
captured = capsys.readouterr()
assert not re.search(r"BigQuery adapter: Access entry <AccessEntry: .* already exists in dataset",captured[0])




class TestAccessGrantFails:
@pytest.fixture(scope="class")
def models(self):
Expand Down
29 changes: 29 additions & 0 deletions tests/unit/test_bigquery_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -969,6 +969,35 @@ def test_grant_access_to_calls_update_with_valid_access_entry(self):
self.mock_client.update_dataset.assert_called_once()


def test_remove_grant_access_to_calls_update_with_valid_access_entry(self):
a_different_entity = BigQueryRelation.from_dict(
{
"type": None,
"path": {
"database": "another-test-project",
"schema": "test_schema_2",
"identifier": "my_view",
},
"quote_policy": {"identifier": True},
}
)
grant_target_dict = {"dataset": "someOtherDataset", "project": "someProject"}
self.adapter.grant_access_to(
entity=a_different_entity,
entity_type="view",
role=None,
grant_target_dict=grant_target_dict,
)
self.mock_client.update_dataset.assert_called_once()
self.adapter.remove_grant_access_to(
entity=a_different_entity,
entity_type="view",
role=None,
grant_target_dict=grant_target_dict,
)
self.mock_client.update_dataset.assert_called_once()


@pytest.mark.parametrize(
["input", "output"],
[
Expand Down
30 changes: 29 additions & 1 deletion tests/unit/test_dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset, is_access_entry_in_dataset
from dbt.adapters.bigquery.dataset import add_access_entry_to_dataset, is_access_entry_in_dataset, delete_access_entry_from_dataset
from dbt.adapters.bigquery import BigQueryRelation

from google.cloud.bigquery import Dataset, AccessEntry, DatasetReference
Expand Down Expand Up @@ -88,3 +88,31 @@ def test_is_access_entry_in_dataset_returns_false_if_entry_not_in_dataset():
dataset = Dataset(dataset_ref)
access_entry = AccessEntry(None, "table", entity)
assert not is_access_entry_in_dataset(dataset, access_entry)




def test_delete_access_to_dataset_updates_dataset():
"""
test the removal of views grants to dataset
"""
database = "someDb"
dataset = "someDataset"
entity = BigQueryRelation.from_dict(
{
"type": None,
"path": {
"database": "test-project",
"schema": "test_schema",
"identifier": "my_table",
},
"quote_policy": {"identifier": False},
}
).to_dict()
dataset_ref = DatasetReference(project=database, dataset_id=dataset)
dataset = Dataset(dataset_ref)
access_entry = AccessEntry(None, "table", entity)
dataset = add_access_entry_to_dataset(dataset, access_entry)
assert is_access_entry_in_dataset(dataset, access_entry)
dataset = delete_access_entry_from_dataset(dataset, access_entry)
assert not is_access_entry_in_dataset(dataset, access_entry)
Loading