Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return distinct list of shared dataset names - worksheet view #300

Closed
wants to merge 12 commits into from
1 change: 1 addition & 0 deletions backend/dataall/api/Objects/Environment/input_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ class EnvironmentSortField(GraphQLEnumMapper):
gql.Argument('term', gql.String),
gql.Argument('page', gql.Integer),
gql.Argument('pageSize', gql.Integer),
gql.Argument('uniqueDatasets', gql.Boolean),
],
)

Expand Down
11 changes: 7 additions & 4 deletions backend/dataall/db/api/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -935,11 +935,14 @@ def paginated_shared_with_environment_datasets(
datasetUri = data.get('datasetUri')
q = q.filter(models.ShareObject.datasetUri == datasetUri)

if data.get("uniqueDatasets", False):
q = q.distinct(models.ShareObject.datasetUri)

if data.get('itemTypes', None):
itemTypes = data.get('itemTypes')
q = q.filter(
or_(*[models.ShareObjectItem.itemType == t for t in itemTypes])
)
itemTypes = data.get('itemTypes')
q = q.filter(
or_(*[models.ShareObjectItem.itemType == t for t in itemTypes])
)
if data.get('term'):
term = data.get('term')
q = q.filter(models.ShareObjectItem.itemName.ilike('%' + term + '%'))
Expand Down
9 changes: 9 additions & 0 deletions backend/dataall/tasks/data_sharing/data_sharing_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@

log = logging.getLogger(__name__)

REFRESH_SHARES_STATES = [
models.ShareObjectStatus.Approved.value,
models.ShareObjectStatus.Revoked.value,
]


class DataSharingService:
def __init__(self):
Expand Down Expand Up @@ -259,7 +264,11 @@ def refresh_shares(cls, engine: Engine) -> bool:
environments = session.query(models.Environment).all()
shares = (
session.query(models.ShareObject)
<<<<<<< HEAD
.filter(models.ShareObject.status.in_(REFRESH_SHARES_STATES))
=======
.filter(models.ShareObject.status.in_(share_object_refreshable_states))
>>>>>>> main
.all()
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ def __init__(
self.source_environment = source_environment
self.target_environment = target_environment
self.shared_db_name = self.build_shared_db_name()
<<<<<<< HEAD
=======
self.principals = self.get_share_principals()
>>>>>>> main

@abc.abstractmethod
def process_approved_shares(self) -> [str]:
Expand Down Expand Up @@ -82,11 +85,19 @@ def build_shared_db_name(self) -> str:
"""
return (self.dataset.GlueDatabaseName + '_shared_' + self.share.shareUri)[:254]

<<<<<<< HEAD
def build_share_data(self, principals: [str], table: models.DatasetTable) -> dict:
=======
def build_share_data(self, table: models.DatasetTable) -> dict:
>>>>>>> main
"""
Build aws dict for boto3 operations on Glue and LF from share data
Parameters
----------
<<<<<<< HEAD
principals : team role
=======
>>>>>>> main
table : dataset table

Returns
Expand All @@ -103,7 +114,11 @@ def build_share_data(self, table: models.DatasetTable) -> dict:
'target': {
'accountid': self.target_environment.AwsAccountId,
'region': self.target_environment.region,
<<<<<<< HEAD
'principals': principals,
=======
'principals': self.principals,
>>>>>>> main
'database': self.shared_db_name,
},
}
Expand Down Expand Up @@ -162,7 +177,11 @@ def create_shared_database(
"""
Creates the shared database if does not exists.
1) Grants pivot role ALL permission on shareddb
<<<<<<< HEAD
2) Grant Team role DESCRIBE Only permission
=======
2) Grant principals DESCRIBE Only permission
>>>>>>> main

Parameters
----------
Expand Down Expand Up @@ -271,13 +290,20 @@ def create_resource_link(cls, **data) -> dict:
)
raise e

<<<<<<< HEAD
def revoke_table_resource_link_access(self, table: models.DatasetTable):
=======
def revoke_table_resource_link_access(self, table: models.DatasetTable, principals: [str]):
>>>>>>> main
"""
Revokes access to glue table resource link
Parameters
----------
table : models.DatasetTable
<<<<<<< HEAD
=======
principals: List of strings. IAM role arn and Quicksight groups
>>>>>>> main

Returns
-------
Expand All @@ -296,6 +322,38 @@ def revoke_table_resource_link_access(self, table: models.DatasetTable, principa
)
return True

<<<<<<< HEAD
logger.info(
f'Revoking resource link access '
f'on {self.target_environment.AwsAccountId}/{self.shared_db_name}/{table.GlueTableName} '
f'for principal {self.env_group.environmentIAMRoleArn}'
)
LakeFormation.batch_revoke_permissions(
SessionHelper.remote_session(self.target_environment.AwsAccountId).client(
'lakeformation', region_name=self.target_environment.region
),
self.target_environment.AwsAccountId,
[
{
'Id': str(uuid.uuid4()),
'Principal': {
'DataLakePrincipalIdentifier': self.env_group.environmentIAMRoleArn
},
'Resource': {
'Table': {
'DatabaseName': self.shared_db_name,
'Name': table.GlueTableName,
'CatalogId': self.target_environment.AwsAccountId,
}
},
'Permissions': ['DESCRIBE'],
}
],
)
return True

def revoke_source_table_access(self, table):
=======
for principal in principals:
logger.info(
f'Revoking resource link access '
Expand Down Expand Up @@ -328,6 +386,7 @@ def revoke_table_resource_link_access(self, table: models.DatasetTable, principa
return True

def revoke_source_table_access(self, table, principals: [str]):
>>>>>>> main
"""
Revokes access to the source glue table
Parameters
Expand All @@ -354,14 +413,22 @@ def revoke_source_table_access(self, table, principals: [str]):
logger.info(
f'Revoking source table access '
f'on {self.source_environment.AwsAccountId}/{self.dataset.GlueDatabaseName}/{table.GlueTableName} '
<<<<<<< HEAD
f'for principal {self.env_group.environmentIAMRoleArn}'
=======
f'for principals {principals}'
>>>>>>> main
)
LakeFormation.revoke_source_table_access(
target_accountid=self.target_environment.AwsAccountId,
region=self.target_environment.region,
source_database=self.dataset.GlueDatabaseName,
source_table=table.GlueTableName,
<<<<<<< HEAD
target_principal=self.env_group.environmentIAMRoleArn,
=======
target_principals=principals,
>>>>>>> main
source_accountid=self.source_environment.AwsAccountId,
)
return True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ def __init__(
def process_approved_shares(self) -> bool:
"""
1) Grant ALL permissions to pivotRole for source database in source account
<<<<<<< HEAD
2) Gets share principals and build shared db name
3) Creates the shared database in target account if it doesn't exist
4) For each shared table:
a) update its status to SHARE_IN_PROGRESS with Action Start
b) check if share item exists on glue catalog raise error if not and flag share item status to failed
c) grant external account to target account
d) accept Ram invitation if pending
e) create resource link on target account
f) grant permission to table for team role in source account
g) grant permission to resource link table for team role in target account
=======
2) Get share principals (requester IAM role and QS groups) and build shared db name
3) Create the shared database in target account if it doesn't exist
4) For each shared table:
Expand All @@ -44,6 +56,7 @@ def process_approved_shares(self) -> bool:
e) create resource link for table in target account
f) grant permission to table for requester team IAM role in source account
g) grant permission to resource link table for requester team IAM role in target account
>>>>>>> main
h) update share item status to SHARE_SUCCESSFUL with Action Success

Returns
Expand Down Expand Up @@ -90,7 +103,11 @@ def process_approved_shares(self) -> bool:

self.check_share_item_exists_on_glue_catalog(share_item, table)

<<<<<<< HEAD
data = self.build_share_data(principals, table)
=======
data = self.build_share_data(table)
>>>>>>> main
self.share_table_with_target_account(**data)

(
Expand Down Expand Up @@ -134,8 +151,11 @@ def process_revoked_shares(self) -> bool:
'##### Starting Revoking tables cross account #######'
)
success = True
<<<<<<< HEAD
=======
shared_db_name = self.build_shared_db_name()
principals = self.get_share_principals()
>>>>>>> main
for table in self.revoked_tables:
share_item = api.ShareObject.find_share_item_by_table(
self.session, self.share, table
Expand All @@ -149,12 +169,20 @@ def process_revoked_shares(self) -> bool:

self.check_share_item_exists_on_glue_catalog(share_item, table)

<<<<<<< HEAD
log.info(f'Starting revoke access for table: {table.GlueTableName}')

self.revoke_table_resource_link_access(table)

self.revoke_source_table_access(table)
=======
log.info(f'Starting revoke access for table: {table.GlueTableName} in database {shared_db_name} '
f'For principals {principals}')

self.revoke_table_resource_link_access(table, principals)

self.revoke_source_table_access(table, principals)
>>>>>>> main

self.delete_resource_link_table(table)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,24 @@ def __init__(
def process_approved_shares(self) -> bool:
"""
1) Grant ALL permissions to pivotRole for source database in source account
<<<<<<< HEAD
2) Gets share principals and build shared db name
3) Creates the shared database in target account if it doesn't exist
=======
2) Get share principals (requester IAM role and QS groups) and build shared db name
3) Create the shared database in target account if it doesn't exist
>>>>>>> main
4) For each shared table:
a) update its status to SHARE_IN_PROGRESS with Action Start
b) check if share item exists on glue catalog raise error if not and flag share item status to failed
c) create resource link in account
<<<<<<< HEAD
d) grant permission to table for team role in account
e) grant permission to resource link table for team role in account
=======
d) grant permission to table for requester team IAM role in account
e) grant permission to resource link table for requester team IAM role in account
>>>>>>> main
f) update share item status to SHARE_SUCCESSFUL with Action Success

Returns
Expand Down Expand Up @@ -84,7 +94,11 @@ def process_approved_shares(self) -> bool:
log.info(f'Starting sharing access for table: {table.GlueTableName}')
self.check_share_item_exists_on_glue_catalog(share_item, table)

<<<<<<< HEAD
data = self.build_share_data(principals, table)
=======
data = self.build_share_data(table)
>>>>>>> main
self.create_resource_link(**data)

new_state = shared_item_SM.run_transition(models.Enums.ShareItemActions.Success.value)
Expand Down Expand Up @@ -114,8 +128,11 @@ def process_revoked_shares(self) -> bool:
False if revoke fails
"""
success = True
<<<<<<< HEAD
=======
shared_db_name = self.build_shared_db_name()
principals = self.get_share_principals()
>>>>>>> main
for table in self.revoked_tables:
share_item = api.ShareObject.find_share_item_by_table(
self.session, self.share, table
Expand All @@ -134,12 +151,20 @@ def process_revoked_shares(self) -> bool:
try:
self.check_share_item_exists_on_glue_catalog(share_item, table)

<<<<<<< HEAD
log.info(f'Starting revoke access for table: {table.GlueTableName}')

self.revoke_table_resource_link_access(table)

self.revoke_source_table_access(table)
=======
log.info(f'Starting revoke access for table: {table.GlueTableName} in database {shared_db_name} '
f'For principals {principals}')

self.revoke_table_resource_link_access(table, principals)

self.revoke_source_table_access(table, principals)
>>>>>>> main

self.delete_resource_link_table(table)

Expand Down
2 changes: 1 addition & 1 deletion frontend/src/views/Worksheets/WorksheetView.js
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ const WorksheetView = () => {
page: 1,
pageSize: 10000,
term: '',
itemTypes: 'DatasetTable'
uniqueDatasets: true
}
})
);
Expand Down
Loading