From 3170daeea288a3254edc47717394ffad95799159 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Fri, 7 Jun 2024 14:13:27 +0200 Subject: [PATCH 01/14] Split APIs and types between S3-shares and shares-base --- .../s3_datasets_shares/api/__init__.py | 3 +- .../s3_datasets_shares/api/mutations.py | 115 ------- .../modules/s3_datasets_shares/api/queries.py | 83 ++--- .../s3_datasets_shares/api/resolvers.py | 304 +----------------- .../modules/s3_datasets_shares/api/types.py | 275 +--------------- .../db/share_object_repositories.py | 74 ++++- .../services/dataset_sharing_service.py | 57 +++- .../services/share_object_service.py | 44 +-- .../dataall/modules/shares_base/__init__.py | 6 +- .../modules/shares_base/api/__init__.py | 9 + .../api/input_types.py | 0 .../modules/shares_base/api/mutations.py | 118 +++++++ .../modules/shares_base/api/queries.py | 46 +++ .../modules/shares_base/api/resolvers.py | 295 +++++++++++++++++ .../dataall/modules/shares_base/api/types.py | 256 +++++++++++++++ .../components/EnvironmentSharedDatasets.js | 3 +- .../modules/Environments/services/index.js | 1 + .../services}/searchEnvironmentDataItems.js | 3 - .../Shares/services/getS3ConsumptionData.js | 16 + .../modules/Shares/services/getShareObject.js | 5 - frontend/src/modules/Shares/services/index.js | 1 + .../src/modules/Worksheets/services/index.js | 1 + .../listS3DatasetsSharedWithEnvGroup.js | 24 ++ .../modules/Worksheets/views/WorksheetView.js | 30 +- .../src/services/graphql/Environment/index.js | 1 - .../ShareObject/getShareRequestsToMe.js | 2 - 26 files changed, 948 insertions(+), 824 deletions(-) rename backend/dataall/modules/{s3_datasets_shares => shares_base}/api/input_types.py (100%) create mode 100644 backend/dataall/modules/shares_base/api/mutations.py create mode 100644 backend/dataall/modules/shares_base/api/queries.py create mode 100644 backend/dataall/modules/shares_base/api/resolvers.py create mode 100644 backend/dataall/modules/shares_base/api/types.py rename frontend/src/{services/graphql/Environment => modules/Environments/services}/searchEnvironmentDataItems.js (90%) create mode 100644 frontend/src/modules/Shares/services/getS3ConsumptionData.js create mode 100644 frontend/src/modules/Worksheets/services/listS3DatasetsSharedWithEnvGroup.js diff --git a/backend/dataall/modules/s3_datasets_shares/api/__init__.py b/backend/dataall/modules/s3_datasets_shares/api/__init__.py index fc054d1d2..313b64cce 100644 --- a/backend/dataall/modules/s3_datasets_shares/api/__init__.py +++ b/backend/dataall/modules/s3_datasets_shares/api/__init__.py @@ -1,9 +1,8 @@ from dataall.modules.s3_datasets_shares.api import ( - input_types, mutations, queries, resolvers, types, ) -__all__ = ['resolvers', 'types', 'input_types', 'queries', 'mutations'] +__all__ = ['resolvers', 'queries', 'mutations', 'types'] diff --git a/backend/dataall/modules/s3_datasets_shares/api/mutations.py b/backend/dataall/modules/s3_datasets_shares/api/mutations.py index f669e8181..0af2eb548 100644 --- a/backend/dataall/modules/s3_datasets_shares/api/mutations.py +++ b/backend/dataall/modules/s3_datasets_shares/api/mutations.py @@ -1,123 +1,8 @@ from dataall.base.api import gql from dataall.modules.s3_datasets_shares.api.resolvers import ( - add_shared_item, - approve_share_object, - create_share_object, - delete_share_object, - reapply_items_share_object, - reject_share_object, - remove_shared_item, - revoke_items_share_object, - submit_share_object, - update_share_reject_purpose, - update_share_request_purpose, - verify_items_share_object, verify_dataset_share_objects, ) -createShareObject = gql.MutationField( - name='createShareObject', - args=[ - gql.Argument(name='datasetUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='itemUri', type=gql.String), - gql.Argument(name='itemType', type=gql.String), - gql.Argument(name='input', type=gql.NonNullableType(gql.Ref('NewShareObjectInput'))), - ], - type=gql.Ref('ShareObject'), - resolver=create_share_object, -) - -deleteShareObject = gql.MutationField( - name='deleteShareObject', - args=[gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String))], - resolver=delete_share_object, - type=gql.Boolean, -) - -addSharedItem = gql.MutationField( - name='addSharedItem', - args=[ - gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='input', type=gql.Ref('AddSharedItemInput')), - ], - type=gql.Ref('ShareItem'), - resolver=add_shared_item, -) - - -removeSharedItem = gql.MutationField( - name='removeSharedItem', - args=[gql.Argument(name='shareItemUri', type=gql.NonNullableType(gql.String))], - resolver=remove_shared_item, - type=gql.Boolean, -) - -submitShareObject = gql.MutationField( - name='submitShareObject', - args=[gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String))], - type=gql.Ref('ShareObject'), - resolver=submit_share_object, -) - -approveShareObject = gql.MutationField( - name='approveShareObject', - args=[gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String))], - type=gql.Ref('ShareObject'), - resolver=approve_share_object, -) - - -rejectShareObject = gql.MutationField( - name='rejectShareObject', - args=[ - gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='rejectPurpose', type=gql.String), - ], - type=gql.Ref('ShareObject'), - resolver=reject_share_object, -) - -revokeItemsShareObject = gql.MutationField( - name='revokeItemsShareObject', - args=[gql.Argument(name='input', type=gql.Ref('ShareItemSelectorInput'))], - type=gql.Ref('ShareObject'), - resolver=revoke_items_share_object, -) - -verifyItemsShareObject = gql.MutationField( - name='verifyItemsShareObject', - args=[gql.Argument(name='input', type=gql.Ref('ShareItemSelectorInput'))], - type=gql.Ref('ShareObject'), - resolver=verify_items_share_object, -) - -reApplyItemsShareObject = gql.MutationField( - name='reApplyItemsShareObject', - args=[gql.Argument(name='input', type=gql.Ref('ShareItemSelectorInput'))], - type=gql.Ref('ShareObject'), - resolver=reapply_items_share_object, -) - -updateShareRejectReason = gql.MutationField( - name='updateShareRejectReason', - args=[ - gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='rejectPurpose', type=gql.String), - ], - type=gql.Boolean, - resolver=update_share_reject_purpose, -) - - -updateShareRequestReason = gql.MutationField( - name='updateShareRequestReason', - args=[ - gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='requestPurpose', type=gql.String), - ], - type=gql.Boolean, - resolver=update_share_request_purpose, -) verifyDatasetShareObjects = gql.MutationField( name='verifyDatasetShareObjects', diff --git a/backend/dataall/modules/s3_datasets_shares/api/queries.py b/backend/dataall/modules/s3_datasets_shares/api/queries.py index bad081806..d9b547c82 100644 --- a/backend/dataall/modules/s3_datasets_shares/api/queries.py +++ b/backend/dataall/modules/s3_datasets_shares/api/queries.py @@ -1,58 +1,12 @@ from dataall.base.api import gql from dataall.modules.s3_datasets_shares.api.resolvers import ( get_dataset_shared_assume_role_url, - get_share_object, - get_share_logs, - list_shared_with_environment_data_items, - list_shares_in_my_inbox, - list_shares_in_my_outbox, - list_dataset_share_objects, list_shared_tables_by_env_dataset, + list_dataset_share_objects, + list_shared_databases_tables_with_env_group, + get_s3_consumption_data, ) -getShareObject = gql.QueryField( - name='getShareObject', - args=[gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String))], - type=gql.Ref('ShareObject'), - resolver=get_share_object, -) - - -getShareRequestsFromMe = gql.QueryField( - name='getShareRequestsFromMe', - args=[gql.Argument(name='filter', type=gql.Ref('ShareObjectFilter'))], - type=gql.Ref('ShareSearchResult'), - resolver=list_shares_in_my_outbox, -) - -getShareRequestsToMe = gql.QueryField( - name='getShareRequestsToMe', - args=[gql.Argument(name='filter', type=gql.Ref('ShareObjectFilter'))], - type=gql.Ref('ShareSearchResult'), - resolver=list_shares_in_my_inbox, -) - -searchEnvironmentDataItems = gql.QueryField( - name='searchEnvironmentDataItems', - args=[ - gql.Argument(name='environmentUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='filter', type=gql.Ref('EnvironmentDataItemFilter')), - ], - resolver=list_shared_with_environment_data_items, - type=gql.Ref('EnvironmentPublishedItemSearchResults'), - test_scope='Dataset', -) - -listShareObjects = gql.QueryField( - name='listDatasetShareObjects', - resolver=list_dataset_share_objects, - args=[ - gql.Argument(name='datasetUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='environmentUri', type=gql.String), - gql.Argument(name='page', type=gql.Integer), - ], - type=gql.Ref('ShareSearchResult'), -) getSharedDatasetTables = gql.QueryField( name='getSharedDatasetTables', @@ -72,10 +26,31 @@ test_scope='Dataset', ) +listShareObjects = gql.QueryField( + name='listDatasetShareObjects', + resolver=list_dataset_share_objects, + args=[ + gql.Argument(name='datasetUri', type=gql.NonNullableType(gql.String)), + gql.Argument(name='environmentUri', type=gql.String), + gql.Argument(name='page', type=gql.Integer), + ], + type=gql.Ref('ShareSearchResult'), +) -getShareLogs = gql.QueryField( - name='getShareLogs', - args=[gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String))], - type=gql.ArrayType(gql.Ref('ShareLog')), - resolver=get_share_logs, +getS3ConsumptionData = gql.QueryField( + name='getS3ConsumptionData', + args=[gql.Argument(name='shareUri', type=gql.String)], + type=gql.Ref('S3ConsumptionData'), + resolver=get_s3_consumption_data, +) # todo: Once PR 1277 is merged modify shareView to use this query + + +listS3DatasetsSharedWithEnvGroup = gql.QueryField( + name='listS3DatasetsSharedWithEnvGroup', + resolver=list_shared_databases_tables_with_env_group, + args=[ + gql.Argument(name='groupUri', type=gql.NonNullableType(gql.String)), + gql.Argument(name='environmentUri', type=gql.NonNullableType(gql.String)), + ], + type=gql.ArrayType(gql.Ref('SharedDatabaseTableItem')), ) diff --git a/backend/dataall/modules/s3_datasets_shares/api/resolvers.py b/backend/dataall/modules/s3_datasets_shares/api/resolvers.py index d1b2857aa..79a0d53e3 100644 --- a/backend/dataall/modules/s3_datasets_shares/api/resolvers.py +++ b/backend/dataall/modules/s3_datasets_shares/api/resolvers.py @@ -1,19 +1,9 @@ import logging from dataall.base.api.context import Context -from dataall.core.environment.db.environment_models import Environment -from dataall.core.environment.services.environment_service import EnvironmentService -from dataall.core.organizations.db.organization_repositories import OrganizationRepository from dataall.base.db.exceptions import RequiredParameter from dataall.base.feature_toggle_checker import is_feature_enabled -from dataall.modules.shares_base.services.shares_enums import ShareObjectPermission -from dataall.modules.shares_base.db.share_object_models import ShareObjectItem, ShareObject -from dataall.modules.s3_datasets_shares.services.share_item_service import ShareItemService -from dataall.modules.s3_datasets_shares.services.share_object_service import ShareObjectService from dataall.modules.s3_datasets_shares.services.dataset_sharing_service import DatasetSharingService -from dataall.modules.s3_datasets_shares.aws.glue_client import GlueClient -from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository -from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, S3Dataset log = logging.getLogger(__name__) @@ -50,280 +40,13 @@ def validate_dataset_share_selector_input(data): raise RequiredParameter('shareUris') -def create_share_object( - context: Context, - source, - datasetUri: str = None, - itemUri: str = None, - itemType: str = None, - input: dict = None, -): - RequestValidator.validate_creation_request(input) - - return ShareObjectService.create_share_object( - uri=input['environmentUri'], - dataset_uri=datasetUri, - item_uri=itemUri, - item_type=itemType, - group_uri=input['groupUri'], - principal_id=input['principalId'], - principal_type=input['principalType'], - requestPurpose=input.get('requestPurpose'), - attachMissingPolicies=input.get('attachMissingPolicies'), - ) - - -def submit_share_object(context: Context, source, shareUri: str = None): - return ShareObjectService.submit_share_object(uri=shareUri) - - -def approve_share_object(context: Context, source, shareUri: str = None): - return ShareObjectService.approve_share_object(uri=shareUri) - - -def reject_share_object( - context: Context, - source, - shareUri: str = None, - rejectPurpose: str = None, -): - return ShareObjectService.reject_share_object(uri=shareUri, reject_purpose=rejectPurpose) - - -def revoke_items_share_object(context: Context, source, input): - RequestValidator.validate_item_selector_input(input) - share_uri = input.get('shareUri') - revoked_uris = input.get('itemUris') - return ShareItemService.revoke_items_share_object(uri=share_uri, revoked_uris=revoked_uris) - - -def verify_items_share_object(context: Context, source, input): - RequestValidator.validate_item_selector_input(input) - share_uri = input.get('shareUri') - verify_item_uris = input.get('itemUris') - return ShareItemService.verify_items_share_object(uri=share_uri, item_uris=verify_item_uris) - - -def reapply_items_share_object(context: Context, source, input): - RequestValidator.validate_item_selector_input(input) - share_uri = input.get('shareUri') - reapply_item_uris = input.get('itemUris') - return ShareItemService.reapply_items_share_object(uri=share_uri, item_uris=reapply_item_uris) - - -def delete_share_object(context: Context, source, shareUri: str = None): - return ShareObjectService.delete_share_object(uri=shareUri) - - -def add_shared_item(context, source, shareUri: str = None, input: dict = None): - return ShareItemService.add_shared_item(uri=shareUri, data=input) - - -def remove_shared_item(context, source, shareItemUri: str = None): - return ShareItemService.remove_shared_item(uri=shareItemUri) - - -def resolve_shared_item(context, source: ShareObjectItem, **kwargs): - if not source: - return None - return ShareItemService.resolve_shared_item(uri=source.shareUri, item=source) - - -def get_share_object(context, source, shareUri: str = None): - return ShareObjectService.get_share_object(uri=shareUri) - - -def get_share_logs(context, source, shareUri: str): - return ShareObjectService.get_share_logs(shareUri) - - -def resolve_user_role(context: Context, source: ShareObject, **kwargs): - if not source: - return None - with context.engine.scoped_session() as session: - dataset: S3Dataset = DatasetRepository.get_dataset_by_uri(session, source.datasetUri) - - can_approve = ( - True - if ( - dataset - and ( - dataset.stewards in context.groups - or dataset.SamlAdminGroupName in context.groups - or dataset.owner == context.username - ) - ) - else False - ) - - can_request = True if (source.owner == context.username or source.groupUri in context.groups) else False - - return ( - ShareObjectPermission.ApproversAndRequesters.value - if can_approve and can_request - else ShareObjectPermission.Approvers.value - if can_approve - else ShareObjectPermission.Requesters.value - if can_request - else ShareObjectPermission.NoPermission.value - ) - - -def resolve_can_view_logs(context: Context, source: ShareObject): - return ShareObjectService.check_view_log_permissions(context.username, context.groups, source.shareUri) - - -def resolve_dataset(context: Context, source: ShareObject, **kwargs): - if not source: - return None - with context.engine.scoped_session() as session: - ds: S3Dataset = DatasetRepository.get_dataset_by_uri(session, source.datasetUri) - if ds: - env: Environment = EnvironmentService.get_environment_by_uri(session, ds.environmentUri) - return { - 'datasetUri': source.datasetUri, - 'datasetName': ds.name if ds else 'NotFound', - 'SamlAdminGroupName': ds.SamlAdminGroupName if ds else 'NotFound', - 'environmentName': env.label if env else 'NotFound', - 'AwsAccountId': env.AwsAccountId if env else 'NotFound', - 'region': env.region if env else 'NotFound', - 'exists': True if ds else False, - 'description': ds.description, - } - - -def union_resolver(object, *_): - if isinstance(object, DatasetTable): - return 'DatasetTable' - elif isinstance(object, DatasetStorageLocation): - return 'DatasetStorageLocation' - - -def resolve_principal(context: Context, source: ShareObject, **kwargs): - if not source: - return None - - with context.engine.scoped_session() as session: - if source.principalType in ['Group', 'ConsumptionRole']: - environment = EnvironmentService.get_environment_by_uri(session, source.environmentUri) - organization = OrganizationRepository.get_organization_by_uri(session, environment.organizationUri) - if source.principalType in ['ConsumptionRole']: - principal = EnvironmentService.get_environment_consumption_role( - session, source.principalId, source.environmentUri - ) - principalName = f'{principal.consumptionRoleName} [{principal.IAMRoleArn}]' - else: - principal = EnvironmentService.get_environment_group(session, source.groupUri, source.environmentUri) - principalName = f'{source.groupUri} [{principal.environmentIAMRoleArn}]' - - return { - 'principalId': source.principalId, - 'principalType': source.principalType, - 'principalName': principalName, - 'principalIAMRoleName': source.principalIAMRoleName, - 'SamlGroupName': source.groupUri, - 'environmentUri': environment.environmentUri, - 'environmentName': environment.label, - 'AwsAccountId': environment.AwsAccountId, - 'region': environment.region, - 'organizationUri': organization.organizationUri, - 'organizationName': organization.label, - } - - -def resolve_group(context: Context, source: ShareObject, **kwargs): - if not source: - return None - return source.groupUri - - -def resolve_consumption_data(context: Context, source: ShareObject, **kwargs): - if not source: - return None - return ShareObjectService.resolve_share_object_consumption_data( - uri=source.shareUri, - datasetUri=source.datasetUri, - principalId=source.principalId, - environmentUri=source.environmentUri, - ) - - -def resolve_share_object_statistics(context: Context, source: ShareObject, **kwargs): - if not source: - return None - return ShareObjectService.resolve_share_object_statistics(uri=source.shareUri) - - -def resolve_existing_shared_items(context: Context, source: ShareObject, **kwargs): - if not source: - return None - return ShareItemService.check_existing_shared_items(source) - - -def list_shareable_objects(context: Context, source: ShareObject, filter: dict = None): - if not source: - return None - if not filter: - filter = {'page': 1, 'pageSize': 5} - - is_revokable = filter.get('isRevokable') - return ShareItemService.list_shareable_objects(share=source, is_revokable=is_revokable, filter=filter) - - -def resolve_shared_database_name(context: Context, source: ShareObject): - if not source: - return None - with context.engine.scoped_session() as session: - share = ShareObjectService.get_share_object_in_environment( - uri=source.targetEnvironmentUri, shareUri=source.shareUri - ) - dataset = DatasetRepository.get_dataset_by_uri(session=session, dataset_uri=source.datasetUri) - old_shared_db_name = (dataset.GlueDatabaseName + '_shared_' + source.shareUri)[:254] - env = EnvironmentService.get_environment_by_uri(session, share.environmentUri) - database = GlueClient( - account_id=env.AwsAccountId, database=old_shared_db_name, region=env.region - ).get_glue_database() - if database: - return old_shared_db_name - return source.GlueDatabaseName + '_shared' - - -def list_shares_in_my_inbox(context: Context, source, filter: dict = None): - if not filter: - filter = {} - return ShareObjectService.list_shares_in_my_inbox(filter) - - -def list_shares_in_my_outbox(context: Context, source, filter: dict = None): - if not filter: - filter = {} - return ShareObjectService.list_shares_in_my_outbox(filter) - - -def list_shared_with_environment_data_items(context: Context, source, environmentUri: str = None, filter: dict = None): - if not filter: - filter = {} - with context.engine.scoped_session() as session: - return ShareItemService.paginated_shared_with_environment_datasets( - session=session, - uri=environmentUri, - data=filter, - ) - - -def update_share_request_purpose(context: Context, source, shareUri: str = None, requestPurpose: str = None): - return ShareObjectService.update_share_request_purpose( - uri=shareUri, - request_purpose=requestPurpose, - ) +def list_shared_tables_by_env_dataset(context: Context, source, datasetUri: str, envUri: str): + return DatasetSharingService.list_shared_tables_by_env_dataset(datasetUri, envUri) -def update_share_reject_purpose(context: Context, source, shareUri: str = None, rejectPurpose: str = None): - with context.engine.scoped_session() as session: - return ShareObjectService.update_share_reject_purpose( - uri=shareUri, - reject_purpose=rejectPurpose, - ) +@is_feature_enabled('modules.s3_datasets.features.aws_actions') +def get_dataset_shared_assume_role_url(context: Context, source, datasetUri: str = None): + return DatasetSharingService.get_dataset_shared_assume_role_url(uri=datasetUri) def verify_dataset_share_objects(context: Context, source, input): @@ -341,10 +64,17 @@ def list_dataset_share_objects(context, source, filter: dict = None): return DatasetSharingService.list_dataset_share_objects(source, filter) -def list_shared_tables_by_env_dataset(context: Context, source, datasetUri: str, envUri: str): - return DatasetSharingService.list_shared_tables_by_env_dataset(datasetUri, envUri) +def get_s3_consumption_data(context: Context, source, shareUri: str): + return DatasetSharingService.get_s3_consumption_data(uri=shareUri) -@is_feature_enabled('modules.s3_datasets.features.aws_actions') -def get_dataset_shared_assume_role_url(context: Context, source, datasetUri: str = None): - return DatasetSharingService.get_dataset_shared_assume_role_url(uri=datasetUri) +def list_shared_databases_tables_with_env_group(context: Context, source, environmentUri: str, groupUri: str): + return DatasetSharingService.list_shared_databases_tables_with_env_group( + environmentUri=environmentUri, groupUri=groupUri + ) + + +def resolve_shared_db_name(context: Context, source, **kwargs): + return DatasetSharingService.resolve_shared_db_name( + source.GlueDatabaseName, source.shareUri, source.targetEnvAwsAccountId, source.targetEnvRegion + ) diff --git a/backend/dataall/modules/s3_datasets_shares/api/types.py b/backend/dataall/modules/s3_datasets_shares/api/types.py index a4ada1acd..745f7f07f 100644 --- a/backend/dataall/modules/s3_datasets_shares/api/types.py +++ b/backend/dataall/modules/s3_datasets_shares/api/types.py @@ -1,128 +1,9 @@ from dataall.base.api import gql -from dataall.modules.shares_base.services.shares_enums import ( - ShareableType, - PrincipalType, - ShareItemHealthStatus, -) -from dataall.modules.s3_datasets_shares.api.resolvers import ( - union_resolver, - resolve_shared_item, - resolve_dataset, - resolve_consumption_data, - resolve_existing_shared_items, - resolve_share_object_statistics, - resolve_principal, - resolve_group, - list_shareable_objects, - resolve_user_role, - resolve_shared_database_name, - resolve_can_view_logs, -) -from dataall.core.environment.api.resolvers import resolve_environment - -ShareableObject = gql.Union( - name='ShareableObject', - types=[gql.Ref('DatasetTable'), gql.Ref('DatasetStorageLocation')], - resolver=union_resolver, -) - - -ShareItem = gql.ObjectType( - name='ShareItem', - fields=[ - gql.Field(name='shareUri', type=gql.String), - gql.Field(name='shareItemUri', type=gql.ID), - gql.Field('itemUri', gql.String), - gql.Field(name='status', type=gql.Ref('ShareItemStatus')), - gql.Field(name='action', type=gql.String), - gql.Field('itemType', ShareableType.toGraphQLEnum()), - gql.Field('itemName', gql.String), - gql.Field('description', gql.String), - gql.Field('healthStatus', ShareItemHealthStatus.toGraphQLEnum()), - gql.Field('healthMessage', gql.String), - gql.Field('lastVerificationTime', gql.String), - gql.Field( - name='sharedObject', - type=gql.Ref('ShareableObject'), - resolver=resolve_shared_item, - ), - # gql.Field(name="permission", type=gql.String) - ], -) - -NotSharedItem = gql.ObjectType( - name='NotSharedItem', - fields=[ - gql.Field('itemUri', gql.String), - gql.Field('shareItemUri', gql.String), - gql.Field('itemType', ShareableType.toGraphQLEnum()), - gql.Field('label', gql.String), - # gql.Field("permission", DatasetRole.toGraphQLEnum()), - gql.Field('tags', gql.ArrayType(gql.String)), - gql.Field('created', gql.String), - ], -) - - -NotSharedItemsSearchResult = gql.ObjectType( - name='NotSharedItemsSearchResult', - fields=[ - gql.Field(name='count', type=gql.Integer), - gql.Field(name='pageSize', type=gql.Integer), - gql.Field(name='nextPage', type=gql.Integer), - gql.Field(name='pages', type=gql.Integer), - gql.Field(name='page', type=gql.Integer), - gql.Field(name='previousPage', type=gql.Integer), - gql.Field(name='hasNext', type=gql.Boolean), - gql.Field(name='hasPrevious', type=gql.Boolean), - gql.Field(name='nodes', type=gql.ArrayType(NotSharedItem)), - ], -) - - -SharedItemSearchResult = gql.ObjectType( - name='SharedItemSearchResult', - fields=[ - gql.Field(name='count', type=gql.Integer), - gql.Field(name='pageSize', type=gql.Integer), - gql.Field(name='nextPage', type=gql.Integer), - gql.Field(name='pages', type=gql.Integer), - gql.Field(name='page', type=gql.Integer), - gql.Field(name='previousPage', type=gql.Integer), - gql.Field(name='hasNext', type=gql.Boolean), - gql.Field(name='hasPrevious', type=gql.Boolean), - gql.Field(name='nodes', type=gql.ArrayType(gql.Ref('ShareItem'))), - ], -) +from dataall.modules.s3_datasets_shares.api.resolvers import resolve_shared_db_name -ShareObjectStatistic = gql.ObjectType( - name='ShareObjectStatistic', - fields=[ - gql.Field(name='locations', type=gql.Integer), - gql.Field(name='tables', type=gql.Integer), - gql.Field(name='sharedItems', type=gql.Integer), - gql.Field(name='revokedItems', type=gql.Integer), - gql.Field(name='failedItems', type=gql.Integer), - gql.Field(name='pendingItems', type=gql.Integer), - ], -) - -DatasetLink = gql.ObjectType( - name='DatasetLink', - fields=[ - gql.Field(name='datasetUri', type=gql.String), - gql.Field(name='datasetName', type=gql.String), - gql.Field(name='SamlAdminGroupName', type=gql.String), - gql.Field(name='environmentName', type=gql.String), - gql.Field(name='AwsAccountId', type=gql.String), - gql.Field(name='region', type=gql.String), - gql.Field(name='exists', type=gql.Boolean), - gql.Field(name='description', type=gql.String), - ], -) -ConsumptionData = gql.ObjectType( - name='ConsumptionData', +S3ConsumptionData = gql.ObjectType( + name='S3ConsumptionData', fields=[ gql.Field(name='s3AccessPointName', type=gql.String), gql.Field(name='sharedGlueDatabase', type=gql.String), @@ -130,150 +11,14 @@ ], ) -ShareObject = gql.ObjectType( - name='ShareObject', +SharedDatabaseTableItem = gql.ObjectType( + name='SharedDatabaseTableItem', fields=[ - gql.Field(name='shareUri', type=gql.ID), - gql.Field(name='status', type=gql.Ref('ShareObjectStatus')), - gql.Field(name='owner', type=gql.String), - gql.Field(name='created', type=gql.String), - gql.Field(name='deleted', type=gql.String), - gql.Field(name='updated', type=gql.String), - gql.Field(name='datasetUri', type=gql.String), - gql.Field(name='requestPurpose', type=gql.String), - gql.Field(name='rejectPurpose', type=gql.String), - gql.Field(name='dataset', type=DatasetLink, resolver=resolve_dataset), - gql.Field(name='consumptionData', type=gql.Ref('ConsumptionData'), resolver=resolve_consumption_data), - gql.Field(name='existingSharedItems', type=gql.Boolean, resolver=resolve_existing_shared_items), - gql.Field( - name='statistics', - type=gql.Ref('ShareObjectStatistic'), - resolver=resolve_share_object_statistics, - ), - gql.Field(name='principal', resolver=resolve_principal, type=gql.Ref('Principal')), - gql.Field( - name='environment', - resolver=resolve_environment, - type=gql.Ref('Environment'), - ), - gql.Field( - name='group', - resolver=resolve_group, - type=gql.String, - ), - gql.Field( - 'items', - args=[gql.Argument(name='filter', type=gql.Ref('ShareableObjectFilter'))], - type=gql.Ref('SharedItemSearchResult'), - resolver=list_shareable_objects, - ), - gql.Field( - name='canViewLogs', - resolver=resolve_can_view_logs, - type=gql.Boolean, - ), - gql.Field( - name='userRoleForShareObject', - type=gql.Ref('ShareObjectPermission'), - resolver=resolve_user_role, - ), - ], -) - - -ShareSearchResult = gql.ObjectType( - name='ShareSearchResult', - fields=[ - gql.Field(name='count', type=gql.Integer), - gql.Field(name='pageSize', type=gql.Integer), - gql.Field(name='nextPage', type=gql.Integer), - gql.Field(name='pages', type=gql.Integer), - gql.Field(name='page', type=gql.Integer), - gql.Field(name='previousPage', type=gql.Integer), - gql.Field(name='hasNext', type=gql.Boolean), - gql.Field(name='hasPrevious', type=gql.Boolean), - gql.Field(name='nodes', type=gql.ArrayType(gql.Ref('ShareObject'))), - ], -) - -EnvironmentPublishedItem = gql.ObjectType( - name='EnvironmentPublishedItem', - fields=[ - gql.Field(name='shareUri', type=gql.NonNullableType(gql.String)), gql.Field(name='datasetUri', type=gql.NonNullableType(gql.String)), - gql.Field(name='datasetName', type=gql.NonNullableType(gql.String)), - gql.Field(name='itemAccess', type=gql.NonNullableType(gql.String)), - gql.Field(name='itemType', type=gql.NonNullableType(gql.String)), - gql.Field(name='itemName', type=gql.NonNullableType(gql.String)), - gql.Field(name='environmentUri', type=gql.NonNullableType(gql.String)), - gql.Field(name='targetEnvironmentUri', type=gql.NonNullableType(gql.String)), - gql.Field(name='principalId', type=gql.NonNullableType(gql.String)), - gql.Field(name='environmentName', type=gql.NonNullableType(gql.String)), - gql.Field(name='organizationUri', type=gql.NonNullableType(gql.String)), - gql.Field(name='organizationName', type=gql.NonNullableType(gql.String)), - gql.Field(name='created', type=gql.NonNullableType(gql.String)), - gql.Field(name='GlueDatabaseName', type=gql.String), # TODO: remove when splitting API calls - gql.Field(name='GlueTableName', type=gql.String), # TODO: remove when splitting API calls - gql.Field( - 'sharedGlueDatabaseName', - type=gql.String, - resolver=resolve_shared_database_name, - ), - ], -) - - -EnvironmentPublishedItemSearchResults = gql.ObjectType( - name='EnvironmentPublishedItemSearchResults', - fields=[ - gql.Field(name='count', type=gql.Integer), - gql.Field(name='page', type=gql.Integer), - gql.Field(name='pages', type=gql.Integer), - gql.Field(name='hasNext', type=gql.Boolean), - gql.Field(name='hasPrevious', type=gql.Boolean), - gql.Field(name='nodes', type=gql.ArrayType(EnvironmentPublishedItem)), - ], -) - -Principal = gql.ObjectType( - name='Principal', - fields=[ - gql.Field(name='principalId', type=gql.ID), - gql.Field(name='principalType', type=PrincipalType.toGraphQLEnum()), - gql.Field(name='principalName', type=gql.String), - gql.Field(name='principalIAMRoleName', type=gql.String), - gql.Field(name='SamlGroupName', type=gql.String), - gql.Field(name='environmentName', type=gql.String), - gql.Field(name='environmentUri', type=gql.String), - gql.Field(name='AwsAccountId', type=gql.String), - gql.Field(name='region', type=gql.String), - gql.Field(name='organizationName', type=gql.String), - gql.Field(name='organizationUri', type=gql.String), - ], -) - - -PrincipalSearchResult = gql.ObjectType( - name='PrincipalSearchResult', - fields=[ - gql.Field(name='count', type=gql.Integer), - gql.Field(name='nodes', type=gql.ArrayType(Principal)), - gql.Field(name='pageSize', type=gql.Integer), - gql.Field(name='nextPage', type=gql.Integer), - gql.Field(name='pages', type=gql.Integer), - gql.Field(name='page', type=gql.Integer), - gql.Field(name='previousPage', type=gql.Integer), - gql.Field(name='hasNext', type=gql.Boolean), - gql.Field(name='hasPrevious', type=gql.Boolean), - ], -) - -ShareLog = gql.ObjectType( - name='ShareLog', - fields=[ - gql.Field(name='logStream', type=gql.String), - gql.Field(name='logGroup', type=gql.String), - gql.Field(name='timestamp', type=gql.String), - gql.Field(name='message', type=gql.String), + gql.Field(name='GlueDatabaseName', type=gql.NonNullableType(gql.String)), + gql.Field(name='shareUri', type=gql.NonNullableType(gql.String)), + gql.Field(name='targetEnvAwsAccountId', type=gql.NonNullableType(gql.String)), + gql.Field(name='targetEnvRegion', type=gql.NonNullableType(gql.String)), + gql.Field(name='sharedGlueDatabaseName', type=gql.NonNullableType(gql.String), resolver=resolve_shared_db_name), ], ) diff --git a/backend/dataall/modules/s3_datasets_shares/db/share_object_repositories.py b/backend/dataall/modules/s3_datasets_shares/db/share_object_repositories.py index 93d73668e..d96869b91 100644 --- a/backend/dataall/modules/s3_datasets_shares/db/share_object_repositories.py +++ b/backend/dataall/modules/s3_datasets_shares/db/share_object_repositories.py @@ -834,9 +834,9 @@ def paginate_shared_datasets(session, env_uri, data): q = ( session.query( ShareObjectItem.shareUri.label('shareUri'), - S3Dataset.datasetUri.label('datasetUri'), - S3Dataset.name.label('datasetName'), - S3Dataset.description.label('datasetDescription'), + DatasetBase.datasetUri.label('datasetUri'), + DatasetBase.name.label('datasetName'), + DatasetBase.description.label('datasetDescription'), Environment.environmentUri.label('environmentUri'), Environment.name.label('environmentName'), ShareObject.created.label('created'), @@ -845,8 +845,6 @@ def paginate_shared_datasets(session, env_uri, data): ShareObject.environmentUri.label('targetEnvironmentUri'), ShareObjectItem.itemType.label('itemType'), ShareObjectItem.itemName.label('itemName'), - S3Dataset.GlueDatabaseName.label('GlueDatabaseName'), - DatasetTable.GlueTableName.label('GlueTableName'), Organization.organizationUri.label('organizationUri'), Organization.name.label('organizationName'), case( @@ -872,25 +870,17 @@ def paginate_shared_datasets(session, env_uri, data): ShareObject.shareUri == ShareObjectItem.shareUri, ) .join( - S3Dataset, - ShareObject.datasetUri == S3Dataset.datasetUri, + DatasetBase, + ShareObject.datasetUri == DatasetBase.datasetUri, ) .join( Environment, - Environment.environmentUri == S3Dataset.environmentUri, + Environment.environmentUri == DatasetBase.environmentUri, ) .join( Organization, Organization.organizationUri == Environment.organizationUri, ) - .outerjoin( - DatasetTable, - ShareObjectItem.itemUri == DatasetTable.tableUri, - ) - .outerjoin( - DatasetStorageLocation, - ShareObjectItem.itemUri == DatasetStorageLocation.locationUri, - ) .filter( and_( ShareObjectItem.status.in_(share_item_shared_states), @@ -899,7 +889,7 @@ def paginate_shared_datasets(session, env_uri, data): ) ) - if data.get('datasetUri'): + if data.get('datasetUri'): # TODO review filter dataset_uri = data.get('datasetUri') q = q.filter(ShareObject.datasetUri == dataset_uri) @@ -1064,3 +1054,53 @@ def query_dataset_tables_shared_with_env( ) return env_tables_shared + + @staticmethod + def query_shared_glue_databases(session, groups, env_uri, group_uri): + share_item_shared_states = ShareItemSM.get_share_item_shared_states() + q = ( + session.query( + ShareObjectItem.shareUri.label('shareUri'), + S3Dataset.datasetUri.label('datasetUri'), + S3Dataset.name.label('datasetName'), + S3Dataset.name.label('sharedGlueDatabaseName'), + Environment.environmentUri.label('environmentUri'), + Environment.name.label('environmentName'), + Environment.AwsAccountId.label('targetEnvAwsAccountId'), + Environment.region.label('targetEnvRegion'), + ShareObject.created.label('created'), + ShareObject.principalId.label('principalId'), + ShareObject.principalType.label('principalType'), + ShareObject.environmentUri.label('targetEnvironmentUri'), + ShareObjectItem.itemType.label('itemType'), + ShareObjectItem.itemName.label('itemName'), + S3Dataset.GlueDatabaseName.label('GlueDatabaseName'), + DatasetTable.GlueTableName.label('GlueTableName'), + ) + .join( + ShareObject, + ShareObject.shareUri == ShareObjectItem.shareUri, + ) + .join( + S3Dataset, + ShareObject.datasetUri == S3Dataset.datasetUri, + ) + .join( + Environment, + Environment.environmentUri == ShareObject.environmentUri, + ) + .outerjoin( + DatasetTable, + ShareObjectItem.itemUri == DatasetTable.tableUri, + ) + .filter( + and_( + ShareObjectItem.status.in_(share_item_shared_states), + ShareObject.environmentUri == env_uri, + ShareObject.principalId == group_uri, + ShareObject.groupUri.in_(groups), + ShareObjectItem.itemType == ShareableType.Table.value, + ) + ) + ) + return q.order_by(ShareObject.shareUri).distinct(ShareObject.shareUri) diff --git a/backend/dataall/modules/s3_datasets_shares/services/dataset_sharing_service.py b/backend/dataall/modules/s3_datasets_shares/services/dataset_sharing_service.py index 915829b0e..05b63a404 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/dataset_sharing_service.py +++ b/backend/dataall/modules/s3_datasets_shares/services/dataset_sharing_service.py @@ -1,3 +1,5 @@ +from warnings import warn +from dataall.base.db import utils from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.core.environment.services.environment_service import EnvironmentService @@ -7,7 +9,7 @@ from dataall.modules.shares_base.db.share_object_models import ShareObject from dataall.modules.s3_datasets_shares.db.share_object_repositories import ShareObjectRepository from dataall.modules.shares_base.db.share_object_state_machines import ShareItemSM -from dataall.modules.shares_base.services.share_permissions import SHARE_OBJECT_APPROVER +from dataall.modules.shares_base.services.share_permissions import SHARE_OBJECT_APPROVER, GET_SHARE_OBJECT from dataall.modules.s3_datasets_shares.services.share_item_service import ShareItemService from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository from dataall.modules.s3_datasets.services.dataset_permissions import ( @@ -21,6 +23,7 @@ from dataall.modules.s3_datasets.db.dataset_models import S3Dataset from dataall.modules.datasets_base.services.datasets_enums import DatasetRole, DatasetType from dataall.modules.datasets_base.services.dataset_service_interface import DatasetServiceInterface +from dataall.modules.s3_datasets_shares.aws.glue_client import GlueClient import logging @@ -179,3 +182,55 @@ def get_dataset_shared_assume_role_url(uri): bucket=dataset.S3BucketName, ) return url + + @staticmethod + @ResourcePolicyService.has_resource_permission(GET_SHARE_OBJECT) + def get_s3_consumption_data(uri): + with get_context().db_engine.scoped_session() as session: + share = ShareObjectRepository.get_share_by_uri(session, uri) + dataset = DatasetRepository.get_dataset_by_uri(session, share.datasetUri) + if dataset: + environment = EnvironmentService.get_environment_by_uri(session, share.environmentUri) + S3AccessPointName = utils.slugify( + share.datasetUri + '-' + share.principalId, + max_length=50, + lowercase=True, + regex_pattern='[^a-zA-Z0-9-]', + separator='-', + ) + # Check if the share was made with a Glue Database + datasetGlueDatabase = ShareItemService._get_glue_database_for_share( + dataset.GlueDatabaseName, dataset.AwsAccountId, dataset.region + ) + old_shared_db_name = f'{datasetGlueDatabase}_shared_{uri}'[:254] + database = GlueClient( + account_id=environment.AwsAccountId, region=environment.region, database=old_shared_db_name + ).get_glue_database() + warn('old_shared_db_name will be deprecated in v2.6.0', DeprecationWarning, stacklevel=2) + sharedGlueDatabase = old_shared_db_name if database else f'{datasetGlueDatabase}_shared' + return { + 's3AccessPointName': S3AccessPointName, + 'sharedGlueDatabase': sharedGlueDatabase, + 's3bucketName': dataset.S3BucketName, + } + return { + 's3AccessPointName': 'Not Created', + 'sharedGlueDatabase': 'Not Created', + 's3bucketName': 'Not Created', + } + + @staticmethod + def list_shared_databases_tables_with_env_group(environmentUri: str, groupUri: str): + context = get_context() + with context.db_engine.scoped_session() as session: + return ShareObjectRepository.query_shared_glue_databases( + session=session, groups=context.groups, env_uri=environmentUri, group_uri=groupUri + ).all() + + @staticmethod + def resolve_shared_db_name(GlueDatabaseName: str, shareUri: str, targetEnvAwsAccountId: str, targetEnvRegion: str): + old_shared_db_name = (GlueDatabaseName + '_shared_' + shareUri)[:254] + database = GlueClient( + account_id=targetEnvAwsAccountId, database=old_shared_db_name, region=targetEnvRegion + ).get_glue_database() + return old_shared_db_name if database else GlueDatabaseName + '_shared' diff --git a/backend/dataall/modules/s3_datasets_shares/services/share_object_service.py b/backend/dataall/modules/s3_datasets_shares/services/share_object_service.py index be4626e69..9eeab4772 100644 --- a/backend/dataall/modules/s3_datasets_shares/services/share_object_service.py +++ b/backend/dataall/modules/s3_datasets_shares/services/share_object_service.py @@ -1,6 +1,5 @@ import os from datetime import datetime -from warnings import warn from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService from dataall.core.tasks.service_handlers import Worker @@ -27,7 +26,6 @@ ShareItemSM, ) from dataall.modules.shares_base.services.share_exceptions import ShareItemsFound, PrincipalRoleNotFound -from dataall.modules.s3_datasets_shares.services.share_item_service import ShareItemService from dataall.modules.shares_base.services.share_notification_service import ShareNotificationService from dataall.modules.s3_datasets_shares.services.managed_share_policy_service import SharePolicyService from dataall.modules.shares_base.services.share_permissions import ( @@ -40,10 +38,8 @@ DELETE_SHARE_OBJECT, GET_SHARE_OBJECT, ) -from dataall.modules.s3_datasets_shares.aws.glue_client import GlueClient from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository -from dataall.modules.s3_datasets.db.dataset_models import DatasetTable, S3Dataset, DatasetStorageLocation -from dataall.modules.s3_datasets.services.dataset_permissions import DATASET_TABLE_READ, DATASET_FOLDER_READ +from dataall.modules.s3_datasets.db.dataset_models import S3Dataset from dataall.base.aws.iam import IAM from dataall.base.utils import Parameter @@ -414,8 +410,6 @@ def delete_share_object(cls, uri: str): @staticmethod def resolve_share_object_statistics(uri): with get_context().db_engine.scoped_session() as session: - tables = ShareObjectRepository.count_sharable_items(session, uri, 'DatasetTable') - locations = ShareObjectRepository.count_sharable_items(session, uri, 'DatasetStorageLocation') shared_items = ShareObjectRepository.count_items_in_states( session, uri, ShareItemSM.get_share_item_shared_states() ) @@ -428,48 +422,12 @@ def resolve_share_object_statistics(uri): session, uri, [ShareItemStatus.PendingApproval.value] ) return { - 'tables': tables, - 'locations': locations, 'sharedItems': shared_items, 'revokedItems': revoked_items, 'failedItems': failed_items, 'pendingItems': pending_items, } - @staticmethod - def resolve_share_object_consumption_data(uri, datasetUri, principalId, environmentUri): - with get_context().db_engine.scoped_session() as session: - dataset = DatasetRepository.get_dataset_by_uri(session, datasetUri) - if dataset: - environment = EnvironmentService.get_environment_by_uri(session, environmentUri) - S3AccessPointName = utils.slugify( - datasetUri + '-' + principalId, - max_length=50, - lowercase=True, - regex_pattern='[^a-zA-Z0-9-]', - separator='-', - ) - # Check if the share was made with a Glue Database - datasetGlueDatabase = ShareItemService._get_glue_database_for_share( - dataset.GlueDatabaseName, dataset.AwsAccountId, dataset.region - ) - old_shared_db_name = f'{datasetGlueDatabase}_shared_{uri}'[:254] - database = GlueClient( - account_id=environment.AwsAccountId, region=environment.region, database=old_shared_db_name - ).get_glue_database() - warn('old_shared_db_name will be deprecated in v2.6.0', DeprecationWarning, stacklevel=2) - sharedGlueDatabase = old_shared_db_name if database else f'{datasetGlueDatabase}_shared' - return { - 's3AccessPointName': S3AccessPointName, - 'sharedGlueDatabase': sharedGlueDatabase, - 's3bucketName': dataset.S3BucketName, - } - return { - 's3AccessPointName': 'Not Created', - 'sharedGlueDatabase': 'Not Created', - 's3bucketName': 'Not Created', - } - @staticmethod def list_shares_in_my_inbox(filter: dict): context = get_context() diff --git a/backend/dataall/modules/shares_base/__init__.py b/backend/dataall/modules/shares_base/__init__.py index fde939369..a2ec7e268 100644 --- a/backend/dataall/modules/shares_base/__init__.py +++ b/backend/dataall/modules/shares_base/__init__.py @@ -32,10 +32,7 @@ def __init__(self): class SharesBaseAPIModuleInterface(ModuleInterface): @staticmethod def is_supported(modes: Set[ImportMode]) -> bool: - supported_modes = { - ImportMode.API, - } - return modes & supported_modes + return ImportMode.API in modes @staticmethod def depends_on() -> List[Type['ModuleInterface']]: @@ -48,6 +45,7 @@ def __init__(self): import dataall.modules.shares_base.services.share_permissions import dataall.modules.shares_base.services.sharing_service import dataall.modules.shares_base.handlers + import dataall.modules.shares_base.api class SharesBaseECSTaskModuleInterface(ModuleInterface): diff --git a/backend/dataall/modules/shares_base/api/__init__.py b/backend/dataall/modules/shares_base/api/__init__.py index e69de29bb..ccdaa1f33 100644 --- a/backend/dataall/modules/shares_base/api/__init__.py +++ b/backend/dataall/modules/shares_base/api/__init__.py @@ -0,0 +1,9 @@ +from dataall.modules.shares_base.api import ( + input_types, + mutations, + queries, + resolvers, + types, +) + +__all__ = ['resolvers', 'types', 'input_types', 'queries', 'mutations'] diff --git a/backend/dataall/modules/s3_datasets_shares/api/input_types.py b/backend/dataall/modules/shares_base/api/input_types.py similarity index 100% rename from backend/dataall/modules/s3_datasets_shares/api/input_types.py rename to backend/dataall/modules/shares_base/api/input_types.py diff --git a/backend/dataall/modules/shares_base/api/mutations.py b/backend/dataall/modules/shares_base/api/mutations.py new file mode 100644 index 000000000..afc180f93 --- /dev/null +++ b/backend/dataall/modules/shares_base/api/mutations.py @@ -0,0 +1,118 @@ +from dataall.base.api import gql +from dataall.modules.shares_base.api.resolvers import ( + add_shared_item, + approve_share_object, + create_share_object, + delete_share_object, + reapply_items_share_object, + reject_share_object, + remove_shared_item, + revoke_items_share_object, + submit_share_object, + update_share_reject_purpose, + update_share_request_purpose, + verify_items_share_object, +) + +createShareObject = gql.MutationField( + name='createShareObject', + args=[ + gql.Argument(name='datasetUri', type=gql.NonNullableType(gql.String)), + gql.Argument(name='itemUri', type=gql.String), + gql.Argument(name='itemType', type=gql.String), + gql.Argument(name='input', type=gql.NonNullableType(gql.Ref('NewShareObjectInput'))), + ], + type=gql.Ref('ShareObject'), + resolver=create_share_object, +) + +deleteShareObject = gql.MutationField( + name='deleteShareObject', + args=[gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String))], + resolver=delete_share_object, + type=gql.Boolean, +) + +addSharedItem = gql.MutationField( + name='addSharedItem', + args=[ + gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String)), + gql.Argument(name='input', type=gql.Ref('AddSharedItemInput')), + ], + type=gql.Ref('ShareItem'), + resolver=add_shared_item, +) + + +removeSharedItem = gql.MutationField( + name='removeSharedItem', + args=[gql.Argument(name='shareItemUri', type=gql.NonNullableType(gql.String))], + resolver=remove_shared_item, + type=gql.Boolean, +) + +submitShareObject = gql.MutationField( + name='submitShareObject', + args=[gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String))], + type=gql.Ref('ShareObject'), + resolver=submit_share_object, +) + +approveShareObject = gql.MutationField( + name='approveShareObject', + args=[gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String))], + type=gql.Ref('ShareObject'), + resolver=approve_share_object, +) + + +rejectShareObject = gql.MutationField( + name='rejectShareObject', + args=[ + gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String)), + gql.Argument(name='rejectPurpose', type=gql.String), + ], + type=gql.Ref('ShareObject'), + resolver=reject_share_object, +) + +revokeItemsShareObject = gql.MutationField( + name='revokeItemsShareObject', + args=[gql.Argument(name='input', type=gql.Ref('ShareItemSelectorInput'))], + type=gql.Ref('ShareObject'), + resolver=revoke_items_share_object, +) + +verifyItemsShareObject = gql.MutationField( + name='verifyItemsShareObject', + args=[gql.Argument(name='input', type=gql.Ref('ShareItemSelectorInput'))], + type=gql.Ref('ShareObject'), + resolver=verify_items_share_object, +) + +reApplyItemsShareObject = gql.MutationField( + name='reApplyItemsShareObject', + args=[gql.Argument(name='input', type=gql.Ref('ShareItemSelectorInput'))], + type=gql.Ref('ShareObject'), + resolver=reapply_items_share_object, +) + +updateShareRejectReason = gql.MutationField( + name='updateShareRejectReason', + args=[ + gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String)), + gql.Argument(name='rejectPurpose', type=gql.String), + ], + type=gql.Boolean, + resolver=update_share_reject_purpose, +) + +updateShareRequestReason = gql.MutationField( + name='updateShareRequestReason', + args=[ + gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String)), + gql.Argument(name='requestPurpose', type=gql.String), + ], + type=gql.Boolean, + resolver=update_share_request_purpose, +) diff --git a/backend/dataall/modules/shares_base/api/queries.py b/backend/dataall/modules/shares_base/api/queries.py new file mode 100644 index 000000000..7cc3f675a --- /dev/null +++ b/backend/dataall/modules/shares_base/api/queries.py @@ -0,0 +1,46 @@ +from dataall.base.api import gql +from dataall.modules.shares_base.api.resolvers import ( + get_share_object, + get_share_logs, + list_shared_with_environment_data_items, + list_shares_in_my_inbox, + list_shares_in_my_outbox, +) + +getShareObject = gql.QueryField( + name='getShareObject', + args=[gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String))], + type=gql.Ref('ShareObject'), + resolver=get_share_object, +) + +getShareRequestsFromMe = gql.QueryField( + name='getShareRequestsFromMe', + args=[gql.Argument(name='filter', type=gql.Ref('ShareObjectFilter'))], + type=gql.Ref('ShareSearchResult'), + resolver=list_shares_in_my_outbox, +) + +getShareRequestsToMe = gql.QueryField( + name='getShareRequestsToMe', + args=[gql.Argument(name='filter', type=gql.Ref('ShareObjectFilter'))], + type=gql.Ref('ShareSearchResult'), + resolver=list_shares_in_my_inbox, +) + +searchEnvironmentDataItems = gql.QueryField( + name='searchEnvironmentDataItems', + args=[ + gql.Argument(name='environmentUri', type=gql.NonNullableType(gql.String)), + gql.Argument(name='filter', type=gql.Ref('EnvironmentDataItemFilter')), + ], + resolver=list_shared_with_environment_data_items, + type=gql.Ref('EnvironmentPublishedItemSearchResults'), +) + +getShareLogs = gql.QueryField( + name='getShareLogs', + args=[gql.Argument(name='shareUri', type=gql.NonNullableType(gql.String))], + type=gql.ArrayType(gql.Ref('ShareLog')), + resolver=get_share_logs, +) diff --git a/backend/dataall/modules/shares_base/api/resolvers.py b/backend/dataall/modules/shares_base/api/resolvers.py new file mode 100644 index 000000000..7845fae9c --- /dev/null +++ b/backend/dataall/modules/shares_base/api/resolvers.py @@ -0,0 +1,295 @@ +import logging + +from dataall.base.api.context import Context +from dataall.core.environment.db.environment_models import Environment +from dataall.core.environment.services.environment_service import EnvironmentService +from dataall.core.organizations.db.organization_repositories import OrganizationRepository +from dataall.base.db.exceptions import RequiredParameter +from dataall.modules.shares_base.services.shares_enums import ShareObjectPermission +from dataall.modules.shares_base.db.share_object_models import ShareObjectItem, ShareObject +from dataall.modules.s3_datasets_shares.services.share_item_service import ShareItemService +from dataall.modules.s3_datasets_shares.services.share_object_service import ShareObjectService +from dataall.modules.s3_datasets_shares.aws.glue_client import GlueClient +from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, S3Dataset + + +log = logging.getLogger(__name__) + + +class RequestValidator: + @staticmethod + def validate_creation_request(data): + if not data: + raise RequiredParameter(data) + if not data.get('principalId'): + raise RequiredParameter('principalId') + if not data.get('principalType'): + raise RequiredParameter('principalType') + if not data.get('groupUri'): + raise RequiredParameter('groupUri') + + @staticmethod + def validate_item_selector_input(data): + if not data: + raise RequiredParameter(data) + if not data.get('shareUri'): + raise RequiredParameter('shareUri') + if not data.get('itemUris'): + raise RequiredParameter('itemUris') + + @staticmethod + def validate_dataset_share_selector_input(data): + if not data: + raise RequiredParameter(data) + if not data.get('datasetUri'): + raise RequiredParameter('datasetUri') + if not data.get('shareUris'): + raise RequiredParameter('shareUris') + + +def create_share_object( + context: Context, + source, + datasetUri: str = None, + itemUri: str = None, + itemType: str = None, + input: dict = None, +): + RequestValidator.validate_creation_request(input) + + return ShareObjectService.create_share_object( + uri=input['environmentUri'], + dataset_uri=datasetUri, + item_uri=itemUri, + item_type=itemType, + group_uri=input['groupUri'], + principal_id=input['principalId'], + principal_type=input['principalType'], + requestPurpose=input.get('requestPurpose'), + attachMissingPolicies=input.get('attachMissingPolicies'), + ) + + +def submit_share_object(context: Context, source, shareUri: str = None): + return ShareObjectService.submit_share_object(uri=shareUri) + + +def approve_share_object(context: Context, source, shareUri: str = None): + return ShareObjectService.approve_share_object(uri=shareUri) + + +def reject_share_object( + context: Context, + source, + shareUri: str = None, + rejectPurpose: str = None, +): + return ShareObjectService.reject_share_object(uri=shareUri, reject_purpose=rejectPurpose) + + +def revoke_items_share_object(context: Context, source, input): + RequestValidator.validate_item_selector_input(input) + share_uri = input.get('shareUri') + revoked_uris = input.get('itemUris') + return ShareItemService.revoke_items_share_object(uri=share_uri, revoked_uris=revoked_uris) + + +def verify_items_share_object(context: Context, source, input): + RequestValidator.validate_item_selector_input(input) + share_uri = input.get('shareUri') + verify_item_uris = input.get('itemUris') + return ShareItemService.verify_items_share_object(uri=share_uri, item_uris=verify_item_uris) + + +def reapply_items_share_object(context: Context, source, input): + RequestValidator.validate_item_selector_input(input) + share_uri = input.get('shareUri') + reapply_item_uris = input.get('itemUris') + return ShareItemService.reapply_items_share_object(uri=share_uri, item_uris=reapply_item_uris) + + +def delete_share_object(context: Context, source, shareUri: str = None): + return ShareObjectService.delete_share_object(uri=shareUri) + + +def add_shared_item(context, source, shareUri: str = None, input: dict = None): + return ShareItemService.add_shared_item(uri=shareUri, data=input) + + +def remove_shared_item(context, source, shareItemUri: str = None): + return ShareItemService.remove_shared_item(uri=shareItemUri) + + +def resolve_shared_item(context, source: ShareObjectItem, **kwargs): + if not source: + return None + return ShareItemService.resolve_shared_item(uri=source.shareUri, item=source) + + +def get_share_object(context, source, shareUri: str = None): + return ShareObjectService.get_share_object(uri=shareUri) + + +def get_share_logs(context, source, shareUri: str): + return ShareObjectService.get_share_logs(shareUri) + + +def resolve_user_role(context: Context, source: ShareObject, **kwargs): + if not source: + return None + with context.engine.scoped_session() as session: + dataset: S3Dataset = DatasetRepository.get_dataset_by_uri(session, source.datasetUri) + + can_approve = ( + True + if ( + dataset + and ( + dataset.stewards in context.groups + or dataset.SamlAdminGroupName in context.groups + or dataset.owner == context.username + ) + ) + else False + ) + + can_request = True if (source.owner == context.username or source.groupUri in context.groups) else False + + return ( + ShareObjectPermission.ApproversAndRequesters.value + if can_approve and can_request + else ShareObjectPermission.Approvers.value + if can_approve + else ShareObjectPermission.Requesters.value + if can_request + else ShareObjectPermission.NoPermission.value + ) + + +def resolve_can_view_logs(context: Context, source: ShareObject): + return ShareObjectService.check_view_log_permissions(context.username, context.groups, source.shareUri) + + +def resolve_dataset(context: Context, source: ShareObject, **kwargs): + if not source: + return None + with context.engine.scoped_session() as session: + ds: S3Dataset = DatasetRepository.get_dataset_by_uri(session, source.datasetUri) + if ds: + env: Environment = EnvironmentService.get_environment_by_uri(session, ds.environmentUri) + return { + 'datasetUri': source.datasetUri, + 'datasetName': ds.name if ds else 'NotFound', + 'SamlAdminGroupName': ds.SamlAdminGroupName if ds else 'NotFound', + 'environmentName': env.label if env else 'NotFound', + 'AwsAccountId': env.AwsAccountId if env else 'NotFound', + 'region': env.region if env else 'NotFound', + 'exists': True if ds else False, + 'description': ds.description, + } + + +def union_resolver(object, *_): + if isinstance(object, DatasetTable): + return 'DatasetTable' + elif isinstance(object, DatasetStorageLocation): + return 'DatasetStorageLocation' + + +def resolve_principal(context: Context, source: ShareObject, **kwargs): + if not source: + return None + + with context.engine.scoped_session() as session: + if source.principalType in ['Group', 'ConsumptionRole']: + environment = EnvironmentService.get_environment_by_uri(session, source.environmentUri) + organization = OrganizationRepository.get_organization_by_uri(session, environment.organizationUri) + if source.principalType in ['ConsumptionRole']: + principal = EnvironmentService.get_environment_consumption_role( + session, source.principalId, source.environmentUri + ) + principalName = f'{principal.consumptionRoleName} [{principal.IAMRoleArn}]' + else: + principal = EnvironmentService.get_environment_group(session, source.groupUri, source.environmentUri) + principalName = f'{source.groupUri} [{principal.environmentIAMRoleArn}]' + + return { + 'principalId': source.principalId, + 'principalType': source.principalType, + 'principalName': principalName, + 'principalIAMRoleName': source.principalIAMRoleName, + 'SamlGroupName': source.groupUri, + 'environmentUri': environment.environmentUri, + 'environmentName': environment.label, + 'AwsAccountId': environment.AwsAccountId, + 'region': environment.region, + 'organizationUri': organization.organizationUri, + 'organizationName': organization.label, + } + + +def resolve_group(context: Context, source: ShareObject, **kwargs): + if not source: + return None + return source.groupUri + + +def resolve_share_object_statistics(context: Context, source: ShareObject, **kwargs): + if not source: + return None + return ShareObjectService.resolve_share_object_statistics(uri=source.shareUri) + + +def resolve_existing_shared_items(context: Context, source: ShareObject, **kwargs): + if not source: + return None + return ShareItemService.check_existing_shared_items(source) + + +def list_shareable_objects(context: Context, source: ShareObject, filter: dict = None): + if not source: + return None + if not filter: + filter = {'page': 1, 'pageSize': 5} + + is_revokable = filter.get('isRevokable') + return ShareItemService.list_shareable_objects(share=source, is_revokable=is_revokable, filter=filter) + + +def list_shares_in_my_inbox(context: Context, source, filter: dict = None): + if not filter: + filter = {} + return ShareObjectService.list_shares_in_my_inbox(filter) + + +def list_shares_in_my_outbox(context: Context, source, filter: dict = None): + if not filter: + filter = {} + return ShareObjectService.list_shares_in_my_outbox(filter) + + +def list_shared_with_environment_data_items(context: Context, source, environmentUri: str = None, filter: dict = None): + if not filter: + filter = {} + with context.engine.scoped_session() as session: + return ShareItemService.paginated_shared_with_environment_datasets( + session=session, + uri=environmentUri, + data=filter, + ) + + +def update_share_request_purpose(context: Context, source, shareUri: str = None, requestPurpose: str = None): + return ShareObjectService.update_share_request_purpose( + uri=shareUri, + request_purpose=requestPurpose, + ) + + +def update_share_reject_purpose(context: Context, source, shareUri: str = None, rejectPurpose: str = None): + with context.engine.scoped_session() as session: + return ShareObjectService.update_share_reject_purpose( + uri=shareUri, + reject_purpose=rejectPurpose, + ) diff --git a/backend/dataall/modules/shares_base/api/types.py b/backend/dataall/modules/shares_base/api/types.py new file mode 100644 index 000000000..1a3860515 --- /dev/null +++ b/backend/dataall/modules/shares_base/api/types.py @@ -0,0 +1,256 @@ +from dataall.base.api import gql +from dataall.modules.shares_base.services.shares_enums import ( + ShareableType, + PrincipalType, + ShareItemHealthStatus, +) +from dataall.modules.shares_base.api.resolvers import ( + union_resolver, + resolve_shared_item, + resolve_dataset, + resolve_existing_shared_items, + resolve_share_object_statistics, + resolve_principal, + resolve_group, + list_shareable_objects, + resolve_user_role, + resolve_can_view_logs, +) +from dataall.core.environment.api.resolvers import resolve_environment + +ShareableObject = gql.Union( + name='ShareableObject', + types=[gql.Ref('DatasetTable'), gql.Ref('DatasetStorageLocation')], + resolver=union_resolver, +) + + +ShareItem = gql.ObjectType( + name='ShareItem', + fields=[ + gql.Field(name='shareUri', type=gql.String), + gql.Field(name='shareItemUri', type=gql.ID), + gql.Field('itemUri', gql.String), + gql.Field(name='status', type=gql.Ref('ShareItemStatus')), + gql.Field(name='action', type=gql.String), + gql.Field('itemType', ShareableType.toGraphQLEnum()), + gql.Field('itemName', gql.String), + gql.Field('description', gql.String), + gql.Field('healthStatus', ShareItemHealthStatus.toGraphQLEnum()), + gql.Field('healthMessage', gql.String), + gql.Field('lastVerificationTime', gql.String), + gql.Field( + name='sharedObject', + type=gql.Ref('ShareableObject'), + resolver=resolve_shared_item, + ), + ], +) + +NotSharedItem = gql.ObjectType( + name='NotSharedItem', + fields=[ + gql.Field('itemUri', gql.String), + gql.Field('shareItemUri', gql.String), + gql.Field('itemType', ShareableType.toGraphQLEnum()), + gql.Field('label', gql.String), + gql.Field('tags', gql.ArrayType(gql.String)), + gql.Field('created', gql.String), + ], +) + + +NotSharedItemsSearchResult = gql.ObjectType( + name='NotSharedItemsSearchResult', + fields=[ + gql.Field(name='count', type=gql.Integer), + gql.Field(name='pageSize', type=gql.Integer), + gql.Field(name='nextPage', type=gql.Integer), + gql.Field(name='pages', type=gql.Integer), + gql.Field(name='page', type=gql.Integer), + gql.Field(name='previousPage', type=gql.Integer), + gql.Field(name='hasNext', type=gql.Boolean), + gql.Field(name='hasPrevious', type=gql.Boolean), + gql.Field(name='nodes', type=gql.ArrayType(NotSharedItem)), + ], +) + + +SharedItemSearchResult = gql.ObjectType( + name='SharedItemSearchResult', + fields=[ + gql.Field(name='count', type=gql.Integer), + gql.Field(name='pageSize', type=gql.Integer), + gql.Field(name='nextPage', type=gql.Integer), + gql.Field(name='pages', type=gql.Integer), + gql.Field(name='page', type=gql.Integer), + gql.Field(name='previousPage', type=gql.Integer), + gql.Field(name='hasNext', type=gql.Boolean), + gql.Field(name='hasPrevious', type=gql.Boolean), + gql.Field(name='nodes', type=gql.ArrayType(gql.Ref('ShareItem'))), + ], +) + +ShareObjectStatistic = gql.ObjectType( + name='ShareObjectStatistic', + fields=[ + gql.Field(name='sharedItems', type=gql.Integer), + gql.Field(name='revokedItems', type=gql.Integer), + gql.Field(name='failedItems', type=gql.Integer), + gql.Field(name='pendingItems', type=gql.Integer), + ], +) + +DatasetLink = gql.ObjectType( + name='DatasetLink', + fields=[ + gql.Field(name='datasetUri', type=gql.String), + gql.Field(name='datasetName', type=gql.String), + gql.Field(name='SamlAdminGroupName', type=gql.String), + gql.Field(name='environmentName', type=gql.String), + gql.Field(name='AwsAccountId', type=gql.String), + gql.Field(name='region', type=gql.String), + gql.Field(name='exists', type=gql.Boolean), + gql.Field(name='description', type=gql.String), + ], +) + +ShareObject = gql.ObjectType( + name='ShareObject', + fields=[ + gql.Field(name='shareUri', type=gql.ID), + gql.Field(name='status', type=gql.Ref('ShareObjectStatus')), + gql.Field(name='owner', type=gql.String), + gql.Field(name='created', type=gql.String), + gql.Field(name='deleted', type=gql.String), + gql.Field(name='updated', type=gql.String), + gql.Field(name='datasetUri', type=gql.String), + gql.Field(name='requestPurpose', type=gql.String), + gql.Field(name='rejectPurpose', type=gql.String), + gql.Field(name='dataset', type=DatasetLink, resolver=resolve_dataset), + gql.Field(name='existingSharedItems', type=gql.Boolean, resolver=resolve_existing_shared_items), + gql.Field( + name='statistics', + type=gql.Ref('ShareObjectStatistic'), + resolver=resolve_share_object_statistics, + ), + gql.Field(name='principal', resolver=resolve_principal, type=gql.Ref('Principal')), + gql.Field( + name='environment', + resolver=resolve_environment, + type=gql.Ref('Environment'), + ), + gql.Field( + name='group', + resolver=resolve_group, + type=gql.String, + ), + gql.Field( + 'items', + args=[gql.Argument(name='filter', type=gql.Ref('ShareableObjectFilter'))], + type=gql.Ref('SharedItemSearchResult'), + resolver=list_shareable_objects, + ), + gql.Field( + name='canViewLogs', + resolver=resolve_can_view_logs, + type=gql.Boolean, + ), + gql.Field( + name='userRoleForShareObject', + type=gql.Ref('ShareObjectPermission'), + resolver=resolve_user_role, + ), + ], +) + + +ShareSearchResult = gql.ObjectType( + name='ShareSearchResult', + fields=[ + gql.Field(name='count', type=gql.Integer), + gql.Field(name='pageSize', type=gql.Integer), + gql.Field(name='nextPage', type=gql.Integer), + gql.Field(name='pages', type=gql.Integer), + gql.Field(name='page', type=gql.Integer), + gql.Field(name='previousPage', type=gql.Integer), + gql.Field(name='hasNext', type=gql.Boolean), + gql.Field(name='hasPrevious', type=gql.Boolean), + gql.Field(name='nodes', type=gql.ArrayType(gql.Ref('ShareObject'))), + ], +) + +EnvironmentPublishedItem = gql.ObjectType( + name='EnvironmentPublishedItem', + fields=[ + gql.Field(name='shareUri', type=gql.NonNullableType(gql.String)), + gql.Field(name='datasetUri', type=gql.NonNullableType(gql.String)), + gql.Field(name='datasetName', type=gql.NonNullableType(gql.String)), + gql.Field(name='itemAccess', type=gql.NonNullableType(gql.String)), + gql.Field(name='itemType', type=gql.NonNullableType(gql.String)), + gql.Field(name='itemName', type=gql.NonNullableType(gql.String)), + gql.Field(name='environmentUri', type=gql.NonNullableType(gql.String)), + gql.Field(name='targetEnvironmentUri', type=gql.NonNullableType(gql.String)), + gql.Field(name='principalId', type=gql.NonNullableType(gql.String)), + gql.Field(name='environmentName', type=gql.NonNullableType(gql.String)), + gql.Field(name='organizationUri', type=gql.NonNullableType(gql.String)), + gql.Field(name='organizationName', type=gql.NonNullableType(gql.String)), + gql.Field(name='created', type=gql.NonNullableType(gql.String)), + ], +) + + +EnvironmentPublishedItemSearchResults = gql.ObjectType( + name='EnvironmentPublishedItemSearchResults', + fields=[ + gql.Field(name='count', type=gql.Integer), + gql.Field(name='page', type=gql.Integer), + gql.Field(name='pages', type=gql.Integer), + gql.Field(name='hasNext', type=gql.Boolean), + gql.Field(name='hasPrevious', type=gql.Boolean), + gql.Field(name='nodes', type=gql.ArrayType(EnvironmentPublishedItem)), + ], +) + +Principal = gql.ObjectType( + name='Principal', + fields=[ + gql.Field(name='principalId', type=gql.ID), + gql.Field(name='principalType', type=PrincipalType.toGraphQLEnum()), + gql.Field(name='principalName', type=gql.String), + gql.Field(name='principalIAMRoleName', type=gql.String), + gql.Field(name='SamlGroupName', type=gql.String), + gql.Field(name='environmentName', type=gql.String), + gql.Field(name='environmentUri', type=gql.String), + gql.Field(name='AwsAccountId', type=gql.String), + gql.Field(name='region', type=gql.String), + gql.Field(name='organizationName', type=gql.String), + gql.Field(name='organizationUri', type=gql.String), + ], +) + + +PrincipalSearchResult = gql.ObjectType( + name='PrincipalSearchResult', + fields=[ + gql.Field(name='count', type=gql.Integer), + gql.Field(name='nodes', type=gql.ArrayType(Principal)), + gql.Field(name='pageSize', type=gql.Integer), + gql.Field(name='nextPage', type=gql.Integer), + gql.Field(name='pages', type=gql.Integer), + gql.Field(name='page', type=gql.Integer), + gql.Field(name='previousPage', type=gql.Integer), + gql.Field(name='hasNext', type=gql.Boolean), + gql.Field(name='hasPrevious', type=gql.Boolean), + ], +) + +ShareLog = gql.ObjectType( + name='ShareLog', + fields=[ + gql.Field(name='logStream', type=gql.String), + gql.Field(name='logGroup', type=gql.String), + gql.Field(name='timestamp', type=gql.String), + gql.Field(name='message', type=gql.String), + ], +) diff --git a/frontend/src/modules/Environments/components/EnvironmentSharedDatasets.js b/frontend/src/modules/Environments/components/EnvironmentSharedDatasets.js index bfec151d2..a99658f0a 100644 --- a/frontend/src/modules/Environments/components/EnvironmentSharedDatasets.js +++ b/frontend/src/modules/Environments/components/EnvironmentSharedDatasets.js @@ -25,7 +25,8 @@ import { SearchIcon } from 'design'; import { SET_ERROR, useDispatch } from 'globalErrors'; -import { searchEnvironmentDataItems, useClient } from 'services'; +import { useClient } from 'services'; +import { searchEnvironmentDataItems } from '../services'; export const EnvironmentSharedDatasets = ({ environment }) => { const client = useClient(); diff --git a/frontend/src/modules/Environments/services/index.js b/frontend/src/modules/Environments/services/index.js index d0895efa5..e52b10a61 100644 --- a/frontend/src/modules/Environments/services/index.js +++ b/frontend/src/modules/Environments/services/index.js @@ -20,6 +20,7 @@ export * from './listEnvironmentNetworks'; export * from './listEnvironmentPermissions'; export * from './removeConsumptionRole'; export * from './removeGroup'; +export * from './searchEnvironmentDataItems'; export * from './updateEnvironment'; export * from './updateGroupEnvironmentPermissions'; export * from './updateConsumptionRole'; diff --git a/frontend/src/services/graphql/Environment/searchEnvironmentDataItems.js b/frontend/src/modules/Environments/services/searchEnvironmentDataItems.js similarity index 90% rename from frontend/src/services/graphql/Environment/searchEnvironmentDataItems.js rename to frontend/src/modules/Environments/services/searchEnvironmentDataItems.js index 2fe5b6e98..8c912fc1f 100644 --- a/frontend/src/services/graphql/Environment/searchEnvironmentDataItems.js +++ b/frontend/src/modules/Environments/services/searchEnvironmentDataItems.js @@ -30,11 +30,8 @@ export const searchEnvironmentDataItems = ({ filter, environmentUri }) => ({ itemType itemName itemAccess - GlueDatabaseName - GlueTableName created principalId - sharedGlueDatabaseName } } } diff --git a/frontend/src/modules/Shares/services/getS3ConsumptionData.js b/frontend/src/modules/Shares/services/getS3ConsumptionData.js new file mode 100644 index 000000000..1aca5fa1e --- /dev/null +++ b/frontend/src/modules/Shares/services/getS3ConsumptionData.js @@ -0,0 +1,16 @@ +import { gql } from 'apollo-boost'; + +export const getS3ConsumptionData = ({ shareUri }) => ({ + variables: { + shareUri + }, + query: gql` + query getS3ConsumptionData($shareUri: String!) { + getS3ConsumptionData(shareUri: $shareUri) { + s3AccessPointName + sharedGlueDatabase + s3bucketName + } + } + ` +}); diff --git a/frontend/src/modules/Shares/services/getShareObject.js b/frontend/src/modules/Shares/services/getShareObject.js index 579aeaa37..b71542192 100644 --- a/frontend/src/modules/Shares/services/getShareObject.js +++ b/frontend/src/modules/Shares/services/getShareObject.js @@ -16,11 +16,6 @@ export const getShareObject = ({ shareUri, filter }) => ({ rejectPurpose userRoleForShareObject canViewLogs - consumptionData { - s3AccessPointName - sharedGlueDatabase - s3bucketName - } principal { principalId principalType diff --git a/frontend/src/modules/Shares/services/index.js b/frontend/src/modules/Shares/services/index.js index 2b2c6bf56..da37d2b7c 100644 --- a/frontend/src/modules/Shares/services/index.js +++ b/frontend/src/modules/Shares/services/index.js @@ -1,6 +1,7 @@ export * from './addSharedItem'; export * from './approveShareObject'; export * from './deleteShareObject'; +export * from './getS3ConsumptionData'; export * from './getShareObject'; export * from './getShareRequestsFromMe'; export * from './listOwnedDatasets'; diff --git a/frontend/src/modules/Worksheets/services/index.js b/frontend/src/modules/Worksheets/services/index.js index 66134e1f7..8ccde440a 100644 --- a/frontend/src/modules/Worksheets/services/index.js +++ b/frontend/src/modules/Worksheets/services/index.js @@ -2,6 +2,7 @@ export * from './createWorksheet'; export * from './deleteWorksheet'; export * from './getWorksheet'; export * from './listS3DatasetsOwnedByEnvGroup'; +export * from './listS3DatasetsSharedWithEnvGroup'; export * from './listWorksheets'; export * from './runAthenaSqlQuery'; export * from './updateWorksheet'; diff --git a/frontend/src/modules/Worksheets/services/listS3DatasetsSharedWithEnvGroup.js b/frontend/src/modules/Worksheets/services/listS3DatasetsSharedWithEnvGroup.js new file mode 100644 index 000000000..a3944d0a1 --- /dev/null +++ b/frontend/src/modules/Worksheets/services/listS3DatasetsSharedWithEnvGroup.js @@ -0,0 +1,24 @@ +import { gql } from 'apollo-boost'; +export const listS3DatasetsSharedWithEnvGroup = ({ + environmentUri, + groupUri +}) => ({ + variables: { + environmentUri, + groupUri + }, + query: gql` + query listS3DatasetsSharedWithEnvGroup( + $environmentUri: String + $groupUri: String + ) { + listS3DatasetsSharedWithEnvGroup( + environmentUri: $environmentUri + groupUri: $groupUri + ) { + datasetUri + sharedGlueDatabaseName + } + } + ` +}); diff --git a/frontend/src/modules/Worksheets/views/WorksheetView.js b/frontend/src/modules/Worksheets/views/WorksheetView.js index 6f4a6dc38..720ca2c91 100644 --- a/frontend/src/modules/Worksheets/views/WorksheetView.js +++ b/frontend/src/modules/Worksheets/views/WorksheetView.js @@ -33,13 +33,13 @@ import { getSharedDatasetTables, listDatasetTableColumns, listValidEnvironments, - searchEnvironmentDataItems, useClient } from 'services'; import { deleteWorksheet, getWorksheet, listS3DatasetsOwnedByEnvGroup, + listS3DatasetsSharedWithEnvGroup, runAthenaSqlQuery, updateWorksheet } from '../services'; @@ -140,38 +140,20 @@ const WorksheetView = () => { ); } response = await client.query( - searchEnvironmentDataItems({ + listS3DatasetsSharedWithEnvGroup({ environmentUri: environment.environmentUri, - filter: { - ...Defaults.selectListFilter, - uniqueShares: true, - itemTypes: 'DatasetTable' - } + groupUri: team }) ); if (response.errors) { dispatch({ type: SET_ERROR, error: response.errors[0].message }); } - if (response.data.searchEnvironmentDataItems.nodes) { + if (response.data.listS3DatasetsSharedWithEnvGroup) { sharedWithDatabases = - response.data.searchEnvironmentDataItems.nodes.map((d) => ({ - datasetUri: d.datasetUri, + response.data.listS3DatasetsSharedWithEnvGroup?.map((d) => ({ value: d.datasetUri, - label: d.sharedGlueDatabaseName, - GlueDatabaseName: d.sharedGlueDatabaseName, - environmentUri: d.environmentUri, - principalId: d.principalId + label: d.sharedGlueDatabaseName })); - // Remove duplicates based on GlueDatabaseName - sharedWithDatabases = sharedWithDatabases.filter( - (database, index, self) => - index === - self.findIndex( - (d) => - d.GlueDatabaseName === database.GlueDatabaseName && - d.principalId === team - ) - ); } setDatabaseOptions(ownedDatabases.concat(sharedWithDatabases)); setLoadingDatabases(false); diff --git a/frontend/src/services/graphql/Environment/index.js b/frontend/src/services/graphql/Environment/index.js index 4d3f06e83..96d75039d 100644 --- a/frontend/src/services/graphql/Environment/index.js +++ b/frontend/src/services/graphql/Environment/index.js @@ -5,5 +5,4 @@ export * from './listEnvironmentConsumptionRoles'; export * from './listEnvironmentGroups'; export * from './listEnvironments'; export * from './listValidEnvironments'; -export * from './searchEnvironmentDataItems'; export * from './getConsumptionRolePolicy'; diff --git a/frontend/src/services/graphql/ShareObject/getShareRequestsToMe.js b/frontend/src/services/graphql/ShareObject/getShareRequestsToMe.js index 75b2242f4..1050172c5 100644 --- a/frontend/src/services/graphql/ShareObject/getShareRequestsToMe.js +++ b/frontend/src/services/graphql/ShareObject/getShareRequestsToMe.js @@ -31,8 +31,6 @@ export const getShareRequestsToMe = ({ filter }) => ({ organizationName } statistics { - tables - locations sharedItems revokedItems failedItems From 17070cf0e172c2a8053a258157e5a65948d112d0 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Fri, 14 Jun 2024 11:56:58 +0200 Subject: [PATCH 02/14] Fix issues with tables select worksheet and with unused items in searchEnvironmentDataItems.js + clean-up merge conflicts --- .../modules/s3_datasets_shares/api/queries.py | 2 +- .../db/share_object_repositories.py | 19 +-------- .../dataall/modules/shares_base/api/types.py | 1 + .../shares_base/services/sharing_service.py | 1 + .../services/searchEnvironmentDataItems.js | 1 - .../src/modules/Shares/views/ShareView.js | 41 +++++++++++++------ .../modules/Worksheets/views/WorksheetView.js | 10 ++--- .../modules/s3_datasets_shares/test_share.py | 4 +- 8 files changed, 39 insertions(+), 40 deletions(-) diff --git a/backend/dataall/modules/s3_datasets_shares/api/queries.py b/backend/dataall/modules/s3_datasets_shares/api/queries.py index d9b547c82..eb6f279f3 100644 --- a/backend/dataall/modules/s3_datasets_shares/api/queries.py +++ b/backend/dataall/modules/s3_datasets_shares/api/queries.py @@ -42,7 +42,7 @@ args=[gql.Argument(name='shareUri', type=gql.String)], type=gql.Ref('S3ConsumptionData'), resolver=get_s3_consumption_data, -) # todo: Once PR 1277 is merged modify shareView to use this query +) listS3DatasetsSharedWithEnvGroup = gql.QueryField( diff --git a/backend/dataall/modules/s3_datasets_shares/db/share_object_repositories.py b/backend/dataall/modules/s3_datasets_shares/db/share_object_repositories.py index d96869b91..511977dff 100644 --- a/backend/dataall/modules/s3_datasets_shares/db/share_object_repositories.py +++ b/backend/dataall/modules/s3_datasets_shares/db/share_object_repositories.py @@ -847,23 +847,6 @@ def paginate_shared_datasets(session, env_uri, data): ShareObjectItem.itemName.label('itemName'), Organization.organizationUri.label('organizationUri'), Organization.name.label('organizationName'), - case( - [ - ( - ShareObjectItem.itemType == ShareableType.Table.value, - func.concat( - DatasetTable.GlueDatabaseName, - '.', - DatasetTable.GlueTableName, - ), - ), - ( - ShareObjectItem.itemType == ShareableType.StorageLocation.value, - func.concat(DatasetStorageLocation.name), - ), - ], - else_='XXX XXXX', - ).label('itemAccess'), ) .join( ShareObject, @@ -889,7 +872,7 @@ def paginate_shared_datasets(session, env_uri, data): ) ) - if data.get('datasetUri'): # TODO review filter + if data.get('datasetUri'): dataset_uri = data.get('datasetUri') q = q.filter(ShareObject.datasetUri == dataset_uri) diff --git a/backend/dataall/modules/shares_base/api/types.py b/backend/dataall/modules/shares_base/api/types.py index 1a3860515..1b87a0f66 100644 --- a/backend/dataall/modules/shares_base/api/types.py +++ b/backend/dataall/modules/shares_base/api/types.py @@ -128,6 +128,7 @@ gql.Field(name='requestPurpose', type=gql.String), gql.Field(name='rejectPurpose', type=gql.String), gql.Field(name='dataset', type=DatasetLink, resolver=resolve_dataset), + gql.Field(name='alreadyExisted', type=gql.Boolean), gql.Field(name='existingSharedItems', type=gql.Boolean, resolver=resolve_existing_shared_items), gql.Field( name='statistics', diff --git a/backend/dataall/modules/shares_base/services/sharing_service.py b/backend/dataall/modules/shares_base/services/sharing_service.py index f47a2e277..7f81fca05 100644 --- a/backend/dataall/modules/shares_base/services/sharing_service.py +++ b/backend/dataall/modules/shares_base/services/sharing_service.py @@ -194,6 +194,7 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: log.info(f'Starting revoke {share_data.share.shareUri}') new_share_state = share_sm.run_transition(ShareObjectActions.Start.value) share_sm.update_state(session, share_data.share, new_share_state) + revoke_successful = True try: if not ShareObjectService.verify_principal_role(session, share_data.share): raise PrincipalRoleNotFound( diff --git a/frontend/src/modules/Environments/services/searchEnvironmentDataItems.js b/frontend/src/modules/Environments/services/searchEnvironmentDataItems.js index 8c912fc1f..dace73b4a 100644 --- a/frontend/src/modules/Environments/services/searchEnvironmentDataItems.js +++ b/frontend/src/modules/Environments/services/searchEnvironmentDataItems.js @@ -29,7 +29,6 @@ export const searchEnvironmentDataItems = ({ filter, environmentUri }) => ({ datasetName itemType itemName - itemAccess created principalId } diff --git a/frontend/src/modules/Shares/views/ShareView.js b/frontend/src/modules/Shares/views/ShareView.js index 586787636..16a5c311f 100644 --- a/frontend/src/modules/Shares/views/ShareView.js +++ b/frontend/src/modules/Shares/views/ShareView.js @@ -63,7 +63,8 @@ import { submitApproval, revokeItemsShareObject, verifyItemsShareObject, - reApplyItemsShareObject + reApplyItemsShareObject, + getS3ConsumptionData } from '../services'; import { AddShareItemModal, @@ -95,6 +96,7 @@ function ShareViewHeader(props) { const [openLogsModal, setOpenLogsModal] = useState(null); const [isSubmitShareModalOpen, setIsSubmitShareModalOpen] = useState(false); + const submit = async () => { setSubmitting(true); const response = await client.mutate( @@ -557,6 +559,7 @@ const ShareView = () => { const [isVerifyItemsModalOpen, setIsVerifyItemsModalOpen] = useState(false); const [isReApplyShareItemModalOpen, setIsReApplyShareItemModalOpen] = useState(false); + const [consumptionData, setConsumptionData] = useState({}); const handleAddItemModalClose = () => { setIsAddItemModalOpen(false); @@ -602,6 +605,22 @@ const ShareView = () => { ); if (!response.errors) { setShare(response.data.getShareObject); + const response_c = await client.query( + getS3ConsumptionData({ + shareUri: response.data.getShareObject.shareUri + }) + ); + if (!response_c.errors) { + setConsumptionData({ + s3bucketName: response_c.data.getS3ConsumptionData.s3bucketName, + s3AccessPointName: + response_c.data.getS3ConsumptionData.s3AccessPointName, + sharedGlueDatabase: + response_c.data.getS3ConsumptionData.sharedGlueDatabase + }); + } else { + dispatch({ type: SET_ERROR, error: response_c.errors[0].message }); + } } else { dispatch({ type: SET_ERROR, error: response.errors[0].message }); } @@ -1180,12 +1199,12 @@ const ShareView = () => { color="textPrimary" variant="subtitle2" > - {` ${share.consumptionData.s3bucketName || '-'}`} + {` ${consumptionData.s3bucketName || '-'}`} copyNotification()} - text={`aws s3 ls s3://${share.consumptionData.s3bucketName}`} + text={`aws s3 ls s3://${consumptionData.s3bucketName}`} > { /> - {`aws s3 ls s3://${share.consumptionData.s3bucketName}`} + {`aws s3 ls s3://${consumptionData.s3bucketName}`} @@ -1214,12 +1233,12 @@ const ShareView = () => { color="textPrimary" variant="subtitle2" > - {` ${share.consumptionData.s3AccessPointName || '-'}`} + {` ${consumptionData.s3AccessPointName || '-'}`} copyNotification()} - text={`aws s3 ls arn:aws:s3:${share.dataset.region}:${share.dataset.AwsAccountId}:accesspoint/${share.consumptionData.s3AccessPointName}/SHARED_FOLDER/`} + text={`aws s3 ls arn:aws:s3:${share.dataset.region}:${share.dataset.AwsAccountId}:accesspoint/${consumptionData.s3AccessPointName}/SHARED_FOLDER/`} > { /> - {`aws s3 ls arn:aws:s3:${share.dataset.region}:${share.dataset.AwsAccountId}:accesspoint/${share.consumptionData.s3AccessPointName}/SHARED_FOLDER/`} + {`aws s3 ls arn:aws:s3:${share.dataset.region}:${share.dataset.AwsAccountId}:accesspoint/${consumptionData.s3AccessPointName}/SHARED_FOLDER/`} @@ -1248,14 +1267,12 @@ const ShareView = () => { color="textPrimary" variant="subtitle2" > - {` ${ - share.consumptionData.sharedGlueDatabase || '-' - }`} + {` ${consumptionData.sharedGlueDatabase || '-'}`} copyNotification()} - text={`SELECT * FROM ${share.consumptionData.sharedGlueDatabase}.TABLENAME`} + text={`SELECT * FROM ${consumptionData.sharedGlueDatabase}.TABLENAME`} > { /> - {`SELECT * FROM ${share.consumptionData.sharedGlueDatabase}.TABLENAME`} + {`SELECT * FROM ${consumptionData.sharedGlueDatabase}.TABLENAME`} diff --git a/frontend/src/modules/Worksheets/views/WorksheetView.js b/frontend/src/modules/Worksheets/views/WorksheetView.js index 720ca2c91..ca51d73d0 100644 --- a/frontend/src/modules/Worksheets/views/WorksheetView.js +++ b/frontend/src/modules/Worksheets/views/WorksheetView.js @@ -164,17 +164,17 @@ const WorksheetView = () => { async (environment, dataset) => { setLoadingTables(true); let response = ''; - if (dataset.GlueDatabaseName.includes(dataset.datasetUri + '_shared')) { + if (dataset.label.includes(dataset.value + '_shared')) { response = await client.query( getSharedDatasetTables({ - datasetUri: dataset.datasetUri, + datasetUri: dataset.value, envUri: environment.environmentUri }) ); } else { response = await client.query( listDatasetTables({ - datasetUri: dataset.datasetUri, + datasetUri: dataset.value, filter: Defaults.selectListFilter }) ); @@ -182,7 +182,7 @@ const WorksheetView = () => { if ( !response.errors && - dataset.GlueDatabaseName.includes(dataset.datasetUri + '_shared') + dataset.label.includes(dataset.value + '_shared') ) { setTableOptions( response.data.getSharedDatasetTables.map((t) => ({ @@ -355,7 +355,7 @@ const WorksheetView = () => { dispatch({ type: SET_ERROR, error: e.message }) ); setSqlBody( - `SELECT * FROM "${selectedDatabase.GlueDatabaseName}"."${event.target.value.GlueTableName}" limit 10;` + `SELECT * FROM "${selectedDatabase.label}"."${event.target.value.GlueTableName}" limit 10;` ); } diff --git a/tests/modules/s3_datasets_shares/test_share.py b/tests/modules/s3_datasets_shares/test_share.py index 2b1a371a7..571c96ec1 100644 --- a/tests/modules/s3_datasets_shares/test_share.py +++ b/tests/modules/s3_datasets_shares/test_share.py @@ -813,9 +813,7 @@ def list_datasets_published_in_environment(client, user, group, environmentUri): datasetUri datasetName itemType - itemAccess - GlueDatabaseName - GlueTableName + itemName created principalId } From 274cbee1ae2c3ad13d071260ca05cbf18f695158 Mon Sep 17 00:00:00 2001 From: Noah Paige Date: Mon, 17 Jun 2024 09:21:41 -0400 Subject: [PATCH 03/14] Make resource lock generic pt 1 --- backend/dataall/base/db/exceptions.py | 12 ++ backend/dataall/core/__init__.py | 10 +- .../dataall/core/resource_lock/__init__.py | 0 .../dataall/core/resource_lock/db/__init__.py | 0 .../resource_lock/db/resource_lock_models.py | 28 +++ .../db/resource_lock_repositories.py | 122 +++++++++++ .../datasets_base/db/dataset_models.py | 12 -- .../datasets_base/db/dataset_repositories.py | 14 +- .../s3_datasets/services/dataset_service.py | 8 +- .../shares_base/services/share_exceptions.py | 5 - .../shares_base/services/sharing_service.py | 194 ++++++++---------- tests/modules/s3_datasets/test_dataset.py | 5 +- .../test_dataset_resource_found.py | 4 +- 13 files changed, 258 insertions(+), 156 deletions(-) create mode 100644 backend/dataall/core/resource_lock/__init__.py create mode 100644 backend/dataall/core/resource_lock/db/__init__.py create mode 100644 backend/dataall/core/resource_lock/db/resource_lock_models.py create mode 100644 backend/dataall/core/resource_lock/db/resource_lock_repositories.py diff --git a/backend/dataall/base/db/exceptions.py b/backend/dataall/base/db/exceptions.py index b7b4bf583..95c2c2a73 100644 --- a/backend/dataall/base/db/exceptions.py +++ b/backend/dataall/base/db/exceptions.py @@ -169,3 +169,15 @@ def __init__(self, action, message): def __str__(self): return f'{self.message}' + + +class ResourceLockTimeout(Exception): + def __init__(self, action, message): + self.action = action + self.message = f""" + An error occurred (ResourceLockTimeout) when calling {self.action} operation: + {message} + """ + + def __str__(self): + return f'{self.message}' diff --git a/backend/dataall/core/__init__.py b/backend/dataall/core/__init__.py index 9f1bc3e38..4b86fd038 100644 --- a/backend/dataall/core/__init__.py +++ b/backend/dataall/core/__init__.py @@ -1,11 +1,3 @@ """The package contains the core functionality that is required by data.all to work correctly""" -from dataall.core import ( - permissions, - stacks, - groups, - environment, - organizations, - tasks, - vpc, -) +from dataall.core import permissions, stacks, groups, environment, organizations, tasks, vpc, resource_locks diff --git a/backend/dataall/core/resource_lock/__init__.py b/backend/dataall/core/resource_lock/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/core/resource_lock/db/__init__.py b/backend/dataall/core/resource_lock/db/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/core/resource_lock/db/resource_lock_models.py b/backend/dataall/core/resource_lock/db/resource_lock_models.py new file mode 100644 index 000000000..d98f30ffe --- /dev/null +++ b/backend/dataall/core/resource_lock/db/resource_lock_models.py @@ -0,0 +1,28 @@ +from typing import Optional +from sqlalchemy import Column, String, Boolean + +from dataall.base.db import Base + + +class ResourceLock(Base): + __tablename__ = 'resource_lock' + + resourceUri = Column(String, nullable=False, primary_key=True) + resourceType = Column(String, nullable=False, primary_key=True) + isLocked = Column(Boolean, default=False) + acquiredByUri = Column(String, nullable=True) + acquiredByType = Column(String, nullable=True) + + def __init__( + self, + resourceUri: str, + resourceType: str, + isLocked: bool = False, + acquiredByUri: Optional[str] = None, + acquiredByType: Optional[str] = None, + ): + self.resourceUri = resourceUri + self.resourceType = resourceType + self.isLocked = isLocked + self.acquiredByUri = acquiredByUri + self.acquiredByType = acquiredByType diff --git a/backend/dataall/core/resource_lock/db/resource_lock_repositories.py b/backend/dataall/core/resource_lock/db/resource_lock_repositories.py new file mode 100644 index 000000000..891744bf4 --- /dev/null +++ b/backend/dataall/core/resource_lock/db/resource_lock_repositories.py @@ -0,0 +1,122 @@ +import logging + +from backend.dataall.base.db.base import Resource +from dataall.core.resource_lock.db.resource_lock_models import ResourceLock +from sqlalchemy import and_, or_ + +log = logging.getLogger(__name__) + + +class ResourceLockRepository: + @staticmethod + def create_resource_lock( + session, resource_uri, resource_type, is_locked=False, acquired_by_uri=None, acquired_by_type=None + ): + resource_lock = ResourceLock( + resourceUri=resource_uri, + resourceType=resource_type, + isLocked=is_locked, + acquiredByUri=acquired_by_uri, + acquiredByType=acquired_by_type, + ) + session.add(resource_lock) + session.commit() + + @staticmethod + def delete_resource_lock(session, resource_uri): + resource_lock = session.query(ResourceLock).filter(ResourceLock.resourceUri == resource_uri).first() + session.delete(resource_lock) + session.commit() + + @staticmethod + def acquire_locks(resources, session, acquired_by_uri, acquired_by_type): + """ + Attempts to acquire one or more locks on the resources identified by resourceUri and resourceType. + + Args: + resources: List of resource tuples (resourceUri, resourceType) to acquire locks for. + session (sqlalchemy.orm.Session): The SQLAlchemy session object used for interacting with the database. + acquired_by_uri: The ID of the resource that is attempting to acquire the lock. + acquired_by_type: The resource type that is attempting to acquire the lock.qu + + Returns: + bool: True if the lock is successfully acquired, False otherwise. + """ + try: + # Execute the query to get the ResourceLock object + filter_conditions = [ + and_( + ResourceLock.resourceUri == resource[0], + ResourceLock.resourceType == resource[1], + ~ResourceLock.isLocked, + ) + for resource in resources + ] + resource_locks = session.query(ResourceLock).filter(or_(*filter_conditions)).with_for_update().all() + + # Ensure lock record found for each resource + if len(resource_locks) == len(resources): + # Update the attributes of the ResourceLock object + for resource_lock in resource_locks: + resource_lock.isLocked = True + resource_lock.acquiredBy = acquired_by_uri + resource_lock.acquiredByType = acquired_by_type + session.commit() + return True + else: + log.info( + 'Not all ResourceLocks were found. One or more ResourceLocks may be acquired by another resource...' + ) + return False + except Exception as e: + session.expunge_all() + session.rollback() + log.error('Error occurred while acquiring lock:', e) + return False + + @staticmethod + def release_lock(session, resource_uri, resource_type, share_uri): + """ + Releases the lock on the resource identified by resource_uri, resource_type. + + Args: + session (sqlalchemy.orm.Session): The SQLAlchemy session object used for interacting with the database. + resource_uri: The ID of the resource that owns the lock. + resource_type: The type of the resource that owns the lock. + share_uri: The ID of the share that is attempting to release the lock. + + Returns: + bool: True if the lock is successfully released, False otherwise. + """ + try: + log.info(f'Releasing lock for resource: {resource_uri=}, {resource_type=}') + + resource_lock = ( + session.query(ResourceLock) + .filter( + and_( + ResourceLock.resourceUri == resource_uri, + ResourceLock.resourceType == resource_type, + ResourceLock.isLocked, + ResourceLock.acquiredByUri == share_uri, + ) + ) + .with_for_update() + .first() + ) + + if resource_lock: + resource_lock.isLocked = False + resource_lock.acquiredBy = '' + + session.commit() + return True + else: + log.info(f'ResourceLock not found for resource: {resource_uri=}, {resource_type=}') + return False + + except Exception as e: + session.expunge_all() + session.rollback() + log.error('Error occurred while releasing lock:', e) + return False diff --git a/backend/dataall/modules/datasets_base/db/dataset_models.py b/backend/dataall/modules/datasets_base/db/dataset_models.py index eb4c9890b..6d7f453d7 100644 --- a/backend/dataall/modules/datasets_base/db/dataset_models.py +++ b/backend/dataall/modules/datasets_base/db/dataset_models.py @@ -40,15 +40,3 @@ def uri(cls): DatasetBase.__name__ = 'Dataset' - - -class DatasetLock(Base): - __tablename__ = 'dataset_lock' - datasetUri = Column(String, ForeignKey('dataset.datasetUri'), nullable=False, primary_key=True) - isLocked = Column(Boolean, default=False) - acquiredBy = Column(String, nullable=True) - - def __init__(self, datasetUri, isLocked=False, acquiredBy=None): - self.datasetUri = datasetUri - self.isLocked = isLocked - self.acquiredBy = acquiredBy diff --git a/backend/dataall/modules/datasets_base/db/dataset_repositories.py b/backend/dataall/modules/datasets_base/db/dataset_repositories.py index f9d1b33a6..2ff9e396f 100644 --- a/backend/dataall/modules/datasets_base/db/dataset_repositories.py +++ b/backend/dataall/modules/datasets_base/db/dataset_repositories.py @@ -5,7 +5,7 @@ from dataall.base.db import paginate from dataall.base.db.exceptions import ObjectNotFound from dataall.core.activity.db.activity_models import Activity -from dataall.modules.datasets_base.db.dataset_models import DatasetBase, DatasetLock +from dataall.modules.datasets_base.db.dataset_models import DatasetBase logger = logging.getLogger(__name__) @@ -13,18 +13,6 @@ class DatasetBaseRepository: """DAO layer for GENERIC Datasets""" - @staticmethod - def create_dataset_lock(session, dataset: DatasetBase): - dataset_lock = DatasetLock(datasetUri=dataset.datasetUri, isLocked=False, acquiredBy='') - session.add(dataset_lock) - session.commit() - - @staticmethod - def delete_dataset_lock(session, dataset: DatasetBase): - dataset_lock = session.query(DatasetLock).filter(DatasetLock.datasetUri == dataset.datasetUri).first() - session.delete(dataset_lock) - session.commit() - @staticmethod def update_dataset_activity(session, dataset: DatasetBase, username): activity = Activity( diff --git a/backend/dataall/modules/s3_datasets/services/dataset_service.py b/backend/dataall/modules/s3_datasets/services/dataset_service.py index 0ea96b2dc..b1883d7d3 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_service.py @@ -2,6 +2,7 @@ import json import logging from typing import List +from backend.dataall.core.resource_lock.db.resource_lock_repositories import ResourceLockRepository from dataall.base.aws.quicksight import QuicksightClient from dataall.base.db import exceptions from dataall.base.utils.naming_convention import NamingConventionPattern @@ -164,8 +165,9 @@ def create_dataset(uri, admin_group, data: dict): DatasetService.check_imported_resources(dataset) dataset = DatasetRepository.create_dataset(session=session, env=environment, dataset=dataset, data=data) - DatasetBaseRepository.create_dataset_lock(session=session, dataset=dataset) - + ResourceLockRepository.create_resource_lock( + session=session, resource_uri=dataset.datasetUri, resource_type=dataset.__tablename__ + ) DatasetBucketRepository.create_dataset_bucket(session, dataset, data) ResourcePolicyService.attach_resource_policy( @@ -411,7 +413,7 @@ def delete_dataset(uri: str, delete_from_aws: bool = False): ResourcePolicyService.delete_resource_policy(session=session, resource_uri=uri, group=env.SamlGroupName) if dataset.stewards: ResourcePolicyService.delete_resource_policy(session=session, resource_uri=uri, group=dataset.stewards) - DatasetBaseRepository.delete_dataset_lock(session=session, dataset=dataset) + ResourceLockRepository.delete_resource_lock(session=session, resource_uri=dataset) DatasetRepository.delete_dataset(session, dataset) if delete_from_aws: diff --git a/backend/dataall/modules/shares_base/services/share_exceptions.py b/backend/dataall/modules/shares_base/services/share_exceptions.py index bfe051776..ea9091f8a 100644 --- a/backend/dataall/modules/shares_base/services/share_exceptions.py +++ b/backend/dataall/modules/shares_base/services/share_exceptions.py @@ -19,8 +19,3 @@ def __init__(self, action, message): class PrincipalRoleNotFound(BaseShareException): def __init__(self, action, message): super().__init__('PrincipalRoleNotFound', action, message) - - -class DatasetLockTimeout(BaseShareException): - def __init__(self, action, message): - super().__init__('DatasetLockTimeout', action, message) diff --git a/backend/dataall/modules/shares_base/services/sharing_service.py b/backend/dataall/modules/shares_base/services/sharing_service.py index 7f81fca05..97939c248 100644 --- a/backend/dataall/modules/shares_base/services/sharing_service.py +++ b/backend/dataall/modules/shares_base/services/sharing_service.py @@ -1,11 +1,13 @@ import logging -from typing import Any, Dict +from typing import Any, Dict, List, Tuple from dataclasses import dataclass from abc import ABC, abstractmethod -from sqlalchemy import and_ from time import sleep + +from sqlalchemy.orm import Session +from backend.dataall.core.resource_lock.db.resource_lock_repositories import ResourceLockRepository from dataall.base.db import Engine -from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup +from dataall.core.environment.db.environment_models import ConsumptionRole, Environment, EnvironmentGroup from dataall.modules.shares_base.db.share_object_state_machines import ( ShareObjectSM, ShareItemSM, @@ -16,17 +18,15 @@ ShareItemActions, ShareItemStatus, ShareableType, + PrincipalType, ) from dataall.modules.shares_base.db.share_object_models import ShareObject from dataall.modules.shares_base.db.share_object_repositories import ShareObjectRepository from dataall.modules.s3_datasets_shares.services.share_object_service import ( ShareObjectService, ) # TODO move to shares_base in following PR -from dataall.modules.shares_base.services.share_exceptions import ( - PrincipalRoleNotFound, - DatasetLockTimeout, -) -from dataall.modules.datasets_base.db.dataset_models import DatasetLock +from dataall.modules.shares_base.services.share_exceptions import PrincipalRoleNotFound +from dataall.base.db.exceptions import ResourceLockTimeout log = logging.getLogger(__name__) @@ -106,6 +106,17 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: log.info(f'Starting share {share_data.share.shareUri}') new_share_state = share_object_sm.run_transition(ShareObjectActions.Start.value) share_object_sm.update_state(session, share_data.share, new_share_state) + + resources = [(share_data.dataset.datasetUri, share_data.dataset.__tablename__)] + resources.append( + (share_data.share.principalId, ConsumptionRole.__tablename__) + if share_data.share.principalType == PrincipalType.ConsumptionRole.value + else ( + f'{share_data.share.principalId}-{share_data.share.environmentUri}', + EnvironmentGroup.__tablename__, + ) + ) + share_successful = True try: if not ShareObjectService.verify_principal_role(session, share_data.share): @@ -113,8 +124,11 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: 'process approved shares', f'Principal role {share_data.share.principalIAMRoleName} is not found.', ) - if not cls.acquire_lock_with_retry(share_data.dataset.datasetUri, session, share_data.share.shareUri): - raise DatasetLockTimeout( + + if not cls.acquire_lock_with_retry( + resources, session, share_data.share.shareUri, share_data.share.__tablename__ + ): + raise ResourceLockTimeout( 'process approved shares', f'Failed to acquire lock for dataset {share_data.dataset.datasetUri}', ) @@ -160,9 +174,13 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: finally: new_share_state = share_object_sm.run_transition(ShareObjectActions.Finish.value) share_object_sm.update_state(session, share_data.share, new_share_state) - lock_released = cls.release_lock(share_data.dataset.datasetUri, session, share_data.share.shareUri) - if not lock_released: - log.error(f'Failed to release lock for dataset {share_data.dataset.datasetUri}.') + for resource in resources: + if not ResourceLockRepository.release_lock( + session, resource[0], resource[1], share_data.share.shareUri + ): + log.error( + f'Failed to release lock for resource: resource_uri={resource[0]}, resource_type={resource[1]}' + ) @classmethod def revoke_share(cls, engine: Engine, share_uri: str) -> bool: @@ -194,6 +212,17 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: log.info(f'Starting revoke {share_data.share.shareUri}') new_share_state = share_sm.run_transition(ShareObjectActions.Start.value) share_sm.update_state(session, share_data.share, new_share_state) + + resources = [(share_data.dataset.datasetUri, share_data.dataset.__tablename__)] + resources.append( + (share_data.share.principalId, ConsumptionRole.__tablename__) + if share_data.share.principalType == PrincipalType.ConsumptionRole.value + else ( + f'{share_data.share.principalId}-{share_data.share.environmentUri}', + EnvironmentGroup.__tablename__, + ) + ) + revoke_successful = True try: if not ShareObjectService.verify_principal_role(session, share_data.share): @@ -201,8 +230,11 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: 'process revoked shares', f'Principal role {share_data.share.principalIAMRoleName} is not found.', ) - if not cls.acquire_lock_with_retry(share_data.dataset.datasetUri, session, share_data.share.shareUri): - raise DatasetLockTimeout( + + if not cls.acquire_lock_with_retry( + resources, session, share_data.share.shareUri, share_data.share.__tablename__ + ): + raise ResourceLockTimeout( 'process revoked shares', f'Failed to acquire lock for dataset {share_data.dataset.datasetUri}', ) @@ -254,9 +286,13 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: new_share_state = share_sm.run_transition(ShareObjectActions.Finish.value) share_sm.update_state(session, share_data.share, new_share_state) - lock_released = cls.release_lock(share_data.dataset.datasetUri, session, share_data.share.shareUri) - if not lock_released: - log.error(f'Failed to release lock for dataset {share_data.dataset.datasetUri}.') + for resource in resources: + if not ResourceLockRepository.release_lock( + session, resource[0], resource[1], share_data.share.shareUri + ): + log.error( + f'Failed to release lock for resource: resource_uri={resource[0]}, resource_type={resource[1]}' + ) @classmethod def verify_share( @@ -334,6 +370,16 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: share_data, share_items = cls._get_share_data_and_items( session, share_uri, None, ShareItemHealthStatus.PendingReApply.value ) + resources = [(share_data.dataset.datasetUri, share_data.dataset.__tablename__)] + resources.append( + (share_data.share.principalId, ConsumptionRole.__tablename__) + if share_data.share.principalType == PrincipalType.ConsumptionRole.value + else ( + f'{share_data.share.principalId}-{share_data.share.environmentUri}', + EnvironmentGroup.__tablename__, + ) + ) + try: log.info(f'Verifying principal IAM Role {share_data.share.principalIAMRoleName}') reapply_successful = ShareObjectService.verify_principal_role(session, share_data.share) @@ -342,8 +388,9 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: return False else: lock_acquired = cls.acquire_lock_with_retry( - share_data.dataset.datasetUri, session, share_data.share.shareUri + resources, session, share_data.share.shareUri, share_data.share.__tablename__ ) + if not lock_acquired: log.error(f'Failed to acquire lock for dataset {share_data.dataset.datasetUri}, exiting...') error_message = f'SHARING PROCESS TIMEOUT: Failed to acquire lock for dataset {share_data.dataset.datasetUri}' @@ -383,9 +430,13 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: finally: with engine.scoped_session() as session: - lock_released = cls.release_lock(share_data.dataset.datasetUri, session, share_data.share.shareUri) - if not lock_released: - log.error(f'Failed to release lock for dataset {share_data.dataset.datasetUri}.') + for resource in resources: + if not ResourceLockRepository.release_lock( + session, resource[0], resource[1], share_data.share.shareUri + ): + log.error( + f'Failed to release lock for resource: resource_uri={resource[0]}, resource_type={resource[1]}' + ) @staticmethod def _get_share_data_and_items(session, share_uri, status, healthStatus=None): @@ -404,103 +455,26 @@ def _get_share_data_and_items(session, share_uri, status, healthStatus=None): return share_data, share_items @staticmethod - def acquire_lock(dataset_uri, session, share_uri): - """ - Attempts to acquire a lock on the dataset identified by dataset_id. - - Args: - dataset_uri: The ID of the dataset for which the lock is being acquired. - session (sqlalchemy.orm.Session): The SQLAlchemy session object used for interacting with the database. - share_uri: The ID of the share that is attempting to acquire the lock. - - Returns: - bool: True if the lock is successfully acquired, False otherwise. - """ - try: - # Execute the query to get the DatasetLock object - dataset_lock = ( - session.query(DatasetLock) - .filter(and_(DatasetLock.datasetUri == dataset_uri, ~DatasetLock.isLocked)) - .with_for_update() - .first() - ) - - # Check if dataset_lock is not None before attempting to update - if dataset_lock: - # Update the attributes of the DatasetLock object - dataset_lock.isLocked = True - dataset_lock.acquiredBy = share_uri - - session.commit() - return True - else: - log.info('DatasetLock not found for the given criteria.') - return False - except Exception as e: - session.expunge_all() - session.rollback() - log.error('Error occurred while acquiring lock:', e) - return False - - @staticmethod - def acquire_lock_with_retry(dataset_uri, session, share_uri): + def acquire_lock_with_retry( + resources: List[Tuple[str, str]], session: Session, acquired_by_uri: str, acquired_by_type: str + ): for attempt in range(MAX_RETRIES): try: - log.info(f'Attempting to acquire lock for dataset {dataset_uri} by share {share_uri}...') - lock_acquired = SharingService.acquire_lock(dataset_uri, session, share_uri) + log.info(f'Attempting to acquire lock for resources {resources} by share {acquired_by_uri}...') + lock_acquired = ResourceLockRepository.acquire_locks( + resources, session, acquired_by_uri, acquired_by_type + ) if lock_acquired: return True - log.info(f'Lock for dataset {dataset_uri} already acquired. Retrying in {RETRY_INTERVAL} seconds...') + log.info( + f'Lock for one or more resources {resources} already acquired. Retrying in {RETRY_INTERVAL} seconds...' + ) sleep(RETRY_INTERVAL) except Exception as e: log.error('Error occurred while retrying acquiring lock:', e) return False - log.info(f'Max retries reached. Failed to acquire lock for dataset {dataset_uri}') + log.info(f'Max retries reached. Failed to acquire lock for one or more resources {resources}') return False - - @staticmethod - def release_lock(dataset_uri, session, share_uri): - """ - Releases the lock on the dataset identified by dataset_uri. - - Args: - dataset_uri: The ID of the dataset for which the lock is being released. - session (sqlalchemy.orm.Session): The SQLAlchemy session object used for interacting with the database. - share_uri: The ID of the share that is attempting to release the lock. - - Returns: - bool: True if the lock is successfully released, False otherwise. - """ - try: - log.info(f'Releasing lock for dataset: {dataset_uri} last acquired by share: {share_uri}') - dataset_lock = ( - session.query(DatasetLock) - .filter( - and_( - DatasetLock.datasetUri == dataset_uri, - DatasetLock.isLocked == True, - DatasetLock.acquiredBy == share_uri, - ) - ) - .with_for_update() - .first() - ) - - if dataset_lock: - dataset_lock.isLocked = False - dataset_lock.acquiredBy = '' - - session.commit() - return True - else: - log.info('DatasetLock not found for the given criteria.') - return False - - except Exception as e: - session.expunge_all() - session.rollback() - log.error('Error occurred while releasing lock:', e) - return False diff --git a/tests/modules/s3_datasets/test_dataset.py b/tests/modules/s3_datasets/test_dataset.py index 5a551e586..163e012dd 100644 --- a/tests/modules/s3_datasets/test_dataset.py +++ b/tests/modules/s3_datasets/test_dataset.py @@ -8,7 +8,8 @@ from dataall.core.organizations.db.organization_models import Organization from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, S3Dataset -from dataall.modules.datasets_base.db.dataset_models import DatasetLock, DatasetBase +from dataall.modules.datasets_base.db.dataset_models import DatasetBase +from dataall.core.resource_lock.db.resource_lock_models import ResourceLock from tests.core.stacks.test_stack import update_stack_query from dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification @@ -350,7 +351,7 @@ def test_dataset_in_environment(client, env_fixture, dataset1, group): def test_delete_dataset(client, dataset, env_fixture, org_fixture, db, module_mocker, group, user): # Delete any Dataset before effectuating the test with db.scoped_session() as session: - session.query(DatasetLock).delete() + session.query(ResourceLock).delete() session.query(S3Dataset).delete() session.query(DatasetBase).delete() session.commit() diff --git a/tests/modules/s3_datasets/test_dataset_resource_found.py b/tests/modules/s3_datasets/test_dataset_resource_found.py index 8c71a2077..0652feb10 100644 --- a/tests/modules/s3_datasets/test_dataset_resource_found.py +++ b/tests/modules/s3_datasets/test_dataset_resource_found.py @@ -1,5 +1,5 @@ from dataall.modules.s3_datasets.db.dataset_models import S3Dataset -from dataall.modules.datasets_base.db.dataset_models import DatasetLock +from dataall.core.resource_lock.db.resource_lock_models import ResourceLock from dataall.modules.s3_datasets.services.dataset_permissions import CREATE_DATASET @@ -117,7 +117,7 @@ def test_dataset_resource_found(db, client, env_fixture, org_fixture, group2, us assert 'EnvironmentResourcesFound' in response.errors[0].message with db.scoped_session() as session: - dataset_lock = session.query(DatasetLock).filter(DatasetLock.datasetUri == dataset.datasetUri).first() + dataset_lock = session.query(ResourceLock).filter(ResourceLock.resourceUri == dataset.datasetUri).first() session.delete(dataset_lock) dataset = session.query(S3Dataset).get(dataset.datasetUri) session.delete(dataset) From e61bebeeac0ae19738762bef5395fde9f9fe7388 Mon Sep 17 00:00:00 2001 From: Noah Paige Date: Mon, 17 Jun 2024 14:27:50 -0400 Subject: [PATCH 04/14] Create and delete resource locks on env group or CR create or delete --- .../services/environment_service.py | 28 +++++++++++++++++++ .../db/resource_lock_repositories.py | 1 - .../s3_datasets/services/dataset_service.py | 4 +-- .../shares_base/services/sharing_service.py | 2 +- 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/backend/dataall/core/environment/services/environment_service.py b/backend/dataall/core/environment/services/environment_service.py index 3f5b82302..496df953b 100644 --- a/backend/dataall/core/environment/services/environment_service.py +++ b/backend/dataall/core/environment/services/environment_service.py @@ -18,6 +18,7 @@ CREDENTIALS_ENVIRONMENT, ) from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService +from dataall.core.resource_lock.db.resource_lock_repositories import ResourceLockRepository from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.core.activity.db.activity_models import Activity from dataall.core.environment.db.environment_models import EnvironmentParameter, ConsumptionRole @@ -285,6 +286,12 @@ def create_environment(uri, data=None): ) session.commit() + ResourceLockRepository.create_resource_lock( + session=session, + resource_uri=f'{env_group.groupUri}-{env_group.environmentUri}', + resource_type=env_group.__tablename__, + ) + activity = Activity( action='ENVIRONMENT:CREATE', label='ENVIRONMENT:CREATE', @@ -401,6 +408,13 @@ def invite_group(uri, data=None) -> (Environment, EnvironmentGroup): ) session.add(environment_group) session.commit() + + ResourceLockRepository.create_resource_lock( + session=session, + resource_uri=f'{environment_group.groupUri}-{environment_group.environmentUri}', + resource_type=environment_group.__tablename__, + ) + ResourcePolicyService.attach_resource_policy( session=session, group=group, @@ -454,6 +468,9 @@ def remove_group(uri, group): ).delete_all_policies() if group_membership: + ResourceLockRepository.delete_resource_lock( + session=session, resource_uri=f'{group}-{environment.environmentUri}' + ) session.delete(group_membership) session.commit() @@ -580,6 +597,12 @@ def add_consumption_role(uri, data=None) -> (Environment, EnvironmentGroup): session.add(consumption_role) session.commit() + ResourceLockRepository.create_resource_lock( + session=session, + resource_uri=consumption_role.consumptionRoleUri, + resource_type=consumption_role.__tablename__, + ) + ResourcePolicyService.attach_resource_policy( session=session, group=group, @@ -619,6 +642,7 @@ def remove_consumption_role(uri, env_uri): resource_uri=consumption_role.consumptionRoleUri, resource_type=ConsumptionRole.__name__, ) + ResourceLockRepository.delete_resource_lock(session=session, resource_uri=uri) session.delete(consumption_role) session.commit() @@ -869,6 +893,9 @@ def delete_environment(uri): EnvironmentParameterRepository(session).delete_params(environment.environmentUri) for group in env_groups: + ResourceLockRepository.delete_resource_lock( + session=session, resource_uri=f'{group.groupUri}-{environment.environmentUri}' + ) session.delete(group) ResourcePolicyService.delete_resource_policy( @@ -878,6 +905,7 @@ def delete_environment(uri): ) for role in env_roles: + ResourceLockRepository.delete_resource_lock(session=session, resource_uri=role.consumptionRoleUri) session.delete(role) return session.delete(environment), environment diff --git a/backend/dataall/core/resource_lock/db/resource_lock_repositories.py b/backend/dataall/core/resource_lock/db/resource_lock_repositories.py index 891744bf4..03c3f67e5 100644 --- a/backend/dataall/core/resource_lock/db/resource_lock_repositories.py +++ b/backend/dataall/core/resource_lock/db/resource_lock_repositories.py @@ -1,6 +1,5 @@ import logging -from backend.dataall.base.db.base import Resource from dataall.core.resource_lock.db.resource_lock_models import ResourceLock from sqlalchemy import and_, or_ diff --git a/backend/dataall/modules/s3_datasets/services/dataset_service.py b/backend/dataall/modules/s3_datasets/services/dataset_service.py index b1883d7d3..5f4dbf175 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_service.py @@ -2,7 +2,7 @@ import json import logging from typing import List -from backend.dataall.core.resource_lock.db.resource_lock_repositories import ResourceLockRepository +from dataall.core.resource_lock.db.resource_lock_repositories import ResourceLockRepository from dataall.base.aws.quicksight import QuicksightClient from dataall.base.db import exceptions from dataall.base.utils.naming_convention import NamingConventionPattern @@ -413,7 +413,7 @@ def delete_dataset(uri: str, delete_from_aws: bool = False): ResourcePolicyService.delete_resource_policy(session=session, resource_uri=uri, group=env.SamlGroupName) if dataset.stewards: ResourcePolicyService.delete_resource_policy(session=session, resource_uri=uri, group=dataset.stewards) - ResourceLockRepository.delete_resource_lock(session=session, resource_uri=dataset) + ResourceLockRepository.delete_resource_lock(session=session, resource_uri=dataset.datasetUri) DatasetRepository.delete_dataset(session, dataset) if delete_from_aws: diff --git a/backend/dataall/modules/shares_base/services/sharing_service.py b/backend/dataall/modules/shares_base/services/sharing_service.py index 97939c248..a548796a1 100644 --- a/backend/dataall/modules/shares_base/services/sharing_service.py +++ b/backend/dataall/modules/shares_base/services/sharing_service.py @@ -5,7 +5,7 @@ from time import sleep from sqlalchemy.orm import Session -from backend.dataall.core.resource_lock.db.resource_lock_repositories import ResourceLockRepository +from dataall.core.resource_lock.db.resource_lock_repositories import ResourceLockRepository from dataall.base.db import Engine from dataall.core.environment.db.environment_models import ConsumptionRole, Environment, EnvironmentGroup from dataall.modules.shares_base.db.share_object_state_machines import ( From d1ced2ae7727a522ce72fe8fd00463923b64f12b Mon Sep 17 00:00:00 2001 From: Noah Paige Date: Mon, 17 Jun 2024 20:20:47 -0400 Subject: [PATCH 05/14] Add migration script and debug --- backend/dataall/core/__init__.py | 2 +- .../db/resource_lock_repositories.py | 5 +- .../797dd1012be1_resource_lock_table.py | 183 ++++++++++++++++++ 3 files changed, 187 insertions(+), 3 deletions(-) create mode 100644 backend/migrations/versions/797dd1012be1_resource_lock_table.py diff --git a/backend/dataall/core/__init__.py b/backend/dataall/core/__init__.py index 4b86fd038..f15c835aa 100644 --- a/backend/dataall/core/__init__.py +++ b/backend/dataall/core/__init__.py @@ -1,3 +1,3 @@ """The package contains the core functionality that is required by data.all to work correctly""" -from dataall.core import permissions, stacks, groups, environment, organizations, tasks, vpc, resource_locks +from dataall.core import permissions, stacks, groups, environment, organizations, tasks, vpc, resource_lock diff --git a/backend/dataall/core/resource_lock/db/resource_lock_repositories.py b/backend/dataall/core/resource_lock/db/resource_lock_repositories.py index 03c3f67e5..e242853a1 100644 --- a/backend/dataall/core/resource_lock/db/resource_lock_repositories.py +++ b/backend/dataall/core/resource_lock/db/resource_lock_repositories.py @@ -58,7 +58,7 @@ def acquire_locks(resources, session, acquired_by_uri, acquired_by_type): # Update the attributes of the ResourceLock object for resource_lock in resource_locks: resource_lock.isLocked = True - resource_lock.acquiredBy = acquired_by_uri + resource_lock.acquiredByUri = acquired_by_uri resource_lock.acquiredByType = acquired_by_type session.commit() return True @@ -106,7 +106,8 @@ def release_lock(session, resource_uri, resource_type, share_uri): if resource_lock: resource_lock.isLocked = False - resource_lock.acquiredBy = '' + resource_lock.acquiredByUri = '' + resource_lock.acquiredByType = '' session.commit() return True diff --git a/backend/migrations/versions/797dd1012be1_resource_lock_table.py b/backend/migrations/versions/797dd1012be1_resource_lock_table.py new file mode 100644 index 000000000..48b2e7bde --- /dev/null +++ b/backend/migrations/versions/797dd1012be1_resource_lock_table.py @@ -0,0 +1,183 @@ +"""resource_lock_table + +Revision ID: 797dd1012be1 +Revises: 6adce90ab470 +Create Date: 2024-06-17 19:06:51.569471 + +""" + +from alembic import op +from sqlalchemy import orm, Column, String, Boolean, ForeignKey +import sqlalchemy as sa +from typing import Optional +from sqlalchemy.ext.declarative import declarative_base +from dataall.base.db import utils + +# revision identifiers, used by Alembic. +revision = '797dd1012be1' +down_revision = '6adce90ab470' +branch_labels = None +depends_on = None + +Base = declarative_base() + + +class ResourceLock(Base): + __tablename__ = 'resource_lock' + + resourceUri = Column(String, nullable=False, primary_key=True) + resourceType = Column(String, nullable=False, primary_key=True) + isLocked = Column(Boolean, default=False) + acquiredByUri = Column(String, nullable=True) + acquiredByType = Column(String, nullable=True) + + def __init__( + self, + resourceUri: str, + resourceType: str, + isLocked: bool = False, + acquiredByUri: Optional[str] = None, + acquiredByType: Optional[str] = None, + ): + self.resourceUri = resourceUri + self.resourceType = resourceType + self.isLocked = isLocked + self.acquiredByUri = acquiredByUri + self.acquiredByType = acquiredByType + + +class DatasetBase(Base): + __tablename__ = 'dataset' + environmentUri = Column(String, ForeignKey('environment.environmentUri'), nullable=False) + organizationUri = Column(String, nullable=False) + datasetUri = Column(String, primary_key=True, default=utils.uuid('dataset')) + + +class S3Dataset(DatasetBase): + __tablename__ = 's3_dataset' + datasetUri = Column(String, ForeignKey('dataset.datasetUri'), primary_key=True) + + +class EnvironmentGroup(Base): + __tablename__ = 'environment_group_permission' + groupUri = Column(String, primary_key=True) + environmentUri = Column(String, primary_key=True) + + +class ConsumptionRole(Base): + __tablename__ = 'consumptionrole' + consumptionRoleUri = Column(String, primary_key=True, default=utils.uuid('group')) + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + + # Drop Foregin Key + op.drop_constraint('dataset_lock_datasetUri_fkey', 'dataset_lock', type_='foreignkey') + + # Rename Table to Resource Lock + op.rename_table('dataset_lock', 'resource_lock') + + # Rename Columns + op.alter_column( + 'resource_lock', + 'datasetUri', + nullable=False, + new_column_name='resourceUri', + existing_type=String, + primary_key=True, + ) + op.alter_column( + 'resource_lock', + 'acquiredBy', + nullable=True, + new_column_name='acquiredByUri', + existing_type=String, + ) + + # Add New Columns + op.add_column('resource_lock', sa.Column('resourceType', sa.String())) + op.add_column('resource_lock', sa.Column('acquiredByType', sa.String(), nullable=True)) + + session = orm.Session(bind=op.get_bind()) + + # Backfill Dataset Locks + session.query(ResourceLock).update( + { + ResourceLock.resourceType: S3Dataset.__tablename__, + } + ) + session.commit() + + # Add resourceType as primary key after backfilling + op.alter_column('resource_lock', 'resourceType', primary_key=True) + + # Backfill Locks for Env Groups + env_groups = session.query(EnvironmentGroup).all() + for group in env_groups: + lock = ResourceLock( + resourceUri=f'{group.groupUri}-{group.environmentUri}', + resourceType=EnvironmentGroup.__tablename__, + ) + session.add(lock) + session.commit() + + # Backfill Locks for Consumption Roles + consumption_roles = session.query(ConsumptionRole).all() + for role in consumption_roles: + lock = ResourceLock(resourceUri=role.consumptionRoleUri, resourceType=ConsumptionRole.__tablename__) + session.add(lock) + session.commit() + # ### end Alembic commands ### + + +def downgrade(): + session = orm.Session(bind=op.get_bind()) + # Deleting Locks for Env Groups + env_groups = session.query(EnvironmentGroup).all() + for group in env_groups: + lock = session.query(ResourceLock).get( + (f'{group.groupUri}-{group.environmentUri}', EnvironmentGroup.__tablename__) + ) + if lock: + print('YES LOCK') + session.delete(lock) + + # Deleting Locks for Consumption Roles + consumption_roles = session.query(ConsumptionRole).all() + for role in consumption_roles: + print('CR ROLE', role.consumptionRoleUri) + lock = session.query(ResourceLock).get((role.consumptionRoleUri, ConsumptionRole.__tablename__)) + if lock: + print('YES LOCK') + session.delete(lock) + session.commit() + + # Drop Columns + op.drop_column('resource_lock', 'resourceType') + op.drop_column('resource_lock', 'acquiredByType') + + # Rename Columns + op.alter_column( + 'resource_lock', + 'resourceUri', + nullable=False, + new_column_name='datasetUri', + existing_type=String, + primary_key=True, + ) + op.alter_column( + 'resource_lock', + 'acquiredByUri', + nullable=True, + new_column_name='acquiredBy', + existing_type=String, + ) + + # Rename Table to Dataset Lock + op.rename_table('resource_lock', 'dataset_lock') + + # Add Foregin Key + op.create_foreign_key('dataset_lock_datasetUri_fkey', 'dataset_lock', 'dataset', ['datasetUri'], ['datasetUri']) + + # ### end Alembic commands ### From 07a2e3d3d8c136d4f7576f2811c8efe3dd2efc6d Mon Sep 17 00:00:00 2001 From: Noah Paige Date: Mon, 17 Jun 2024 20:29:23 -0400 Subject: [PATCH 06/14] add back resources --- .../modules/shares_base/services/sharing_service.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/backend/dataall/modules/shares_base/services/sharing_service.py b/backend/dataall/modules/shares_base/services/sharing_service.py index 55e2ee02c..a548796a1 100644 --- a/backend/dataall/modules/shares_base/services/sharing_service.py +++ b/backend/dataall/modules/shares_base/services/sharing_service.py @@ -212,6 +212,17 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: log.info(f'Starting revoke {share_data.share.shareUri}') new_share_state = share_sm.run_transition(ShareObjectActions.Start.value) share_sm.update_state(session, share_data.share, new_share_state) + + resources = [(share_data.dataset.datasetUri, share_data.dataset.__tablename__)] + resources.append( + (share_data.share.principalId, ConsumptionRole.__tablename__) + if share_data.share.principalType == PrincipalType.ConsumptionRole.value + else ( + f'{share_data.share.principalId}-{share_data.share.environmentUri}', + EnvironmentGroup.__tablename__, + ) + ) + revoke_successful = True try: if not ShareObjectService.verify_principal_role(session, share_data.share): From d1ce3ba97663999d2b6a92e74d90af67bc3e1634 Mon Sep 17 00:00:00 2001 From: Noah Paige Date: Mon, 17 Jun 2024 20:34:32 -0400 Subject: [PATCH 07/14] Fix revision IDs --- backend/migrations/versions/797dd1012be1_resource_lock_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/migrations/versions/797dd1012be1_resource_lock_table.py b/backend/migrations/versions/797dd1012be1_resource_lock_table.py index 48b2e7bde..1eed1d5e0 100644 --- a/backend/migrations/versions/797dd1012be1_resource_lock_table.py +++ b/backend/migrations/versions/797dd1012be1_resource_lock_table.py @@ -15,7 +15,7 @@ # revision identifiers, used by Alembic. revision = '797dd1012be1' -down_revision = '6adce90ab470' +down_revision = 'f2f7431c34e5' branch_labels = None depends_on = None From 3fe45e000e6a89f1771fd91f3ed1afee1a11d5ce Mon Sep 17 00:00:00 2001 From: Noah Paige Date: Wed, 19 Jun 2024 09:42:56 -0400 Subject: [PATCH 08/14] merge latest from main and update alembic revision ID --- .../migrations/versions/797dd1012be1_resource_lock_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/migrations/versions/797dd1012be1_resource_lock_table.py b/backend/migrations/versions/797dd1012be1_resource_lock_table.py index 1eed1d5e0..916f9c75d 100644 --- a/backend/migrations/versions/797dd1012be1_resource_lock_table.py +++ b/backend/migrations/versions/797dd1012be1_resource_lock_table.py @@ -1,7 +1,7 @@ """resource_lock_table Revision ID: 797dd1012be1 -Revises: 6adce90ab470 +Revises: 448d9dc95e94 Create Date: 2024-06-17 19:06:51.569471 """ @@ -15,7 +15,7 @@ # revision identifiers, used by Alembic. revision = '797dd1012be1' -down_revision = 'f2f7431c34e5' +down_revision = '448d9dc95e94' branch_labels = None depends_on = None From 3ff6734e9496bc798ce4e05db97dc9c9dd3a4132 Mon Sep 17 00:00:00 2001 From: Noah Paige Date: Mon, 24 Jun 2024 17:15:17 -0400 Subject: [PATCH 09/14] Merge latest from main + resolve conflicts + update db migration revision ID --- .../migrations/versions/797dd1012be1_resource_lock_table.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/migrations/versions/797dd1012be1_resource_lock_table.py b/backend/migrations/versions/797dd1012be1_resource_lock_table.py index 9d1ea67fa..947a75e3a 100644 --- a/backend/migrations/versions/797dd1012be1_resource_lock_table.py +++ b/backend/migrations/versions/797dd1012be1_resource_lock_table.py @@ -1,7 +1,7 @@ """resource_lock_table Revision ID: 797dd1012be1 -Revises: 448d9dc95e94 +Revises: 18da23d3ba44 Create Date: 2024-06-17 19:06:51.569471 """ @@ -15,7 +15,7 @@ # revision identifiers, used by Alembic. revision = '797dd1012be1' -down_revision = '448d9dc95e94' +down_revision = '18da23d3ba44' branch_labels = None depends_on = None From 88679912caa993039df20291c07eca55b66ba83a Mon Sep 17 00:00:00 2001 From: Noah Paige Date: Wed, 26 Jun 2024 11:47:12 -0400 Subject: [PATCH 10/14] Add context manager to acquire locks with retry and move function to resourcelockrepo --- .../db/resource_lock_repositories.py | 59 ++++ .../shares_base/services/sharing_service.py | 270 +++++++----------- 2 files changed, 166 insertions(+), 163 deletions(-) diff --git a/backend/dataall/core/resource_lock/db/resource_lock_repositories.py b/backend/dataall/core/resource_lock/db/resource_lock_repositories.py index e242853a1..6440ad3f4 100644 --- a/backend/dataall/core/resource_lock/db/resource_lock_repositories.py +++ b/backend/dataall/core/resource_lock/db/resource_lock_repositories.py @@ -2,9 +2,17 @@ from dataall.core.resource_lock.db.resource_lock_models import ResourceLock from sqlalchemy import and_, or_ +from sqlalchemy.orm import Session +from time import sleep +from typing import List, Tuple +from contextlib import contextmanager +from dataall.base.db.exceptions import ResourceLockTimeout log = logging.getLogger(__name__) +MAX_RETRIES = 10 +RETRY_INTERVAL = 60 + class ResourceLockRepository: @staticmethod @@ -120,3 +128,54 @@ def release_lock(session, resource_uri, resource_type, share_uri): session.rollback() log.error('Error occurred while releasing lock:', e) return False + + @staticmethod + def acquire_lock_with_retry( + resources: List[Tuple[str, str]], session: Session, acquired_by_uri: str, acquired_by_type: str + ): + for _ in range(MAX_RETRIES): + try: + log.info(f'Attempting to acquire lock for resources {resources} by share {acquired_by_uri}...') + lock_acquired = ResourceLockRepository.acquire_locks( + resources, session, acquired_by_uri, acquired_by_type + ) + if lock_acquired: + return True + + log.info( + f'Lock for one or more resources {resources} already acquired. Retrying in {RETRY_INTERVAL} seconds...' + ) + sleep(RETRY_INTERVAL) + + except Exception as e: + log.error('Error occurred while retrying acquiring lock:', e) + return False + + log.info(f'Max retries reached. Failed to acquire lock for one or more resources {resources}') + return False + + @staticmethod + @contextmanager + def acquire_lock_with_retry_context( + resources: List[Tuple[str, str]], session: Session, acquired_by_uri: str, acquired_by_type: str + ): + retries_remaining = MAX_RETRIES + log.info(f'Attempting to acquire lock for resources {resources} by share {acquired_by_uri}...') + while not ( + lock_acquired := ResourceLockRepository.acquire_locks(resources, session, acquired_by_uri, acquired_by_type) + ): + log.info( + f'Lock for one or more resources {resources} already acquired. Retrying in {RETRY_INTERVAL} seconds...' + ) + sleep(RETRY_INTERVAL) + retries_remaining -= 1 + if retries_remaining <= 0: + raise ResourceLockTimeout( + 'process shares', + f'Failed to acquire lock for one or more of {resources=}', + ) + try: + yield lock_acquired + finally: + for resource in resources: + ResourceLockRepository.release_lock(session, resource[0], resource[1], acquired_by_uri) diff --git a/backend/dataall/modules/shares_base/services/sharing_service.py b/backend/dataall/modules/shares_base/services/sharing_service.py index 3d3bc91a3..ce35623e7 100644 --- a/backend/dataall/modules/shares_base/services/sharing_service.py +++ b/backend/dataall/modules/shares_base/services/sharing_service.py @@ -1,9 +1,7 @@ import logging -from typing import Any, List, Tuple from dataclasses import dataclass -from time import sleep -from sqlalchemy.orm import Session +from typing import Any from dataall.core.resource_lock.db.resource_lock_repositories import ResourceLockRepository from dataall.base.db import Engine from dataall.core.environment.db.environment_models import ConsumptionRole, Environment, EnvironmentGroup @@ -30,9 +28,6 @@ log = logging.getLogger(__name__) -MAX_RETRIES = 10 -RETRY_INTERVAL = 60 - @dataclass class ShareData: @@ -94,44 +89,45 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: f'Principal role {share_data.share.principalIAMRoleName} is not found.', ) - if not cls.acquire_lock_with_retry( - resources, session, share_data.share.shareUri, share_data.share.__tablename__ + with ResourceLockRepository.acquire_lock_with_retry( + resources=resources, + session=session, + acquired_by_uri=share_data.share.shareUri, + acquired_by_type=share_data.share.__tablename__, ): - raise ResourceLockTimeout( - 'process approved shares', - f'Failed to acquire lock for dataset {share_data.dataset.datasetUri}', - ) - for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): - try: - log.info(f'Granting permissions of {type.value}') - shareable_items = ShareObjectRepository.get_share_data_items_by_type( - session, - share_data.share, - processor.shareable_type, - processor.shareable_uri, - status=ShareItemStatus.Share_Approved.value, - ) - success = processor.Processor(session, share_data, shareable_items).process_approved_shares() - log.info(f'Sharing {type.value} succeeded = {success}') - if not success: + for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): + try: + log.info(f'Granting permissions of {type.value}') + shareable_items = ShareObjectRepository.get_share_data_items_by_type( + session, + share_data.share, + processor.shareable_type, + processor.shareable_uri, + status=ShareItemStatus.Share_Approved.value, + ) + success = processor.Processor( + session, share_data, shareable_items + ).process_approved_shares() + log.info(f'Sharing {type.value} succeeded = {success}') + if not success: + share_successful = False + except Exception as e: + log.error(f'Error occurred during sharing of {type.value}: {e}') + ShareStatusRepository.update_share_item_status_batch( + session, + share_uri, + old_status=ShareItemStatus.Share_Approved.value, + new_status=ShareItemStatus.Share_Failed.value, + share_item_type=processor.type.value, + ) + ShareStatusRepository.update_share_item_status_batch( + session, + share_uri, + old_status=ShareItemStatus.Share_In_Progress.value, + new_status=ShareItemStatus.Share_Failed.value, + share_item_type=processor.type.value, + ) share_successful = False - except Exception as e: - log.error(f'Error occurred during sharing of {type.value}: {e}') - ShareStatusRepository.update_share_item_status_batch( - session, - share_uri, - old_status=ShareItemStatus.Share_Approved.value, - new_status=ShareItemStatus.Share_Failed.value, - share_item_type=processor.type.value, - ) - ShareStatusRepository.update_share_item_status_batch( - session, - share_uri, - old_status=ShareItemStatus.Share_In_Progress.value, - new_status=ShareItemStatus.Share_Failed.value, - share_item_type=processor.type.value, - ) - share_successful = False return share_successful except Exception as e: @@ -143,13 +139,6 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: finally: new_share_state = share_object_sm.run_transition(ShareObjectActions.Finish.value) share_object_sm.update_state(session, share_data.share, new_share_state) - for resource in resources: - if not ResourceLockRepository.release_lock( - session, resource[0], resource[1], share_data.share.shareUri - ): - log.error( - f'Failed to release lock for resource: resource_uri={resource[0]}, resource_type={resource[1]}' - ) @classmethod def revoke_share(cls, engine: Engine, share_uri: str) -> bool: @@ -200,45 +189,43 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: f'Principal role {share_data.share.principalIAMRoleName} is not found.', ) - if not cls.acquire_lock_with_retry( - resources, session, share_data.share.shareUri, share_data.share.__tablename__ + with ResourceLockRepository.acquire_lock_with_retry( + resources=resources, + session=session, + acquired_by_uri=share_data.share.shareUri, + acquired_by_type=share_data.share.__tablename__, ): - raise ResourceLockTimeout( - 'process revoked shares', - f'Failed to acquire lock for dataset {share_data.dataset.datasetUri}', - ) - - for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): - try: - shareable_items = ShareObjectRepository.get_share_data_items_by_type( - session, - share_data.share, - processor.shareable_type, - processor.shareable_uri, - status=ShareItemStatus.Revoke_Approved.value, - ) - log.info(f'Revoking permissions with {type.value}') - success = processor.Processor(session, share_data, shareable_items).process_revoked_shares() - log.info(f'Revoking {type.value} succeeded = {success}') - if not success: + for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): + try: + shareable_items = ShareObjectRepository.get_share_data_items_by_type( + session, + share_data.share, + processor.shareable_type, + processor.shareable_uri, + status=ShareItemStatus.Revoke_Approved.value, + ) + log.info(f'Revoking permissions with {type.value}') + success = processor.Processor(session, share_data, shareable_items).process_revoked_shares() + log.info(f'Revoking {type.value} succeeded = {success}') + if not success: + revoke_successful = False + except Exception as e: + log.error(f'Error occurred during share revoking of {type.value}: {e}') + ShareStatusRepository.update_share_item_status_batch( + session, + share_uri, + old_status=ShareItemStatus.Revoke_Approved.value, + new_status=ShareItemStatus.Revoke_Failed.value, + share_item_type=processor.type.value, + ) + ShareStatusRepository.update_share_item_status_batch( + session, + share_uri, + old_status=ShareItemStatus.Revoke_In_Progress.value, + new_status=ShareItemStatus.Revoke_Failed.value, + share_item_type=processor.type.value, + ) revoke_successful = False - except Exception as e: - log.error(f'Error occurred during share revoking of {type.value}: {e}') - ShareStatusRepository.update_share_item_status_batch( - session, - share_uri, - old_status=ShareItemStatus.Revoke_Approved.value, - new_status=ShareItemStatus.Revoke_Failed.value, - share_item_type=processor.type.value, - ) - ShareStatusRepository.update_share_item_status_batch( - session, - share_uri, - old_status=ShareItemStatus.Revoke_In_Progress.value, - new_status=ShareItemStatus.Revoke_Failed.value, - share_item_type=processor.type.value, - ) - revoke_successful = False return revoke_successful except Exception as e: @@ -255,14 +242,6 @@ def revoke_share(cls, engine: Engine, share_uri: str) -> bool: new_share_state = share_sm.run_transition(ShareObjectActions.Finish.value) share_sm.update_state(session, share_data.share, new_share_state) - for resource in resources: - if not ResourceLockRepository.release_lock( - session, resource[0], resource[1], share_data.share.shareUri - ): - log.error( - f'Failed to release lock for resource: resource_uri={resource[0]}, resource_type={resource[1]}' - ) - @classmethod def verify_share( cls, @@ -356,57 +335,47 @@ def reapply_share(cls, engine: Engine, share_uri: str) -> bool: log.error(f'Failed to get Principal IAM Role {share_data.share.principalIAMRoleName}, exiting...') return False else: - lock_acquired = cls.acquire_lock_with_retry( - resources, session, share_data.share.shareUri, share_data.share.__tablename__ - ) + with ResourceLockRepository.acquire_lock_with_retry( + resources=resources, + session=session, + acquired_by_uri=share_data.share.shareUri, + acquired_by_type=share_data.share.__tablename__, + ): + for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): + try: + log.info(f'Reapplying permissions to {type.value}') + shareable_items = ShareObjectRepository.get_share_data_items_by_type( + session, + share_data.share, + processor.shareable_type, + processor.shareable_uri, + None, + ShareItemHealthStatus.PendingReApply.value, + ) + success = processor.Processor( + session, share_data, shareable_items + ).process_approved_shares() + log.info(f'Reapplying {type.value} succeeded = {success}') + if not success: + reapply_successful = False + except Exception as e: + log.error(f'Error occurred during share reapplying of {type.value}: {e}') - if not lock_acquired: - log.error(f'Failed to acquire lock for dataset {share_data.dataset.datasetUri}, exiting...') - error_message = f'SHARING PROCESS TIMEOUT: Failed to acquire lock for dataset {share_data.dataset.datasetUri}' - ShareStatusRepository.update_share_item_health_status_batch( - session, - share_uri, - old_status=ShareItemHealthStatus.PendingReApply.value, - new_status=ShareItemHealthStatus.Unhealthy.value, - message=error_message, - ) - return False + return reapply_successful - for type, processor in ShareProcessorManager.SHARING_PROCESSORS.items(): - try: - log.info(f'Reapplying permissions to {type.value}') - shareable_items = ShareObjectRepository.get_share_data_items_by_type( - session, - share_data.share, - processor.shareable_type, - processor.shareable_uri, - None, - ShareItemHealthStatus.PendingReApply.value, - ) - success = processor.Processor( - session, share_data, shareable_items - ).process_approved_shares() - log.info(f'Reapplying {type.value} succeeded = {success}') - if not success: - reapply_successful = False - except Exception as e: - log.error(f'Error occurred during share reapplying of {type.value}: {e}') + except ResourceLockTimeout as e: + ShareStatusRepository.update_share_item_health_status_batch( + session, + share_uri, + old_status=ShareItemHealthStatus.PendingReApply.value, + new_status=ShareItemHealthStatus.Unhealthy.value, + message=str(e), + ) - return reapply_successful except Exception as e: log.error(f'Error occurred during share approval: {e}') return False - finally: - with engine.scoped_session() as session: - for resource in resources: - if not ResourceLockRepository.release_lock( - session, resource[0], resource[1], share_data.share.shareUri - ): - log.error( - f'Failed to release lock for resource: resource_uri={resource[0]}, resource_type={resource[1]}' - ) - @staticmethod def _get_share_data_and_items(session, share_uri, status, healthStatus=None): data = ShareObjectRepository.get_share_data(session, share_uri) @@ -422,28 +391,3 @@ def _get_share_data_and_items(session, share_uri, status, healthStatus=None): session=session, share_uri=share_uri, status=[status], healthStatus=[healthStatus] ) return share_data, share_items - - @staticmethod - def acquire_lock_with_retry( - resources: List[Tuple[str, str]], session: Session, acquired_by_uri: str, acquired_by_type: str - ): - for attempt in range(MAX_RETRIES): - try: - log.info(f'Attempting to acquire lock for resources {resources} by share {acquired_by_uri}...') - lock_acquired = ResourceLockRepository.acquire_locks( - resources, session, acquired_by_uri, acquired_by_type - ) - if lock_acquired: - return True - - log.info( - f'Lock for one or more resources {resources} already acquired. Retrying in {RETRY_INTERVAL} seconds...' - ) - sleep(RETRY_INTERVAL) - - except Exception as e: - log.error('Error occurred while retrying acquiring lock:', e) - return False - - log.info(f'Max retries reached. Failed to acquire lock for one or more resources {resources}') - return False From ccde783ed24f87459cbbf048f80a6604d3d257e0 Mon Sep 17 00:00:00 2001 From: Noah Paige Date: Wed, 26 Jun 2024 12:49:31 -0400 Subject: [PATCH 11/14] Remove isLocked and create/delete + fix migration script --- .../services/environment_service.py | 26 ---- .../resource_lock/db/resource_lock_models.py | 3 - .../db/resource_lock_repositories.py | 78 ++-------- .../s3_datasets/services/dataset_service.py | 4 - .../797dd1012be1_resource_lock_table.py | 147 +++++------------- 5 files changed, 58 insertions(+), 200 deletions(-) diff --git a/backend/dataall/core/environment/services/environment_service.py b/backend/dataall/core/environment/services/environment_service.py index 309117a69..653c182db 100644 --- a/backend/dataall/core/environment/services/environment_service.py +++ b/backend/dataall/core/environment/services/environment_service.py @@ -296,12 +296,6 @@ def create_environment(uri, data=None): ) session.commit() - ResourceLockRepository.create_resource_lock( - session=session, - resource_uri=f'{env_group.groupUri}-{env_group.environmentUri}', - resource_type=env_group.__tablename__, - ) - activity = Activity( action='ENVIRONMENT:CREATE', label='ENVIRONMENT:CREATE', @@ -421,12 +415,6 @@ def invite_group(uri, data=None) -> (Environment, EnvironmentGroup): session.add(environment_group) session.commit() - ResourceLockRepository.create_resource_lock( - session=session, - resource_uri=f'{environment_group.groupUri}-{environment_group.environmentUri}', - resource_type=environment_group.__tablename__, - ) - ResourcePolicyService.attach_resource_policy( session=session, group=group, @@ -480,9 +468,6 @@ def remove_group(uri, group): ).delete_all_policies() if group_membership: - ResourceLockRepository.delete_resource_lock( - session=session, resource_uri=f'{group}-{environment.environmentUri}' - ) session.delete(group_membership) session.commit() @@ -609,12 +594,6 @@ def add_consumption_role(uri, data=None) -> (Environment, EnvironmentGroup): session.add(consumption_role) session.commit() - ResourceLockRepository.create_resource_lock( - session=session, - resource_uri=consumption_role.consumptionRoleUri, - resource_type=consumption_role.__tablename__, - ) - ResourcePolicyService.attach_resource_policy( session=session, group=group, @@ -654,7 +633,6 @@ def remove_consumption_role(uri, env_uri): resource_uri=consumption_role.consumptionRoleUri, resource_type=ConsumptionRole.__name__, ) - ResourceLockRepository.delete_resource_lock(session=session, resource_uri=uri) session.delete(consumption_role) session.commit() @@ -905,9 +883,6 @@ def delete_environment(uri): EnvironmentParameterRepository(session).delete_params(environment.environmentUri) for group in env_groups: - ResourceLockRepository.delete_resource_lock( - session=session, resource_uri=f'{group.groupUri}-{environment.environmentUri}' - ) session.delete(group) ResourcePolicyService.delete_resource_policy( @@ -917,7 +892,6 @@ def delete_environment(uri): ) for role in env_roles: - ResourceLockRepository.delete_resource_lock(session=session, resource_uri=role.consumptionRoleUri) session.delete(role) return session.delete(environment), environment diff --git a/backend/dataall/core/resource_lock/db/resource_lock_models.py b/backend/dataall/core/resource_lock/db/resource_lock_models.py index d98f30ffe..7e478758e 100644 --- a/backend/dataall/core/resource_lock/db/resource_lock_models.py +++ b/backend/dataall/core/resource_lock/db/resource_lock_models.py @@ -9,7 +9,6 @@ class ResourceLock(Base): resourceUri = Column(String, nullable=False, primary_key=True) resourceType = Column(String, nullable=False, primary_key=True) - isLocked = Column(Boolean, default=False) acquiredByUri = Column(String, nullable=True) acquiredByType = Column(String, nullable=True) @@ -17,12 +16,10 @@ def __init__( self, resourceUri: str, resourceType: str, - isLocked: bool = False, acquiredByUri: Optional[str] = None, acquiredByType: Optional[str] = None, ): self.resourceUri = resourceUri self.resourceType = resourceType - self.isLocked = isLocked self.acquiredByUri = acquiredByUri self.acquiredByType = acquiredByType diff --git a/backend/dataall/core/resource_lock/db/resource_lock_repositories.py b/backend/dataall/core/resource_lock/db/resource_lock_repositories.py index 6440ad3f4..7bf1ea016 100644 --- a/backend/dataall/core/resource_lock/db/resource_lock_repositories.py +++ b/backend/dataall/core/resource_lock/db/resource_lock_repositories.py @@ -15,30 +15,10 @@ class ResourceLockRepository: - @staticmethod - def create_resource_lock( - session, resource_uri, resource_type, is_locked=False, acquired_by_uri=None, acquired_by_type=None - ): - resource_lock = ResourceLock( - resourceUri=resource_uri, - resourceType=resource_type, - isLocked=is_locked, - acquiredByUri=acquired_by_uri, - acquiredByType=acquired_by_type, - ) - session.add(resource_lock) - session.commit() - - @staticmethod - def delete_resource_lock(session, resource_uri): - resource_lock = session.query(ResourceLock).filter(ResourceLock.resourceUri == resource_uri).first() - session.delete(resource_lock) - session.commit() - @staticmethod def acquire_locks(resources, session, acquired_by_uri, acquired_by_type): """ - Attempts to acquire one or more locks on the resources identified by resourceUri and resourceType. + Attempts to acquire/create one or more locks on the resources identified by resourceUri and resourceType. Args: resources: List of resource tuples (resourceUri, resourceType) to acquire locks for. @@ -55,19 +35,22 @@ def acquire_locks(resources, session, acquired_by_uri, acquired_by_type): and_( ResourceLock.resourceUri == resource[0], ResourceLock.resourceType == resource[1], - ~ResourceLock.isLocked, ) for resource in resources ] - resource_locks = session.query(ResourceLock).filter(or_(*filter_conditions)).with_for_update().all() - # Ensure lock record found for each resource - if len(resource_locks) == len(resources): - # Update the attributes of the ResourceLock object - for resource_lock in resource_locks: - resource_lock.isLocked = True - resource_lock.acquiredByUri = acquired_by_uri - resource_lock.acquiredByType = acquired_by_type + if not session.query(ResourceLock).filter(or_(*filter_conditions)).first(): + records = [] + for resource in resources: + records.append( + ResourceLock( + resourceUri=resource[0], + resourceType=resource[1], + acquiredByUri=acquired_by_uri, + acquiredByType=acquired_by_type, + ) + ) + session.add_all(records) session.commit() return True else: @@ -84,7 +67,7 @@ def acquire_locks(resources, session, acquired_by_uri, acquired_by_type): @staticmethod def release_lock(session, resource_uri, resource_type, share_uri): """ - Releases the lock on the resource identified by resource_uri, resource_type. + Releases/delete the lock on the resource identified by resource_uri, resource_type. Args: session (sqlalchemy.orm.Session): The SQLAlchemy session object used for interacting with the database. @@ -104,7 +87,6 @@ def release_lock(session, resource_uri, resource_type, share_uri): and_( ResourceLock.resourceUri == resource_uri, ResourceLock.resourceType == resource_type, - ResourceLock.isLocked, ResourceLock.acquiredByUri == share_uri, ) ) @@ -113,10 +95,7 @@ def release_lock(session, resource_uri, resource_type, share_uri): ) if resource_lock: - resource_lock.isLocked = False - resource_lock.acquiredByUri = '' - resource_lock.acquiredByType = '' - + session.delete(resource_lock) session.commit() return True else: @@ -129,34 +108,9 @@ def release_lock(session, resource_uri, resource_type, share_uri): log.error('Error occurred while releasing lock:', e) return False - @staticmethod - def acquire_lock_with_retry( - resources: List[Tuple[str, str]], session: Session, acquired_by_uri: str, acquired_by_type: str - ): - for _ in range(MAX_RETRIES): - try: - log.info(f'Attempting to acquire lock for resources {resources} by share {acquired_by_uri}...') - lock_acquired = ResourceLockRepository.acquire_locks( - resources, session, acquired_by_uri, acquired_by_type - ) - if lock_acquired: - return True - - log.info( - f'Lock for one or more resources {resources} already acquired. Retrying in {RETRY_INTERVAL} seconds...' - ) - sleep(RETRY_INTERVAL) - - except Exception as e: - log.error('Error occurred while retrying acquiring lock:', e) - return False - - log.info(f'Max retries reached. Failed to acquire lock for one or more resources {resources}') - return False - @staticmethod @contextmanager - def acquire_lock_with_retry_context( + def acquire_lock_with_retry( resources: List[Tuple[str, str]], session: Session, acquired_by_uri: str, acquired_by_type: str ): retries_remaining = MAX_RETRIES diff --git a/backend/dataall/modules/s3_datasets/services/dataset_service.py b/backend/dataall/modules/s3_datasets/services/dataset_service.py index 5f4dbf175..58561112a 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_service.py @@ -165,9 +165,6 @@ def create_dataset(uri, admin_group, data: dict): DatasetService.check_imported_resources(dataset) dataset = DatasetRepository.create_dataset(session=session, env=environment, dataset=dataset, data=data) - ResourceLockRepository.create_resource_lock( - session=session, resource_uri=dataset.datasetUri, resource_type=dataset.__tablename__ - ) DatasetBucketRepository.create_dataset_bucket(session, dataset, data) ResourcePolicyService.attach_resource_policy( @@ -413,7 +410,6 @@ def delete_dataset(uri: str, delete_from_aws: bool = False): ResourcePolicyService.delete_resource_policy(session=session, resource_uri=uri, group=env.SamlGroupName) if dataset.stewards: ResourcePolicyService.delete_resource_policy(session=session, resource_uri=uri, group=dataset.stewards) - ResourceLockRepository.delete_resource_lock(session=session, resource_uri=dataset.datasetUri) DatasetRepository.delete_dataset(session, dataset) if delete_from_aws: diff --git a/backend/migrations/versions/797dd1012be1_resource_lock_table.py b/backend/migrations/versions/797dd1012be1_resource_lock_table.py index 947a75e3a..de5f27351 100644 --- a/backend/migrations/versions/797dd1012be1_resource_lock_table.py +++ b/backend/migrations/versions/797dd1012be1_resource_lock_table.py @@ -9,7 +9,7 @@ from alembic import op from sqlalchemy import orm, Column, String, Boolean, ForeignKey import sqlalchemy as sa -from typing import Optional +from typing import Optional, List from sqlalchemy.ext.declarative import declarative_base from dataall.base.db import utils @@ -27,7 +27,6 @@ class ResourceLock(Base): resourceUri = Column(String, nullable=False, primary_key=True) resourceType = Column(String, nullable=False, primary_key=True) - isLocked = Column(Boolean, default=False) acquiredByUri = Column(String, nullable=True) acquiredByType = Column(String, nullable=True) @@ -35,13 +34,11 @@ def __init__( self, resourceUri: str, resourceType: str, - isLocked: bool = False, acquiredByUri: Optional[str] = None, acquiredByType: Optional[str] = None, ): self.resourceUri = resourceUri self.resourceType = resourceType - self.isLocked = isLocked self.acquiredByUri = acquiredByUri self.acquiredByType = acquiredByType @@ -58,123 +55,63 @@ class S3Dataset(DatasetBase): datasetUri = Column(String, ForeignKey('dataset.datasetUri'), primary_key=True) -class EnvironmentGroup(Base): - __tablename__ = 'environment_group_permission' - groupUri = Column(String, primary_key=True) - environmentUri = Column(String, primary_key=True) +class DatasetLock(Base): + __tablename__ = 'dataset_lock' + datasetUri = Column(String, nullable=False, primary_key=True) + isLocked = Column(Boolean, default=False, nullable=False) + acquiredBy = Column(String, nullable=True) - -class ConsumptionRole(Base): - __tablename__ = 'consumptionrole' - consumptionRoleUri = Column(String, primary_key=True, default=utils.uuid('group')) + @classmethod + def uri(cls): + return cls.datasetUri def upgrade(): # ### commands auto generated by Alembic - please adjust! ### - # Drop Foregin Key - op.drop_constraint('dataset_lock_datasetUri_fkey', 'dataset_lock', type_='foreignkey') - - # Rename Table to Resource Lock - op.rename_table('dataset_lock', 'resource_lock') + ## drop dataset_lock table + op.drop_table('dataset_lock') - # Rename Columns - op.alter_column( + ## create resource_lock table + op.create_table( 'resource_lock', - 'datasetUri', - nullable=False, - new_column_name='resourceUri', - existing_type=String, - primary_key=True, + sa.Column('resourceUri', sa.String(), nullable=False, primary_key=True), + sa.Column('resourceType', sa.String(), nullable=False, primary_key=True), + sa.Column('acquiredByUri', sa.String(), nullable=True), + sa.Column('acquiredByType', sa.String(), nullable=True), ) - op.alter_column( - 'resource_lock', - 'acquiredBy', - nullable=True, - new_column_name='acquiredByUri', - existing_type=String, - ) - - # Add New Columns - op.add_column('resource_lock', sa.Column('resourceType', sa.String())) - op.add_column('resource_lock', sa.Column('acquiredByType', sa.String(), nullable=True)) - - session = orm.Session(bind=op.get_bind()) - - # Backfill Dataset Locks - session.query(ResourceLock).update( - { - ResourceLock.resourceType: S3Dataset.__tablename__, - } - ) - session.commit() - - # Add resourceType as primary key after backfilling - op.alter_column('resource_lock', 'resourceType', primary_key=True) - - # Backfill Locks for Env Groups - env_groups = session.query(EnvironmentGroup).all() - for group in env_groups: - lock = ResourceLock( - resourceUri=f'{group.groupUri}-{group.environmentUri}', - resourceType=EnvironmentGroup.__tablename__, - ) - session.add(lock) - session.commit() - - # Backfill Locks for Consumption Roles - consumption_roles = session.query(ConsumptionRole).all() - for role in consumption_roles: - lock = ResourceLock(resourceUri=role.consumptionRoleUri, resourceType=ConsumptionRole.__tablename__) - session.add(lock) - session.commit() # ### end Alembic commands ### def downgrade(): - session = orm.Session(bind=op.get_bind()) - # Deleting Locks for Env Groups - env_groups = session.query(EnvironmentGroup).all() - for group in env_groups: - lock = session.query(ResourceLock).get( - (f'{group.groupUri}-{group.environmentUri}', EnvironmentGroup.__tablename__) - ) - if lock: - session.delete(lock) - - # Deleting Locks for Consumption Roles - consumption_roles = session.query(ConsumptionRole).all() - for role in consumption_roles: - lock = session.query(ResourceLock).get((role.consumptionRoleUri, ConsumptionRole.__tablename__)) - if lock: - session.delete(lock) - session.commit() + # Drop resource_lock table + op.drop_table('resource_lock') - # Drop Columns - op.drop_column('resource_lock', 'resourceType') - op.drop_column('resource_lock', 'acquiredByType') + bind = op.get_bind() + session = orm.Session(bind=bind) + datasets: List[S3Dataset] = session.query(S3Dataset).all() - # Rename Columns - op.alter_column( - 'resource_lock', - 'resourceUri', - nullable=False, - new_column_name='datasetUri', - existing_type=String, - primary_key=True, - ) - op.alter_column( - 'resource_lock', - 'acquiredByUri', - nullable=True, - new_column_name='acquiredBy', - existing_type=String, - ) + print('Creating dataset_lock table') - # Rename Table to Dataset Lock - op.rename_table('resource_lock', 'dataset_lock') + op.create_table( + 'dataset_lock', + sa.Column('datasetUri', sa.String(), primary_key=True), + sa.Column('isLocked', sa.Boolean(), nullable=False), + sa.Column('acquiredBy', sa.String(), nullable=True), + ) - # Add Foregin Key - op.create_foreign_key('dataset_lock_datasetUri_fkey', 'dataset_lock', 'dataset', ['datasetUri'], ['datasetUri']) + op.create_foreign_key( + 'fk_dataset_lock_datasetUri', # Constraint name + 'dataset_lock', + 'dataset', + ['datasetUri'], + ['datasetUri'], + ) + print('Creating a new row for each existing dataset in dataset_lock table') + for dataset in datasets: + dataset_lock = DatasetLock(datasetUri=dataset.datasetUri, isLocked=False, acquiredBy='') + session.add(dataset_lock) + session.flush() # flush to get the datasetUri + session.commit() # ### end Alembic commands ### From d41e8e46d2bb912a47f09fc1bb0febea54c87882 Mon Sep 17 00:00:00 2001 From: Noah Paige Date: Wed, 26 Jun 2024 12:58:51 -0400 Subject: [PATCH 12/14] Fix tests --- .../dataall/core/environment/services/environment_service.py | 2 -- tests/modules/s3_datasets/test_dataset_resource_found.py | 3 --- 2 files changed, 5 deletions(-) diff --git a/backend/dataall/core/environment/services/environment_service.py b/backend/dataall/core/environment/services/environment_service.py index 653c182db..615f83cbe 100644 --- a/backend/dataall/core/environment/services/environment_service.py +++ b/backend/dataall/core/environment/services/environment_service.py @@ -19,7 +19,6 @@ CREDENTIALS_ENVIRONMENT, ) from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService -from dataall.core.resource_lock.db.resource_lock_repositories import ResourceLockRepository from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.core.activity.db.activity_models import Activity from dataall.core.environment.db.environment_models import EnvironmentParameter, ConsumptionRole @@ -414,7 +413,6 @@ def invite_group(uri, data=None) -> (Environment, EnvironmentGroup): ) session.add(environment_group) session.commit() - ResourcePolicyService.attach_resource_policy( session=session, group=group, diff --git a/tests/modules/s3_datasets/test_dataset_resource_found.py b/tests/modules/s3_datasets/test_dataset_resource_found.py index 231f7ea8f..45c2267d4 100644 --- a/tests/modules/s3_datasets/test_dataset_resource_found.py +++ b/tests/modules/s3_datasets/test_dataset_resource_found.py @@ -1,5 +1,4 @@ from dataall.modules.s3_datasets.db.dataset_models import S3Dataset -from dataall.core.resource_lock.db.resource_lock_models import ResourceLock from dataall.modules.s3_datasets.services.dataset_permissions import CREATE_DATASET @@ -124,8 +123,6 @@ def test_dataset_resource_found(db, client, env_fixture, org_fixture, group2, us assert 'EnvironmentResourcesFound' in response.errors[0].message with db.scoped_session() as session: - dataset_lock = session.query(ResourceLock).filter(ResourceLock.resourceUri == dataset.datasetUri).first() - session.delete(dataset_lock) dataset = session.query(S3Dataset).get(dataset.datasetUri) session.delete(dataset) session.commit() From 59752d73b82a3af1ddab239d165a34a36fe17cc1 Mon Sep 17 00:00:00 2001 From: Noah Paige Date: Thu, 27 Jun 2024 13:11:39 -0400 Subject: [PATCH 13/14] make methods private acquire and release --- .../core/resource_lock/db/resource_lock_repositories.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/dataall/core/resource_lock/db/resource_lock_repositories.py b/backend/dataall/core/resource_lock/db/resource_lock_repositories.py index 7bf1ea016..e9b531881 100644 --- a/backend/dataall/core/resource_lock/db/resource_lock_repositories.py +++ b/backend/dataall/core/resource_lock/db/resource_lock_repositories.py @@ -16,7 +16,7 @@ class ResourceLockRepository: @staticmethod - def acquire_locks(resources, session, acquired_by_uri, acquired_by_type): + def _acquire_locks(resources, session, acquired_by_uri, acquired_by_type): """ Attempts to acquire/create one or more locks on the resources identified by resourceUri and resourceType. @@ -65,7 +65,7 @@ def acquire_locks(resources, session, acquired_by_uri, acquired_by_type): return False @staticmethod - def release_lock(session, resource_uri, resource_type, share_uri): + def _release_lock(session, resource_uri, resource_type, share_uri): """ Releases/delete the lock on the resource identified by resource_uri, resource_type. @@ -116,7 +116,7 @@ def acquire_lock_with_retry( retries_remaining = MAX_RETRIES log.info(f'Attempting to acquire lock for resources {resources} by share {acquired_by_uri}...') while not ( - lock_acquired := ResourceLockRepository.acquire_locks(resources, session, acquired_by_uri, acquired_by_type) + lock_acquired := ResourceLockRepository._acquire_locks(resources, session, acquired_by_uri, acquired_by_type) ): log.info( f'Lock for one or more resources {resources} already acquired. Retrying in {RETRY_INTERVAL} seconds...' @@ -132,4 +132,4 @@ def acquire_lock_with_retry( yield lock_acquired finally: for resource in resources: - ResourceLockRepository.release_lock(session, resource[0], resource[1], acquired_by_uri) + ResourceLockRepository._release_lock(session, resource[0], resource[1], acquired_by_uri) From 9e2e56956a000105b6405409e0ee6e45dc07862c Mon Sep 17 00:00:00 2001 From: Noah Paige Date: Thu, 27 Jun 2024 13:12:37 -0400 Subject: [PATCH 14/14] make methods private acquire and release --- .../core/resource_lock/db/resource_lock_repositories.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/dataall/core/resource_lock/db/resource_lock_repositories.py b/backend/dataall/core/resource_lock/db/resource_lock_repositories.py index e9b531881..25d4e24c8 100644 --- a/backend/dataall/core/resource_lock/db/resource_lock_repositories.py +++ b/backend/dataall/core/resource_lock/db/resource_lock_repositories.py @@ -116,7 +116,9 @@ def acquire_lock_with_retry( retries_remaining = MAX_RETRIES log.info(f'Attempting to acquire lock for resources {resources} by share {acquired_by_uri}...') while not ( - lock_acquired := ResourceLockRepository._acquire_locks(resources, session, acquired_by_uri, acquired_by_type) + lock_acquired := ResourceLockRepository._acquire_locks( + resources, session, acquired_by_uri, acquired_by_type + ) ): log.info( f'Lock for one or more resources {resources} already acquired. Retrying in {RETRY_INTERVAL} seconds...'