diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py index 3bb60e052..073b48b67 100644 --- a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py @@ -22,7 +22,7 @@ ) from dataall.modules.s3_datasets.db.dataset_models import Dataset -from dataall.modules.s3_datasets.services.datasets_enums import DatasetRole +from dataall.modules.datasets_base.services.datasets_enums import DatasetRole from dataall.modules.s3_datasets.services.dataset_service import DatasetServiceInterface diff --git a/backend/dataall/modules/datasets_base/__init__.py b/backend/dataall/modules/datasets_base/__init__.py new file mode 100644 index 000000000..a79574166 --- /dev/null +++ b/backend/dataall/modules/datasets_base/__init__.py @@ -0,0 +1,18 @@ +from typing import Set +from dataall.base.loader import ModuleInterface, ImportMode + + +class DatasetBaseModuleInterface(ModuleInterface): + @staticmethod + def is_supported(modes: Set[ImportMode]) -> bool: + supported_modes = { + ImportMode.API, + ImportMode.CDK, + ImportMode.HANDLERS, + ImportMode.STACK_UPDATER_TASK, + ImportMode.CATALOG_INDEXER_TASK, + } + return modes & supported_modes + + def __init__(self): + import dataall.modules.datasets_base.services.datasets_enums diff --git a/backend/dataall/modules/datasets_base/api/__init__.py b/backend/dataall/modules/datasets_base/api/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/modules/datasets_base/db/__init__.py b/backend/dataall/modules/datasets_base/db/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/modules/datasets_base/services/__init__.py b/backend/dataall/modules/datasets_base/services/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/modules/s3_datasets/services/datasets_enums.py b/backend/dataall/modules/datasets_base/services/datasets_enums.py similarity index 100% rename from backend/dataall/modules/s3_datasets/services/datasets_enums.py rename to backend/dataall/modules/datasets_base/services/datasets_enums.py diff --git a/backend/dataall/modules/s3_datasets/__init__.py b/backend/dataall/modules/s3_datasets/__init__.py index 6e272f26b..008793ada 100644 --- a/backend/dataall/modules/s3_datasets/__init__.py +++ b/backend/dataall/modules/s3_datasets/__init__.py @@ -17,11 +17,13 @@ def is_supported(modes): @staticmethod def depends_on() -> List[Type['ModuleInterface']]: + from dataall.modules.datasets_base import DatasetBaseModuleInterface from dataall.modules.catalog import CatalogApiModuleInterface from dataall.modules.feed import FeedApiModuleInterface from dataall.modules.vote import VoteApiModuleInterface return [ + DatasetBaseModuleInterface, CatalogApiModuleInterface, FeedApiModuleInterface, VoteApiModuleInterface, @@ -85,12 +87,17 @@ class DatasetAsyncHandlersModuleInterface(ModuleInterface): def is_supported(modes: Set[ImportMode]): return ImportMode.HANDLERS in modes + @staticmethod + def depends_on() -> List[Type['ModuleInterface']]: + from dataall.modules.datasets_base import DatasetBaseModuleInterface + + return [DatasetBaseModuleInterface] + def __init__(self): import dataall.modules.s3_datasets.handlers import dataall.modules.s3_datasets.db.dataset_models import dataall.modules.s3_datasets.db.dataset_repositories import dataall.modules.s3_datasets.services.dataset_permissions - import dataall.modules.s3_datasets.services.datasets_enums log.info('Dataset handlers have been imported') @@ -102,6 +109,12 @@ class DatasetCdkModuleInterface(ModuleInterface): def is_supported(modes: Set[ImportMode]): return ImportMode.CDK in modes + @staticmethod + def depends_on() -> List[Type['ModuleInterface']]: + from dataall.modules.datasets_base import DatasetBaseModuleInterface + + return [DatasetBaseModuleInterface] + def __init__(self): import dataall.modules.s3_datasets.cdk from dataall.core.environment.cdk.environment_stack import EnvironmentSetup @@ -119,6 +132,12 @@ class DatasetStackUpdaterModuleInterface(ModuleInterface): def is_supported(modes: Set[ImportMode]) -> bool: return ImportMode.STACK_UPDATER_TASK in modes + @staticmethod + def depends_on() -> List[Type['ModuleInterface']]: + from dataall.modules.datasets_base import DatasetBaseModuleInterface + + return [DatasetBaseModuleInterface] + def __init__(self): from dataall.modules.s3_datasets.tasks.dataset_stack_finder import DatasetStackFinder @@ -134,8 +153,9 @@ def is_supported(modes: Set[ImportMode]) -> bool: @staticmethod def depends_on() -> List[Type['ModuleInterface']]: from dataall.modules.catalog import CatalogIndexerModuleInterface + from dataall.modules.datasets_base import DatasetBaseModuleInterface - return [CatalogIndexerModuleInterface] + return [CatalogIndexerModuleInterface, DatasetBaseModuleInterface] def __init__(self): from dataall.modules.s3_datasets.indexers.dataset_catalog_indexer import DatasetCatalogIndexer diff --git a/backend/dataall/modules/s3_datasets/api/dataset/input_types.py b/backend/dataall/modules/s3_datasets/api/dataset/input_types.py index a7571c426..ba9c0c495 100644 --- a/backend/dataall/modules/s3_datasets/api/dataset/input_types.py +++ b/backend/dataall/modules/s3_datasets/api/dataset/input_types.py @@ -1,6 +1,6 @@ from dataall.base.api import gql from dataall.base.api.constants import SortDirection -from dataall.modules.s3_datasets.services.datasets_enums import DatasetSortField +from dataall.modules.datasets_base.services.datasets_enums import DatasetSortField NewDatasetInput = gql.InputType( diff --git a/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py b/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py index 5b08f85a3..f19019f15 100644 --- a/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py +++ b/backend/dataall/modules/s3_datasets/api/dataset/resolvers.py @@ -8,7 +8,7 @@ from dataall.core.organizations.db.organization_repositories import OrganizationRepository from dataall.base.db.exceptions import RequiredParameter, InvalidInput from dataall.modules.s3_datasets.db.dataset_models import Dataset -from dataall.modules.s3_datasets.services.datasets_enums import DatasetRole, ConfidentialityClassification +from dataall.modules.datasets_base.services.datasets_enums import DatasetRole, ConfidentialityClassification from dataall.modules.s3_datasets.services.dataset_service import DatasetService log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/s3_datasets/api/dataset/types.py b/backend/dataall/modules/s3_datasets/api/dataset/types.py index 863f896f7..8fa7afb46 100644 --- a/backend/dataall/modules/s3_datasets/api/dataset/types.py +++ b/backend/dataall/modules/s3_datasets/api/dataset/types.py @@ -1,5 +1,5 @@ from dataall.base.api import gql -from dataall.modules.s3_datasets.services.datasets_enums import DatasetRole +from dataall.modules.datasets_base.services.datasets_enums import DatasetRole from dataall.modules.s3_datasets.api.dataset.resolvers import ( get_dataset_environment, get_dataset_organization, diff --git a/backend/dataall/modules/s3_datasets/api/table/input_types.py b/backend/dataall/modules/s3_datasets/api/table/input_types.py index fe9026518..0f25aaa20 100644 --- a/backend/dataall/modules/s3_datasets/api/table/input_types.py +++ b/backend/dataall/modules/s3_datasets/api/table/input_types.py @@ -1,6 +1,6 @@ from dataall.base.api import gql from dataall.base.api.constants import SortDirection -from dataall.modules.s3_datasets.services.datasets_enums import DatasetSortField +from dataall.modules.datasets_base.services.datasets_enums import DatasetSortField ModifyDatasetTableInput = gql.InputType( diff --git a/backend/dataall/modules/s3_datasets/db/dataset_models.py b/backend/dataall/modules/s3_datasets/db/dataset_models.py index 87fe21328..1133faf1a 100644 --- a/backend/dataall/modules/s3_datasets/db/dataset_models.py +++ b/backend/dataall/modules/s3_datasets/db/dataset_models.py @@ -2,7 +2,7 @@ from sqlalchemy.dialects.postgresql import JSON, ARRAY from sqlalchemy.orm import query_expression from dataall.base.db import Base, Resource, utils -from dataall.modules.s3_datasets.services.datasets_enums import ConfidentialityClassification, Language +from dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification, Language class DatasetTableColumn(Resource, Base): diff --git a/backend/dataall/modules/s3_datasets/db/dataset_repositories.py b/backend/dataall/modules/s3_datasets/db/dataset_repositories.py index 58840c900..3c44e9174 100644 --- a/backend/dataall/modules/s3_datasets/db/dataset_repositories.py +++ b/backend/dataall/modules/s3_datasets/db/dataset_repositories.py @@ -7,7 +7,7 @@ from dataall.core.organizations.db.organization_repositories import OrganizationRepository from dataall.base.db import paginate from dataall.base.db.exceptions import ObjectNotFound -from dataall.modules.s3_datasets.services.datasets_enums import ConfidentialityClassification, Language +from dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification, Language from dataall.core.environment.services.environment_resource_manager import EnvironmentResource from dataall.modules.s3_datasets.db.dataset_models import DatasetTable, Dataset, DatasetLock from dataall.base.utils.naming_convention import ( diff --git a/backend/dataall/modules/s3_datasets/services/dataset_column_service.py b/backend/dataall/modules/s3_datasets/services/dataset_column_service.py index eba100f0f..eb7c19a00 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_column_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_column_service.py @@ -9,7 +9,7 @@ from dataall.modules.s3_datasets.services.dataset_permissions import UPDATE_DATASET_TABLE from dataall.modules.s3_datasets.db.dataset_models import DatasetTable, DatasetTableColumn from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository -from dataall.modules.s3_datasets.services.datasets_enums import ConfidentialityClassification +from dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification from dataall.modules.s3_datasets.services.dataset_permissions import PREVIEW_DATASET_TABLE diff --git a/backend/dataall/modules/s3_datasets/services/dataset_profiling_service.py b/backend/dataall/modules/s3_datasets/services/dataset_profiling_service.py index 183db0b45..be94ce51c 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_profiling_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_profiling_service.py @@ -13,7 +13,7 @@ from dataall.modules.s3_datasets.db.dataset_table_repositories import DatasetTableRepository from dataall.modules.s3_datasets.services.dataset_permissions import PROFILE_DATASET_TABLE, GET_DATASET from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository -from dataall.modules.s3_datasets.services.datasets_enums import ConfidentialityClassification +from dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification from dataall.modules.s3_datasets.db.dataset_models import DatasetProfilingRun, DatasetTable from dataall.modules.s3_datasets.services.dataset_permissions import PREVIEW_DATASET_TABLE diff --git a/backend/dataall/modules/s3_datasets/services/dataset_service.py b/backend/dataall/modules/s3_datasets/services/dataset_service.py index fbf574a51..7b14608f4 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_service.py @@ -40,7 +40,7 @@ IMPORT_DATASET, ) from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository -from dataall.modules.s3_datasets.services.datasets_enums import DatasetRole +from dataall.modules.datasets_base.services.datasets_enums import DatasetRole from dataall.modules.s3_datasets.db.dataset_models import Dataset, DatasetTable from dataall.modules.s3_datasets.services.dataset_permissions import DATASET_TABLE_READ diff --git a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py index dd758150d..e3ec400f6 100644 --- a/backend/dataall/modules/s3_datasets/services/dataset_table_service.py +++ b/backend/dataall/modules/s3_datasets/services/dataset_table_service.py @@ -16,7 +16,7 @@ SYNC_DATASET, ) from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository -from dataall.modules.s3_datasets.services.datasets_enums import ConfidentialityClassification +from dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification from dataall.modules.s3_datasets.db.dataset_models import DatasetTable, Dataset from dataall.modules.s3_datasets.services.dataset_permissions import ( PREVIEW_DATASET_TABLE, diff --git a/backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py b/backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py index c2590d457..ba34dc176 100644 --- a/backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py +++ b/backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py @@ -14,7 +14,7 @@ from sqlalchemy.ext.declarative import declarative_base from dataall.base.db import utils, Resource -from dataall.modules.s3_datasets.services.datasets_enums import ConfidentialityClassification, Language +from dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification, Language revision = '5e5c84138af7' diff --git a/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py b/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py index 589896927..dce79af22 100644 --- a/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py +++ b/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py @@ -19,7 +19,7 @@ from dataall.modules.dataset_sharing.services.dataset_sharing_enums import ShareObjectStatus from datetime import datetime -from dataall.modules.s3_datasets.services.datasets_enums import ConfidentialityClassification, Language +from dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification, Language # revision identifiers, used by Alembic. diff --git a/backend/migrations/versions/97050ec09354_release_3_7_8.py b/backend/migrations/versions/97050ec09354_release_3_7_8.py index 882af867e..b2e7256af 100644 --- a/backend/migrations/versions/97050ec09354_release_3_7_8.py +++ b/backend/migrations/versions/97050ec09354_release_3_7_8.py @@ -12,7 +12,7 @@ from sqlalchemy.ext.declarative import declarative_base from dataall.base.db import utils, Resource -from dataall.modules.s3_datasets.services.datasets_enums import ConfidentialityClassification, Language +from dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification, Language # revision identifiers, used by Alembic. diff --git a/backend/migrations/versions/e1cd4927482b_rename_imported_dataset_aws_resources.py b/backend/migrations/versions/e1cd4927482b_rename_imported_dataset_aws_resources.py index 30adbdf89..c167b2d74 100644 --- a/backend/migrations/versions/e1cd4927482b_rename_imported_dataset_aws_resources.py +++ b/backend/migrations/versions/e1cd4927482b_rename_imported_dataset_aws_resources.py @@ -16,7 +16,7 @@ NamingConventionService, NamingConventionPattern, ) -from dataall.modules.s3_datasets.services.datasets_enums import ConfidentialityClassification, Language +from dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification, Language # revision identifiers, used by Alembic. revision = 'e1cd4927482b' diff --git a/config.json b/config.json index 2e7ca12f8..81e0ab579 100644 --- a/config.json +++ b/config.json @@ -9,6 +9,9 @@ "datapipelines": { "active": true }, + "datasets_base": { + "active": true + }, "s3_datasets": { "active": true, "features": { diff --git a/tests/modules/datasets/conftest.py b/tests/modules/datasets/conftest.py index 4cc1902ce..cc0bfe047 100644 --- a/tests/modules/datasets/conftest.py +++ b/tests/modules/datasets/conftest.py @@ -9,7 +9,7 @@ from dataall.modules.dataset_sharing.services.dataset_sharing_enums import ShareableType, PrincipalType from dataall.modules.dataset_sharing.db.share_object_models import ShareObject, ShareObjectItem from dataall.modules.dataset_sharing.services.share_permissions import SHARE_OBJECT_REQUESTER, SHARE_OBJECT_APPROVER -from dataall.modules.s3_datasets.services.datasets_enums import ConfidentialityClassification +from dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification from dataall.modules.s3_datasets.services.dataset_permissions import DATASET_TABLE_READ from dataall.modules.s3_datasets.db.dataset_models import Dataset, DatasetTable, DatasetStorageLocation from dataall.modules.s3_datasets.services.dataset_permissions import DATASET_ALL @@ -35,13 +35,13 @@ def patch_dataset_methods(module_mocker): glue_mock_client().run_job.return_value = True module_mocker.patch( - 'dataall.modules.s3_datasets.services.datasets_enums.ConfidentialityClassification.validate_confidentiality_level', + 'dataall.modules.datasets_base.services.datasets_enums.ConfidentialityClassification.validate_confidentiality_level', return_value=True, ) confidentiality_classification_mocker = MagicMock() module_mocker.patch( - 'dataall.modules.s3_datasets.services.datasets_enums.ConfidentialityClassification', + 'dataall.modules.datasets_base.services.datasets_enums.ConfidentialityClassification', return_value=confidentiality_classification_mocker, ) # Return the input when mocking. This mock avoids checking the custom_confidentiality_mapping value in the actual function and just returns whatever confidentiality value is supplied for pytests diff --git a/tests/modules/datasets/test_dataset.py b/tests/modules/datasets/test_dataset.py index 8340bb366..8a71b07fe 100644 --- a/tests/modules/datasets/test_dataset.py +++ b/tests/modules/datasets/test_dataset.py @@ -10,7 +10,7 @@ from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetLock from tests.core.stacks.test_stack import update_stack_query -from dataall.modules.s3_datasets.services.datasets_enums import ConfidentialityClassification +from dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification mocked_key_id = 'some_key'