Skip to content

Commit

Permalink
Merge pull request #207 from jaceksan/working
Browse files Browse the repository at this point in the history
PSDK-130: Clone workspace

Reviewed-by: Jan Kadlec
             https://github.com/hkad98
  • Loading branch information
gdgate committed Jan 31, 2023
2 parents 5737f37 + bdfab1e commit 191e363
Show file tree
Hide file tree
Showing 5 changed files with 13,957 additions and 98 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,98 +34,6 @@ def store_to_disk(self, workspace_folder: Path) -> None:
if self.ldm is not None:
self.ldm.store_to_disk(workspace_folder)

"""LDM contains data source ID - is mapped to this data source.
You may decide to migrate to different data source containing the same physical data model
(e.g. change the DB engine, but keep the model).
This function helps you to replace any set of data source IDs with new set of IDs
(ready for multiple DS per workspace).
Example:
```
data_source_mapping = {"postgresql": "snowflake"}
ldm = sdk.catalog_workspace_content.get_declarative_ldm(workspace_id)
ldm.modify_mapped_data_source(data_source_mapping)
# When migrating to Snowflake, we need to change the case of table/column names as well
ldm.change_tables_columns_case(upper_case=True)
sdk.catalog_workspace_content.put_declarative_ldm(workspace_id, ldm)
# Chaining approach is also possible:
```
sdk.catalog_workspace_content.put_declarative_ldm(
workspace_id,
sdk.catalog_workspace_content.get_declarative_ldm(workspace_id)\\
.modify_mapped_data_source(data_source_mapping)\\
.change_tables_columns_case(upper_case=True)
)
Args:
data_source_mapping (dict):
Key value pairs representing which DS(key) should be replaced by which DS(value).
Returns:
self
"""

def modify_mapped_data_source(self, data_source_mapping: dict) -> CatalogDeclarativeModel:
if self.ldm is not None:
for dataset in self.ldm.datasets:
if dataset.data_source_table_id is not None:
data_source_id = dataset.data_source_table_id.data_source_id
if data_source_id in data_source_mapping:
dataset.data_source_table_id.data_source_id = data_source_mapping[data_source_id]
return self

@staticmethod
def _change_case(object_name: str, upper_case: bool) -> str:
if upper_case:
return object_name.upper()
else:
return object_name.lower()

"""Change case (to lower/upper-case) of all physical objects mapped in the LDM.
Namely mapped table names and column names.
Default is to change everything to upper-case.
This is handy if you migrate e.g. from PostgreSQL to Snowflake, which is the only DB having upper-case as default.
Instead of enclosing all (lower-cased) object names in all DDLs during the migration,
you can use this function to change the case in GoodData LDM.
If you specify upper-case=False, the function changes the case to lower-case
(e.g. migration from Snowflake back to PostgreSQL).
Examples can be found in the DOC of modify_mapped_data_source() method.
Args:
upper_case (bool):
If True, all tables/columns names are changes to upper-case, otherwise to lower-case.
Returns:
self
"""

def change_tables_columns_case(self, upper_case: bool = True) -> CatalogDeclarativeModel:
if self.ldm is not None:
for dataset in self.ldm.datasets:
if dataset.data_source_table_id and dataset.data_source_table_id.id:
dataset.data_source_table_id.id = self._change_case(dataset.data_source_table_id.id, upper_case)
if dataset.attributes:
for attribute in dataset.attributes:
if attribute.source_column:
attribute.source_column = self._change_case(attribute.source_column, upper_case)
if attribute.sort_column:
attribute.sort_column = self._change_case(attribute.sort_column, upper_case)
for label in attribute.labels:
if label.source_column:
label.source_column = self._change_case(label.source_column, upper_case)
if dataset.facts:
for fact in dataset.facts:
if fact.source_column:
fact.source_column = self._change_case(fact.source_column, upper_case)
for reference in dataset.references:
new_columns = []
for reference_column in reference.source_columns:
new_columns.append(self._change_case(reference_column, upper_case))
reference.source_columns = new_columns
return self

@classmethod
def load_from_disk(cls, workspace_folder: Path) -> CatalogDeclarativeModel:
ldm = CatalogDeclarativeLdm.load_from_disk(workspace_folder)
Expand Down Expand Up @@ -184,3 +92,100 @@ def load_from_disk(cls, workspace_folder: Path) -> CatalogDeclarativeLdm:
for date_instance_file in date_instance_files
]
return cls(datasets=datasets, date_instances=date_instances)

def modify_mapped_data_source(self, data_source_mapping: Optional[dict]) -> CatalogDeclarativeLdm:
"""LDM contains data source ID - is mapped to this data source.
You may decide to migrate to different data source containing the same physical data model
(e.g. change the DB engine, but keep the model).
This function helps you to replace any set of data source IDs with new set of IDs
(ready for multiple DS per workspace).
Example:
```
data_source_mapping = {"postgresql": "snowflake"}
ldm = sdk.catalog_workspace_content.get_declarative_ldm(workspace_id)
ldm.modify_mapped_data_source(data_source_mapping)
# When migrating to Snowflake, we need to change the case of table/column names as well
ldm.change_tables_columns_case(upper_case=True)
sdk.catalog_workspace_content.put_declarative_ldm(workspace_id, ldm)
# Chaining approach is also possible:
```
sdk.catalog_workspace_content.put_declarative_ldm(
workspace_id,
sdk.catalog_workspace_content.get_declarative_ldm(workspace_id)\\
.modify_mapped_data_source(data_source_mapping)\\
.change_tables_columns_case(upper_case=True)
)
Args:
data_source_mapping (dict):
Key value pairs representing which DS(key) should be replaced by which DS(value).
If mapping is empty, noop
- helps to chaining approach,
devs do not have to implement IFs if one of inputs in the chaining is optional.
Returns:
self
"""
if self.datasets is not None and data_source_mapping:
for dataset in self.datasets:
if dataset.data_source_table_id is not None:
data_source_id = dataset.data_source_table_id.data_source_id
if data_source_id in data_source_mapping:
dataset.data_source_table_id.data_source_id = data_source_mapping[data_source_id]
return self

@staticmethod
def _change_case(object_name: str, upper_case: bool) -> str:
if upper_case:
return object_name.upper()
else:
return object_name.lower()

def change_tables_columns_case(self, upper_case: Optional[bool] = None) -> CatalogDeclarativeLdm:
"""Change case (to lower/upper-case) of all physical objects mapped in the LDM.
Namely mapped table names and column names.
Default is to change everything to upper-case.
This is handy if you migrate e.g. from PostgreSQL to Snowflake,
which is the only DB having upper-case as default.
Instead of enclosing all (lower-cased) object names in all DDLs during the migration,
you can use this function to change the case in GoodData LDM.
If you specify upper-case=False, the function changes the case to lower-case
(e.g. migration from Snowflake back to PostgreSQL).
Examples can be found in the DOC of modify_mapped_data_source() method.
Args:
upper_case (bool):
If True, all tables/columns names are changes to upper-case, otherwise to lower-case.
If None, noop.
- helps to chaining approach,
devs do not have to implement IFs if one of inputs in the chaining is optional.
Returns:
self
"""
if self.datasets is not None and upper_case is not None:
for dataset in self.datasets:
if dataset.data_source_table_id and dataset.data_source_table_id.id:
dataset.data_source_table_id.id = self._change_case(dataset.data_source_table_id.id, upper_case)
if dataset.attributes:
for attribute in dataset.attributes:
if attribute.source_column:
attribute.source_column = self._change_case(attribute.source_column, upper_case)
if attribute.sort_column:
attribute.sort_column = self._change_case(attribute.sort_column, upper_case)
for label in attribute.labels:
if label.source_column:
label.source_column = self._change_case(label.source_column, upper_case)
if dataset.facts:
for fact in dataset.facts:
if fact.source_column:
fact.source_column = self._change_case(fact.source_column, upper_case)
for reference in dataset.references:
new_columns = []
for reference_column in reference.source_columns:
new_columns.append(self._change_case(reference_column, upper_case))
reference.source_columns = new_columns
return self
87 changes: 85 additions & 2 deletions gooddata-sdk/gooddata_sdk/catalog/workspace/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@

import functools
from pathlib import Path
from typing import List
from typing import List, Optional

import attrs

from gooddata_api_client.exceptions import NotFoundException
from gooddata_sdk.catalog.catalog_service_base import CatalogServiceBase
from gooddata_sdk.catalog.permission.service import CatalogPermissionService
from gooddata_sdk.catalog.workspace.declarative_model.workspace.workspace import (
CatalogDeclarativeWorkspaceDataFilters,
CatalogDeclarativeWorkspaceModel,
Expand All @@ -21,6 +24,7 @@
class CatalogWorkspaceService(CatalogServiceBase):
def __init__(self, api_client: GoodDataApiClient) -> None:
super(CatalogWorkspaceService, self).__init__(api_client)
self._permissions_service = CatalogPermissionService(api_client)

# Entities methods

Expand Down Expand Up @@ -99,7 +103,7 @@ def list_workspaces(self) -> List[CatalogWorkspace]:
"""Returns a list of all workspaces in current organization
Args:
None
List[CatalogWorkspace]
Returns:
List[CatalogWorkspace]:
Expand Down Expand Up @@ -259,6 +263,85 @@ def load_and_put_declarative_workspace(self, workspace_id: str, layout_root_path
)
self.put_declarative_workspace(workspace_id=workspace_id, workspace=declarative_workspace)

def clone_workspace(
self,
source_workspace_id: str,
target_workspace_id: Optional[str] = None,
target_workspace_name: Optional[str] = None,
overwrite_existing: Optional[bool] = None,
data_source_mapping: Optional[dict] = None,
upper_case: Optional[bool] = True,
) -> None:
"""Clone workspace from existing workspace.
Clones complete workspace content - LDM, ADM, permissions.
If the target workspace already exists, it's content is overwritten.
This can be useful when testing changes in the clone
- once you are satisfied, you can clone it back to the origin workspace.
For the safety, you have to enforce this behavior by the dedicated input argument `overwrite_existing`.
Beware of workspace data filters - after the clone you have to set WDF value for the new workspace.
Args:
source_workspace_id (str):
Source workspace ID, from which we wanna create a clone
target_workspace_id (str):
Target workspace ID, where we wanna clone the source workspace
Optional, if empty, we generate <source_workspace_id>_clone
target_workspace_name (str):
Target workspace name
Optional, if empty, we generate <source_workspace_name> (Clone)
overwrite_existing (bool):
Overwrite existing workspace.
data_source_mapping (dict):
Optional, allows users to map LDM to different data source ID
upper_case (bool):
Optional, allows users to change the case of all physical object IDs (table names, columns names)
True changes it to upper-case, False to lower-case, None(default) is noop
Useful when migrating to Snowflake, which is the only DB with upper-case default.
Returns:
None
"""
# TODO - what if it has already been cloned? List existing WS and find first free WS ID?
source_declarative_ws = self.get_declarative_workspace(workspace_id=source_workspace_id)
source_ws = self.get_workspace(source_workspace_id)

final_target_workspace_id = target_workspace_id or f"{source_workspace_id}_clone"
final_target_workspace_name = target_workspace_name or f"{source_ws.name} (Clone)"
# TODO - enable cloning into another hierarchy
final_target_parent_id = source_ws.parent_id

try:
self.get_workspace(final_target_workspace_id)
if not overwrite_existing:
raise Exception(
f"Target workspace {final_target_workspace_id} already exists, "
"and `overwrite_existing` argument is False"
)
except NotFoundException:
self.create_or_update(
CatalogWorkspace(
workspace_id=final_target_workspace_id,
name=final_target_workspace_name,
parent_id=final_target_parent_id,
)
)

target_declarative_ws = source_declarative_ws
if source_declarative_ws.ldm:
target_declarative_ws = attrs.evolve(
source_declarative_ws,
ldm=source_declarative_ws.ldm.modify_mapped_data_source(data_source_mapping).change_tables_columns_case(
upper_case
),
)

self.put_declarative_workspace(workspace_id=final_target_workspace_id, workspace=target_declarative_ws)
self._permissions_service.put_declarative_permissions(
final_target_workspace_id, self._permissions_service.get_declarative_permissions(source_workspace_id)
)

# Declarative methods - workspace data filters

def get_declarative_workspace_data_filters(self) -> CatalogDeclarativeWorkspaceDataFilters:
Expand Down

0 comments on commit 191e363

Please sign in to comment.