diff --git a/src/databricks/labs/ucx/hive_metastore/table_migrate.py b/src/databricks/labs/ucx/hive_metastore/table_migrate.py index 64a17e9b8c..f87e6276cd 100644 --- a/src/databricks/labs/ucx/hive_metastore/table_migrate.py +++ b/src/databricks/labs/ucx/hive_metastore/table_migrate.py @@ -72,6 +72,7 @@ def _migrate_external_table(self, src_table: Table, rule: Rule): table_migrate_sql = src_table.sql_migrate_external(target_table_key) logger.debug(f"Migrating external table {src_table.key} to using SQL query: {table_migrate_sql}") self._backend.execute(table_migrate_sql) + self._backend.execute(src_table.sql_alter_from(rule.as_uc_table_key, self._ws.get_workspace_id())) return True def _migrate_dbfs_root_table(self, src_table: Table, rule: Rule): @@ -80,7 +81,7 @@ def _migrate_dbfs_root_table(self, src_table: Table, rule: Rule): logger.debug(f"Migrating managed table {src_table.key} to using SQL query: {table_migrate_sql}") self._backend.execute(table_migrate_sql) self._backend.execute(src_table.sql_alter_to(rule.as_uc_table_key)) - self._backend.execute(src_table.sql_alter_from(rule.as_uc_table_key)) + self._backend.execute(src_table.sql_alter_from(rule.as_uc_table_key, self._ws.get_workspace_id())) return True def _migrate_view(self, src_table: Table, rule: Rule): @@ -89,7 +90,7 @@ def _migrate_view(self, src_table: Table, rule: Rule): logger.debug(f"Migrating view {src_table.key} to using SQL query: {table_migrate_sql}") self._backend.execute(table_migrate_sql) self._backend.execute(src_table.sql_alter_to(rule.as_uc_table_key)) - self._backend.execute(src_table.sql_alter_from(rule.as_uc_table_key)) + self._backend.execute(src_table.sql_alter_from(rule.as_uc_table_key, self._ws.get_workspace_id())) return True def _init_seen_tables(self): diff --git a/src/databricks/labs/ucx/hive_metastore/tables.py b/src/databricks/labs/ucx/hive_metastore/tables.py index 65b676d119..40798b9634 100644 --- a/src/databricks/labs/ucx/hive_metastore/tables.py +++ b/src/databricks/labs/ucx/hive_metastore/tables.py @@ -54,6 +54,8 @@ class Table: "dbfs:/databricks-datasets", ] + UPGRADED_FROM_WS_PARAM: typing.ClassVar[str] = "upgraded_from_workspace_id" + @property def is_delta(self) -> bool: if self.table_format is None: @@ -71,8 +73,12 @@ def kind(self) -> str: def sql_alter_to(self, target_table_key): return f"ALTER {self.kind} {self.key} SET TBLPROPERTIES ('upgraded_to' = '{target_table_key}');" - def sql_alter_from(self, target_table_key): - return f"ALTER {self.kind} {target_table_key} SET TBLPROPERTIES ('upgraded_from' = '{self.key}');" + def sql_alter_from(self, target_table_key, ws_id): + return ( + f"ALTER {self.kind} {target_table_key} SET TBLPROPERTIES " + f"('upgraded_from' = '{self.key}'" + f" , '{self.UPGRADED_FROM_WS_PARAM}' = '{ws_id}');" + ) def sql_unset_upgraded_to(self): return f"ALTER {self.kind} {self.key} UNSET TBLPROPERTIES IF EXISTS('upgraded_to');" diff --git a/tests/integration/hive_metastore/test_migrate.py b/tests/integration/hive_metastore/test_migrate.py index f7e058116e..cd60230ce4 100644 --- a/tests/integration/hive_metastore/test_migrate.py +++ b/tests/integration/hive_metastore/test_migrate.py @@ -7,6 +7,7 @@ from databricks.labs.ucx.hive_metastore.mapping import Rule from databricks.labs.ucx.hive_metastore.table_migrate import TablesMigrate +from databricks.labs.ucx.hive_metastore.tables import Table from ..conftest import StaticTableMapping, StaticTablesCrawler @@ -46,6 +47,7 @@ def test_migrate_managed_tables(ws, sql_backend, inventory_schema, make_catalog, target_table_properties = ws.tables.get(f"{dst_schema.full_name}.{src_managed_table.name}").properties assert target_table_properties["upgraded_from"] == src_managed_table.full_name + assert target_table_properties[Table.UPGRADED_FROM_WS_PARAM] == str(ws.get_workspace_id()) @retried(on=[NotFound], timeout=timedelta(minutes=5)) @@ -136,6 +138,9 @@ def test_migrate_external_table(ws, sql_backend, inventory_schema, make_catalog, target_tables = list(sql_backend.fetch(f"SHOW TABLES IN {dst_schema.full_name}")) assert len(target_tables) == 1 + target_table_properties = ws.tables.get(f"{dst_schema.full_name}.{src_external_table.name}").properties + assert target_table_properties["upgraded_from"] == src_external_table.full_name + assert target_table_properties[Table.UPGRADED_FROM_WS_PARAM] == str(ws.get_workspace_id()) @retried(on=[NotFound], timeout=timedelta(minutes=5)) diff --git a/tests/unit/hive_metastore/test_table_migrate.py b/tests/unit/hive_metastore/test_table_migrate.py index ad3cc283e9..ecd1f5bad2 100644 --- a/tests/unit/hive_metastore/test_table_migrate.py +++ b/tests/unit/hive_metastore/test_table_migrate.py @@ -24,7 +24,8 @@ def test_migrate_dbfs_root_tables_should_produce_proper_queries(): rows = {} backend = MockBackend(fails_on_first=errors, rows=rows) table_crawler = TablesCrawler(backend, "inventory_database") - client = MagicMock() + client = create_autospec(WorkspaceClient) + client.get_workspace_id.return_value = "12345" table_mapping = create_autospec(TableMapping) table_mapping.get_tables_to_migrate.return_value = [ TableToMigrate( @@ -54,8 +55,8 @@ def test_migrate_dbfs_root_tables_should_produce_proper_queries(): "SET TBLPROPERTIES ('upgraded_to' = 'ucx_default.db1_dst.managed_dbfs');" ) in list(backend.queries) assert ( - "ALTER TABLE ucx_default.db1_dst.managed_dbfs " - "SET TBLPROPERTIES ('upgraded_from' = 'hive_metastore.db1_src.managed_dbfs');" + f"ALTER TABLE ucx_default.db1_dst.managed_dbfs " + f"SET TBLPROPERTIES ('upgraded_from' = 'hive_metastore.db1_src.managed_dbfs' , '{Table.UPGRADED_FROM_WS_PARAM}' = '12345');" ) in list(backend.queries) assert "SYNC TABLE ucx_default.db1_dst.managed_other FROM hive_metastore.db1_src.managed_other;" in list( backend.queries @@ -86,7 +87,8 @@ def test_migrate_external_tables_should_produce_proper_queries(): rows = {} backend = MockBackend(fails_on_first=errors, rows=rows) table_crawler = TablesCrawler(backend, "inventory_database") - client = MagicMock() + client = create_autospec(WorkspaceClient) + client.get_workspace_id.return_value = "12345" table_mapping = create_autospec(TableMapping) table_mapping.get_tables_to_migrate.return_value = [ TableToMigrate( @@ -98,7 +100,11 @@ def test_migrate_external_tables_should_produce_proper_queries(): table_migrate.migrate_tables() assert (list(backend.queries)) == [ - "SYNC TABLE ucx_default.db1_dst.external_dst FROM hive_metastore.db1_src.external_src;" + "SYNC TABLE ucx_default.db1_dst.external_dst FROM hive_metastore.db1_src.external_src;", + ( + f"ALTER TABLE ucx_default.db1_dst.external_dst " + f"SET TBLPROPERTIES ('upgraded_from' = 'hive_metastore.db1_src.external_src' , '{Table.UPGRADED_FROM_WS_PARAM}' = '12345');" + ), ] @@ -159,7 +165,8 @@ def test_migrate_view_should_produce_proper_queries(): rows = {} backend = MockBackend(fails_on_first=errors, rows=rows) table_crawler = TablesCrawler(backend, "inventory_database") - client = MagicMock() + client = create_autospec(WorkspaceClient) + client.get_workspace_id.return_value = "12345" table_mapping = create_autospec(TableMapping) table_mapping.get_tables_to_migrate.return_value = [ TableToMigrate( @@ -176,8 +183,8 @@ def test_migrate_view_should_produce_proper_queries(): "SET TBLPROPERTIES ('upgraded_to' = 'ucx_default.db1_dst.view_dst');" ) in list(backend.queries) assert ( - "ALTER VIEW ucx_default.db1_dst.view_dst " - "SET TBLPROPERTIES ('upgraded_from' = 'hive_metastore.db1_src.view_src');" + f"ALTER VIEW ucx_default.db1_dst.view_dst " + f"SET TBLPROPERTIES ('upgraded_from' = 'hive_metastore.db1_src.view_src' , '{Table.UPGRADED_FROM_WS_PARAM}' = '12345');" ) in list(backend.queries)