In [0]:
from databricks.sdk import WorkspaceClient

# Initialize the WorkspaceClient
workspace_client = WorkspaceClient()

In [0]:
dbutils.widgets.text("catalog_name", "", "Catalog Name")
dbutils.widgets.text("schema_name", "", "Schema Name")
dbutils.widgets.text("schema_root_location", "", "Schema Location")
dbutils.widgets.text("dry_run", "", "Dry Run")

In [0]:
catalog_name = dbutils.widgets.get("catalog_name")
schema_name = dbutils.widgets.get("schema_name")
schema_root_location = dbutils.widgets.get("schema_root_location")
dry_run = dbutils.widgets.get("dry_run")
backup_schema_name = f"{schema_name}_ext_mgd"

In [0]:
%run ./utils

In [0]:
import pandas as pd

external_schema_summary = pd.DataFrame(columns=["id","object_category", "object_sub_category", "count"])

In [0]:
#get schema owner and comment
external_schema_details, external_schema_summary = get_schemata(catalog_name,schema_name,external_schema_summary)

#get external schema location. Used later to check if it is already a managed schema
external_schema_location, external_schema_summary = get_schema_location(catalog_name,schema_name, external_schema_summary)

#get schema level tags
external_schema_tags, external_schema_summary = get_schema_tags(catalog_name,schema_name, external_schema_summary)

#get grants at schema level
external_schema_grants, external_schema_summary = get_schema_grants(catalog_name,schema_name, external_schema_summary)

#get unsupported tables name, description, type, format and owner
external_unsupported_tables, external_schema_summary = get_unsupported_tables(catalog_name,schema_name, external_schema_summary)

#get supported delta tables name, description, type, format and owner
external_tables, external_schema_summary = get_supported_tables(catalog_name,schema_name, external_schema_summary)

#get view name, definition, owner and comment
external_views, external_schema_summary = get_views(catalog_name,schema_name, external_schema_summary )

#get tables and views tags
external_tables_views_tags, external_schema_summary = get_tables_views_tags(catalog_name,schema_name, external_schema_summary, external_tables, external_views )

#get tables and views columns comments
external_table_view_columns_comment, external_schema_summary = get_table_view_columns_comment(catalog_name,schema_name, external_schema_summary, external_tables, external_views )

#get tables and views columns tags
external_table_view_columns_tags, external_schema_summary = get_table_view_columns_tags(catalog_name,schema_name, external_schema_summary, external_tables, external_views )

#get tables and views constraints
external_table_view_constraints, external_schema_summary = get_table_view_constraints(catalog_name,schema_name, external_schema_summary, external_tables )

#get tables and views grants
external_table_view_grants, external_schema_summary = get_table_view_grants(catalog_name,schema_name, external_schema_summary, external_tables, external_views )

#get unsupported volumes name, owner, location, description
external_unsupported_volumes, external_schema_summary = get_unsupported_volumes(catalog_name,schema_name,external_schema_summary)

#get supported volumes name, owner, location, description
external_volumes, external_schema_summary = get_supported_volumes(catalog_name,schema_name, external_schema_summary)

#get supported volumes tags
external_volume_tags, external_schema_summary = get_volume_tags(catalog_name,schema_name, external_schema_summary, external_volumes)

#get supported volumes grants
external_volume_grants, external_schema_summary = get_volume_grants(catalog_name,schema_name, external_schema_summary, external_volumes)

#get unsupported functions name, owner, location, description
external_unsupported_functions, external_schema_summary = get_unsupported_functions(catalog_name,schema_name,external_schema_summary)

external_schema_summary

In [0]:
if external_unsupported_volumes.count() > 0 or external_unsupported_tables.count() > 0 or external_unsupported_functions.count() > 0 or external_schema_location.filter(
    external_schema_location['database_description_value'] != "").count() > 0:
  print("Schema has unsupported tables, volumes, functions or schema is already managed. Please fix them before proceeding")
  display(external_unsupported_volumes)
  display(external_unsupported_tables)
  display(external_unsupported_functions)
  dbutils.exit("1")
else:
  print("Schema has no unsupported tables, volumes, functions and schema is not managed. Proceeding with migration")

In [0]:
#generate schema level scripts
schema_scripts = generate_schema_scripts(catalog_name, schema_name, schema_root_location, external_schema_tags, external_schema_grants, external_schema_details)
#generate table view level scripts
table_view_scripts = generate_table_view_scripts(catalog_name, schema_name,backup_schema_name, external_tables, external_views, external_tables_views_tags, external_table_view_columns_comment, external_table_view_columns_tags, external_table_view_grants)
#generate volume scripts
volume_scripts = generate_volume_scripts(catalog_name, schema_name,backup_schema_name, external_volumes, external_volume_tags, external_volume_grants)
#source schema reference count
external_data_count = data_count(catalog_name, schema_name, external_tables, external_views, external_volumes)

In [0]:
schema_scripts

In [0]:
table_view_scripts

In [0]:
volume_scripts

In [0]:
external_data_count

In [0]:
if dry_run == "N":
  print("Will use workspace client to rename the schema")
  # Rename the schema
  workspace_client.schemas.update(full_name=f"{catalog_name}.{schema_name}", new_name=f"{backup_schema_name}")
  for schema in schema_scripts:
    print(schema)
    spark.sql(schema)
  for table_views in table_view_scripts:
    print(table_views)
    spark.sql(table_views)
  for volume in volume_scripts:
    print(volume)
    spark.sql(volume)
else:
  dbutils.notebooks.exit("1")

In [0]:
managed_schema_summary = pd.DataFrame(columns=["id","object_category", "object_sub_category", "count"])
#get schema owner and comment
managed_schema_details, managed_schema_summary = get_schemata(catalog_name,schema_name,managed_schema_summary)

#get external schema location. Used later to check if it is already a managed schema
managed_schema_location, managed_schema_summary = get_schema_location(catalog_name,schema_name, managed_schema_summary)

#get schema level tags
managed_schema_tags, managed_schema_summary = get_schema_tags(catalog_name,schema_name, managed_schema_summary)

#get grants at schema level
managed_schema_grants, managed_schema_summary = get_schema_grants(catalog_name,schema_name, managed_schema_summary)

#get unsupported tables name, description, type, format and owner
managed_unsupported_tables, managed_schema_summary = get_unsupported_tables(catalog_name,schema_name, managed_schema_summary)

#get supported delta tables name, description, type, format and owner
managed_tables, managed_schema_summary = get_supported_tables(catalog_name,schema_name, managed_schema_summary)

#get view name, definition, owner and comment
managed_views, managed_schema_summary = get_views(catalog_name,schema_name, managed_schema_summary )

#get tables and views tags
managed_tables_views_tags, managed_schema_summary = get_tables_views_tags(catalog_name,schema_name, managed_schema_summary, managed_tables, managed_views )

#get tables and views columns comments
managed_table_view_columns_comment, managed_schema_summary = get_table_view_columns_comment(catalog_name,schema_name, managed_schema_summary, managed_tables, managed_views )

#get tables and views columns tags
managed_table_view_columns_tags, managed_schema_summary = get_table_view_columns_tags(catalog_name,schema_name, managed_schema_summary, managed_tables, managed_views )

#get tables and views constraints
managed_table_view_constraints, managed_schema_summary = get_table_view_constraints(catalog_name,schema_name, managed_schema_summary, managed_tables )

#get tables and views grants
managed_table_view_grants, managed_schema_summary = get_table_view_grants(catalog_name,schema_name, managed_schema_summary, managed_tables, managed_views )

#get unsupported volumes name, owner, location, description
managed_unsupported_volumes, managed_schema_summary = get_unsupported_volumes(catalog_name,schema_name,managed_schema_summary)

#get supported volumes name, owner, location, description
managed_volumes, managed_schema_summary = get_supported_volumes(catalog_name,schema_name, managed_schema_summary)

#get supported volumes tags
managed_volume_tags, managed_schema_summary = get_volume_tags(catalog_name,schema_name, managed_schema_summary, managed_volumes)

#get supported volumes grants
managed_volume_grants, managed_schema_summary = get_volume_grants(catalog_name,schema_name, managed_schema_summary, managed_volumes)

#get unsupported functions name, owner, location, description
managed_unsupported_functions, managed_schema_summary = get_unsupported_functions(catalog_name,schema_name,managed_schema_summary)

managed_data_count = data_count(catalog_name, schema_name, managed_tables, managed_views, managed_volumes)

managed_schema_summary

In [0]:
external_schema_summary = add_prefix_to_columns(external_schema_summary, "external_")
managed_schema_summary = add_prefix_to_columns(managed_schema_summary, "managed_")
external_data_count = add_prefix_to_columns(external_data_count, "external_")
managed_data_count = add_prefix_to_columns(managed_data_count, "managed_")


In [0]:
external_schema_summary.set_index('external_id', inplace=True)
managed_schema_summary.set_index('managed_id', inplace=True)

schema_summary = external_schema_summary.join(managed_schema_summary, how="inner")
display(schema_summary)

In [0]:
summary_data_count = external_data_count
summary_data_count['managed_count'] = managed_data_count['managed_count']
summary_data_count 
