### Install databricks-uniform-sync

In [0]:
%pip install databricks-uniform-sync

Restart Python Kernel

In [0]:
dbutils.library.restartPython()

### Configure Required Inputs

In [0]:
# === Databricks Parameters ===

dbx_workspace_url = str(
    dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiUrl().getOrElse(None)
)  # Automatically retrieves URL of the current Databricks workspace

dbx_workspace_pat = (
    dbutils.notebook.entry_point.getDbutils()
    .notebook()
    .getContext()
    .apiToken()
    .getOrElse(None)
) # Automatically retrieves Personal Access Token for Databricks authentication

metadata_catalog = "guanjie_catalog"  # Unity Catalog catalog to store metadata
metadata_schema = "databricks_uniform_sync"  # Unity Catalog schema to store metadata
metadata_table_name = "uniform_sync_metadata"  # (Optional) Custom name for metadata table

oauth_client_id = "XXXX"  # Databricks OAuth client ID
oauth_client_secret = "XXX"  # Databricks OAuth client secret (use secrets management)


catalogs_to_sync = ["gshen_uniform","guanjie_catalog"] # Which UC catalogs to synchronize to Snowflake

# === Snowflake Parameters ===

sf_account_id = "XXX"  # Snowflake account identifier
sf_user = "databricks_service_account"  # Snowflake user or service account

sf_private_key_file = "rsa/rsa_key.p8"  # Path to Snowflake RSA private key
sf_private_key_file_pwd = "XXX"  # Password for decrypting the private key (use secrets management)

# === Optional Parameters ===

refresh_interval_seconds = 300  # (Optional) Catalog Integration refresh interval (in seconds)
auto_refresh = True  # (Optional) Enable/disable automatic table refresh


### Create & Refresh Metadata Tables

In [0]:
# Import the main class responsible for syncing metadata from Databricks to Snowflake
from databricks_uniform_sync.dbx_to_sf_mirror import DatabricksToSnowflakeMirror

# Initialize the mirror helper with Spark session and workspace credentials
d2s_mirror = DatabricksToSnowflakeMirror(
    spark_session=spark,                       # Active Spark session
    dbx_workspace_url=dbx_workspace_url,       # Databricks workspace URL
    dbx_workspace_pat=dbx_workspace_pat,       # Personal Access Token for Databricks
    metadata_catalog=metadata_catalog,         # Catalog where sync metadata is stored
    metadata_schema=metadata_schema            # Schema for sync metadata
)

# Refresh metadata in Unity Catalog for each catalog that needs syncing
for catalog in catalogs_to_sync:
    d2s_mirror.refresh_uc_metadata(catalog)

In [0]:
metadata_table = spark.read.table(f"{metadata_catalog}.{metadata_schema}.dbx_sf_uniform_metadata_vw")
display(metadata_table)

### Generate Unity Catalog Discovery Tags

In [0]:
d2s_mirror.refresh_uc_metadata_tags()

### Create Snowflake Catalog Integrations

Print the SQL DDL for Catalog Integrations.

In [0]:
sql_ddl = d2s_mirror.generate_create_sf_catalog_integrations_sql(
    oauth_client_id=oauth_client_id,
    oauth_client_secret=oauth_client_secret,
    refresh_interval_seconds=refresh_interval_seconds,
)
for i in range(len(sql_ddl)):
    print(sql_ddl[i])

Create Catalog Integrations within Snowflake.

In [0]:
d2s_mirror.create_sf_catalog_integrations(
    sf_account_id=sf_account_id,
    sf_user=sf_user,
    sf_private_key_file=sf_private_key_file,
    sf_private_key_file_pwd=sf_private_key_file_pwd,
    oauth_client_id=oauth_client_id,
    oauth_client_secret=oauth_client_secret,
    refresh_interval_seconds = refresh_interval_seconds
)

%md
### Create Snowflake Iceberg Tables

Print the SQL DDL for Iceberg Tables.

In [0]:
sql_ddl = d2s_mirror.generate_create_sf_iceberg_tables_sql()
for i in range(len(sql_ddl)):
    print(sql_ddl[i])

Create Iceberg Tables within Snowflake.

In [0]:
d2s_mirror.create_sf_iceberg_tables(
    sf_account_id=sf_account_id,
    sf_user=sf_user,
    sf_private_key_file=sf_private_key_file,
    sf_private_key_file_pwd=sf_private_key_file_pwd,
    auto_refresh = auto_refresh
)