In [0]:
%sql
SELECT * FROM system.information_schema.catalogs;


In [0]:
%sql
SELECT
  table_catalog,
  table_schema,
  table_name,
  table_type,
  created,
  table_owner
FROM system.information_schema.tables;


In [0]:
%sql
SELECT
  entry,
  ingest_time,
  source_file,
  type,
  resourceType
FROM angad_kumar91.fhir_healthcare_analytics_bronze.fhir_bundle;

In [0]:
%sql
SELECT
  table_catalog,
  table_schema,
  table_name,
  column_name,
  data_type,
  is_nullable
FROM system.information_schema.columns;


In [0]:
# Fetch metadata
schemas_df = spark.sql("""
SELECT catalog_name, schema_name, schema_owner, created
FROM system.information_schema.schemata
""")

tables_df = spark.sql("""
SELECT table_catalog, table_schema, table_name, table_type
FROM system.information_schema.tables
""")

columns_df = spark.sql("""
SELECT table_catalog, table_schema, table_name,
       column_name, data_type, is_nullable
FROM system.information_schema.columns
""")


In [0]:
%sql
SELECT * FROM angad_kumar91.information_schema.tables


In [0]:
%sql
USE CATALOG angad_kumar91;

CREATE SCHEMA IF NOT EXISTS metadata_export;

CREATE VOLUME IF NOT EXISTS metadata_export.uc_metadata_volume;


In [0]:
schemas_df = spark.sql("""
SELECT catalog_name, schema_name, schema_owner, created
FROM system.information_schema.schemata
""")

tables_df = spark.sql("""
SELECT table_catalog, table_schema, table_name, table_type, created
FROM system.information_schema.tables
""")

columns_df = spark.sql("""
SELECT table_catalog, table_schema, table_name,
       column_name, data_type, is_nullable
FROM system.information_schema.columns
""")


In [0]:
base_path = "/Volumes/angad_kumar91/metadata_export/uc_metadata_volume"

schemas_df.write.mode("overwrite").json(f"{base_path}/schemas")
tables_df.write.mode("overwrite").json(f"{base_path}/tables")
columns_df.write.mode("overwrite").json(f"{base_path}/columns")


In [0]:
schemas_df.write.mode("overwrite").saveAsTable(
  "angad_kumar91.metadata_export.schemas"
)

tables_df.write.mode("overwrite").saveAsTable(
  "angad_kumar91.metadata_export.tables"
)

columns_df.write.mode("overwrite").saveAsTable(
  "angad_kumar91.metadata_export.columns"
)


In [0]:
import os

os.getcwd()


In [0]:
REPO_BASE = os.getcwd()
EXPORT_BASE = f"{REPO_BASE}/unity_catalog_metadata"

os.makedirs(EXPORT_BASE, exist_ok=True)


In [0]:
schemas_df = spark.sql("""
SELECT
  catalog_name,
  schema_name,
  schema_owner,
  created
FROM system.information_schema.schemata
WHERE catalog_name = 'angad_kumar91'
""")


In [0]:
tables_df = spark.sql("""
SELECT
  table_catalog,
  table_schema,
  table_name,
  table_type,
  created
FROM system.information_schema.tables
WHERE table_catalog = 'angad_kumar91'
""")


In [0]:
columns_df = spark.sql("""
SELECT
  table_catalog,
  table_schema,
  table_name,
  column_name,
  data_type,
  is_nullable
FROM system.information_schema.columns
WHERE table_catalog = 'angad_kumar91'
""")


In [0]:
schemas_pd = schemas_df.toPandas()
tables_pd  = tables_df.toPandas()
columns_pd = columns_df.toPandas()


In [0]:
schemas_pd.to_json(f"{EXPORT_BASE}/schemas.json", orient="records", indent=2)
tables_pd.to_json(f"{EXPORT_BASE}/tables.json", orient="records", indent=2)
columns_pd.to_json(f"{EXPORT_BASE}/columns.json", orient="records", indent=2)


In [0]:
for schema in tables_pd["table_schema"].unique():
    schema_dir = f"{EXPORT_BASE}/{schema}"
    os.makedirs(schema_dir, exist_ok=True)

    schema_tables = tables_pd[tables_pd["table_schema"] == schema]
    schema_columns = columns_pd[columns_pd["table_schema"] == schema]

    schema_tables.to_json(f"{schema_dir}/tables.json", orient="records", indent=2)
    schema_columns.to_json(f"{schema_dir}/columns.json", orient="records", indent=2)


In [0]:
ddl_lines = []

for _, row in tables_pd.iterrows():
    ddl_lines.append(
        f"CREATE TABLE {row.table_catalog}.{row.table_schema}.{row.table_name} (...);"
    )

with open(f"{EXPORT_BASE}/all_tables_ddl.sql", "w") as f:
    f.write("\n".join(ddl_lines))
