# Catalog Table Sizes
This notebook lists the total size of every table in a selected catalog and schema using [DiscoverX](https://github.com/databrickslabs/discoverx).

Use the widgets below to select a catalog and schema, then run the remaining cells.

In [None]:
%pip install dbl-discoverx
dbutils.library.restartPython()

In [None]:
# Create widgets for catalog and schema
catalogs = [row.catalog for row in spark.sql("SHOW CATALOGS").collect()]
dbutils.widgets.combobox("1.catalog", catalogs[0] if catalogs else "", catalogs)
catalog = dbutils.widgets.get("1.catalog")

schemas_df = spark.sql(f"SHOW SCHEMAS IN `{catalog}`")
schemas = [row[0] for row in schemas_df.collect() if row[0].lower() not in ("information_schema",)]
dbutils.widgets.combobox("2.schema", schemas[0] if schemas else "", schemas)
schema = dbutils.widgets.get("2.schema")

In [None]:
from discoverx import DX

dx = DX()

def human_size(size_bytes):
    for unit in ['B','KB','MB','GB','TB','PB','EB']:
        if size_bytes < 1024 or unit == 'EB':
            return f"{size_bytes:.2f} {unit}"
        size_bytes /= 1024

def table_size(tbl):
    qname = f"`{tbl.catalog}`.`{tbl.schema}`.`{tbl.table}`"
    df = spark.sql(f"DESCRIBE DETAIL {qname}")
    size = df.select('sizeInBytes').collect()[0][0]
    readable = human_size(size)
    print(f'{tbl.catalog}.{tbl.schema}.{tbl.table}: {readable}')
    return {"table": f"{tbl.catalog}.{tbl.schema}.{tbl.table}", "size": readable}

results = dx.from_tables(f"{catalog}.{schema}.*").map(table_size)

import json
for r in results:
    print(json.dumps(r, indent=4))