# Catalog Table Sizes
This notebook lists the total size of every table across all schemas in a selected catalog using [DiscoverX](https://github.com/databrickslabs/discoverx).

Use the widget below to select one or more catalogs, then run the remaining cells.

In [0]:
%pip install dbl-discoverx
dbutils.library.restartPython()

In [0]:
# Create widgets for catalogs
catalogs = [row.catalog for row in spark.sql("SHOW CATALOGS").collect()]
catalogs.append("None Selected")
dbutils.widgets.multiselect("1.catalogs", "None Selected", catalogs)

dbutils.widgets.text("2.target_table", "", label="Target table (catalog.schema.table)")
dbutils.widgets.dropdown("2.write_mode", "overwrite", ["overwrite", "append"], label="Write mode")


In [0]:
catalog_list = [c for c in dbutils.widgets.get("1.catalogs").split(',') if c]

In [0]:
from pyspark.sql import Row
from discoverx import DX

dx = DX()

def human_size(size_bytes):
    for unit in ['B','KB','MB','GB','TB','PB','EB']:
        if size_bytes < 1024 or unit == 'EB':
            return f"{size_bytes:.2f} {unit}"
        size_bytes /= 1024

def table_size(tbl):
    qname = f"`{tbl.catalog}`.`{tbl.schema}`.`{tbl.table}`"
    df = spark.sql(f"DESCRIBE DETAIL {qname}")
    size = df.select('sizeInBytes').collect()[0][0]
    return {
        'table': f"{tbl.catalog}.{tbl.schema}.{tbl.table}",
        'size': size,
        'size_human': human_size(size)
    }

results = []
for cat in catalog_list:
    results.extend(dx.from_tables(f'{cat}.*.*').map(table_size))
df = spark.createDataFrame(results)

# Add total row for all tables
total_size = df.agg({'size': 'sum'}).collect()[0][0]
df = df.union(spark.createDataFrame([Row(size=total_size, size_human=human_size(total_size), table='ALL_TABLES')]))
target_table = dbutils.widgets.get("2.target_table").strip()
write_mode = dbutils.widgets.get("2.write_mode")

if target_table:
    (df
     .write
     .mode(write_mode)
     .option("overwriteSchema", "true")
     .saveAsTable(target_table)
     )

display(df)