# Delta Table Maintenance
This notebook is used to query spark for the listing of Delta Tables so they can be automatically optimized and vacuumed.  For more on this topic, check out https://learn.microsoft.com/en-us/fabric/data-engineering/lakehouse-table-maintenance

## Define Variables
Use this cell to **define your variables** in your Lakehouse for the OPTIMIZE and VACUUM commands.

In [None]:
from pyspark.context import SparkContext
from pyspark.sql.session import SparkSession
from notebookutils import mssparkutils

sparkContext = SparkContext.getOrCreate()
spark = SparkSession(sparkContext)

retention_hours = '168' # This equals 1 week

# Initialize counters
total_tables = 0
optimized_tables = 0
vacuumed_tables = 0

# First, count total tables across all schemas
# for schema_name in schemas:
try:
    tables_df = spark.sql(f"SHOW TABLES")
    table_count = tables_df.count()
    print(f"Found {table_count} tables")
    display(tables_df)
    total_tables += table_count
    table_column_name = "tableName"
    print(f"Using column '{table_column_name}' for table names")
except Exception as e:
    print(f"Error listing tables: {str(e)}")

## Optimize tables
VOrder is applied.  For more on this topic, check out https://learn.microsoft.com/en-us/fabric/data-engineering/delta-optimization-and-v-order?tabs=sparksql

In [None]:
from delta.tables import DeltaTable

try:
    tables_df = spark.sql("SHOW TABLES")
    tables = tables_df.collect()
    print("Starting optimization of tables...")

    for table in tables:
        try:
            table_name = table[table_column_name]

            try:
                spark.sql(f"OPTIMIZE {table_name} VORDER")
                optimized_tables += 1
                print(f"✔ Optimized using SQL: {table_name} ({optimized_tables}/{total_tables})")
            except Exception as e1:
                try:
                    deltaTable = DeltaTable.forName(spark, table_name)
                    deltaTable.optimize().executeCompaction()
                    optimized_tables += 1
                    print(f"✔ Optimized using DeltaTable: {table_name} ({optimized_tables}/{total_tables})")
                except Exception as e2:
                    print(f"✖ Failed to optimize '{table_name}': {str(e2)}")
        except Exception as e:
            print(f"⚠ Optimize - Error processing table: {str(e)}")
except Exception as e:
    print(f"⚠ Error processing: {str(e)}")

print(f"✅ Optimization complete. Successfully optimized {optimized_tables} out of {total_tables} tables.")

## Vacuum tables

In [None]:
from delta.tables import DeltaTable

try:
    tables_df = spark.sql("SHOW TABLES")
    tables = tables_df.collect()
    print("Starting vacuum process...")

    for table in tables:
        try:
            table_name = table[table_column_name]

            try:
                spark.sql(f"VACUUM {table_name} RETAIN {retention_hours} HOURS")
                vacuumed_tables += 1
                print(f"✔ Vacuumed using SQL: {table_name} ({vacuumed_tables}/{total_tables})")
            except Exception as e1:
                try:
                    deltaTable = DeltaTable.forName(spark, table_name)
                    deltaTable.vacuum(retention_hours)
                    vacuumed_tables += 1
                    print(f"✔ Vacuumed using DeltaTable: {table_name} ({vacuumed_tables}/{total_tables})")
                except Exception as e2:
                    print(f"✖ Failed to vacuum '{table_name}': {str(e2)}")
        except Exception as e:
            print(f"⚠ Vacuum - Error processing table: {str(e)}")
except Exception as e:
    print(f"⚠ Error processing: {str(e)}")

print(f"✅ Optimization complete. Successfully vacuumed {vacuumed_tables} out of {total_tables} tables.")

Verify OPTIMIZE and VACUUM command executed on a specific table

In [None]:
%%sql
DESCRIBE HISTORY customers