In [0]:
def delete_ingest_records(spark, catalog, date='20250724'):
    spark.sql(f'use catalog {catalog}')
    system_schemas = ['information_schema', 'sys']
    schemas = [row.databaseName for row in spark.sql(f'SHOW SCHEMAS IN {catalog}').collect()]
    for schema in schemas:
        if schema in system_schemas:
            print(f'Skipping system schema {schema}')
            continue
        tables = spark.sql(f'SHOW TABLES IN {catalog}.{schema}').filter('isTemporary = false')
        view_rows = spark.sql(f'SHOW VIEWS IN {catalog}.{schema}').collect()
        views = {row.viewName for row in view_rows}
        for row in tables.collect():
            name = row.tableName
            if name in views:
                print(f'Skipping view {catalog}.{schema}.{name}')
                continue
            table_ref = f'{catalog}.{schema}.{name}'
            cols = spark.table(table_ref).columns
            conditions = []
            if 'ingest_time' in cols:
                conditions.append(f"ingest_time = '{date}'")
            if 'derived_ingest_time' in cols:
                conditions.append(f"derived_ingest_time = '{date}'")
            if not conditions:
                print(f'Skipping {table_ref}: no ingest columns')
                continue
            where_clause = ' OR '.join(conditions)
            spark.sql(f'DELETE FROM {table_ref} WHERE {where_clause}')
            print(f'Deleted records from {table_ref}')

delete_ingest_records(spark, 'edsm')
