In [1]:
from pyspark.sql import functions as F
from pyspark.sql import SparkSession
from pyspark.sql import Row


# -----------------------------
# 1. Get pipeline parameters
# -----------------------------

lakehouse_name = lakehouse_name if 'lakehouse_name' in locals() else globals().get('lakehouse_name', "lh_silver")
medallion_layer = medallion_layer if 'medallion_layer' in locals() else globals().get('medallion_layer', "silver")
medallion_short = medallion_short if 'medallion_short' in locals() else globals().get('medallion_short', 'slv')
api_name = api_name if 'api_name' in locals() else globals().get('api_name', "redata")
write_mode = write_mode if 'write_mode' in locals() else globals().get('write_mode', "append") #overwrite/append


# -----------------------------
# 2. Spark Session & Config
# -----------------------------
spark = (
    SparkSession.builder
    .appName("JSON_to_Silver")
    .config("spark.fabric.lakehouse.name", lakehouse_name)
    .getOrCreate()
)



StatementMeta(, 5f85fa02-0ed6-4647-a6f0-0f5529ae62e9, 3, Finished, Available, Finished)

In [2]:
# -----------------------------
# 3. Get api tables
# -----------------------------

# List tables in the Lakehouse
tables = spark.catalog.listTables(lakehouse_name)

# Convert to DataFrame
df_tables = spark.createDataFrame(
    [(t.name, t.database, t.tableType, t.isTemporary) for t in tables],
    ["table_name", "database", "table_type", "is_temporary"]
)

#df_tables.show(truncate=False)


# Filter using Spark SQL functions
df_filtered = df_tables.filter(
    (F.col("table_name").startswith(f"{medallion_short}_{api_name}"))&
    (~F.col("table_name").endswith("metadata"))
    )

# Show results
df_filtered.show(truncate=False)


StatementMeta(, 5f85fa02-0ed6-4647-a6f0-0f5529ae62e9, 4, Finished, Available, Finished)

+---------------------------------------------------------------------+---------+----------+------------+
|table_name                                                           |database |table_type|is_temporary|
+---------------------------------------------------------------------+---------+----------+------------+
|slv_redata_balance_balance_electrico_day                             |lh_silver|MANAGED   |false       |
|slv_redata_balance_balance_electrico_month                           |lh_silver|MANAGED   |false       |
|slv_redata_demanda_evolucion_day                                     |lh_silver|MANAGED   |false       |
|slv_redata_demanda_evolucion_month                                   |lh_silver|MANAGED   |false       |
|slv_redata_generacion_estructura_generacion_day                      |lh_silver|MANAGED   |false       |
|slv_redata_generacion_estructura_generacion_emisiones_asociadas_day  |lh_silver|MANAGED   |false       |
|slv_redata_generacion_estructura_generacion_e

In [3]:
# -----------------------------
# 4. Write Tables
# -----------------------------
# Collect the column values to a Python list
table_names = [row["table_name"] for row in df_filtered.collect()]

# Now you can iterate normally
for t in table_names:
    df_table_to_copy = spark.sql(f"SELECT * FROM {t}")
    table_name = f"backup_{t}"

    # Access the DataFrameWriter via .write
    writer = df_table_to_copy.write.format("delta").mode(write_mode)
    
    # Only add overwriteSchema if mode is overwrite
    if write_mode == "overwrite":   
        writer = writer.option("overwriteSchema", "true")
    
    # Save the table
    writer.saveAsTable(table_name)
    
    print(f"Table {table_name} successfully written.")
    # Drop the original table
    
    
    spark.sql(f"DROP TABLE IF EXISTS {t}")
    print(f"Original table {t} deleted.")


StatementMeta(, 5f85fa02-0ed6-4647-a6f0-0f5529ae62e9, 5, Finished, Available, Finished)

Table backup_slv_redata_balance_balance_electrico_day successfully written.
Original table slv_redata_balance_balance_electrico_day deleted.
Table backup_slv_redata_balance_balance_electrico_month successfully written.
Original table slv_redata_balance_balance_electrico_month deleted.
Table backup_slv_redata_demanda_evolucion_day successfully written.
Original table slv_redata_demanda_evolucion_day deleted.
Table backup_slv_redata_demanda_evolucion_month successfully written.
Original table slv_redata_demanda_evolucion_month deleted.
Table backup_slv_redata_generacion_estructura_generacion_day successfully written.
Original table slv_redata_generacion_estructura_generacion_day deleted.
Table backup_slv_redata_generacion_estructura_generacion_emisiones_asociadas_day successfully written.
Original table slv_redata_generacion_estructura_generacion_emisiones_asociadas_day deleted.
Table backup_slv_redata_generacion_estructura_generacion_emisiones_asociadas_month successfully written.
Origi