In [0]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window

df = spark.table("workspace.default.electricity_and_weather_europe")

In [0]:
# SUM OF ALL *_Actual_Aggregated COLUMNS

gen_cols = [c for c in df.columns if c.endswith("_Actual_Aggregated")]

df = df.withColumn(
    "total_generation",
    sum(F.col(c) for c in gen_cols)
)


In [0]:
# imbalance = load - (generation + net imports)

df = df.withColumn(
    "imbalance",
    F.col("Actual_Load") - (F.col("total_generation") + F.col("net_imports"))
)


In [0]:
# imbalance_pct = imbalance / Actual_Load

df = df.withColumn(
    "imbalance_pct",
    F.col("imbalance") / F.col("Actual_Load")
)

In [0]:
# (0 = Low, 1 = Medium, 2 = High)
# Using relative thresholds valid for ALL countries

df = df.withColumn(
    "blackout_risk",
    F.when(F.col("imbalance_pct") >= 0.10, 2)  # High risk (≥10%)
     .when(F.col("imbalance_pct") >= 0.05, 1)  # Medium risk (≥5%)
     .otherwise(0)                             # Low risk
)

# create binary version
df = df.withColumn(
    "blackout_risk_binary",
    F.when(F.col("imbalance_pct") >= 0.10, 1).otherwise(0)
)

In [0]:
window_spec = Window.partitionBy("country").orderBy("index")

df = df.withColumn("load_plus_1h", F.lag("Actual_Load", -1).over(window_spec))
df = df.withColumn("load_plus_2h", F.lag("Actual_Load", -2).over(window_spec))
df = df.withColumn("load_plus_3h", F.lag("Actual_Load", -3).over(window_spec))
df = df.withColumn("load_plus_4h", F.lag("Actual_Load", -4).over(window_spec))
df = df.withColumn("load_plus_5h", F.lag("Actual_Load", -5).over(window_spec))
df = df.withColumn("load_plus_6h", F.lag("Actual_Load", -6).over(window_spec))

In [0]:
df.write.format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable("workspace.default.electricity_weather_blackout_ready")
