In [0]:
df = spark.table("workspace.default.electricity_and_weather_europe")


In [0]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window

# Identify all generation columns
gen_cols = [c for c in df.columns if c.endswith("_Actual_Aggregated")]

# Total generation
df = df.withColumn("total_generation", sum(F.col(c) for c in gen_cols))


In [0]:
df = df.withColumn(
    "imbalance",
    F.col("Actual_Load") - (F.col("total_generation") + F.col("net_imports"))
)


In [0]:
window_spec = Window.partitionBy("country").orderBy("index")


In [0]:
# For 1 hour ahead:
df = df.withColumn("load_plus_1h", F.lag("Actual_Load", -1).over(window_spec))


In [0]:
# For 2–6 hour ahead:
for h in range(2, 7):
    df = df.withColumn(f"load_plus_{h}h", F.lag("Actual_Load", -h).over(window_spec))


In [0]:
# We flag high risk if:
# A. imbalance > threshold → shortage of supply
# B. load jumps too quickly → sudden peak approaching
# C. net imports suddenly drop → lost external support

df = df.withColumn(
    "blackout_risk",
    F.when(
        (F.col("imbalance") > 500) |
        (F.col("load_plus_1h") - F.col("Actual_Load") > 1000) |
        (F.col("net_imports") < -1000),
        1
    ).otherwise(0)
)

# If imbalance > 500 MW → risk = 1

In [0]:
df.write.format("delta").mode("overwrite") \
    .saveAsTable("table_imbalance_blackout_risk")
