In [0]:
df = spark.table("workspace.default.electricity_and_weather_europe")

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.window import Window


In [0]:
# TARGET T1 — LARGE FORECAST ERROR

df = spark.table("workspace.default.electricity_and_weather_europe")

# Large Forecast Error
df = df.withColumn(
    "forecast_error_pct",
    F.try_divide(F.abs(F.col("Actual_Load") - F.col("Forecasted_Load")),
                 F.col("Forecasted_Load"))
)

# Threshold based on class balance (Pedro: ~ 8.7% samples)
df = df.withColumn(
    "T1_large_forecast_error",
    (F.col("forecast_error_pct") > 0.15).cast("int")     # 15% error threshold → tuneable
)


In [0]:
# TARGET T3 — UNDERESTIMATED DEMAND

df = df.withColumn(
    "T3_underestimated_demand",
    (
        (F.col("Actual_Load") - F.col("Forecasted_Load")) > 0.10 * F.col("Forecasted_Load")
    ).cast("int")
)


In [0]:
# TARGET T8 — HIGH IMPORTS
# (Country relying heavily on imports)

df = df.withColumn(
    "import_ratio",
    F.try_divide(F.col("net_imports"), F.col("Actual_Load"))
)

df = df.withColumn(
    "T8_high_imports",
    (F.col("import_ratio") > 0.30).cast("int")     # imports > 30% of load → high risk
)




In [0]:
# TARGET T15 — CATASTROPHIC IMBALANCE
# (Strong supply shortage: demand far exceeds generation + imports)

gen_cols = [c for c in df.columns if c.endswith("_Actual_Aggregated")]
df = df.withColumn("total_generation", sum(F.col(c) for c in gen_cols))

df = df.withColumn(
    "imbalance",
    F.col("Actual_Load") - (F.col("total_generation") + F.col("net_imports"))
)

df = df.withColumn(
    "imbalance_pct",
    F.try_divide(F.col("imbalance"), F.col("Actual_Load"))
)

df = df.withColumn(
    "T15_catastrophic_imbalance",
    (F.col("imbalance_pct") > 0.10).cast("int")      # >10% shortage → catastrophic
)



In [0]:
df.write.format("delta") \
    .mode("overwrite") \
    .option("overwriteSchema", "true") \
    .saveAsTable("workspace.default.blackout_targets_pedro")
