# Notebook to download data from ELIA

In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import datetime

import polars as pl

from forecast_ui.elia import EliaAPIClient, EliaDataset
from forecast_ui.io import ForecastConfig, IODataManager
from forecast_ui.time import last_day_start_end

### Select the data you want to download
 - `data_type`: Type of data to download (e.g., 'solar', 'wind')
 - `start_date`: Start date for the data in 'YYYY-MM-DD' format
 - `end_date`: End date for the data in 'YYYY-MM-DD' format

In [3]:
FORECAST_TYPE = "wind"

match FORECAST_TYPE:
    case "solar":
        existing_data = pl.read_parquet("./data/solar/target.parquet")
        start, end = last_day_start_end("CET")
        start = datetime.datetime(2020, 1, 1, 0, 0, 0, tzinfo=datetime.UTC)
        start = max(existing_data["datetime"].max(), start)
        client = EliaAPIClient(EliaDataset.SOLAR_HISTORY)
        df = client.load_history_measurements(start, end)
        df = (
            df.rename({"value": "measured"})
            .with_columns(pl.col("datetime").dt.cast_time_unit("ms"))
            .sort("datetime", descending=False)
            .drop_nulls(subset=["measured"])
        )
        df = pl.concat([existing_data, df], how="vertical").sort("datetime", descending=False)
        df.write_parquet("./data/solar/target.parquet")
    case "wind":
        existing_data = pl.read_parquet("./data/wind/target.parquet")
        start, end = last_day_start_end("CET")
        start = datetime.datetime(2020, 1, 1, 0, 0, 0, tzinfo=datetime.UTC)
        start = max(existing_data["datetime"].max(), start)
        client = EliaAPIClient(EliaDataset.WIND_HISTORY)
        df = client.load_history_measurements(start, end)
        df = (
            df.rename({"value": "measured"})
            .with_columns(pl.col("datetime").dt.cast_time_unit("ms"))
            .with_columns(pl.col("measured").truediv("monitoredcapacity").alias("loadfactor"))
            .sort("datetime", descending=False)
            .with_columns(
                pl.col("decrementalbidid").str.replace(",", "").replace("''", 0.0).cast(pl.Float64).truediv(1e3),
            )
            .with_columns(
                pl.when(pl.col("decrementalbidid") > 0)
                .then(-pl.col("decrementalbidid"))
                .otherwise(pl.col("decrementalbidid"))
                .alias("decrementalbidid")
            )
            # could be shifted by 1 hour or scaled differently
            .with_columns((pl.col("measured") - pl.col("decrementalbidid")).alias("measured-decrementalbid"))
            .drop_nulls(subset=["measured"])
        )

        df = pl.concat([existing_data, df], how="vertical").sort("datetime", descending=False)
        df.write_parquet("./data/wind/target.parquet")

Loading historical data: 100%|██████████| 28/28 [00:29<00:00,  1.06s/it]


In [21]:
df

datetime,measured,dayahead11hforecast,dayahead11hconfidence10,dayahead11hconfidence90,monitoredcapacity,loadfactor
"datetime[ms, UTC]",f64,f64,f64,f64,f64,f64
2025-06-01 00:00:00 UTC,1485.98,1404.9,982.6,1811.8,2261.0,0.657222
2025-06-01 00:15:00 UTC,1303.75,1394.8,976.2,1802.3,2261.0,0.576625
2025-06-01 00:30:00 UTC,1361.48,1380.4,965.5,1788.2,2261.0,0.602158
2025-06-01 00:45:00 UTC,1464.2,1362.1,945.2,1771.5,2261.0,0.64759
2025-06-01 01:00:00 UTC,1566.25,1267.4,842.6,1679.7,2261.0,0.692724
…,…,…,…,…,…,…
2025-06-28 20:45:00 UTC,1727.24,1465.1,841.3,2044.4,2262.1,0.763556
2025-06-28 21:00:00 UTC,1700.91,1438.4,799.2,2026.3,2262.1,0.751916
2025-06-28 21:15:00 UTC,1740.28,1412.4,760.9,2004.3,2262.1,0.769321
2025-06-28 21:30:00 UTC,1734.06,1386.6,722.5,1982.0,2262.1,0.766571


In [None]:
io_manager = IODataManager(ForecastConfig.SOLAR)
# io_manager.update_realtime_data()
io_manager.update_forecast_file(
    start=datetime.datetime(2025, 1, 1, 0, 0, 0, tzinfo=datetime.UTC),
    end=datetime.datetime.now(datetime.UTC) - datetime.timedelta(days=1),
)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


solar-forecasts.parquet:   0%|          | 0.00/159k [00:00<?, ?B/s]

[32m2025-06-29 23:17:42.367[0m | [1mINFO    [0m | [36mforecast_ui.io[0m:[36mupdate_forecast_file[0m:[36m97[0m - [1mForecast data successfully written to ./data/solar/forecast.json.[0m


In [None]:
io_manager.update_history_benchmark_file(
    start=datetime.datetime(2025, 1, 1, 0, 0, 0, tzinfo=datetime.UTC),
    end=datetime.datetime.now(datetime.UTC) - datetime.timedelta(days=1),
)

In [None]:
datetime.datetime.utcnow()

In [22]:
import polars as pl

pl.DataFrame(schema={"valid_time": pl.Datetime, "q10": pl.Float64, "q50": pl.Float64, "q90": pl.Float64}).write_parquet(
    "data/solar/solar-forecasts.parquet"
)