# Notebook to download data from ELIA

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import datetime

import polars as pl

from wind_forecast_ui.elia import load_history_measurements
from wind_forecast_ui.time import last_day_start_end

In [None]:
existing_data = pl.read_parquet("./data/target.parquet")

In [None]:
start, end = last_day_start_end("CET")
start = datetime.datetime(2020, 1, 1, 0, 0, 0, tzinfo=datetime.UTC)
start = max(existing_data["datetime"].min(), start)

In [None]:
df = load_history_measurements(start, end)

In [None]:
df = (
    df.rename({"value": "measured"})
    .with_columns(pl.col("datetime").dt.cast_time_unit("ms"))
    .with_columns(pl.col("measured").truediv("monitoredcapacity").alias("loadfactor"))
    .sort("datetime", descending=False)
    .with_columns(
        pl.col("decrementalbidid").str.replace(",", "").replace("''", 0.0).cast(pl.Float64).truediv(1e3),
    )
    .with_columns(
        pl.when(pl.col("decrementalbidid") > 0)
        .then(-pl.col("decrementalbidid"))
        .otherwise(pl.col("decrementalbidid"))
        .alias("decrementalbidid")
    )
    # could be shifted by 1 hour or scaled differently
    .with_columns((pl.col("measured") - pl.col("decrementalbidid")).alias("measured-decrementalbid"))
    .drop_nulls(subset=["measured"])
)

In [None]:
df = pl.concat([existing_data, df], how="vertical").sort("datetime", descending=False)

In [None]:
df = df.write_parquet("./data/target.parquet")