In [1]:

import json
import os
import polars as pl
from polars import col as c
import polars.selectors as cs
import re
from data_federation.input_model import SmallflexInputSchema
import owncloud


from data_federation.parser.market_price import parse_market_price
from data_federation.parser.discharge_flow import parse_discharge_flow
from data_federation.parser.weather import parse_weather
from data_federation.parser.aegina_hydro_power_plant import parse_aegina_hydro_power_plant
from data_federation.parser.merezenbach_hydro_power_plant import parse_merezenbach_hydro_power_plant
from data_federation.parser.morel_hydro_power_plant import parse_morel_hydro_power_plant
from data_federation.parser.aegina_wind_power_plant import parse_aegina_wind_power_plant
from data_federation.parser.parser_pipeline import input_pipeline
from utility.general_function import dictionary_key_filtering, scan_switch_directory, modify_string, generate_uuid, pl_to_dict
from utility.polars_operation import modify_string_col, generate_uuid_col

from data_federation.parser.hydro_power_plant import get_hydro_power_plant_data
from config import settings
import plotly.express as px

from itertools import product

os.chdir(os.getcwd().replace("/src", ""))




In [2]:

input_file_names: dict[str, str] = json.load(open(settings.INPUT_FILE_NAMES))
output_file_names: dict[str, str] = json.load(open(settings.OUTPUT_FILE_NAMES))
market_price_metadata: dict = json.load(open(input_file_names["market_price_metadata"]))


In [3]:
file_path = output_file_names["duckdb_input"]
small_flex_input_schema: SmallflexInputSchema = SmallflexInputSchema().duckdb_to_schema(file_path=file_path)

# kwargs["small_flex_input_schema"] 

Read and validate tables from small_flex_input_data.db file: 100%|████████████████████████████████████████████████████| 12/12 [00:01<00:00,  7.06it/s]


In [4]:
small_flex_input_schema

SmallflexInputSchema(market_price_measurement=shape: (2_133_830, 9)
┌─────────────────────┬──────────┬───────────┬─────────┬───┬─────────┬───────┬───────────┬──────┐
│ timestamp           ┆ market   ┆ direction ┆ country ┆ … ┆ unit    ┆ max   ┆ avg       ┆ min  │
│ ---                 ┆ ---      ┆ ---       ┆ ---     ┆   ┆ ---     ┆ ---   ┆ ---       ┆ ---  │
│ datetime[μs, UTC]   ┆ str      ┆ str       ┆ str     ┆   ┆ str     ┆ f64   ┆ f64       ┆ f64  │
╞═════════════════════╪══════════╪═══════════╪═════════╪═══╪═════════╪═══════╪═══════════╪══════╡
│ 2020-10-14 04:00:00 ┆ FCR-cap  ┆ sym       ┆ AT      ┆ … ┆ EUR/MW  ┆ 10.98 ┆ 3.662791  ┆ 1.7  │
│ UTC                 ┆          ┆           ┆         ┆   ┆         ┆       ┆           ┆      │
│ 2020-11-04 08:00:00 ┆ mFRR-cap ┆ pos       ┆ AT      ┆ … ┆ EUR/MW  ┆ 10.58 ┆ 8.007895  ┆ 1.45 │
│ UTC                 ┆          ┆           ┆         ┆   ┆         ┆       ┆           ┆      │
│ 2020-08-13 04:00:00 ┆ mFRR-cap ┆ neg       ┆ AT 

In [None]:
import duckdb

with duckdb.connect(database=file_path) as con:

    query: str = f"SELECT * FROM power_production_measurement"
    data  = con.execute(query).pl()
print(data)

shape: (122_151, 15)
┌───────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬──────────┐
│ timestamp ┆ power_pla ┆ resource_ ┆ min_curre ┆ … ┆ max_activ ┆ min_react ┆ avg_react ┆ max_reac │
│ ---       ┆ nt_fk     ┆ fk        ┆ nt        ┆   ┆ e_power   ┆ ive_power ┆ ive_power ┆ tive_pow │
│ datetime[ ┆ ---       ┆ ---       ┆ ---       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ er       │
│ μs, Europ ┆ str       ┆ str       ┆ f64       ┆   ┆ f64       ┆ f64       ┆ f64       ┆ ---      │
│ e/Zurich] ┆           ┆           ┆           ┆   ┆           ┆           ┆           ┆ f64      │
╞═══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪══════════╡
│ 2024-01-0 ┆ 9267008b- ┆ null      ┆ 2.755     ┆ … ┆ 0.27      ┆ -0.111    ┆ -0.11     ┆ -0.11    │
│ 1         ┆ 9859-579b ┆           ┆           ┆   ┆           ┆           ┆           ┆          │
│ 00:15:00  ┆ -bf5c-5a3 ┆           ┆           ┆   ┆           ┆     

In [6]:

"Ausschreibung":"date"
"Von": "time",
"Angebotene Menge":"ava MW",
"Abgerufene Menge": "pur MW",
"Produkt": "market",
"Preis": "price"


SyntaxError: illegal target for annotation (558766353.py, line 1)

In [13]:
energy = ".cache/input/alpiq_input_data/swissgrid/energy/2020-03-TRE-Ergebnis.csv"

data = pl.read_csv(energy, separator=";", encoding='iso-8859-1', null_values=["*", "N/A"])


data

Ausschreibung,Von,Bis,Produkt,Angebotene Menge,Einheit,Abgerufene Menge,Einheit_duplicated_0,Preis,Einheit_duplicated_1,Status
str,str,str,str,i64,str,i64,str,f64,str,str
"""TRE_20_03_01""","""00:00""","""01:00""","""TREnergie+_s""",5,"""MW""",0,"""MW""",499.0,"""EUR/MWh""","""verfügbar"""
"""TRE_20_03_01""","""00:00""","""01:00""","""TREnergie+_s""",5,"""MW""",0,"""MW""",272.0,"""EUR/MWh""","""verfügbar"""
"""TRE_20_03_01""","""00:00""","""01:00""","""TREnergie+_s""",5,"""MW""",0,"""MW""",92.0,"""EUR/MWh""","""verfügbar"""
"""TRE_20_03_01""","""00:00""","""01:00""","""TREnergie+_s""",5,"""MW""",0,"""MW""",374.0,"""EUR/MWh""","""verfügbar"""
"""TRE_20_03_01""","""00:00""","""01:00""","""TREnergie+_s""",5,"""MW""",0,"""MW""",81.0,"""EUR/MWh""","""verfügbar"""
…,…,…,…,…,…,…,…,…,…,…
"""TRE_20_03_31""","""23:00""","""00:00""","""TREnergie-_s""",25,"""MW""",0,"""MW""",10.0,"""EUR/MWh""","""verfügbar"""
"""TRE_20_03_31""","""23:00""","""00:00""","""TREnergie-_s""",25,"""MW""",0,"""MW""",6.0,"""EUR/MWh""","""verfügbar"""
"""TRE_20_03_31""","""23:00""","""00:00""","""TREnergie-_s""",25,"""MW""",0,"""MW""",14.0,"""EUR/MWh""","""verfügbar"""
"""TRE_20_03_31""","""23:00""","""00:00""","""TREnergie-_s""",27,"""MW""",0,"""MW""",2.0,"""EUR/MWh""","""verfügbar"""


In [42]:
market_price_measurement

timestamp,market,direction,min,avg,max,country,source,unit
"datetime[μs, UTC]",str,str,f64,f64,f64,str,str,str
2020-11-21 20:00:00 UTC,"""mFRR-cap""","""pos""",0.0,0.144331,0.24,"""CH""","""swissgrid""","""EUR/MWh"""
2020-12-13 20:00:00 UTC,"""mFRR-cap""","""pos""",0.0,0.183571,0.48,"""CH""","""swissgrid""","""EUR/MWh"""
2020-01-22 08:00:00 UTC,"""mFRR-cap""","""pos""",0.45,0.474096,0.5,"""CH""","""swissgrid""","""EUR/MWh"""
2020-08-25 00:00:00 UTC,"""mFRR-cap""","""pos""",0.0,0.002,0.01,"""CH""","""swissgrid""","""EUR/MWh"""
2020-11-14 20:00:00 UTC,"""FCR-cap""","""sym""",11.72,12.339788,33.08,"""CH""","""swissgrid""","""EUR/MWh"""
…,…,…,…,…,…,…,…,…
2015-01-29 00:00:00 UTC,"""mFRR-cap""","""neg""",0.98,0.998824,1.0,"""CH""","""swissgrid""","""EUR/MWh"""
2015-03-31 08:00:00 UTC,"""mFRR-cap""","""neg""",0.72,3.394839,4.75,"""CH""","""swissgrid""","""EUR/MWh"""
2015-03-27 08:00:00 UTC,"""mFRR-cap""","""pos""",6.25,10.104925,12.25,"""CH""","""swissgrid""","""EUR/MWh"""
2015-10-22 00:00:00 UTC,"""mFRR-cap""","""neg""",1.75,1.948333,2.5,"""CH""","""swissgrid""","""EUR/MWh"""


In [15]:
data

market,y,m,d,time,Angebotenes Volumen,Einheit,quantity,Einheit_duplicated_0,Leistungspreis,Einheit_duplicated_1,Kosten,Einheit_duplicated_2,price,Einheit_duplicated_3,Land,Angebotspreis,Einheit_duplicated_4,Teilbarkeit
str,str,str,str,str,i64,str,i64,str,f64,str,f64,str,f64,str,str,f64,str,str
"""PRL""","""20""","""01""","""01""","""Auction""",16,"""MW""",16,"""MW""",50.0,"""EUR/MW""",800.0,"""EUR""",6.21,"""EUR/MWh*""","""CH""",2.08,"""EUR/MWh*""","""Ja"""
"""PRL""","""20""","""01""","""01""","""Auction""",1,"""MW""",1,"""MW""",80.0,"""EUR/MW""",80.0,"""EUR""",6.21,"""EUR/MWh*""","""CH""",3.33,"""EUR/MWh*""","""Ja"""
"""PRL""","""20""","""01""","""01""","""Auction""",1,"""MW""",1,"""MW""",80.0,"""EUR/MW""",80.0,"""EUR""",6.21,"""EUR/MWh*""","""CH""",3.33,"""EUR/MWh*""","""Ja"""
"""PRL""","""20""","""01""","""01""","""Auction""",1,"""MW""",1,"""MW""",80.0,"""EUR/MW""",80.0,"""EUR""",6.21,"""EUR/MWh*""","""CH""",3.33,"""EUR/MWh*""","""Ja"""
"""PRL""","""20""","""01""","""01""","""Auction""",1,"""MW""",1,"""MW""",120.0,"""EUR/MW""",120.0,"""EUR""",6.21,"""EUR/MWh*""","""CH""",5.0,"""EUR/MWh*""","""Ja"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""TRL+""","""20""","""12""","""31""","""20:00""",10,"""MW""",10,"""MW""",0.35,"""CHF/MW""",3.5,"""CHF""",0.09,"""CHF/MWh*""","""CH""",0.09,"""CHF/MWh*""",
"""TRL+""","""20""","""12""","""31""","""20:00""",10,"""MW""",10,"""MW""",0.96,"""CHF/MW""",9.6,"""CHF""",0.24,"""CHF/MWh*""","""CH""",0.24,"""CHF/MWh*""",
"""TRL+""","""20""","""12""","""31""","""20:00""",10,"""MW""",10,"""MW""",1.8,"""CHF/MW""",18.0,"""CHF""",0.45,"""CHF/MWh*""","""CH""",0.45,"""CHF/MWh*""",
"""TRL+""","""20""","""12""","""31""","""20:00""",10,"""MW""",10,"""MW""",4.8,"""CHF/MW""",48.0,"""CHF""",1.2,"""CHF/MWh*""","""CH""",1.2,"""CHF/MWh*""",


In [None]:
        .with_columns(
            pl.concat_str(["y", "m", "d", "time"], separator="-")
            .str.to_datetime("%y-%m-%d-%H:%M", time_zone="UTC").alias("timestamp"),
        ).group_by(["timestamp", "market"]).agg(
            c("price").min().alias("min"),
            ((c("price") *c("quantity")).sum()/c("quantity").sum()).alias("avg"),
            c("price").max().alias("max"),
        ).with_columns(
            c("market").replace_strict(market_price_metadata[name]["market_mapping"])
        ).with_columns(
            pl.lit(value).alias(name) for name, value in market_price_metadata[name]["data"].items()
        ).unnest("market")

# data = data.rename(market_price_metadata[name]["col_mapping"]).filter(pl.col("quantity") > 0)

In [7]:
data

Ausschreibung,Beschreibung,Angebotenes Volumen,Einheit,Zugesprochenes Volumen,Einheit_duplicated_0,Leistungspreis,Einheit_duplicated_1,Kosten,Einheit_duplicated_2,Preis,Einheit_duplicated_3,Land,Angebotspreis,Einheit_duplicated_4,Teilbarkeit
str,str,i64,str,i64,str,f64,str,f64,str,f64,str,str,f64,str,str
"""PRL_20_01_01""","""Primary control Auction""",16,"""MW""",16,"""MW""",50.0,"""EUR/MW""",800.0,"""EUR""",6.21,"""EUR/MWh*""","""CH""",2.08,"""EUR/MWh*""","""Ja"""
"""PRL_20_01_01""","""Primary control Auction""",1,"""MW""",1,"""MW""",80.0,"""EUR/MW""",80.0,"""EUR""",6.21,"""EUR/MWh*""","""CH""",3.33,"""EUR/MWh*""","""Ja"""
"""PRL_20_01_01""","""Primary control Auction""",1,"""MW""",1,"""MW""",80.0,"""EUR/MW""",80.0,"""EUR""",6.21,"""EUR/MWh*""","""CH""",3.33,"""EUR/MWh*""","""Ja"""
"""PRL_20_01_01""","""Primary control Auction""",1,"""MW""",1,"""MW""",80.0,"""EUR/MW""",80.0,"""EUR""",6.21,"""EUR/MWh*""","""CH""",3.33,"""EUR/MWh*""","""Ja"""
"""PRL_20_01_01""","""Primary control Auction""",1,"""MW""",1,"""MW""",120.0,"""EUR/MW""",120.0,"""EUR""",6.21,"""EUR/MWh*""","""CH""",5.0,"""EUR/MWh*""","""Ja"""
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""TRL+_20_12_31""","""Tertiary Power UP 20:00 bis 24…",10,"""MW""",0,"""MW""",14.0,"""CHF/MW""",0.0,"""CHF""",3.5,"""CHF/MWh*""","""CH""",3.5,"""CHF/MWh*""",
"""TRL+_20_12_31""","""Tertiary Power UP 20:00 bis 24…",10,"""MW""",0,"""MW""",20.0,"""CHF/MW""",0.0,"""CHF""",5.0,"""CHF/MWh*""","""CH""",5.0,"""CHF/MWh*""",
"""TRL+_20_12_31""","""Tertiary Power UP 20:00 bis 24…",10,"""MW""",10,"""MW""",2.16,"""CHF/MW""",21.6,"""CHF""",0.54,"""CHF/MWh*""","""CH""",0.54,"""CHF/MWh*""",
"""TRL+_20_12_31""","""Tertiary Power UP 20:00 bis 24…",50,"""MW""",0,"""MW""",25.0,"""CHF/MW""",0.0,"""CHF""",6.25,"""CHF/MWh*""","""CH""",6.25,"""CHF/MWh*""",


In [None]:
head_table

In [None]:
data_pl

In [None]:
data_pl.select(
    pl.struct(cs.contains("Pe_") != 0).alias("resource_state")
)

In [None]:
precipitation = data.unpivot(
        index=["timestamp"], variable_name="metadata", value_name="value", on=cs.starts_with("prec_")
    ).with_columns(
    ("gries_" + c("metadata").str.slice(-3, 1)).alias("sub_basin"),
    ((c("metadata").str.slice(-2) + "00").cast(pl.Int32)-50).alias("start_height")
)


In [None]:
precipitation

In [None]:
data.columns

In [None]:
small_flex_input_schema

In [None]:
discharge_flow

In [None]:
data_str = str(ro.r['df'])
data_str = (list(map(lambda x: modify_string(x, format_str={r"\s+": " "}).split(" "), data_str.split("\n"))))


index = [i for i in range(len(data_str)) if data_str[i][0] == ""]
for i in range(len(index)-2):
    if i == 0:
        columns = ["index", "date"] + data_str[index[i]][1:]
        data: pl.DataFrame = pl.DataFrame(data_str[index[i] + 1:index[i+1]], schema=columns, orient="row")
    else:
        columns = ["index"] + data_str[index[i]][1:]
        data = data.join(
            pl.DataFrame(data_str[index[i] + 1:index[i+1]], schema=columns, orient="row"), on="index", how="left")


In [None]:

data = data.with_columns(
    pl.concat_str(["date", "time"], separator=" ").str.to_datetime("%Y-%m-%d %H:%M:%S", time_zone="UTC").alias("timestamp")
)

wind_data = data.unpivot(on=cs.contains("wind"), index="timestamp", value_name="wind_speed", variable_name="height")\
    .with_columns(
        c("height").str.replace("wind_X", ""),
        c("wind_speed").cast(pl.Float64),
    )

irradiance_data = data.unpivot(on=cs.contains("glob"), index="timestamp", value_name="irradiance", variable_name="height")\
    .with_columns(
        c("height").str.replace("glob__X", ""),
        c("irradiance").cast(pl.Float64),
    )

irradiance_data

In [None]:
data

In [None]:
list(map(lambda x: x.split(" "), data_str.split("\n")))


In [None]:
data = pl.from_pandas(pandas2ri.rpy2py())
print(data.head())

In [None]:
ro.r['df']

In [None]:
df_temp2 =data.filter(pl.col("metadata").str.contains("KW"))\
    .with_columns(
        c("metadata").str.replace("KW", "").
        str.split_exact("_", 4).struct.rename_fields(["market", "y", "w"]).alias("metadata"),
    ).unnest("metadata").with_columns(
        (c("y").str.to_datetime("%y", time_zone="UTC") + 
        ((pl.col("w").cast(pl.Int32) - 1)*7*24*60*60*1e3).cast(pl.Duration(time_unit="ms"))).alias("timestamp"),
    )
df_temp2.head(1)


In [None]:
    .with_columns(
        pl.col("Date").str.split("_").map_elements(lambda x: x[0]).alias("market"),
        pl.col("Date").str.split("_").map_elements(lambda x: x[1]).alias("y").cast(pl.Int64),
        pl.col("Date").str.split("KW").map_elements(lambda x: x[-1]).alias("week").str.split("_")
        .map_elements(lambda x: x[0]).cast(pl.Int64)]).with_columns(
            ((pl.col("week") - 1)*7*24*60*60*1e6).cast(pl.Duration(time_unit="us")),
                (pl.col("y") + 2000).cast(pl.Utf8).str.strptime(pl.Datetime, "%Y")
            )


In [None]:
name = "rte_ene"
market_price: pl.DataFrame = pl.read_csv(
    market_price_metadata[name]["file"], separator=";", null_values=["*"])

market_price.columns