In [196]:
import polars as pl
from polars import col as c
import networkx as nx

from config import settings
import json
import os
from datetime import datetime, UTC
import datetime as dt

from utility.polars_operation import generate_uuid_col
from utility.parser_utility import (
    add_table_to_changes_schema,
    generate_connectivity_table,
    generate_random_uuid,
)
from utility.general_function import pl_to_dict

from twindigrid_changes.schema import ChangesSchema
from twindigrid_sql.schema.enum import (
    MeasurementClass,
    MeasurementPhase,
    MeasurementColumn,
    SubstationType,
    TerminalSide,
)
from twindigrid_sql.entries.source import (
    SCADA,
    CONVENTIONAL_METER,
    GRID_LOAD,
    SCADA,
    ESTIMATED,
)


from twindigrid_sql.entries.equipment_class import (
    TRANSFORMER,
    BRANCH,
    SWITCH,
    INDIRECT_FEEDER,
    BUSBAR_SECTION,
    ENERGY_CONSUMER,
)
from twindigrid_sql.entries.measurement_type import ENERGY, ACTIVE_POWER, REACTIVE_POWER
from twindigrid_sql.entries.unit_symbol import WATTHOUR, WATT

# Useless outside jupiternotebook because in settings.py a line that changes the directory to src for ipynb
os.chdir(os.getcwd().replace("/src", ""))
# os.getcwd()

# Import data from matlab

In [197]:
file_names: dict[str, str] = json.load(open(settings.INPUT_FILE_NAMES))

In [198]:
parameter_distflow: pl.DataFrame = pl.read_csv(file_names["Distflow_parameter"])
nodedata_distflow: pl.DataFrame = pl.read_csv(file_names["Distflow_node_data"])
powerdata_distflow: pl.DataFrame = pl.read_csv(file_names["Distflow_Power_data"])
linedata_distflow: pl.DataFrame = pl.read_csv(file_names["Distflow_Line_data"])
result_distflow: pl.DataFrame = pl.read_csv(file_names["Distflow_result"])
# nodedata_distflow = nodedata_distflow.with_columns(c("Snom").cast(pl.Int8))
# # To have base value (need lenght of line), not from matlab !
# line_data_from_input_file: pl.DataFrame = pl.read_excel(
#     file_names["Line_Data_From_Input_File"]
# )

# Add node number to power data
powerdata_distflow = powerdata_distflow.with_row_index(
    "node_number", offset=1
)  # offset=1 because slack bus is 0 and no power on it
powerdata_distflow = powerdata_distflow.with_columns(c("node_number").cast(pl.Int64))
# Create a topology dataframe with basic topology information

df_topology = nodedata_distflow.select(
    c("index").alias("node_number"),
    c("Vnom"),
)

# Add the power data to the topology dataframe with node as key
df_topology = df_topology.join(
    powerdata_distflow, on="node_number", how="full", coalesce=True
)
linedata_distflow

node_from,node_to,line_number,r_pu,x_pu,b_pu,i_pu,connection_type
i64,i64,i64,f64,f64,f64,f64,i64
1,0,1,0.19375,0.112969,7.9168e-8,0.024942,2
2,0,2,2.046875,1.14625,8.9632e-7,0.026327,2
3,0,3,0.0,0.0,0.0,1.0,3
4,1,4,3.03,0.0911,2.2696e-8,0.003048,2
5,1,5,0.0775,0.0451875,3.1667e-8,0.024942,2
…,…,…,…,…,…,…,…
53,52,53,3.7875,0.113875,2.8370e-8,0.003048,2
54,52,54,0.08525,0.049706,3.4834e-8,0.024942,2
55,54,55,0.05425,0.031631,2.2167e-8,0.024942,2
56,54,56,3.219375,0.096794,2.4114e-8,0.003048,2


# Set missing value for equipment

In [199]:
### Set missing value for equipment
# Fake value for the length of the branch
base_length = 1
# Fake value for the switch state
switch_state = False
switch_type = "locked_switch"
switch_command = "unknown"

## Connectivity node table

In [200]:
# Generate the node dict with uuid for each node
connectivity_node: dict[float, str] = pl_to_dict(
    df_topology.select(
        c("node_number"),
        c("node_number").pipe(generate_uuid_col, added_string="node_").alias("uuid"),
    )
)
connectivity_node

{0: 'df941fce-ceda-5874-ab63-5c8af9bec38b',
 1: 'ba84d70a-80d7-590e-b112-f9c4b5fabf56',
 2: '078656ed-79f8-53a1-a67a-bb8f53476cec',
 3: 'c2247320-9fc2-538a-ba64-3ac70e49994e',
 4: 'af72457f-f983-5eeb-a635-0609f4cae0c1',
 5: 'dbd2411e-1e87-5956-86d9-d69ee7d848d5',
 6: '2db84a3d-aa74-5b4f-866a-3313180cb863',
 7: 'cce2c582-1c62-5b80-aac3-b840ce3f5f14',
 8: 'd177af44-109b-50d6-8b8a-97874a03462b',
 9: '8a7f105e-71f3-5101-8b4f-1a9007ec858d',
 10: '41ac73c2-162a-5ce9-964f-68a510db4d95',
 11: '2974cbe7-7e8b-54b5-9978-65f4214528ca',
 12: 'bc038a56-4e78-504b-a9a5-3e5666086e8e',
 13: '2e63d367-00e4-59d5-a861-9117f6825754',
 14: 'd8148587-7cb2-5800-b216-0eeb7afcf7e4',
 15: 'c682ba2f-b6aa-5ee6-8924-b395e42ea750',
 16: 'ba439656-84c7-5804-bf01-e9ac87641692',
 17: '82ae2e23-b674-53d7-ada4-fb1657822f5e',
 18: '7573ef10-2d63-5ec5-8190-b70bd3c452e5',
 19: '807905d4-b3d4-532e-a544-9cdac0f377d5',
 20: '71c4b22f-584c-581d-9508-099e859582d2',
 21: '7e6993e0-fae2-5ce4-85ed-6e2d27445e4c',
 22: '069c0cf6-35f1-

In [201]:
## Add the cn_fk to the topology dataframe
df_topology = df_topology.with_columns(
    c("node_number").replace_strict(connectivity_node, default=None).alias("cn_fk")
)

## Branch

In [202]:
# branch :pl.DataFrame =

default_install_date: datetime = datetime(*settings.DEFAULT_INSTALL_DATE, tzinfo=UTC)
heartbeat = datetime.now(UTC)
changes_schema = ChangesSchema()


# Current and other line parameter in pu

# Filter to take only branch, connection_type == 2
branch = linedata_distflow.filter(c("connection_type") == 2).select(
    c("line_number").alias("dso_code"),
    c("i_pu").alias("current_limit"),
    c("r_pu"),
    c("x_pu"),
    c("b_pu"),
    # Need column name non null value for validation of the schema
    pl.lit(base_length).alias("length"),  # km
    pl.lit(BRANCH).alias("concrete_class"),
    pl.lit(default_install_date).alias("start"),
    pl.lit(heartbeat).alias("start_heartbeat"),
    c("line_number").pipe(generate_uuid_col, added_string=BRANCH).alias("uuid"),
    # Generate uuid for each terminal of branch with node uuid
    c("node_from").replace_strict(connectivity_node, default=None).alias("t1"),
    c("node_to").replace_strict(connectivity_node, default=None).alias("t2"),
    # Need column name for validation of the schema
    pl.lit(None).alias("t1_container_fk"),
    pl.lit(None).alias("t2_container_fk"),
)
new_tables_pl: dict[str, pl.DataFrame] = {
    "Resource": branch,
    "Equipment": branch,
    "Branch": branch,
}
changes_schema = add_table_to_changes_schema(
    schema=changes_schema, new_tables_pl=new_tables_pl, raw_table_name="branch"
)
changes_schema = generate_connectivity_table(
    changes_schema=changes_schema, eq_table=branch, raw_data_table="branch"
)

## Energy consumer

In [None]:
default_install_date: datetime = datetime(*settings.DEFAULT_INSTALL_DATE, tzinfo=UTC)
heartbeat = datetime.now(UTC)
changes_schema = ChangesSchema()


# Power in PU ???????????????????????????????????????????????????
df_topology => utiliser tous les nodes
powerdata_distflow => si pas 0
energy_consumer = linedata_distflow.filter(c("connection_type") == 2).select(
    # c("line_number").alias("dso_code"),
    # c("i_pu").alias("current_limit"),
    # c("r_pu"),
    # c("x_pu"),
    # c("b_pu"),
    # # Need column name non null value for validation of the schema
    # pl.lit(base_length).alias("length"),  # km
    # pl.lit(BRANCH).alias("concrete_class"),
    # pl.lit(default_install_date).alias("start"),
    # pl.lit(heartbeat).alias("start_heartbeat"),
    # c("line_number").pipe(generate_uuid_col, added_string=BRANCH).alias("uuid"),
    # # Generate uuid for each terminal of branch with node uuid
    # c("node_from").replace_strict(connectivity_node, default=None).alias("t1"),
    # c("node_to").replace_strict(connectivity_node, default=None).alias("t2"),
    # # Need column name for validation of the schema
    # pl.lit(None).alias("t1_container_fk"),
    # pl.lit(None).alias("t2_container_fk"),
)
new_tables_pl: dict[str, pl.DataFrame] = {
    "Resource": energy_consumer,
    "Equipment": energy_consumer,
    "EnergyConsumer": energy_consumer,
}
changes_schema = add_table_to_changes_schema(
    schema=changes_schema, new_tables_pl=new_tables_pl, raw_table_name="energy_consumer"
)
changes_schema = generate_connectivity_table(
    changes_schema=changes_schema, eq_table=branch, raw_data_table="energy_consumer"
)

## Measurement

Active power

In [203]:
df_topology.with_columns(c("Vnom").pipe(generate_random_uuid).alias("uuid"))

node_number,Vnom,Pload,Qload,cn_fk,uuid
i64,i64,f64,f64,str,str
0,400,,,"""df941fce-ceda-5874-ab63-5c8af9…","""2dbaccda-4541-4734-97bc-3813c6…"
1,400,0.0,0.0,"""ba84d70a-80d7-590e-b112-f9c4b5…","""8f6b1bc5-7f78-4963-a519-0e7815…"
2,400,0.0,0.0,"""078656ed-79f8-53a1-a67a-bb8f53…","""760d2a68-2316-40ab-a6fd-417b83…"
3,400,0.0,0.0,"""c2247320-9fc2-538a-ba64-3ac70e…","""7183648a-4f60-4c76-a59d-3bb16d…"
4,400,0.0,0.0,"""af72457f-f983-5eeb-a635-0609f4…","""afc38542-0d1a-4024-8cea-e7f659…"
…,…,…,…,…,…
53,400,0.0,0.0,"""4505ed8e-f087-5ee2-8c67-775daa…","""a143808a-28e2-4ba9-ac85-2e99a1…"
54,400,0.000035,-0.000012,"""23bc00b6-0e27-5e6d-a02e-dda5e9…","""241f2942-537d-41ff-af4a-ef20c3…"
55,400,0.0,0.0,"""b1d51456-8036-5737-accc-1103d2…","""6dc4dd4b-eeee-4c7b-a396-08a8b3…"
56,400,0.0,0.0,"""9bed56b6-83af-51ce-b629-9a787c…","""9c912acb-60de-4ed2-9d7e-0270f3…"


In [204]:
## Add the uuid of the node to the power data
measurement = df_topology.select(
    c("cn_fk").pipe(generate_random_uuid).alias("uuid"),
    c("cn_fk").alias("resource_fk"),
    pl.lit(heartbeat).alias("start_heartbeat"),
    pl.lit(MeasurementClass.SPAN.value).alias("concrete_type"),
    pl.lit(MeasurementPhase.ABC.value).alias("phase"),
    pl.lit(MeasurementColumn.DOUBLE.value).alias("column_type"),
    pl.lit(CONVENTIONAL_METER).alias("source_fk"),
    # pl.lit(60*60*24*365).alias("default_period"),
    pl.lit(ACTIVE_POWER).alias("measurement_type"),
    pl.lit("pu").alias("unit_symbol"),
    pl.lit(1).alias("unit_multiplier"),
    c("Pload").alias("double_value"),
)

In [205]:
measurement_span = measurement.with_columns(
    c("uuid").alias("measurement_fk"),
    c("uuid").pipe(generate_random_uuid).alias("uuid"),
    pl.lit(datetime(2022, 1, 1))
    .dt.replace_time_zone(time_zone="Europe/Zurich")
    .dt.convert_time_zone(time_zone="UTC")
    .alias("start"),
    pl.lit(datetime(2023, 1, 1))
    .dt.replace_time_zone(time_zone="Europe/Zurich")
    .dt.convert_time_zone(time_zone="UTC")
    .alias("end"),
)

new_tables_pl: dict[str, pl.DataFrame] = {
    "Measurement": measurement,
    "MeasurementSpan": measurement_span,
}
changes_schema = add_table_to_changes_schema(
    schema=changes_schema, new_tables_pl=new_tables_pl, raw_table_name="meter_id"
)

In [206]:
measurement

uuid,resource_fk,start_heartbeat,concrete_type,phase,column_type,source_fk,measurement_type,unit_symbol,unit_multiplier,double_value
str,str,"datetime[μs, UTC]",str,str,str,str,str,str,i32,f64
"""6eb00740-0338-4fde-9bae-fbe289…","""df941fce-ceda-5874-ab63-5c8af9…",2025-01-06 12:23:06.741277 UTC,"""measurement_span""","""ABC""","""double_value""","""conventional_meter""","""active power""","""pu""",1,
"""ff20b736-46ce-474a-9954-01b9f1…","""ba84d70a-80d7-590e-b112-f9c4b5…",2025-01-06 12:23:06.741277 UTC,"""measurement_span""","""ABC""","""double_value""","""conventional_meter""","""active power""","""pu""",1,0.0
"""a6c1e66b-f371-4a23-ba5d-8e147b…","""078656ed-79f8-53a1-a67a-bb8f53…",2025-01-06 12:23:06.741277 UTC,"""measurement_span""","""ABC""","""double_value""","""conventional_meter""","""active power""","""pu""",1,0.0
"""0db0d29c-d681-44f8-8004-537790…","""c2247320-9fc2-538a-ba64-3ac70e…",2025-01-06 12:23:06.741277 UTC,"""measurement_span""","""ABC""","""double_value""","""conventional_meter""","""active power""","""pu""",1,0.0
"""fc43ef46-11d6-494f-8245-51688e…","""af72457f-f983-5eeb-a635-0609f4…",2025-01-06 12:23:06.741277 UTC,"""measurement_span""","""ABC""","""double_value""","""conventional_meter""","""active power""","""pu""",1,0.0
…,…,…,…,…,…,…,…,…,…,…
"""032a8c73-a507-4a50-b23f-f6b702…","""4505ed8e-f087-5ee2-8c67-775daa…",2025-01-06 12:23:06.741277 UTC,"""measurement_span""","""ABC""","""double_value""","""conventional_meter""","""active power""","""pu""",1,0.0
"""c32dd385-b0a0-40c9-b7b9-5aef26…","""23bc00b6-0e27-5e6d-a02e-dda5e9…",2025-01-06 12:23:06.741277 UTC,"""measurement_span""","""ABC""","""double_value""","""conventional_meter""","""active power""","""pu""",1,0.000035
"""fb904e18-4e5b-49e4-bf60-9d3181…","""b1d51456-8036-5737-accc-1103d2…",2025-01-06 12:23:06.741277 UTC,"""measurement_span""","""ABC""","""double_value""","""conventional_meter""","""active power""","""pu""",1,0.0
"""9a6a85ae-2fb1-47ac-be19-828e9d…","""9bed56b6-83af-51ce-b629-9a787c…",2025-01-06 12:23:06.741277 UTC,"""measurement_span""","""ABC""","""double_value""","""conventional_meter""","""active power""","""pu""",1,0.0


Reactive power

In [209]:
# changes_schema.connectivity
# changes_schema.measurement["resource_fk"][0]
# changes_schema.branch.filter(c("uuid") == "df941fce-ceda-5874-ab63-5c8af9bec38b")
# changes_schema.connectivity.filter(
#     c("cn_fk").is_in(changes_schema.measurement["resource_fk"])
# )
changes_schema.measurement

diff,uuid,start_heartbeat,end_heartbeat,concrete_type,resource_fk,terminal_side,phase,measurement_type,unit_multiplier,unit_symbol,column_type,source_fk,sensor_accuracy,name,description,metadata,op_type,over_measurement_fk,default_period
str,str,"datetime[μs, UTC]","datetime[μs, UTC]",str,str,str,str,str,i32,str,str,str,f64,str,str,str,str,str,i32
"""+""","""6eb00740-0338-4fde-9bae-fbe289…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""df941fce-ceda-5874-ab63-5c8af9…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
"""+""","""ff20b736-46ce-474a-9954-01b9f1…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""ba84d70a-80d7-590e-b112-f9c4b5…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
"""+""","""a6c1e66b-f371-4a23-ba5d-8e147b…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""078656ed-79f8-53a1-a67a-bb8f53…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
"""+""","""0db0d29c-d681-44f8-8004-537790…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""c2247320-9fc2-538a-ba64-3ac70e…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
"""+""","""fc43ef46-11d6-494f-8245-51688e…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""af72457f-f983-5eeb-a635-0609f4…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""+""","""032a8c73-a507-4a50-b23f-f6b702…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""4505ed8e-f087-5ee2-8c67-775daa…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
"""+""","""c32dd385-b0a0-40c9-b7b9-5aef26…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""23bc00b6-0e27-5e6d-a02e-dda5e9…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
"""+""","""fb904e18-4e5b-49e4-bf60-9d3181…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""b1d51456-8036-5737-accc-1103d2…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
"""+""","""9a6a85ae-2fb1-47ac-be19-828e9d…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""9bed56b6-83af-51ce-b629-9a787c…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,


In [210]:
list(changes_schema.__dict__.keys())

['heartbeat',
 'resource',
 'equipment',
 'terminal',
 'busbar_section',
 'branch',
 'branch_parameter_event',
 'geo_event',
 'switch',
 'switch_event',
 'transformer',
 'transformer_end',
 'transformer_parameter_event',
 'tap',
 'tap_event',
 'bess',
 'energy_consumer',
 'external_network',
 'generating_unit',
 'container',
 'client',
 'substation',
 'base_voltage',
 'connectivity_node',
 'connectivity',
 'measurement',
 'measurement_point',
 'measurement_span']

## Switch

In [211]:
# Filter to take only switch, connection_type == 3
switch = linedata_distflow.filter(c("connection_type") == 3).select(
    c("line_number").alias("dso_code"),
    pl.lit(SWITCH).alias("concrete_class"),
    pl.lit(default_install_date).alias("start"),
    pl.lit(heartbeat).alias("start_heartbeat"),
    pl.lit(switch_state).alias("normal_open"),
    pl.lit(switch_type).alias("type"),
    pl.lit(switch_command).alias("command"),
    # Generate uuid for each terminal of branch with node uuid
    c("node_from").replace_strict(connectivity_node, default=None).alias("t1"),
    c("node_to").replace_strict(connectivity_node, default=None).alias("t2"),
    # Need column name for validation of the schema
    pl.lit(None).alias("t1_container_fk"),
    pl.lit(None).alias("t2_container_fk"),
    c("line_number").pipe(generate_uuid_col, added_string=SWITCH).alias("uuid"),
)
new_tables_pl: dict[str, pl.DataFrame] = {
    "Resource": switch,
    "Equipment": switch,
    "Switch": switch,
}
changes_schema = add_table_to_changes_schema(
    schema=changes_schema, new_tables_pl=new_tables_pl, raw_table_name="switch"
)
changes_schema = generate_connectivity_table(
    changes_schema=changes_schema, eq_table=switch, raw_data_table="switch"
)

In [212]:
# Begin time of the data from matlab (from main_FC.ipynb before)
str(datetime(2020, 4, 4, 23, 00, 0, 0, UTC) - dt.timedelta(hours=192))

'2020-03-27 23:00:00+00:00'

## Parser

In [213]:
# Parse connectivity node
df_topology

node_number,Vnom,Pload,Qload,cn_fk
i64,i64,f64,f64,str
0,400,,,"""df941fce-ceda-5874-ab63-5c8af9…"
1,400,0.0,0.0,"""ba84d70a-80d7-590e-b112-f9c4b5…"
2,400,0.0,0.0,"""078656ed-79f8-53a1-a67a-bb8f53…"
3,400,0.0,0.0,"""c2247320-9fc2-538a-ba64-3ac70e…"
4,400,0.0,0.0,"""af72457f-f983-5eeb-a635-0609f4…"
…,…,…,…,…
53,400,0.0,0.0,"""4505ed8e-f087-5ee2-8c67-775daa…"
54,400,0.000035,-0.000012,"""23bc00b6-0e27-5e6d-a02e-dda5e9…"
55,400,0.0,0.0,"""b1d51456-8036-5737-accc-1103d2…"
56,400,0.0,0.0,"""9bed56b6-83af-51ce-b629-9a787c…"


In [214]:
def parse_connectivity_node(
    topology_df: pl.DataFrame, changes_schema: ChangesSchema, **kwargs
) -> ChangesSchema:

    cn_voltage_mapping: dict[str, float] = pl_to_dict(
        topology_df.filter(c("KEYWORD") != "TR2")
        .unpivot(
            index=["UN"], on=["t1", "t2"], value_name="cn_fk", variable_name="side"
        )
        .drop_nulls("cn_fk")
        .group_by("cn_fk")
        .agg(c("UN").drop_nulls().first())
        .drop_nulls("UN")[["cn_fk", "UN"]]
    )
    node = topology_df.filter(c("KEYWORD") == "NODE").with_columns(
        (1e3 * c("uuid").replace_strict(cn_voltage_mapping, default=c("UN")))
        .cast(pl.Int32)
        .alias("base_voltage_fk"),  # kV to V
    )

    changes_schema = add_table_to_changes_schema(
        schema=changes_schema,
        new_tables_pl={"ConnectivityNode": node},
        raw_table_name="ConnectivityNode",
    )
    return changes_schema

## Distflow

In [215]:
list(changes_schema.__dict__.keys())

['heartbeat',
 'resource',
 'equipment',
 'terminal',
 'busbar_section',
 'branch',
 'branch_parameter_event',
 'geo_event',
 'switch',
 'switch_event',
 'transformer',
 'transformer_end',
 'transformer_parameter_event',
 'tap',
 'tap_event',
 'bess',
 'energy_consumer',
 'external_network',
 'generating_unit',
 'container',
 'client',
 'substation',
 'base_voltage',
 'connectivity_node',
 'connectivity',
 'measurement',
 'measurement_point',
 'measurement_span']

In [216]:
## Update Qload due to line capacity
changes_schema.measurement

diff,uuid,start_heartbeat,end_heartbeat,concrete_type,resource_fk,terminal_side,phase,measurement_type,unit_multiplier,unit_symbol,column_type,source_fk,sensor_accuracy,name,description,metadata,op_type,over_measurement_fk,default_period
str,str,"datetime[μs, UTC]","datetime[μs, UTC]",str,str,str,str,str,i32,str,str,str,f64,str,str,str,str,str,i32
"""+""","""6eb00740-0338-4fde-9bae-fbe289…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""df941fce-ceda-5874-ab63-5c8af9…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
"""+""","""ff20b736-46ce-474a-9954-01b9f1…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""ba84d70a-80d7-590e-b112-f9c4b5…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
"""+""","""a6c1e66b-f371-4a23-ba5d-8e147b…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""078656ed-79f8-53a1-a67a-bb8f53…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
"""+""","""0db0d29c-d681-44f8-8004-537790…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""c2247320-9fc2-538a-ba64-3ac70e…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
"""+""","""fc43ef46-11d6-494f-8245-51688e…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""af72457f-f983-5eeb-a635-0609f4…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""+""","""032a8c73-a507-4a50-b23f-f6b702…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""4505ed8e-f087-5ee2-8c67-775daa…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
"""+""","""c32dd385-b0a0-40c9-b7b9-5aef26…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""23bc00b6-0e27-5e6d-a02e-dda5e9…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
"""+""","""fb904e18-4e5b-49e4-bf60-9d3181…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""b1d51456-8036-5737-accc-1103d2…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,
"""+""","""9a6a85ae-2fb1-47ac-be19-828e9d…",2025-01-06 12:23:06.741277 UTC,,"""measurement_span""","""9bed56b6-83af-51ce-b629-9a787c…",,"""ABC""","""active power""",1,"""pu""","""double_value""","""conventional_meter""",,,,,,,


## Import data to changes schema

In [217]:
def sum_downstream_power(col: pl.Expr, df: pl.DataFrame):
    return col.map_elements(
        lambda x: df.filter(c("upstream") == x)["p_line"].sum(), return_dtype=pl.Float64
    )


def calculate_line_power(df: pl.DataFrame):
    return (c("downstream").pipe(sum_downstream_power, df=df) + c("P")) * (1 + c("F"))


def sum_power(df: pl.DataFrame, lv: int):

    return df.with_columns(
        pl.when(c("lv") == lv)
        .then(calculate_line_power(df=df))
        .otherwise(c("p_line"))
        .alias("p_line")
    )


# UP Use for each powerflow
# Down Use only one time
def get_node_level(G: nx.DiGraph) -> dict:
    level_mapping: dict = {}
    for node in reversed(list(nx.topological_sort(G))):
        if not len(list(G.successors(node))):
            level_mapping[node] = 0
        else:
            level_mapping[node] = max(level_mapping[n] for n in G.successors(node)) + 1
    return level_mapping


line_data: pl.DataFrame = pl.DataFrame(
    {
        "downstream": [1, 2, 3, 4, 5, 6, 7, 8],
        "upstream": [None, 1, 2, 1, 4, 4, 4, 6],
        "P": [0, 1, 2, 1, 4, 3, 6, 5],
        "F": [0.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
        "p_line": [0] * 8,
    }
)

grid = nx.DiGraph()

_ = line_data.drop_nulls(subset="upstream").with_columns(
    pl.struct(c("upstream"), c("downstream")).map_elements(
        lambda x: grid.add_edge(x["upstream"], x["downstream"]), return_dtype=pl.Struct
    )
)
level_mapping: dict = get_node_level(G=grid)
line_data = line_data.with_columns(
    c("downstream").replace_strict(level_mapping, default=None).alias("lv")
)

for i in range(line_data["lv"].max() + 1):
    line_data = sum_power(df=line_data, lv=i)

print(line_data.sort("lv"))

shape: (8, 6)
┌────────────┬──────────┬─────┬─────┬────────┬─────┐
│ downstream ┆ upstream ┆ P   ┆ F   ┆ p_line ┆ lv  │
│ ---        ┆ ---      ┆ --- ┆ --- ┆ ---    ┆ --- │
│ i64        ┆ i64      ┆ i64 ┆ f64 ┆ f64    ┆ i64 │
╞════════════╪══════════╪═════╪═════╪════════╪═════╡
│ 3          ┆ 2        ┆ 2   ┆ 0.1 ┆ 2.2    ┆ 0   │
│ 5          ┆ 4        ┆ 4   ┆ 0.1 ┆ 4.4    ┆ 0   │
│ 7          ┆ 4        ┆ 6   ┆ 0.1 ┆ 6.6    ┆ 0   │
│ 8          ┆ 6        ┆ 5   ┆ 0.1 ┆ 5.5    ┆ 0   │
│ 2          ┆ 1        ┆ 1   ┆ 0.1 ┆ 3.52   ┆ 1   │
│ 6          ┆ 4        ┆ 3   ┆ 0.1 ┆ 9.35   ┆ 1   │
│ 4          ┆ 1        ┆ 1   ┆ 0.1 ┆ 23.485 ┆ 2   │
│ 1          ┆ null     ┆ 0   ┆ 0.0 ┆ 27.005 ┆ 3   │
└────────────┴──────────┴─────┴─────┴────────┴─────┘
