In [None]:

import json
import os
import polars as pl
from polars import col as c
import polars.selectors as cs
import re

import plotly.graph_objs as go
from datetime import datetime, UTC
from general_function import  generate_log, extract_archive, scan_folder, build_non_existing_dirs
from polars_function import concat_list_of_list
from smallflex_data_schema import SmallflexInputSchema
from data_display.input_data_plots import plot_forecast
import numpy as np
import plotly
from utility.pyomo_preprocessing import linear_interpolation_using_cols
from general_function import pl_to_dict
from config import settings
from plotly.subplots import make_subplots

os.chdir(os.getcwd().replace("/src", ""))

log = generate_log(name="file_")
input_file_names: dict[str, str] = json.load(open(settings.INPUT_FILE_NAMES)) # type: ignore
output_file_names: dict[str, str] = json.load(open(settings.OUTPUT_FILE_NAMES)) # type: ignore

In [2]:
small_flex_input_schema = SmallflexInputSchema().duckdb_to_schema(file_path=output_file_names["duckdb_input"])
plot_folder = output_file_names["input_data_plot"]

Read and validate tables from small_flex_input_data.db file: 100%|████████████████████████████████████████████████████| 16/16 [00:14<00:00,  1.08it/s]


In [3]:
data = small_flex_input_schema.basin_height_measurement
data = data.filter(c("timestamp")>=datetime(2019, 9, 1, 0, 0, 0, tzinfo=UTC))

discharge_flow_historical = small_flex_input_schema.discharge_flow_historical\
    .filter(c("location")=="Griessee")\
    .filter(c("timestamp") >= data["timestamp"][0])\
    .filter(c("timestamp") <= data["timestamp"][-1])\
    .with_columns(
        c("timestamp").cast(pl.Date)
    ).group_by("timestamp", maintain_order = True).agg(c("value").sum()*3.6).sort("timestamp")

basin_height_volume = pl.DataFrame({"height":np.arange(2332.0, 2386.1, 0.01)}).with_columns(
    c("height").round(2)
).join(
    small_flex_input_schema.basin_height_volume_table, on="height", how="left"
)["height", "volume"]

basin_height_volume = linear_interpolation_using_cols(
    df=basin_height_volume, 
    x_col="height", 
    y_col="volume"
)
volume_mapping = pl_to_dict(basin_height_volume[["height", "volume"]])

data = data.with_columns(
    (c("height").round(2).replace_strict(volume_mapping, default=None).diff().shift(-1)/1e3).alias("diff_vol")
)

trubined = discharge_flow_historical.join(data.with_columns(c("timestamp").cast(pl.Date)), on="timestamp", how="inner").with_columns(
    (c("value") - c("diff_vol")).alias("trubined")
)


fig = make_subplots(
            rows=4, cols = 1
        )

fig.add_trace(
        go.Scatter(
            x=data["timestamp"].to_list(), y=data["height"].to_list(), 
            mode='lines', name="basin height [masl]"
        )
    ,row=1, col=1)

fig.add_trace(
        go.Scatter(
            x=data["timestamp"].to_list(), y=data["diff_vol"].to_list(), 
            mode='lines', name="basin volume difference [km^3]"
        )
    ,row=2, col=1)

fig.add_trace(
        go.Scatter(
            x=discharge_flow_historical["timestamp"].to_list(), y=discharge_flow_historical["value"].to_list(), 
            mode='lines', name="Discharge volume [km^3]"
        )
    ,row=2, col=1)

fig.add_trace(
        go.Scatter(
            x=trubined["timestamp"].to_list(), y=trubined["trubined"].to_list(), 
            mode='lines', name="Turbined volume [km^3]"
        )
    ,row=3, col=1)

fig.update_layout(
            margin=dict(t=60, l=65, r= 10, b=60), 
            width=1000,   # Set the width of the figure
            height=3*300,
            legend_tracegroupgap=146
        )

fig.write_html(f"{plot_folder}/turbined_volume_differences.html")

In [4]:
trubined.columns

['timestamp', 'value', 'water_basin_fk', 'height', 'diff_vol', 'trubined']

In [5]:
basin_evolution = trubined.select(
    c("timestamp").alias("date"),
    c("height").alias("basin_height"),    
    c("diff_vol").alias("basin_volume_diff [km^3]"),
    c("value").alias("discharge_volume [km^3]"),
    c("trubined").alias("turbined_volume [km^3]")
)

In [6]:
cumulated_evolution = trubined.select(
    c("timestamp").alias("date"),
    pl.when(c("diff_vol").gt(0)).then(c("diff_vol")).otherwise(pl.lit(0)).cum_sum().alias("cumulated_pos_diff_vol"),
    pl.when(c("diff_vol").lt(0)).then( - c("diff_vol")).otherwise(pl.lit(0)).cum_sum().alias("cumulated_neg_diff_vol"),
    c("value").cum_sum().alias("cumulated_discharge_vol")
)

In [9]:

fig = go.Figure()


for col in cumulated_evolution.columns[1:]:
    fig.add_trace(
        go.Scatter(
            x=cumulated_evolution["date"].to_list(), y=cumulated_evolution[col].to_list(), 
            mode='lines', name=col
        )
    )
fig.show()

In [8]:
discharge_flow_historical

timestamp,value
date,f64
2019-09-01,97.14495
2019-09-02,84.042
2019-09-03,80.86785
2019-09-04,83.25345
2019-09-05,68.0127
…,…
2023-09-26,56.46375
2023-09-27,51.10455
2023-09-28,46.9014
2023-09-29,43.10355
