In [56]:
import os
import polars as pl
from polars import col as c
import polars.selectors as cs
from datetime import timedelta
import tqdm
from datetime import datetime
import numpy as np
import json
from plotly.subplots import make_subplots
from data_federation.input_model import SmallflexInputSchema
import plotly.graph_objs as go
from polars_function import concat_list_of_list
from general_function import dictionary_key_filtering, pl_to_dict, generate_log, extract_archive, scan_folder
from numpy_function import relative_error_within_boundaries, error_within_boundaries
import plotly.express as px
from config import settings
from rpy2.robjects import pandas2ri
import rpy2.robjects as ro 
from pathlib import Path 
import random

os.chdir(os.getcwd().replace("/src", ""))


In [2]:
input_file_names: dict[str, str] = json.load(open(settings.INPUT_FILE_NAMES)) # type: ignore
output_file_names: dict[str, str] = json.load(open(settings.OUTPUT_FILE_NAMES)) # type: ignore



In [None]:

file_type = input_file_names["wsl_weather_forecast"]
folder_name = input_file_names["wsl_forecast_data"]
forecast_data: pl.DataFrame = pl.DataFrame()
measurement_data: pl.DataFrame = pl.DataFrame()

extract_archive(file_name=input_file_names["wsl_forecast_data"])

file_names_list = scan_folder(folder_name=os.path.splitext(folder_name)[0], extension=".rda", file_names=file_type)

In [None]:


weather_metadata = pl.read_csv(input_file_names["wsl_weather_metadata"]).select(
    c("ID").alias("location"),
    c("Average_Elevation").alias("avg_height"),
    c("Sub_Basin").alias("sub_basin"),
)

var_type_mapping = {
    "wind": "wind",
    "prec": "precipitation",
    "rad_": "irradiation",
    "hum_": "humidity",
    "temp": "temperature"
}

col_to_drop =  ["time", "weekday", "yy", "dd", "mm", "hh"]

file_type = input_file_names["wsl_weather_forecast"]
folder_name = input_file_names["wsl_forecast_data"]
forecast_data: pl.DataFrame = pl.DataFrame()
measurement_data: pl.DataFrame = pl.DataFrame()

extract_archive(file_name=input_file_names["wsl_forecast_data"])

file_names_list = scan_folder(folder_name=os.path.splitext(folder_name)[0], extension=".rda", file_names=file_type)

n_rows = 5

plot_folder = output_file_names["input_data_plot"]
plot_name = "forecast_vs_measurement"

fig = make_subplots(
            rows=n_rows, cols = 1, shared_xaxes=True, vertical_spacing=0.02,
        )

for i, file_name in enumerate(random.sample(file_names_list,n_rows)):

    actual_date = datetime.strptime(Path(file_name).parent.name+"T12:00"  , '%Y-%m-%dT%H:%M')
    names = ro.r['load'](file_name) # type: ignore
    all_df = ro.r[names[0]]

    data: pl.DataFrame = pl.from_pandas(pandas2ri.rpy2py(all_df.rx2(all_df.names[0]))) # type: ignore
    data = data.select(
        pl.from_epoch(c("time")).alias("timestamp") ,
        "rad_Gri200_10"
    )

    measurement_data = data.filter(c("timestamp") <= actual_date)\
        .with_columns(
            c("timestamp").dt.ordinal_day().alias("day"),
            c("timestamp").dt.hour().alias("hour"),
        ).pivot(on="day", index="hour", values="rad_Gri200_10").sort("hour")

    forecast_data = data.filter(c("timestamp") > actual_date)\
        .with_columns(
            c("timestamp").dt.ordinal_day().alias("day"),
            c("timestamp").dt.hour().alias("hour"),
        ).pivot(on="day", index="hour", values="rad_Gri200_10").sort("hour")


    for j, col in enumerate(measurement_data.drop("hour").columns):
        fig.add_trace(
            go.Scatter(
                x=measurement_data["hour"], y=measurement_data[col].to_numpy(), mode='lines',
                name="Measurement", legendgroup="Measurement",  showlegend=j+i==0,
                line=dict(color="blue"))
        , row=i+ 1, col=1 )
        
    for j, col in enumerate(forecast_data.drop("hour").columns):
        fig.add_trace(
            go.Scatter(
                x=forecast_data["hour"], y=forecast_data[col].to_numpy(), mode='lines',
                name="Forecast", legendgroup="Forecast",  showlegend=j+i==0,
                line=dict(color="red"))
        , row=i+ 1, col=1 )    
        
fig.update_layout(
    margin=dict(t=60, l=65, r= 10, b=60), 
    width=1000,   # Set the width of the figure
    height=n_rows*200,
    title= dict(text =f"Differences between forecast and measurement"),
)
fig.show()
fig.write_html(f"{plot_folder}/{plot_name}.html")
# data = data.filter(pl.all_horizontal(c("timestamp") > actual_date))