In [21]:
import pandas as pd
from pathlib import Path
import xarray as xr
from datetime import timedelta

path = Path('output_caravan/timeseries/netcdf/vlaamsebekken/6.nc')
df = xr.open_dataset(path)
df_df = df.to_dataframe()


df_filtered = df_df[['potential_evaporation_sum_FAO_PENMAN_MONTEITH', 'total_precipitation_sum']]
df_filtered = df_filtered.reset_index(drop = False)
df_filtered = df_filtered[df_filtered['date'] >= '1972-06-30']
df_filtered = df_filtered.rename(columns={'date': 'Timestamp', 'potential_evaporation_sum_FAO_PENMAN_MONTEITH': 'potential_evaporation_sum'})

print(df_filtered)

       Timestamp  potential_evaporation_sum  total_precipitation_sum
8215  1972-06-30                       1.83                     4.11
8216  1972-07-01                       1.32                     5.46
8217  1972-07-02                       1.55                     5.45
8218  1972-07-03                       2.25                     3.05
8219  1972-07-04                       2.33                     0.60
...          ...                        ...                      ...
26657 2022-12-27                       0.06                     0.41
26658 2022-12-28                       0.72                     4.30
26659 2022-12-29                       0.70                     2.67
26660 2022-12-30                       0.20                     6.77
26661 2022-12-31                       1.52                     8.56

[18447 rows x 3 columns]


In [None]:
df_filtered["Timestamp"] = pd.to_datetime(df_filtered["Timestamp"])

# Expand each daily row into 24 hourly rows
hourly_data = []
for _, row in df_filtered.iterrows():
    for hour in range(24):
        new_row = row.copy()
        new_row["Timestamp"] = row["Timestamp"] + timedelta(hours=hour)
        new_row["potential_evaporation_sum"] /= 24  # Divide by 24
        new_row["total_precipitation_sum"] /= 24  # Divide by 24
        hourly_data.append(new_row)
 


In [29]:
df_hourly = pd.DataFrame(hourly_data)
print(df_hourly)

                Timestamp  potential_evaporation_sum  total_precipitation_sum
8215  1972-06-30 00:00:00                   0.076250                 0.171250
8215  1972-06-30 01:00:00                   0.076250                 0.171250
8215  1972-06-30 02:00:00                   0.076250                 0.171250
8215  1972-06-30 03:00:00                   0.076250                 0.171250
8215  1972-06-30 04:00:00                   0.076250                 0.171250
...                   ...                        ...                      ...
26661 2022-12-31 19:00:00                   0.063333                 0.356667
26661 2022-12-31 20:00:00                   0.063333                 0.356667
26661 2022-12-31 21:00:00                   0.063333                 0.356667
26661 2022-12-31 22:00:00                   0.063333                 0.356667
26661 2022-12-31 23:00:00                   0.063333                 0.356667

[442728 rows x 3 columns]


In [32]:
output_path = Path('data/Zwalm_data/preprocess_output/Final_Forcings_PDM.pkl')
df_hourly.to_pickle(output_path)