In [110]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
from pathlib import Path
import matplotlib.pyplot as plt
from datetime import timedelta, datetime
import xarray as xr


preprocess_output_folder = Path('data/Zwalm_data/preprocess_output')
p_ep_zwalm = pd.read_pickle(preprocess_output_folder / 'forcings_ERA5_david_E_EP.pkl')

print(p_ep_zwalm)





                 Timestamp  potential_evaporation_sum  total_precipitation_sum
0      1972-07-01 00:00:00                   0.110833                 0.227500
1      1972-07-01 01:00:00                   0.110833                 0.227500
2      1972-07-01 02:00:00                   0.110833                 0.227500
3      1972-07-01 03:00:00                   0.110833                 0.227500
4      1972-07-01 04:00:00                   0.110833                 0.227500
...                    ...                        ...                      ...
442699 2022-12-31 19:00:00                   0.227500                 0.356667
442700 2022-12-31 20:00:00                   0.227500                 0.356667
442701 2022-12-31 21:00:00                   0.227500                 0.356667
442702 2022-12-31 22:00:00                   0.227500                 0.356667
442703 2022-12-31 23:00:00                   0.227500                 0.356667

[442704 rows x 3 columns]


In [111]:
def to_julian_day(timestamp):
    return timestamp.toordinal() + 1721424 + 1 # +1 because it's at 12h

def calculate_solar_declination(julian_number):
    return 0.4093 * np.sin((2 * np.pi * julian_number / 365) - 1.405)

def calculate_omega_s(phi,solar_declination):
    return np.arccos(-np.tan(phi) * np.tan(solar_declination))

def calculate_dr(J):
    return 1 + 0.033 * np.cos(2 * np.pi * J / 365)

def calculate_S0(d_r, omega_s, phi, delta):
    return 15.392 * d_r * (omega_s * np.sin(phi) * np.sin(delta) + np.cos(phi) * np.cos(delta) * np.sin(omega_s))

path = Path('output_caravan/attributes/vlaamsebekken/attributes_other_vlaamsebekken.csv')
ds = pd.read_csv(path)
phi = ds['gauge_lat']
phi = np.radians(phi)

p_ep_zwalm['phi'] = np.repeat(phi.values, len(p_ep_zwalm))
p_ep_zwalm['Julian_Number'] = p_ep_zwalm['Timestamp'].apply(to_julian_day)
p_ep_zwalm['solar_declination'] = p_ep_zwalm['Julian_Number'].apply(calculate_solar_declination)
p_ep_zwalm['omega_s'] = calculate_omega_s(p_ep_zwalm['phi'],p_ep_zwalm['solar_declination'])
p_ep_zwalm['dr'] = p_ep_zwalm['Julian_Number'].apply(calculate_dr)
p_ep_zwalm['S0'] = calculate_S0(p_ep_zwalm['dr'],p_ep_zwalm['omega_s'],p_ep_zwalm['phi'],p_ep_zwalm['solar_declination'])



print(p_ep_zwalm.head())



            Timestamp  potential_evaporation_sum  total_precipitation_sum  \
0 1972-07-01 00:00:00                   0.110833                   0.2275   
1 1972-07-01 01:00:00                   0.110833                   0.2275   
2 1972-07-01 02:00:00                   0.110833                   0.2275   
3 1972-07-01 03:00:00                   0.110833                   0.2275   
4 1972-07-01 04:00:00                   0.110833                   0.2275   

        phi  Julian_Number  solar_declination   omega_s        dr        S0  
0  0.888118        2441500          -0.373055  1.068608  1.031906  3.379338  
1  0.888118        2441500          -0.373055  1.068608  1.031906  3.379338  
2  0.888118        2441500          -0.373055  1.068608  1.031906  3.379338  
3  0.888118        2441500          -0.373055  1.068608  1.031906  3.379338  
4  0.888118        2441500          -0.373055  1.068608  1.031906  3.379338  


In [112]:
path = Path('output_caravan/timeseries/netcdf/vlaamsebekken/6.nc')
ds = xr.open_dataset(path)
df = ds.to_dataframe()
df.index = pd.to_datetime(df.index)
start_date = pd.to_datetime('1972-07-01')
end_date = pd.to_datetime('2022-12-31')

df_filtered = df[(df.index >= start_date) & (df.index <= end_date)]
df_filtered = df_filtered[['temperature_2m_min', 'temperature_2m_max']]
df_filtered['T_min'] = df_filtered.groupby(df_filtered.index.to_period('M'))['temperature_2m_min'].transform('mean')
df_filtered['T_max'] = df_filtered.groupby(df_filtered.index.to_period('M'))['temperature_2m_max'].transform('mean')
df_filtered['T_diff'] = df_filtered['T_max']-df_filtered['T_min']


new_index = pd.date_range(start=start_date, end=end_date + pd.Timedelta(days=1) - pd.Timedelta(hours=1), freq='H')
df_expanded = df_filtered.loc[df_filtered.index.repeat(24)].reset_index(drop=True)
df_expanded.index = new_index

p_ep_zwalm['T_diff'] = df_expanded['T_diff'].values


print(p_ep_zwalm.head(6000))



               Timestamp  potential_evaporation_sum  total_precipitation_sum  \
0    1972-07-01 00:00:00                   0.110833                 0.227500   
1    1972-07-01 01:00:00                   0.110833                 0.227500   
2    1972-07-01 02:00:00                   0.110833                 0.227500   
3    1972-07-01 03:00:00                   0.110833                 0.227500   
4    1972-07-01 04:00:00                   0.110833                 0.227500   
...                  ...                        ...                      ...   
5995 1973-03-07 19:00:00                   0.132500                 0.072917   
5996 1973-03-07 20:00:00                   0.132500                 0.072917   
5997 1973-03-07 21:00:00                   0.132500                 0.072917   
5998 1973-03-07 22:00:00                   0.132500                 0.072917   
5999 1973-03-07 23:00:00                   0.132500                 0.072917   

           phi  Julian_Number  solar_de

In [113]:
path = Path('output_caravan/timeseries/netcdf/vlaamsebekken/6.nc')
ds = xr.open_dataset(path)
df = ds.to_dataframe()
df.index = pd.to_datetime(df.index)
start_date = pd.to_datetime('1972-07-01')
end_date = pd.to_datetime('2022-12-31')

df_filtered = df[(df.index >= start_date) & (df.index <= end_date)]
df_filtered = df_filtered[['temperature_2m_mean']]

new_index = pd.date_range(start=start_date, end=end_date + pd.Timedelta(days=1) - pd.Timedelta(hours=1), freq='H')
df_expanded = df_filtered.loc[df_filtered.index.repeat(24)].reset_index(drop=True)
df_expanded.index = new_index


p_ep_zwalm['T_avg'] = df_expanded['temperature_2m_mean'].values


print(p_ep_zwalm['T_avg'].head(50))




0     12.70
1     12.70
2     12.70
3     12.70
4     12.70
5     12.70
6     12.70
7     12.70
8     12.70
9     12.70
10    12.70
11    12.70
12    12.70
13    12.70
14    12.70
15    12.70
16    12.70
17    12.70
18    12.70
19    12.70
20    12.70
21    12.70
22    12.70
23    12.70
24    12.89
25    12.89
26    12.89
27    12.89
28    12.89
29    12.89
30    12.89
31    12.89
32    12.89
33    12.89
34    12.89
35    12.89
36    12.89
37    12.89
38    12.89
39    12.89
40    12.89
41    12.89
42    12.89
43    12.89
44    12.89
45    12.89
46    12.89
47    12.89
48    14.93
49    14.93
Name: T_avg, dtype: float32


In [114]:
path = Path('output_caravan/timeseries/netcdf/vlaamsebekken/6.nc')
ds = xr.open_dataset(path)
df = ds.to_dataframe()
df.index = pd.to_datetime(df.index)
start_date = pd.to_datetime('1972-07-01')
end_date = pd.to_datetime('2022-12-31')

df_filtered = df[(df.index >= start_date) & (df.index <= end_date)]
df_filtered = df_filtered[['total_precipitation_sum']]


df_filtered['P_sum'] = df_filtered.groupby(df_filtered.index.to_period('M'))['total_precipitation_sum'].transform('sum')

new_index = pd.date_range(start=start_date, end=end_date + pd.Timedelta(days=1) - pd.Timedelta(hours=1), freq='H')
df_expanded = df_filtered.loc[df_filtered.index.repeat(24)].reset_index(drop=True)
df_expanded.index = new_index


p_ep_zwalm['P_sum'] = df_expanded['P_sum'].values


print(p_ep_zwalm.head(6000))



               Timestamp  potential_evaporation_sum  total_precipitation_sum  \
0    1972-07-01 00:00:00                   0.110833                 0.227500   
1    1972-07-01 01:00:00                   0.110833                 0.227500   
2    1972-07-01 02:00:00                   0.110833                 0.227500   
3    1972-07-01 03:00:00                   0.110833                 0.227500   
4    1972-07-01 04:00:00                   0.110833                 0.227500   
...                  ...                        ...                      ...   
5995 1973-03-07 19:00:00                   0.132500                 0.072917   
5996 1973-03-07 20:00:00                   0.132500                 0.072917   
5997 1973-03-07 21:00:00                   0.132500                 0.072917   
5998 1973-03-07 22:00:00                   0.132500                 0.072917   
5999 1973-03-07 23:00:00                   0.132500                 0.072917   

           phi  Julian_Number  solar_de

In [115]:
def calculate_E(S_0, T_avg, T_diff, P_sum):
    E_t = 0.0013 * S_0 * (T_avg + 17.0) * (T_diff - 0.0123 * P_sum)**0.76
    return E_t

p_ep_zwalm['potential_evaporation_sum'] = (calculate_E(p_ep_zwalm['S0'].values,p_ep_zwalm['T_avg'].values,p_ep_zwalm['T_diff'].values,p_ep_zwalm['P_sum'].values))/24
p_ep_zwalm['total_precipitation_sum'] = p_ep_zwalm['total_precipitation_sum'].values

print(p_ep_zwalm['P_sum'])
print(p_ep_zwalm['potential_evaporation_sum'].head())
print(p_ep_zwalm['total_precipitation_sum'].head())



0         107.339996
1         107.339996
2         107.339996
3         107.339996
4         107.339996
             ...    
442699     86.479996
442700     86.479996
442701     86.479996
442702     86.479996
442703     86.479996
Name: P_sum, Length: 442704, dtype: float32
0    0.020479
1    0.020479
2    0.020479
3    0.020479
4    0.020479
Name: potential_evaporation_sum, dtype: float64
0    0.2275
1    0.2275
2    0.2275
3    0.2275
4    0.2275
Name: total_precipitation_sum, dtype: float64


In [116]:
# Define the path where you want to save the pickle file
file_path = Path('data\Zwalm_data\preprocess_output\p_ep_with_adaptions_ep.pkl')


# Save the DataFrame as a pickle file
p_ep_zwalm.to_pickle(file_path)

print(f"DataFrame saved to {file_path}")


DataFrame saved to data\Zwalm_data\preprocess_output\p_ep_with_adaptions_ep.pkl
