In [32]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
from pathlib import Path
import matplotlib.pyplot as plt
import hvplot
import hvplot.pandas
import warnings
import pyswarms as ps
from datetime import timedelta, datetime
from dateutil.relativedelta import relativedelta
from sklearn.preprocessing import StandardScaler

# Define the function to convert datetime to Julian day number
def to_julian_day(timestamp):
    return timestamp.to_pydatetime().toordinal() + 1721424.5 + (
        timestamp.hour / 24.0
    ) + (timestamp.minute / 1440.0) + (timestamp.second / 86400.0)

# Path to the preprocess output folder
current_dir = Path(os.getcwd())  # Use Path to enable .parent
current_dir = current_dir.parent
preprocess_output_folder = current_dir / 'data' / 'Zwalm_data' / 'preprocess_output'



# Read the pickle file
p_ep_zwalm = pd.read_pickle(preprocess_output_folder / 'forcings_ERA5_david_E_EP.pkl')

# Check the data type of the 'Timestamp' column
print(p_ep_zwalm['Timestamp'].dtype)
print(p_ep_zwalm.head())

# Apply the to_julian_day function to calculate Julian numbers
p_ep_zwalm['Julian_Number'] = p_ep_zwalm['Timestamp'].apply(to_julian_day)

# Print the updated DataFrame
print(p_ep_zwalm)
print(p_ep_zwalm['Julian_Number'][1])

def calculate_solar_declination(julian_number):
    return 0.4093 * np.sin((2 * np.pi * julian_number / 365) - 1.405)

p_ep_zwalm['solar_declination'] = p_ep_zwalm['Julian_Number'].apply(calculate_solar_declination)
print(p_ep_zwalm[['Julian_Number', 'solar_declination']][20:30])


datetime64[ns]
            Timestamp  potential_evaporation_sum  total_precipitation_sum
0 1972-07-01 00:00:00                   0.110833                   0.2275
1 1972-07-01 01:00:00                   0.110833                   0.2275
2 1972-07-01 02:00:00                   0.110833                   0.2275
3 1972-07-01 03:00:00                   0.110833                   0.2275
4 1972-07-01 04:00:00                   0.110833                   0.2275
                 Timestamp  potential_evaporation_sum  \
0      1972-07-01 00:00:00                   0.110833   
1      1972-07-01 01:00:00                   0.110833   
2      1972-07-01 02:00:00                   0.110833   
3      1972-07-01 03:00:00                   0.110833   
4      1972-07-01 04:00:00                   0.110833   
...                    ...                        ...   
442699 2022-12-31 19:00:00                   0.227500   
442700 2022-12-31 20:00:00                   0.227500   
442701 2022-12-31 21:00:00  

In [33]:
import xarray as xr
path = current_dir / 'output_caravan' / 'timeseries' / 'netcdf' / 'vlaamsebekken' /'6.nc'

#path = Path('output_caravan/timeseries/netcdf/vlaamsebekken/6.nc')

ds = xr.open_dataset(path)

# Convert to pandas DataFrame if needed
df = ds.to_dataframe()

# Step 1: Convert the index to datetime if it's not already in datetime format
df.index = pd.to_datetime(df.index)

# Step 2: Define the desired date range for the final DataFrame
start_date = pd.to_datetime('1972-07-01')
end_date = pd.to_datetime('2022-12-31')

# Step 3: Filter the DataFrame to include only rows within the date range
df_filtered = df[(df.index >= start_date) & (df.index <= end_date)]

# Step 4: Create a new index with hourly frequency for each day in the range
new_index = pd.date_range(start=start_date, end=end_date + pd.Timedelta(days=1) - pd.Timedelta(hours=1), freq='H')

# Step 5: Repeat each row 24 times for each day to create a new row for every hour
df_expanded = df_filtered.loc[df_filtered.index.repeat(24)].reset_index(drop=True)

# Step 6: Assign the new hourly index to the expanded DataFrame
df_expanded.index = new_index

# Check the resulting DataFrame

p_ep_zwalm['T_avg'] = df_expanded['temperature_2m_mean'].values
print(df_expanded.shape[0])
print(p_ep_zwalm.shape[0])
print(p_ep_zwalm['T_avg'])




442704
442704
0         12.70
1         12.70
2         12.70
3         12.70
4         12.70
          ...  
442699    14.67
442700    14.67
442701    14.67
442702    14.67
442703    14.67
Name: T_avg, Length: 442704, dtype: float32


In [34]:
import xarray as xr

ds = xr.open_dataset(path)

# Convert to pandas DataFrame if needed
df = ds.to_dataframe()

# Step 1: Convert the index to datetime if it's not already in datetime format
df.index = pd.to_datetime(df.index)

# Step 2: Define the desired date range for the final DataFrame
start_date = pd.to_datetime('1972-07-01')
end_date = pd.to_datetime('2022-12-31')

# Step 3: Filter the DataFrame to include only rows within the date range
df_filtered = df[(df.index >= start_date) & (df.index <= end_date)]
df_filtered = df_expanded[['temperature_2m_min', 'temperature_2m_max',]]
df_filtered['T_min'] = df_filtered.groupby(df_filtered.index.to_period('M'))['temperature_2m_min'].transform('min')
df_filtered['T_max'] = df_filtered.groupby(df_filtered.index.to_period('M'))['temperature_2m_max'].transform('max')
df_filtered['T_diff'] = df_filtered['T_max']-df_filtered['T_min']

print(df_filtered)
p_ep_zwalm['T_diff'] = df_filtered['T_diff'].values
print(df_filtered.shape[0])
print(p_ep_zwalm.shape[0])
print(p_ep_zwalm['T_diff'])



                     temperature_2m_min  temperature_2m_max  T_min  T_max  \
1972-07-01 00:00:00               10.10               16.27   9.68  27.83   
1972-07-01 01:00:00               10.10               16.27   9.68  27.83   
1972-07-01 02:00:00               10.10               16.27   9.68  27.83   
1972-07-01 03:00:00               10.10               16.27   9.68  27.83   
1972-07-01 04:00:00               10.10               16.27   9.68  27.83   
...                                 ...                 ...    ...    ...   
2022-12-31 19:00:00               12.83               15.90  -4.92  15.90   
2022-12-31 20:00:00               12.83               15.90  -4.92  15.90   
2022-12-31 21:00:00               12.83               15.90  -4.92  15.90   
2022-12-31 22:00:00               12.83               15.90  -4.92  15.90   
2022-12-31 23:00:00               12.83               15.90  -4.92  15.90   

                     T_diff  
1972-07-01 00:00:00   18.15  
1972-07-01 01:0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['T_min'] = df_filtered.groupby(df_filtered.index.to_period('M'))['temperature_2m_min'].transform('min')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['T_max'] = df_filtered.groupby(df_filtered.index.to_period('M'))['temperature_2m_max'].transform('max')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

In [35]:
import xarray as xr

ds = xr.open_dataset(path)

# Convert to pandas DataFrame if needed
df = ds.to_dataframe()


#  Step 1: Convert the index to datetime if it's not already in datetime format
df.index = pd.to_datetime(df.index)

# Step 2: Define the desired date range for the final DataFrame
start_date = pd.to_datetime('1972-07-01')
end_date = pd.to_datetime('2022-12-31')

# Step 3: Filter the DataFrame to include only rows within the date range
df_filtered = df[(df.index >= start_date) & (df.index <= end_date)]
df_filtered = df_filtered[['total_precipitation_sum']]
print(df_filtered)
df_filtered['P_tot'] = df_filtered.groupby(df_filtered.index.to_period('M'))['total_precipitation_sum'].transform('sum')
print(df_filtered)
# Step 4: Create a new index with hourly frequency for each day in the range
new_index = pd.date_range(start=start_date, end=end_date + pd.Timedelta(days=1) - pd.Timedelta(hours=1), freq='H')

# Step 5: Repeat each row 24 times for each day to create a new row for every hour
df_expanded = df_filtered.loc[df_filtered.index.repeat(24)].reset_index(drop=True)

# Step 6: Assign the new hourly index to the expanded DataFrame
df_expanded.index = new_index
print(df_expanded)
p_ep_zwalm['P_tot'] = df_expanded['P_tot'].values
print(p_ep_zwalm['P_tot'])
print(df_filtered['P_tot'])


            total_precipitation_sum
date                               
1972-07-01                     5.46
1972-07-02                     5.45
1972-07-03                     3.05
1972-07-04                     0.60
1972-07-05                     6.34
...                             ...
2022-12-27                     0.41
2022-12-28                     4.30
2022-12-29                     2.67
2022-12-30                     6.77
2022-12-31                     8.56

[18446 rows x 1 columns]
            total_precipitation_sum       P_tot
date                                           
1972-07-01                     5.46  107.339996
1972-07-02                     5.45  107.339996
1972-07-03                     3.05  107.339996
1972-07-04                     0.60  107.339996
1972-07-05                     6.34  107.339996
...                             ...         ...
2022-12-27                     0.41   86.479996
2022-12-28                     4.30   86.479996
2022-12-29                

In [36]:
#path = Path('output_caravan/attributes/vlaamsebekken/attributes_other_vlaamsebekken.csv')
path = current_dir / 'output_caravan' / 'attributes' / 'vlaamsebekken' / 'attributes_other_vlaamsebekken.csv'
lat = pd.read_csv(path)
latitude= lat['gauge_lat']

p_ep_zwalm = p_ep_zwalm.assign(latitude=latitude)
p_ep_zwalm['latitude'] = p_ep_zwalm.iloc[0]['latitude']
# Calculate latitude in radians
p_ep_zwalm['latitude'] = np.radians(p_ep_zwalm['latitude'])
print(p_ep_zwalm['latitude'])
p_ep_zwalm['omega_s'] = np.arccos(-np.tan(p_ep_zwalm['latitude']) * np.tan(p_ep_zwalm['solar_declination']))



def calculate_dr(J):
    d_r = 1 + 0.033 * np.cos(2 * np.pi * J / 365)
    return d_r

p_ep_zwalm['dr'] = p_ep_zwalm['Julian_Number'].apply(calculate_dr)


def calculate_S0(d_r, omega_s, phi, delta):
    S_0 = 15.392 * d_r * (omega_s * np.sin(phi) * np.sin(delta) + np.cos(phi) * np.cos(delta) * np.sin(omega_s))
    return S_0

p_ep_zwalm['S0'] = calculate_S0(p_ep_zwalm['dr'],p_ep_zwalm['omega_s'],p_ep_zwalm['latitude'],p_ep_zwalm['solar_declination'])


def calculate_E(S_0, T_avg_t, T_diff_t_prime, P_t_prime):
    E_t = 0.0013 * S_0 * (T_avg_t + 17.0) * (T_diff_t_prime - 0.0123 * P_t_prime)**0.76
    return E_t
p_ep_zwalm['potential_evaporation_sum'] = (calculate_E(p_ep_zwalm['S0'].values,p_ep_zwalm['T_avg'].values,p_ep_zwalm['T_diff'].values,p_ep_zwalm['P_tot'].values))/24


print(p_ep_zwalm['P_tot'])
print(p_ep_zwalm['potential_evaporation_sum'].head())



0         0.888118
1         0.888118
2         0.888118
3         0.888118
4         0.888118
            ...   
442699    0.888118
442700    0.888118
442701    0.888118
442702    0.888118
442703    0.888118
Name: latitude, Length: 442704, dtype: float64
0         107.339996
1         107.339996
2         107.339996
3         107.339996
4         107.339996
             ...    
442699     86.479996
442700     86.479996
442701     86.479996
442702     86.479996
442703     86.479996
Name: P_tot, Length: 442704, dtype: float32
0    0.046165
1    0.046189
2    0.046214
3    0.046239
4    0.046264
Name: potential_evaporation_sum, dtype: float64


In [37]:
print(p_ep_zwalm['omega_s'])

0         1.066282
1         1.066474
2         1.066667
3         1.066860
4         1.067053
            ...   
442699    2.001494
442700    2.001228
442701    2.000961
442702    2.000695
442703    2.000428
Name: omega_s, Length: 442704, dtype: float64


In [38]:
# Define the path where you want to save the pickle file
file_path = r'E:\Users\ddknop\PDM_finaal_EP\master_thesis\data\Zwalm_data\preprocess_output\p_ep_with_adaptions_ep.pkl'

# Save the DataFrame as a pickle file
p_ep_zwalm.to_pickle(file_path)

print(f"DataFrame saved to {file_path}")


DataFrame saved to E:\Users\ddknop\PDM_finaal_EP\master_thesis\data\Zwalm_data\preprocess_output\p_ep_with_adaptions_ep.pkl
