In [None]:
import pandas as pd
import xarray as xr
from pathlib import Path
from datetime import datetime

In [None]:
# configuration
variable = 'rowe'
base_path = Path('Z:/nahaUsers/casadje/GloFASv4/long_run')
start_date = datetime.strptime('1991-01-01', '%Y-%m-%d')
end_date = datetime.strptime('1991-12-31', '%Y-%m-%d')
output_dir = base_path / variable / 'thresholds'
output_file = output_dir / f'{variable}_monthly_avg.nc'

# create output directory
output_dir.mkdir(parents=True, exist_ok=True)

# list of input NetCDF files
files = sorted((base_path / variable).glob(f'{variable}_*.nc'))
files = [file for file in files if start_date.year <= int(file.stem.split('_')[1]) <= end_date.year]
if not files:
    raise FileNotFoundError(f"No files found in {base_path / variable} matching pattern '{variable}_*.nc'.")

# open dataset and crop to the study period
ds = xr.open_mfdataset(
    files,
    engine='netcdf4',
    chunks='auto',
)[variable]

# rename variables
ds = ds.rename({'valid_time': 'time', 'latitude': 'lat', 'longitude': 'lon'})

# convert time convention to beginning-of-timestep
ds['time'] = ds['time'] - pd.Timedelta(days=1)

# crop to the study period
ds = ds.sel(time=slice(start_date, end_date))

In [None]:
start_date.year

In [None]:
# monthly resample
ds_monthly = ds.resample(time='1M').mean(skipna=True)

In [None]:
# rechunk
ds_monthly = ds_monthly.chunk({'time': 1, 'lat':'auto', 'lon': 'auto'})

In [None]:
# compute average
serie = ds_monthly.mean(['lat', 'lon'], skipna=True).to_pandas()

In [None]:
serie.shape

In [None]:
# serie = avg.to_pandas()
serie.name = 'runoff_mm'

In [None]:
serie.head()

In [None]:
serie.plot();

***

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
consolidated = xr.open_dataset(base_path / 'rowe' / 'rowe_2024.nc', chunks='auto')['rowe']
consolidated = consolidated.sel(valid_time=slice('2024-02-01', '2024-02-29'))
# consolidated = consolidated.mean('valid_time', skipna=True).compute()
consolidated = consolidated.mean(['latitude', 'longitude'], skipna=True).compute()

In [None]:
consolidated

In [None]:
intermediate = xr.open_dataset(base_path / 'rowe' / 'rowe_2024_intermediate.nc', chunks='auto')['rowe']
intermediate = intermediate.sel(valid_time=slice('2024-02-01', '2024-02-29'))
# intermediate = intermediate.mean('valid_time', skipna=True).compute()
intermediate = intermediate.mean(['latitude', 'longitude'], skipna=True).compute()

In [None]:
intermediate

In [None]:
fig, ax = plt.subplots()
consolidated.plot(ax=ax);
intermediate.plot(ax=ax);

In [None]:
consolidated.min(['latitude', 'longitude'])

In [None]:
intermediate.min(['latitude', 'longitude'])

In [None]:
np.isclose(consolidated, intermediate, atol=1e-2).all()