In [None]:
import xarray as xr
import numpy as np
import pandas as pd
from pathlib import Path

def open_and_regrid(folder_path, variable, coarse_grid_file):
    coarse_grid = xr.open_dataset(coarse_grid_file, engine="netcdf4")
    coarse_lat = coarse_grid.lat
    coarse_lon = coarse_grid.lon
    files = sorted(Path(folder_path).rglob("*.nc"))
    ds_list = []
    for f in files:
        ds = xr.open_dataset(f, chunks={'time': -1}, engine="netcdf4")
        if ds.lat.size != coarse_lat.size:
            ds = ds.interp(lat=coarse_lat, lon=coarse_lon)
        ds_list.append(ds[[variable]])
    ds_all = xr.concat(ds_list, dim="time")
    ds_all = ds_all.sortby("time")
    ds_all = ds_all.drop_duplicates(dim="time")
    return ds_all

def compute_daily(ds, variable):
    return ds[variable].resample(time="1D").mean()

# def compute_weekly(ds_daily):
#     year = ds_daily.time.dt.year
#     doy = ds_daily.time.dt.dayofyear
#     week_in_year = ((doy - 1) // 7) + 1
#     week_id = xr.DataArray(
#         (year.astype(str) + "_" + week_in_year.astype(str)).values,
#         coords={"time": ds_daily.time}, dims="time"
#     )
#     return ds_daily.groupby(week_id).mean(dim="time")

# def compute_daily_climatology(ds_daily):
#     doy = ds_daily["time"].dt.dayofyear
#     clim = ds_daily.groupby(doy).mean("time")
#     return clim

# def compute_daily_anomalies(ds_daily, clim):
#     doy = ds_daily["time"].dt.dayofyear
#     return ds_daily.groupby(doy) - clim

def save_netcdf(da, dataset, variable, freq, period, area, resolution, out_dir="outputs"):
    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)
    filename = f"{dataset}_{variable}_{freq}_{period}_{area}_{resolution}.nc"
    da.to_netcdf(out_dir / filename)



In [9]:

dataset = "hycom"
variables = ["salinity", "water_temp"]
period = "2015-2024"
area = "0N_90W_20S_70E"
resolution = "0.08deg"

folder_path = "/home/jupyter-daniela/suyana/sources/hycom/"
coarse_grid_file = "/home/jupyter-daniela/suyana/sources/hycom/2015/hycom_201501.nc"

for var in variables:
    ds = open_and_regrid(folder_path, var, coarse_grid_file)
    daily = compute_daily(ds, var)
    save_netcdf(daily, dataset, var, "daily", period, area, resolution)
    weekly = compute_weekly(daily)
    save_netcdf(weekly, dataset, var, "weekly", period, area, resolution)
    clim = compute_daily_climatology(daily)
    save_netcdf(clim, dataset, var, "climatology-daily", period, area, resolution)
    anomalies = compute_daily_anomalies(daily, clim)
    save_netcdf(anomalies, dataset, var, "anom-daily", period, area, resolution)

  ds_all = xr.concat(ds_list, dim="time")
  ds_all = xr.concat(ds_list, dim="time")
