In [1]:
import glob, re, regionmask, geopandas as gpd, numpy as np, xarray as xr
xr.set_options(keep_attrs = True)

from xclim.core.units import convert_units_to

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
def wrap_lon(ds):
    
    # if dataset longitudes are defined as 0:360, wrap the data from -180:180
    if ds.lon.max() > 350:
        ds["lon"] = (ds.lon.dims, (((ds.lon.values + 180) % 360) - 180), ds.lon.attrs)
        
        # can only reindex if lon is a dimension - if only a coordinate, should already be in correct order
        if "lon" in ds.dims:
            ds = ds.reindex({ "lon" : np.sort(ds.lon) })
    return ds

In [2]:
# define variable & units
varnm = "pr"
var_units = "mm/day"

# define study region
sr_lon = [2,25]
sr_lat = [4,17]

# also larger region for spatial patterns
sp_lon = [-13,27]
sp_lat = [-5,25]
obs_dates = slice("1981", "2022")

---
### **Extract time series & monthly spatial patterns from raw data**
```
synda install domain=AFR-44,AFR-22 frequency=day variable=pr experiment=rcp85,historical
```

In [6]:
afr44 = sorted(set([re.sub("historical", "rcp85", fnm) for fnm in glob.glob("../synda/data/cordex/output/AFR-44/*/*/*/*/*/*/day/pr")]))
afr22 = sorted(set([re.sub("historical", "rcp85", fnm) for fnm in glob.glob("../synda/data/cordex/output/AFR-22/*/*/*/*/*/*/day/pr")]))

sf_chad = gpd.read_file("sf_lake-chad")
sf_lagdo = gpd.read_file("sf_lagdo-dam")
sf_niger = gpd.read_file("sf_lower-niger")

for fpath in (afr44 + afr22)[11:12]:
    
    print(fpath, end = ": ")
    
    # list all files (RCP85 & historical)
    fl_hist = sorted(glob.glob(re.sub("rcp85", "historical", fpath)+"/*/*.nc"))
    fl_rcp = sorted(glob.glob(fpath+"/*/*.nc"))
    
    # check that files actually exist
    if len(fl_hist + fl_rcp) == 0: 
        print("no files")
        continue
    if len(fl_hist) == 0:
        print("no historical data")
        continue
    if len(fl_rcp) == 0: 
        print("no scenario data")
        continue
    
    # construct new filename
    ts_fnm = re.sub("historical", "hist-rcp85", "cordex/"+re.sub("-[0-9]{8}", "-"+fl_rcp[-1][-11:-3], fl_hist[0].split("/")[-1]))
    sp_fnm = re.sub("[0-9]{8}-[0-9]{8}", "-".join([obs_dates.start, obs_dates.stop]), re.sub(varnm, varnm+"-spatial", ts_fnm))    
    
#     if len(glob.glob(re.sub("day_.+", "*", re.sub("spatial", "*", sp_fnm)))) == 4:
#         print("already processed")
#         continue
        
    # load the data separately (some files need units fixing)
    da_hist = convert_units_to(xr.open_mfdataset(fl_hist)[varnm], var_units)
    da_rcp = convert_units_to(xr.open_mfdataset(fl_rcp)[varnm], var_units)
    
    # fix persistent unit errors (some files have precip in kg/m/s even after conversion)
    if varnm == "pr":
        if da_hist.max() < 0.1: da_hist = da_hist * 86400
        if da_rcp.max() < 0.1: da_rcp = da_rcp * 86400
            
    da = wrap_lon(xr.concat([da_hist, da_rcp], "time"))
    
    try:
        xy_dims = da.cf.axes["X"] + da.cf.axes["Y"]
    except:
        if "rlat" in da.dims:
            xy_dims = ["rlat", "rlon"]
        elif "x" in da.dims:
            xy_dims = ["x", "y"]
        else:
            print("xy_dims undefined")
            continue
            
    # record files used (including version number)
    da = da.assign_attrs(file_list = ", ".join([re.sub(".+"+varnm+"/", "", fnm) for fnm in fl_hist + fl_rcp]))
        
    for rnm in ["lagdo", "chad", "niger"]:
        
        r_fnm = re.sub(varnm, varnm+"-"+rnm, ts_fnm)

        # skip if time series already exists
        if len(glob.glob(r_fnm)) >= 0:
            
            rm = regionmask.mask_3D_geopandas(eval("sf_"+rnm), da.lon, da.lat, drop = False).squeeze(drop = True)
            ts = da.where(rm).mean(xy_dims)
            ts.to_netcdf(r_fnm)
            print(rnm, end = " ")
        
#     # skip if spatial pattern already exists
#     if len(glob.glob(sp_fnm)) == 0:
#         # extract region for evaluation of spatial patterns
#         spatial_region = np.logical_and(np.logical_and(ds.lat >= sp_lat[0], ds.lat <= sp_lat[1]), np.logical_and(ds.lon >= sp_lon[0], ds.lon <= sp_lon[1]))
#         sp = pr.where(spatial_region == 1).sel(time = obs_dates).groupby("time.month").mean().dropna(xy_dims[0], "all").dropna(xy_dims[1], "all")
#         sp.to_netcdf(sp_fnm)
#         print("spatial", end = " ")
        
    print("")
    
print("Done.")

../synda/data/cordex/output/AFR-44/ICTP/MPI-M-MPI-ESM-MR/rcp85/r1i1p1/RegCM4-3/v4/day/pr: lagdo chad niger 
Done.
