In [1]:
import sys; sys.path.append('/rds/general/user/cb2714/home/')
from wwa import *

---
## **CORDEX**

Extract rectangular region over Italy from raw data downloaded using synda

In [2]:
# lat-lon bounds for selection of data
xn = 5; xx = 20; yn = 35; yx = 50

varnm = "pr"
units = "mm/day"

# get list of available files, check against files already processed
synda_fl = glob.glob("../synda/data/CORDEX/EUR-11/*/*/*/day/"+varnm+"/*.nc")
synda_fl = [fnm for fnm in synda_fl if int(fnm[-20:-16]) <= 2050]
fl = glob.glob("*.nc")

fl_to_process = [fnm for fnm in synda_fl if not fnm.split("/")[-1] in fl]

In [30]:
# loop over unprocessed files (mf approach)
for fnm in fl_to_process:

    if os.path.exists(fnm.split("/")[-1]): continue
    da = convert_units_to(xr.open_dataset(fnm)[varnm], units)

    if "rlon" in da.dims:
        x_dim, y_dim = ["rlon", "rlat"]
    elif "x" in da.dims:
        x_dim, y_dim = ["x", "y"]
   
    # MPI-ESM-LR_rcp85_r1i1p1_IPSL-WRF381P has no x or y coords - use dummy coords to match other WRF381P run
    if all(da[x_dim] == 0):
        da = da.assign_coords({x_dim : list(np.arange(0, len(da[x_dim])*12.5, 12.5))})
    if all(da[y_dim] == 0):
        da = da.assign_coords({y_dim : list(np.arange(0, len(da[y_dim])*12.5, 12.5))})

    if "latitude" in da.coords: da = da.rename(latitude = "lat")
    if "longitude" in da.coords: da = da.rename(longitude = "lon")
        
    # fix typos in coordinates (mainly CNRM-CM5_ALADIN63)
    if len(np.unique(da[x_dim])) != len(da[x_dim]):
        da = da.assign_coords({x_dim : list(np.arange(da[x_dim].min(), da[x_dim].max()+1, np.diff(da[x_dim][:2])[0]))})
    if len(np.unique(da[y_dim])) != len(da[y_dim]):
        da = da.assign_coords({y_dim : list(np.arange(da[y_dim].min(), da[y_dim].max()+1, np.diff(da[y_dim][:2])[0]))})
        
    # trim data to area required
    sp_mask = np.logical_and(np.logical_and(da.lat > yn, da.lat < yx), np.logical_and(da.lon > xn, da.lon < xx))
    sp_mask = sp_mask.where(sp_mask == 1).dropna(x_dim, "all").dropna(y_dim, "all")
    da = da.sel({x_dim : sp_mask[x_dim], y_dim : sp_mask[y_dim], "time" : slice(None, "2050")})

    # save individual files to netcdf (quicker than compiling in advance)
    da.to_netcdf(fnm.split("/")[-1])
    print(".", end = "")

print("")




### **Check completed runs to uninstall**

In [4]:
for fpath in glob.glob("../synda/data/CORDEX/EUR-11/*/*/*/day/pr/"):
    
    synda_fl = glob.glob(fpath+"*.nc")
    if len(synda_fl) == 0: continue
        
    synda_fl = [fnm for fnm in synda_fl if int(fnm[-20:-16]) <= 2050]
    fnm_patn = re.sub("rcp85", "*", re.sub("historical", "*", "_".join(synda_fl[0].split("/")[-1].split("_")[:-1])+"*.nc"))
    
    if len(glob.glob(fnm_patn)) == len(synda_fl):
        print(fpath, len(glob.glob(fnm_patn)), len(synda_fl))

### **List available runs, get timeseries & spatial pattern**

In [38]:
sf = gpd.read_file("sf_emilia-romagna/")
run_list = list(set([re.sub("historical", "rcp85", "_".join(fnm.split("_")[:-1])) for fnm in glob.glob("*.nc")]))

for run in run_list:
    
    if run in ["pr_EUR-11_MOHC-HadGEM2-ES_rcp85_r1i1p1_ICTP-RegCM4-6_v1_day"]: continue
    if len(glob.glob("cordex/"+run+"*.nc")) == 2: continue
    
    print(run)
    fl = glob.glob(re.sub("rcp85", "*", run)+"*.nc")
    
    da = xr.open_mfdataset(fl).pr
    
    if "rlon" in da.dims:
        x_dim, y_dim = ["rlon", "rlat"]
    elif "x" in da.dims:
        x_dim, y_dim = ["x", "y"]
    
    # get timeseries over Emilia-Romagna region
    rm = regionmask.mask_3D_geopandas(sf, da.lon, da.lat).squeeze(drop = True)
    ts = da.where(rm).mean([x_dim, y_dim])
    ts.to_netcdf("cordex/"+run+"_ts.nc")
    
    # monthly spatial patterns (not sure which months we'll need, so keep all)
    sp = da.sel(time = slice("1980", "2022")).groupby("time.month").mean()
    sp.to_netcdf("cordex/"+run+"_sp.nc")