In [5]:
import xarray as xr, matplotlib.pyplot as plt
import glob, re

from  IPython.display import clear_output

# define region to subset
xn,xx,yn,yx = [19,38,33,46]

fpath = "/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/synda_Mariam/data/EUR-11"
outpath = "/rds/general/ephemeral/project/wwa/ephemeral/EUR_fire/cordex/sfcWindmax/uncompiled"

varnm = "sfcWindmax"
units = "m/s"

In [None]:
print("!")

# Pre-processing sfcWindmax downloaded from synda

In [6]:
# get list of models for which both historical & rcp85 runs are available
fl_rcp = glob.glob(fpath+"/*/*/*/"+varnm+"/*rcp85*.nc")
fl_hist = glob.glob(fpath+"/*/*/*/"+varnm+"/*historical*.nc")

mdl_rcp = list(set(["/".join(fnm.split("/")[-5:-2]) for fnm in fl_rcp]))
mdl_hist = list(set([re.sub("historical", "rcp85", "/".join(fnm.split("/")[-5:-2])) for fnm in fl_hist]))
mlist = sorted([m for m in mdl_rcp if m in mdl_hist])

In [7]:
# write as .csv for looping over later
with open('cordex-models.txt', 'w+') as f:
    for m in mlist:
        f.write('%s\n' %m)
f.close()

## Extract data subset with cdo

In [None]:
! module load cdo; bash cdo_cordex.sh $varnm $xn $xx $yn $yx $fpath $outpath
clear_output(wait = False)
print("Done.")

CNRM-CERFACS-CNRM-CM5/r1i1p1/ALADIN63
CNRM-CERFACS-CNRM-CM5/r1i1p1/COSMO-crCLIM-v1-1
cdo sellonlatbox: Processed 318980288 values from 1 variable over 1826 timesteps ( 7.68s )
cdo sellonlatbox: Processed 319154976 values from 1 variable over 1827 timesteps ( 7.68s )
cdo sellonlatbox: Processed 318980288 values from 1 variable over 1826 timesteps ( 7.67s )
cdo sellonlatbox: Processed 318980288 values from 1 variable over 1826 timesteps ( 7.66s )
cdo sellonlatbox: Processed 318980288 values from 1 variable over 1826 timesteps ( 7.66s )
cdo sellonlatbox: Processed 319154976 values from 1 variable over 1827 timesteps ( 7.68s )
cdo sellonlatbox: Processed 318980288 values from 1 variable over 1826 timesteps ( 7.65s )
cdo sellonlatbox: Processed 318980288 values from 1 variable over 1826 timesteps ( 7.68s )
cdo sellonlatbox: Processed 318980288 values from 1 variable over 1826 timesteps ( 7.69s )
cdo sellonlatbox: Processed 319154976 values from 1 variable over 1827 timesteps ( 7.66s )
cdo s

## Compile & clean data

In [None]:
# remove weird annoying duplicated year
! rm /scratchx/cbarnes/cordex/$snm/MOHC-HadGEM2-ES/$varnm\_EUR-11_MOHC-HadGEM2-ES_rcp85_r1i1p1_ICTP-RegCM4-6_v1_day_20051201-20051230.nc

In [None]:
# specify how to shorten GCM & RCM names to keep filenames to manageable length
gcm_short = {'CCCma-CanESM2' : 'CanESM2',
             'CNRM-CERFACS-CNRM-CM5':'CNRM-CM5',
             'ICHEC-EC-EARTH':'EC-EARTH',
             'IPSL-IPSL-CM5A-MR':'IPSL-CM5A-MR',
             'MIROC-MIROC5':'MIROC5',
             'MOHC-HadGEM2-ES':'HadGEM2-ES',
             'MPI-M-MPI-ESM-LR':'MPI-ESM-LR',
             'NCC-NorESM1-M':'NorESM1-M'}

rcm_short = {'CLMcom-CCLM4-8-17':'CCLM4-8-17',
             'CLMcom-ETH-COSMO-crCLIM-v1-1':'COSMO-crCLIM-v1-1',
             'CNRM-ALADIN53':'ALADIN53',
             'CNRM-ALADIN63':'ALADIN63',
             'DMI-HIRHAM5':'HIRHAM5',
             'GERICS-REMO2015':'REMO2015',
             'ICTP-RegCM4-6':'RegCM4-6',
             'IPSL-WRF381P':'WRF381P',
             'KNMI-RACMO22E':'RACMO22E',
             'MOHC-HadREM3-GA7-05':'HadREM3-GA7-05',
             'MPI-CSC-REMO2009':'REMO2009',
             'SMHI-RCA4':'RCA4',
             'UHOH-WRF361H':'WRF361H',
             'RMIB-UGent-ALARO-0':"ALARO-0"}

In [None]:
# get list of models with available data
mlist = sorted(list(set(["_".join(re.sub("historical|rcp85", "*", fnm).split("_")[2:-2]) for fnm in glob.glob(outpath+"/"+varnm+"*.nc")[:10]])))[:1]

In [None]:
# loop over models & compile
for m in mlist:

    # keeps crashing the whole process, so skipping (pr)
    if varnm == "pr" and m in ["MIROC-MIROC5_rcp85_r1i1p1_UHOH-WRF361H_v1", "MPI-M-MPI-ESM-LR_rcp85_r1i1p1_UHOH-WRF361H_v1",
                               "MPI-M-MPI-ESM-LR_rcp85_r1i1p1_MPI-CSC-REMO2009_v1"]: continue
    
    print(m, end = " - ")
    domain,gcm,sc,ens,rcm,rcmv = m.split("_")
    fl = sorted(glob.glob(outpath+"/"+varnm+"*"+ens+"_"+rcm+"_"+rcmv+"_*.nc"))
    fl = [fnm for fnm in fl if int(fnm[-20:-16]) < 2070] # exclude runs starting after 2070
    if len(fl) == 0:
        print("no data")
        continue

    new_fnm = "/rds/general/ephemeral/project/wwa/ephemeral/EUR_fire/cordex/"+varnm+"/"+varnm+"_"+domain+"_"+gcm_short[gcm]+"_"+ens+"_"+rcm_short[rcm]+"_"+fl[0][-20:-12]+"-"+fl[-1][-11:-3]+".nc"
    if os.path.exists(new_fnm): 
        print("already processed")
        continue

    print(new_fnm)

    ds = convert_units_to(xr.open_mfdataset(fl, coords = "minimal", compat = "override")[varnm], units).squeeze(drop = True)

    if ds.time.dt.year.min() > 1980 or ds.time.dt.year.max() < 2030: 
        print("only data from "+fl[0][-20:-12]+"-"+fl[-1][-11:-3])
        continue
    
    if "height" in ds.coords: ds = ds.reset_coords("height", drop = True)
    ds.to_netcdf(new_fnm)
    print("complete")
clear_output(wait = False)
print("Done.")

# Compute FWI