In [1]:
import glob, re, os
import xarray as xr, xesmf as xe, numpy as np
from tcpyPI.pi import pi

from xclim.core.units import convert_units_to
from datetime import datetime

from  IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category = FutureWarning)
warnings.filterwarnings("ignore", message = ".+multiple fill values.+")

xn,xx,yn,yx = [100,145,0,40]
rnm = "philippines"

In [2]:
def wrap_lon(ds):
    
    # method to wrap longitude from (0,360) to (-180,180)
    if "longitude" in ds.coords: ds = ds.rename(longitude = "lon", latitude = "lat")
    
    if ds.lon.max() > 180:
        ds["lon"] = (ds.lon.dims, (((ds.lon.values + 180) % 360) - 180), ds.lon.attrs)
        
    ds = ds.reindex({ "lon" : np.sort(ds.lon) })
    ds = ds.reindex({ "lat" : np.sort(ds.lat) })
    
    return ds

# Identify candidate models

In [3]:
# list all available file paths
fplist = glob.glob("/badc/cmip6/data/CMIP6/ScenarioMIP/*/*/ssp585/r1i1p1f1/Omon/tos/*")

# get unique models
mlist = list(set(["/".join(fp.split("/")[6:10]) for fp in fplist]))

# find duplicates
mdup = [m for m in mlist if sum([m in fp for fp in fplist]) > 1]

# list one instance for each model
fp_unique = [[fp for fp in fplist if m in fp][0] for m in mlist if sum([m in fp for fp in fplist]) == 1]
fp_dup = [[fp for fp in fplist if m in fp and "gr" in fp][0] for m in mdup]

# for models where r1i1p1f1 doesn't exist, get first ensemble member
fplist_notr1 = [fp for fp in glob.glob("/badc/cmip6/data/CMIP6/ScenarioMIP/*/*/ssp585/*/Omon/tos/*") if not "r1i1p1f1" in fp]
mlist_notr1 = list(set(["/".join(fp.split("/")[6:8]) for fp in fplist_notr1]))
mlist_notr1 = [m for m in mlist_notr1 if not m in list(set(["/".join(fp.split("/")[6:8]) for fp in fplist]))]
fp_notr1 = [sorted([fp for fp in fplist_notr1 if m in fp])[0] for m in mlist_notr1]

# create final list of models from all of the above
fplist = sorted(fp_unique + fp_dup + fp_notr1)

excl = ['/badc/cmip6/data/CMIP6/ScenarioMIP/AWI/AWI-CM-1-1-MR/ssp585/r1i1p1f1/Omon/tos/gn',
        '/badc/cmip6/data/CMIP6/ScenarioMIP/CNRM-CERFACS/CNRM-CM6-1-HR/ssp585/r1i1p1f2/Omon/tos/gn',
        '/badc/cmip6/data/CMIP6/ScenarioMIP/FIO-QLNM/FIO-ESM-2-0/ssp585/r1i1p1f1/Omon/tos/gn',
        '/badc/cmip6/data/CMIP6/ScenarioMIP/MOHC/HadGEM3-GC31-MM/ssp585/r1i1p1f3/Omon/tos/gn',
        '/badc/cmip6/data/CMIP6/ScenarioMIP/NCAR/CESM2-WACCM/ssp585/r2i1p1f1/Omon/tos/gn']

fplist = [fp for fp in fplist if not fp in excl]

# Compile MSLP

In [4]:
for fp in fplist:

    mdl = fp.split("/")[7]+"_"+fp.split("/")[9]
    print(mdl)

    psl_fp = "/".join(fp.split("/")[:-3])+"/Amon/psl/*/latest/*"

    fl_hist = glob.glob(re.sub("ScenarioMIP", "CMIP", re.sub("ssp585", "historical", psl_fp)))
    fl_ssp = [fnm for fnm in glob.glob(psl_fp) if int(fnm[-16:-12]) <= 2100]

    if len(fl_hist) == 0 or len(fl_ssp) == 0: continue
    
    new_fnm = "/home/users/clairb/potential-intensity/psl/"+rnm+"_"+re.sub("historical_","",fl_hist[0]).split("/")[-1][:-9]+fl_ssp[-1].split("/")[-1][-9:]
    if os.path.exists(new_fnm): continue

    ds_hist = [xr.open_dataset(fnm).psl.convert_calendar("standard", align_on = "date") for fnm in fl_hist]
    ds_ssp = [xr.open_dataset(fnm).psl.convert_calendar("standard", align_on = "date") for fnm in fl_ssp]

    ds = wrap_lon(xr.concat(ds_hist + ds_ssp, "time"))
    ds = ds.sel(lon = slice(xn,xx), lat = slice(yn,yx))
    ds = convert_units_to(ds, "hPa")

    ds.to_netcdf(new_fnm)
clear_output(wait = False)
print("Done.")

Done.


# Regrid & compile SSTs

In [5]:
for fp in fplist:
    
    print(fp, end = ": ")

    fl_hist = glob.glob(re.sub("ScenarioMIP", "CMIP", re.sub("ssp585", "historical", fp)) + "/latest/*")
    fl_ssp = [fnm for fnm in glob.glob(fp+"/latest/*") if int(fnm[-16:-12]) <= 2100]

    if len(fl_hist) == 0 or len(fl_ssp) == 0: 
        print("missing data")
        continue
    
    new_fnm = "/home/users/clairb/potential-intensity/tos/"+rnm+"_"+re.sub("historical_","",fl_hist[0]).split("/")[-1][:-9]+fl_ssp[-1].split("/")[-1][-9:]
    if os.path.exists(new_fnm): 
        print("already processed")
        continue

    print("loading... ", end = "")
    ds_hist = [xr.open_dataset(fnm).tos.convert_calendar("standard", align_on = "date") for fnm in fl_hist]
    ds_ssp = [xr.open_dataset(fnm).tos.convert_calendar("standard", align_on = "date") for fnm in fl_ssp]
    
    print("concatenating... ", end = "")
    ds = xr.concat(ds_hist + ds_ssp, "time")

    print("converting... ", end = "")
    ds = convert_units_to(ds, "degC")

    #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    # SSTs are stored on a different grid to atmospheric variables - regrid

    # load target grid and cut to the region we're interested in
    tmplt = wrap_lon(xr.open_dataset(glob.glob("/".join(fp.split("/")[:-3])+"/Amon/psl/*/latest/*.nc")[0])).sel(lon = slice(xn,xx), lat = slice(yn,yx))

    if "i" in ds.dims:   
        # add CF attributes to allow regridding
        ds.i.attrs['axis'] = 'X'
        ds.j.attrs['axis'] = 'Y'

    # build regridder
    print("regridding... ", end = "")
    rg = xe.Regridder(ds, tmplt, "bilinear", ignore_degenerate = True)
    ds_rg = rg(ds).rename("tos").assign_attrs(units = "degC")
    ds_rg = wrap_lon(ds_rg)

    print("saving... ", end = "")
    ds_rg.to_netcdf(new_fnm)
    print("")
clear_output(wait = False)
print("Done.")

Done.


# Compile other variables

In [6]:
for fp in fplist:

    mdl = fp.split("/")[7]+"_"+fp.split("/")[9]
    print(mdl, end = ": ")
    
    for varnm in ["hus", "ta"]:
        print(varnm, end = " ")

        vp = "/".join(fp.split("/")[:-3])+"/Amon/"+varnm+"/*/latest/*.nc"

        fl_hist = glob.glob(re.sub("ScenarioMIP", "CMIP", re.sub("ssp585", "historical", vp)))
        fl_ssp = [fnm for fnm in glob.glob(vp) if int(fnm[-16:-12]) <= 2100]

        if len(fl_hist) == 0 or len(fl_ssp) == 0: continue

        new_fnm = "/home/users/clairb/potential-intensity/"+varnm+"/"+rnm+"_"+re.sub("historical_","",fl_hist[0]).split("/")[-1][:-9]+fl_ssp[-1].split("/")[-1][-9:]
        if os.path.exists(new_fnm): continue
            
        u = {"ta" : "degC", "hus" : "%"}[varnm]

        # load & prep the data (cut out small region here)
        ds_hist = [wrap_lon(xr.open_dataset(fnm))[varnm].sel(lon = slice(xn, xx), lat = slice(yn,yx)).convert_calendar("standard", align_on = "date") for fnm in fl_hist]
        ds_ssp = [wrap_lon(xr.open_dataset(fnm))[varnm].sel(lon = slice(xn, xx), lat = slice(yn,yx)).convert_calendar("standard", align_on = "date") for fnm in fl_ssp]
    
        # compile & fix units
        ds = xr.concat(ds_hist + ds_ssp, "time")
        ds = convert_units_to(ds, u)
        ds["plev"] = convert_units_to(ds.plev, "hPa")

        ds.to_netcdf(new_fnm)

    print("")
clear_output(wait = False)
print("Done.")

Done.


# Compute potential intensity

In [None]:
fl = sorted(glob.glob("tos/*.nc"))
for fnm in fl:

    mdl = "_".join(fnm.split("_")[-4:])
    if mdl in ["FGOALS-g3_r1i1p1f1_gn_185001-210012.nc"]: continue
    print(mdl)

    new_fnm = re.sub("tos", "pi", fnm)
    if os.path.exists(new_fnm): 
        print("  already processed")
        continue
    
    if not all([os.path.exists(re.sub("Omon", "Amon", re.sub("tos", varnm, fnm))) for varnm in ["hus", "ta", "psl"]]):
        print("  skipped - not all data available")
        continue
        
    tos = xr.open_dataset(fnm).tos
    hus, ta, psl = [xr.open_dataset(re.sub("Omon", "Amon", re.sub("tos", varnm, fnm)))[varnm] for varnm in ["hus", "ta", "psl"]]
    
    ds = xr.merge([tos, psl, ta, hus]).rename(plev = "p", ta = "t", hus = "q", psl = "msl", tos = "sst")
    
    print("  Data loaded:   "+datetime.now().time().strftime("%H:%M:%S"))
    
    # calculate the potential intensity (may take a v long time - up to 3hrs for 200 years)
    vmax, pmin, ifl, t0, otl = xr.apply_ufunc(
        pi,
        ds['sst'], ds['msl'], ds['p'], ds['t'], ds['q'],
        kwargs=dict(CKCD=0.9, ascent_flag=0, diss_flag=1, ptop=50, miss_handle=1),  # use defaults
        input_core_dims=[
            [], [], ['p', ], ['p', ], ['p', ],
        ],
        output_core_dims=[
            [], [], [], [], []
        ],
        vectorize=True
    )
    
    print("  PI calculated: "+datetime.now().time().strftime("%H:%M:%S"))
    
    # store the result in an xarray data structure
    ds_out = xr.Dataset({
        'vmax': vmax, 
        'pmin': pmin,
        'ifl': ifl,
        't0': t0,
        'otl': otl,
        })
    
    ds_out.to_netcdf(new_fnm)
    
    print("  Data saved:    "+datetime.now().time().strftime("%H:%M:%S"))
clear_output(wait = False)
print("Done.")

ACCESS-CM2_r1i1p1f1_gn_185001-210012.nc
  already processed
ACCESS-ESM1-5_r1i1p1f1_gn_185001-210012.nc
  already processed
BCC-CSM2-MR_r1i1p1f1_gn_185001-210012.nc
  already processed
CAMS-CSM1-0_r1i1p1f1_gn_185001-209912.nc
  already processed
CAS-ESM2-0_r1i1p1f1_gn_185001-210012.nc
  skipped - not all data available
CESM2-WACCM_r1i1p1f1_gr_185001-210012.nc
  skipped - not all data available
CIESM_r1i1p1f1_gn_185001-210012.nc
  skipped - not all data available
CMCC-CM2-SR5_r1i1p1f1_gn_185001-210012.nc
  already processed
CMCC-ESM2_r1i1p1f1_gn_185001-210012.nc
  already processed
CNRM-ESM2-1_r1i1p1f2_gn_185001-210012.nc
  skipped - not all data available
CanESM5-CanOE_r1i1p2f1_gn_185001-210012.nc
  already processed
CanESM5_r1i1p1f1_gn_185001-210012.nc
  already processed
E3SM-1-1_r1i1p1f1_gr_185001-210012.nc
  already processed
EC-Earth3-CC_r1i1p1f1_gn_185001-210012.nc
  skipped - not all data available
EC-Earth3-Veg-LR_r1i1p1f1_gn_185001-210012.nc
  skipped - not all data available
E