In [1]:
# kernel: wwa_xesmf
import sys; sys.path.append('..'); from wwa import *

xn,xx,yn,yx = [105,160,-10,45]
study_region = {"lon" : slice(115,135), "lat" : slice(10,30)}

fpath = "/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/synda_clair/data/CMIP6/"
outpath = "/rds/general/user/cb2714/home/99_ephemera/cmip6_"

---
# Check available models
```
synda search variable=tos,ta,hus,psl -l 10000 > ~/wwa/24-08+typhoon-gaemi/cmip6-pi-vars.txt
```

Model list to use in defaults (duplicates & problematic models removed):
`source_id=CMCC-CM2-SR5,CMCC-ESM2,CNRM-CM6-1,CNRM-CM6-1-HR,CNRM-ESM2-1,CanESM5,EC-Earth3,EC-Earth3-CC,IPSL-CM6A-LR,MIROC6,MPI-ESM1-2-HR,MPI-ESM1-2-LR,MRI-ESM2-0,NESM3,NorESM2-LM,NorESM2-MM`


In [2]:
df = pd.read_table("cmip6-pi-vars.txt", sep = "\.", engine = "python", header = None,
                  names = [0,"activity","institute","source_id","experiment","variant_label","table", "variable", "grid", "version"])

In [3]:
# identify models with all required variables available in both historical & SSP experiments
nvars = 5

mdl_xvar = df[["source_id", "variant_label", "variable", "experiment"]].drop_duplicates()
mdl_xvar = mdl_xvar.groupby(["source_id", "variant_label"]).count().reset_index()

mdl_xvar = mdl_xvar.loc[mdl_xvar.variable == (2*nvars)]

In [4]:
mdl_xvar

Unnamed: 0,source_id,variant_label,variable,experiment
8,CESM2,r10i1p1f1,10,10
9,CESM2,r11i1p1f1,10,10
13,CESM2,r4i1p1f1,10,10
23,CESM2-WACCM,r1i1p1f1,10,10
24,CESM2-WACCM,r2i1p1f1,10,10
...,...,...,...,...
385,MRI-ESM2-0,r5i1p1f1,10,10
390,NESM3,r1i1p1f1,10,10
391,NESM3,r2i1p1f1,10,10
395,NorESM2-LM,r1i1p1f1,10,10


In [5]:
# filter models to show only those with all four variables
df = pd.merge(df, mdl_xvar, left_on = ["source_id", "variant_label"], right_on = ["source_id", "variant_label"], how = "inner")

# get list of model variants to include in synda download
print("c+p into synda defaults file:")
"source_id="+",".join(sorted(df.source_id.drop_duplicates().to_list()))

c+p into synda defaults file:


'source_id=CESM2,CESM2-WACCM,CMCC-CM2-SR5,CMCC-ESM2,CNRM-CM6-1,CNRM-CM6-1-HR,CNRM-ESM2-1,CanESM5,EC-Earth3,EC-Earth3-CC,EC-Earth3-Veg,EC-Earth3-Veg-LR,IPSL-CM6A-LR,MIROC6,MPI-ESM1-2-HR,MPI-ESM1-2-LR,MRI-ESM2-0,NESM3,NorESM2-LM,NorESM2-MM'

In [6]:
len(df.source_id.drop_duplicates())

20

In [7]:
# which variant labels provide most runs?
nmod = df.groupby(["variant_label"]).count()[["source_id"]] / (nvars*2)
nmod.sort_values("source_id")

Unnamed: 0_level_0,source_id
variant_label,Unnamed: 1_level_1
r6i1p1f2,1.3
r9i1p2f1,1.8
r2i1p2f1,1.8
r3i1p2f1,1.8
r5i1p2f1,1.8
...,...
r6i1p1f1,7.2
r4i1p1f1,10.4
r3i1p1f1,13.2
r2i1p1f1,14.7


In [8]:
# list models with all available data for run r1i1p1f1 (this is the single most commonly used label)
df.loc[df.variant_label == "r1i1p1f1", ["source_id", "grid"]].drop_duplicates().sort_values("source_id")

Unnamed: 0,source_id,grid
661,CESM2-WACCM,gn
660,CESM2-WACCM,gr
2101,CMCC-CM2-SR5,gn
2111,CMCC-ESM2,gn
648,CanESM5,gn
388,EC-Earth3,gr
390,EC-Earth3,gn
348,EC-Earth3-CC,gr
350,EC-Earth3-CC,gn
282,EC-Earth3-Veg,gn


In [9]:
# count of model variants
df[["source_id", "variant_label"]].drop_duplicates().groupby(["source_id"]).count()

Unnamed: 0_level_0,variant_label
source_id,Unnamed: 1_level_1
CESM2,3
CESM2-WACCM,3
CMCC-CM2-SR5,1
CMCC-ESM2,1
CNRM-CM6-1,5
CNRM-CM6-1-HR,1
CNRM-ESM2-1,5
CanESM5,20
EC-Earth3,8
EC-Earth3-CC,1


---
# List downloaded models

In [2]:
fl = sorted(glob.glob("/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/synda_clair/data/CMIP6/*/*/*.nc"))
df = pd.DataFrame([fnm.split("/")[-1].split("_") for fnm in fl], columns = ["variable", "frequency", "source_id", "experiment", "ensemble_id", "grid", "timeslice"])

# filter only the variables we actually need for the potential intensity
df = df.loc[[v in ["tos","hus","ta","psl"] for v in df.variable]]

df["ys"] = [int(t[:4]) for t in df.timeslice]
df["ye"] = [int(t[9:13]) for t in df.timeslice]

In [3]:
# progress
pd.crosstab(index = df.source_id, columns = df.variable)

variable,hus,psl,ta,tos
source_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CMCC-CM2-SR5,126,11,126,26
CMCC-ESM2,126,11,126,26
CanESM5,26,2,26,26
EC-Earth3,251,251,251,251
EC-Earth3-CC,251,251,251,251
EC-Earth3-Veg,251,251,251,251
EC-Earth3-Veg-LR,251,251,251,251
IPSL-CM6A-LR,3,2,3,2
MIROC6,251,26,251,26
MPI-ESM1-2-HR,51,51,51,51


In [54]:
df = merge_byindex([df[["source_id", "variable", "ys"]].groupby(["source_id", "variable"]).min(),
                       df[["source_id", "variable", "ye"]].groupby(["source_id", "variable"]).max()]).reset_index()

In [55]:
# list models with data spanning the minimum period required for all four variables
mcount = df.loc[(df.ys <= 1950) & (df.ye >= 2050)].groupby(["source_id"]).count()[["variable"]]
mlist = mcount.loc[mcount.variable == 4].index.to_list()
# mlist = [m for m in mlist if not "Veg" in m]

In [56]:
mlist

['CMCC-CM2-SR5',
 'CMCC-ESM2',
 'CanESM5',
 'EC-Earth3',
 'EC-Earth3-CC',
 'EC-Earth3-Veg',
 'EC-Earth3-Veg-LR',
 'IPSL-CM6A-LR',
 'MIROC6',
 'MPI-ESM1-2-HR',
 'MPI-ESM1-2-LR',
 'MRI-ESM2-0',
 'NESM3',
 'NorESM2-LM',
 'NorESM2-MM']

## Extract data subset

### SSTs

In [48]:
outpath = "/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/potential-intensity/tos/"

fl = sorted(glob.glob(fpath+"*/tos/*gn*.nc"))
c = 0
for fnm in fl:
    
#     if "Earth3" in fnm: continue
    
    # only print 50 filnames at a time
    c = c+1
    if c == 50:
        clear_output(wait = False)
        c = 0
        
    print(fnm.split("/")[-1])
    new_fnm = outpath + fnm.split("/")[-1]
    if os.path.exists(new_fnm): continue

    ds = xr.open_dataset(fnm)
    if "longitude" in ds.coords: ds = ds.rename({"longitude" : "lon", "latitude" : "lat"})
    if "nav_lon" in ds.coords: ds = ds.rename({"nav_lon" : "lon", "nav_lat" : "lat"})

    box = np.logical_and(np.logical_and(ds.lon >= xn, ds.lon <= xx), np.logical_and(ds.lat >= yn, ds.lat <= yx))
    
    if "i" in ds.dims:
        xdim,ydim = ["i","j"]
    else:
        xdim,ydim = ["x","y"]
    box = box.where(box == 1).dropna(xdim, "all").dropna(ydim,"all")

    ds = ds.sel({xdim : box[xdim], ydim : box[ydim]}).tos
    ds.to_netcdf(new_fnm)
    
clear_output(wait = False)
print("Done.")

Done.


### Other variables

In [49]:
units = {"psl" : "hPa", "ta" : "degC", "hus" : "g/kg"}

fpath = "/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/synda_clair/data/CMIP6/"

for mdl in mlist:
    
#     if "Earth3" in mdl: continue
        
    for varnm in ["psl", "ta", "hus"]:

        outpath = "/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/potential-intensity/"+varnm+"/"
        fl = sorted(glob.glob(fpath+"*/"+varnm+"/*"+mdl+"_*.nc"))

        for fnm in fl:

            print(fnm.split("/")[-1])
            new_fnm = outpath + fnm.split("/")[-1]

            if os.path.exists(new_fnm): continue

            ds = xr.open_dataset(fnm)
            if "longitude" in ds.coords: ds = ds.rename({"longitude" : "lon", "latitude" : "lat"})
            if "plev" in ds.dims: ds["plev"] = convert_units_to(ds.plev, "hPa")

            ds = ds.sel(lon = slice(xn,xx), lat = slice(yn,yx))[varnm]
            ds = convert_units_to(ds, units[varnm])
            ds.to_netcdf(new_fnm)

        clear_output(wait = False)
print("Done.")

Done.


### Regrid tos from unstructured to structured grid

Using xesmf: https://pavics-sdi.readthedocs.io/en/latest/notebooks/regridding.html

In [50]:
import xesmf as xe

In [51]:
fpath = "/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/potential-intensity/"

fl = glob.glob(fpath+"tos/*.nc")
c = 0

# fl = [fnm for fnm in fl if "NorESM" in fnm]

for fnm in fl:

    # only print 50 filnames at a time
    c = c+1
    if c == 50:
        clear_output(wait = False)
        c = 0
        
    print(fnm.split("/")[-1])
    new_fnm = re.sub("tos", "tos-rg", fnm)
    if os.path.exists(new_fnm): continue

    # load SLP file (if there is one) to get target lat-lon grid
    tmplt_fl = glob.glob(fpath+"psl/*"+fnm.split("_")[5]+"*.nc")
    if len(tmplt_fl) == 0: continue

    tmplt = xr.open_dataset(glob.glob(fpath+"psl/*"+fnm.split("_")[5]+"*.nc")[0]).isel(time = 0).reset_coords(drop = True)

    # load SSTs, convert to degrees, add CF attributes to allow regridding
    tos = convert_units_to(xr.open_mfdataset(fnm).tos, "degC")
    
    if "i" in tos.dims:
        xdim,ydim = ["i","j"]
    else:
        xdim,ydim = ["x","y"]
        
    tos[xdim].attrs['axis'] = 'X'
    tos[ydim].attrs['axis'] = 'Y'

    # regrid SSTs to regular land-sea mask grid
    rg = xe.Regridder(tos, tmplt, "bilinear", ignore_degenerate = True)
    tos_rg = rg(tos)

    # subset region & save
    tos_rg = tos_rg.sel(lon = slice(xn,xx), lat = slice(yn,yx)).assign_attrs(units = "degC").rename("sst").astype("float32")
    tos_rg.to_netcdf(new_fnm)

clear_output(wait = False)
print("Done.")

Done.


## Compute potential intensity

In [10]:
# kernel: potint
import xarray as xr, os, glob
import warnings; warnings.filterwarnings("ignore", category = FutureWarning)

from tcpyPI import pi
from tcpyPI.utilities import *

outpath = "/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/potential-intensity/"

In [18]:
mlist = list(set([fnm.split("_")[5] for fnm in glob.glob(outpath+"tos-rg/*")]))

In [18]:
for mdl in mlist[:1]:
    print(mdl)
    
    hus = xr.open_datase

In [None]:
# extract smaller domain for Gaemi to speed up PI calculation
xn,xx,yn,yx = [105,160,-10,45]

for y in range(1930,2060,10):
    
    hus = xr.open_dataset(outpath+"hus/hus_"+mdl+"_"+str(y)+"-"+str(y+9)+".nc").hus#.sel(lon = slice(xn,xx), lat = slice(yn,yx))
    ta = xr.open_dataset(outpath+"ta/ta_"+mdl+"_"+str(y)+"-"+str(y+9)+".nc").ta#.sel(lon = slice(xn,xx), lat = slice(yn,yx))
    tos = xr.open_dataset(outpath+"tos/tos-rg_"+mdl+"_"+str(y)+"-"+str(y+9)+".nc").tos#.sel(lon = slice(xn,xx), lat = slice(yn,yx))
    psl = xr.open_dataset(outpath+"psl/psl_"+mdl+"_"+str(y)+"-"+str(y+9)+".nc").psl#.sel(lon = slice(xn,xx), lat = slice(yn,yx))
    
    ds_d = xr.merge([tos, psl, ta, hus]).rename(plev = "p", ta = "t", hus = "q", psl = "msl", tos = "sst").load()

    for yy in range(y,y+10):
    
        new_fnm = outpath+"pi/pi_"+mdl+"_"+str(yy)+".nc"
        if os.path.exists(new_fnm): continue
            
        ds = ds_d.sel(time = str(yy))

        # calculate the potential intensity
        vmax, pmin, ifl, t0, otl = xr.apply_ufunc(
            pi,
            ds['sst'], ds['msl'], ds['p'], ds['t'], ds['q'],
            kwargs=dict(CKCD=0.9, ascent_flag=0, diss_flag=1, ptop=50, miss_handle=1),  # used default value of CKCD = 0.9
            input_core_dims=[
                [], [], ['p', ], ['p', ], ['p', ],
            ],
            output_core_dims=[
                [], [], [], [], []
            ],
            vectorize=True
        )

        # store the result in an xarray data structure
        ds_out = xr.Dataset({
            'vmax': vmax, 
            'pmin': pmin,
            'ifl': ifl,
            't0': t0,
            'otl': otl,
            })

        ds_out.to_netcdf(new_fnm)

## Daily PI & SST over study region

In [2]:
fpath = "/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/potential-intensity/"
outpath = "/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/typhoon_gaemi/"

In [52]:
for fnm in glob.glob(fpath+"pi/*.nc"):
    ds = xr.open_dataset(fnm)
    ts = ds.vmax.sel(study_region).mean(["lat", "lon"])
    ts.to_netcdf(fpath+"cmip6_pi/"+fnm.split("/")[-1])

In [63]:
# compile EC-Earth files
for mpath in glob.glob(fpath+"pi/EC-Earth3*"):
    print(mpath)
    
    new_fnm = outpath+"cmip6_pi/pi_"+mpath.split("/")[-1]+".nc"
    if os.path.exists(new_fnm): continue
    
    ds = xr.concat([xr.open_dataset(fnm).vmax.sel(study_region) for fnm in sorted(glob.glob(mpath+"/*.nc"))], "time")
    ts = ds.mean(["lat", "lon"])
    ts.to_netcdf(new_fnm)

/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/potential-intensity/pi/EC-Earth3-Veg-LR
/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/potential-intensity/pi/EC-Earth3


In [46]:
mlist = sorted(list(set([fnm.split("_")[5] for fnm in glob.glob(fpath+"tos-rg/*.nc")])))
for mdl in mlist:
    
    print(mdl)
    new_fnm = outpath+"cmip6_sst/sst_"+mdl+".nc"
    if os.path.exists(new_fnm): continue
        
    ds = xr.concat([xr.open_dataset(fnm).sel(study_region) for fnm in sorted(glob.glob(fpath+"tos-rg/*_"+mdl+"*.nc"))], "time")
    ts = ds.sst.mean(["lat", "lon"])
    ts.to_netcdf(new_fnm)

CMCC-CM2-SR5
CMCC-ESM2
CanESM5
EC-Earth3
EC-Earth3-CC
EC-Earth3-Veg
EC-Earth3-Veg-LR
IPSL-CM6A-LR
MIROC6
MPI-ESM1-2-HR
MPI-ESM1-2-LR
MRI-ESM2-0
NESM3
NorESM2-LM
NorESM2-MM


In [44]:
# have to fix the calendar change in NESM
ts = xr.concat([ts.isel(time = slice(None, (2015-1850)*365+40)),
          convert_calendar(ts.isel(time = slice((2015-1850)*365+40, None)), "default", align_on = "date")], "time")
ts.to_netcdf(new_fnm)

# ERA5

## Resample hourly to daily data

In [64]:
fpath = "/rds/general/user/cb2714/home/99_ephemera/pi_era5/"
outpath = "data/era5_pi/"

In [65]:
fl = glob.glob(fpath+"*.nc")

In [68]:
for y in range(2023,1978,-1):
    
    print(y, end = " ")
    
    for varnm in ["sst","hus","mslp","ta"][:1]:
        print(varnm,end = " ")
        
        new_fnm = outpath+varnm+"-daily_era5_"+str(y)+".nc"
        if os.path.exists(new_fnm): continue
            
        ds = xr.open_dataset(fpath+varnm+"_era5_"+str(y)+".nc").reset_coords(drop = True)
        ds = ds.resample(valid_time = "D").mean()
        ds = ds.rename(valid_time = "time")
        if "pressure_level" in ds.dims: ds["pressure_level"] = convert_units_to(ds.pressure_level, "hPa")
        
        if varnm == "sst": ds = convert_units_to(ds.sst, "degC")
        if varnm == "hus": ds = convert_units_to(ds.q, "g/kg")
        if varnm == "ta" : ds = convert_units_to(ds.t, "degC")
        if varnm == "mslp" : ds = convert_units_to(ds.msl, "hPa")
            
        ds.to_netcdf(new_fnm)
    print("")
clear_output(wait = False)
print("Done.")

Done.


In [81]:
ds = wrap_lon(xr.open_mfdataset("data/era5_pi/sst-daily_era5_*.nc")).sst.rename(longitude = "lon", latitude = "lat").sel(study_region)
ts = convert_units_to(ds.mean(["lat", "lon"]), "degC")
ts.to_netcdf("/rds/general/user/cb2714/home/00_WWA_project_folder/ephemeral/typhoon_gaemi/sst_ts-daily_era5.nc")