In [1]:
import numpy as np
import xarray as xr
import dask
import xesmf as xe
import os

### Preliminaries

In [2]:
############
# Dask
############
from dask_jobqueue import PBSCluster
cluster = PBSCluster(cores=1, resource_spec='pmem=50GB', memory='50GB',
                     worker_extra_args= ['#PBS -l feature=rhel7'],
                     walltime = '00:30:00')

cluster.scale(jobs=4)  # ask for jobs

from dask.distributed import Client
client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.201.205:33943,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## ISIMIP

In [2]:
# Set path
# UPDATE THIS FOR REPRODUCTION
path = '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/isimip3b/'

In [3]:
# Models
from utils import isimip_ssp_dict

models = list(isimip_ssp_dict.keys())

In [4]:
# NEX-GDDP out grid (same as CIL)
out_grid = xr.open_dataset('/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/nex-gddp/avg/CanESM5.nc')

out_grid = xr.Dataset({'lat': out_grid.lat,
                       'lon': out_grid.lon})

In [5]:
# Regridding function
def regrid(model, out_grid, out_path, metric):
    # Read native grid
    ds = xr.open_dataset(out_path + 'native_grid/' + metric + '/' + model + '.nc')
    
    # Conservative regridder from xESMF
    cons_regridder = xe.Regridder(ds, out_grid, 'conservative', periodic=True)
    ds_cons = cons_regridder(ds)
    
    # Store
    ds_cons.to_netcdf(out_path + 'regridded/conservative/' + metric + '/' + model + '.nc')

In [7]:
%%time
# All metrics
delayed_res = []

for metric in ['avg', 'max', 'dry', 'wet', 'max5d']:
    for model in models:
        # Check if already exists
        if os.path.isfile(path + 'regridded/conservative/' + metric + '/' + model + '.nc'):
            continue
    
        # If not, regrid
        regrid(model, out_grid, path, metric)
        print(model + ' ' + metric)
                
# 'hot' slightly different
metric = 'hot'
for var_id in ['tasmin', 'tasmax']:
    for model in models:
        model_var = model + '_' + var_id
        # Check if already exists
        if os.path.isfile(path + 'regridded/conservative/' + metric + '/' + model_var + '.nc'):
            continue
    
        # If not, regrid
        regrid(model_var, out_grid, path, metric)
        print(model + ' ' + metric)

CanESM5 max5d
CNRM-CM6-1 max5d
CNRM-ESM2-1 max5d
EC-Earth3 max5d
GFDL-ESM4 max5d
IPSL-CM6A-LR max5d
MIROC6 max5d
MPI-ESM1-2-HR max5d
MRI-ESM2-0 max5d
UKESM1-0-LL max5d
CPU times: user 6min 38s, sys: 46.2 s, total: 7min 24s
Wall time: 7min 31s


In [22]:
%%time
# All metrics
delayed_res = []

for metric in ['avg', 'max', 'dry', 'wet']:
    for model in models:
        # Check if already exists
        if os.path.isfile(path + 'regridded/conservative/' + metric + '/' + model + '.nc'):
            continue
    
        # Dask to parallelize
        delayed_res.append(dask.delayed(regrid)(model, out_grid, path, metric))
                
# 'hot' slightly different
metric = 'hot'
for var_id in ['tasmin', 'tasmax']:
    for model in models:
        model_var = model + '_' + var_id
        # Check if already exists
        if os.path.isfile(path + 'regridded/conservative/' + metric + '/' + model_var + '.nc'):
            continue
    
        # Dask to parallelize
        delayed_res.append(dask.delayed(regrid)(model_var, out_grid, path, metric))

# Compute
print(len(delayed_res))
res = dask.compute(*delayed_res)

5
CPU times: user 18.1 s, sys: 1.57 s, total: 19.7 s
Wall time: 4min 17s


## GARD-SV (carbonplan)

In [8]:
# Set paths
# UPDATE THIS FOR REPRODUCTION
path = '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/carbonplan/'

In [9]:
# Models
from utils import gardsv_ssp_dict, gardsv_var_dict

models = list(gardsv_ssp_dict.keys())

In [10]:
# NEX-GDDP out grid (same as CIL)
out_grid = xr.open_dataset('/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/nex-gddp/avg/CanESM5.nc')

out_grid = xr.Dataset({'lat': out_grid.lat,
                       'lon': out_grid.lon})

In [11]:
# Regridding function
def regrid(model, out_grid, path, metric):
    # Read native grid
    ds = xr.open_zarr(path + 'native_grid/GARD-SV/' + metric + '/' + model).load()
    ds = ds.isel(member_id=0) # get rid of member_id
    
    # Conservative regridder from xESMF
    cons_regridder = xe.Regridder(ds, out_grid, 'conservative', periodic=True)
    ds_cons = cons_regridder(ds)
    
    # Store
    ds_cons.to_netcdf(path + 'regridded/conservative/GARD-SV/' + metric + '/' + model + '.nc')

In [13]:
%%time
# All metrics
# WARNING: requires around 70GB RAM for hot and wet outputs
for metric in ['avg', 'max', 'dry', 'hot', 'wet', 'max5d']:
    for model in models:
        # Some models missing pr
        if (metric in ['dry', 'wet', 'max5d']) and ('pr' in gardsv_var_dict[model]):
            # Check if already exists
            if os.path.isfile(path + 'regridded/conservative/GARD-SV/' + metric + '/' + model + '.nc'):
                continue
    
            # Regrid
            regrid(model, out_grid, path, metric)
            print(model + ' ' + metric)

        elif metric not in ['dry', 'wet', 'max5d']:
            # Check if already exists
            if os.path.isfile(path + 'regridded/conservative/GARD-SV/' + metric + '/' + model + '.nc'):
                continue
    
            # Regrid
            regrid(model, out_grid, path, metric)
            print(model + ' ' + metric)



CanESM5 max5d




MPI-ESM1-2-HR max5d
CPU times: user 2min 29s, sys: 13.8 s, total: 2min 43s
Wall time: 2min 46s
