In [1]:
import numpy as np
import xarray as xr
import dask
import xesmf as xe
import os
from glob import glob

### Preliminaries

In [2]:
###############################
# Set paths
# UPDATE THIS FOR REPRODUCTION
###############################
in_path = '/gpfs/group/kaf26/default/rls66/GMFD_25/'
out_path = '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/quantiles/'

In [3]:
############
# Dask
############
from dask_jobqueue import PBSCluster
cluster = PBSCluster(cores=1, resource_spec = 'pmem=15GB', memory='15GB',
                     worker_extra_args= ['#PBS -l feature=rhel7'], walltime = '00:20:00')

cluster.scale(jobs=20)  # ask for jobs

from dask.distributed import Client
client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.201.236:44759,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


# Quantile calculation

## Temperature

In [4]:
# Read all
ds_tmin = xr.open_mfdataset(in_path + 'tmin_daily_*', parallel=True, chunks='auto')
ds_tmax = xr.open_mfdataset(in_path + 'tmax_daily_*', parallel=True, chunks='auto')

# Select time period
ds_tmin = ds_tmin.sel(time=slice('1980-01-01','2014-12-31'))
ds_tmax = ds_tmax.sel(time=slice('1980-01-01','2014-12-31'))

# Get tasavg
ds_tas = (ds_tmin['tmin'] + ds_tmax['tmax']) / 2.

In [5]:
# Merge
ds = xr.merge([xr.Dataset({'tas':ds_tas}), ds_tmin, ds_tmax])

In [13]:
%%time
# rechunk for quantile calculations
ds = ds.chunk({'time':-1, 'lat':50, 'lon':50})

# Get 99.95th, 99.97th, 99.99th quantiles
ds_q99 = ds.quantile(q=0.99, dim='time').compute()
ds_rp1 = ds.quantile(q=0.997, dim='time').compute() # approx 1-in-1 year event
ds_rp5 = ds.quantile(q=0.9995, dim='time').compute() # approx 1-in-5 year event
ds_rp10 = ds.quantile(q=0.9997, dim='time').compute() # approx 1-in-10 year event
ds_rp20 = ds.quantile(q=0.9999, dim='time').compute() # approx 1-in-20 year event

CPU times: user 11min 23s, sys: 22.8 s, total: 11min 46s
Wall time: 21min 40s


In [14]:
# Merge and store
ds_out = xr.merge([ds_q99.drop('quantile').rename({'tas':'tas_q99',
                                                   'tmax':'tasmax_q99',
                                                   'tmin':'tasmin_q99'}),
                   ds_rp1.drop('quantile').rename({'tas':'tas_rp1',
                                                   'tmax':'tasmax_rp1',
                                                   'tmin':'tasmin_rp1'}),
                   ds_rp5.drop('quantile').rename({'tas':'tas_rp5',
                                                   'tmax':'tasmax_rp5',
                                                   'tmin':'tasmin_rp5'}),
                   ds_rp10.drop('quantile').rename({'tas':'tas_rp10',
                                                   'tmax':'tasmax_rp10',
                                                   'tmin':'tasmin_rp10'}),
                   ds_rp20.drop('quantile').rename({'tas':'tas_rp20',
                                                    'tmax':'tasmax_rp20',
                                                    'tmin':'tasmin_rp20'})])

ds_out -= 273.15 # K -> C

ds_out['lon'] = np.where(ds_out['lon'] > 180, ds_out['lon'] - 360, ds_out['lon']) # update lon
ds_out = ds_out.sortby('lon')

ds_out.to_netcdf(out_path + 'gmfd_temperature_quantiles.nc')

## Precipitation

In [4]:
# Read 
ds = xr.open_mfdataset(in_path + 'prcp_daily_*', parallel=True, chunks='auto')

# Select time period
ds = ds.sel(time=slice('1980-01-01','2014-12-31'))

In [16]:
%%time
# rechunk for quantile calculations
ds = ds.chunk({'time':-1, 'lat':50, 'lon':50})

# Get 99.95th, 99.97th, 99.99th quantiles (wet days only: >1mm)
ds_q99 = ds.where(ds.prcp > 1./86400).quantile(q=0.99, dim='time').compute()
ds_rp1 = ds.where(ds.prcp > 1./86400).quantile(q=0.997, dim='time').compute() # approx 1-in-1 year event
ds_rp5 = ds.where(ds.prcp > 1./86400).quantile(q=0.9995, dim='time').compute() # approx 1-in-5 year event
ds_rp10 = ds.where(ds.prcp > 1./86400).quantile(q=0.9997, dim='time').compute() # approx 1-in-10 year event
ds_rp20 = ds.where(ds.prcp > 1./86400).quantile(q=0.9999, dim='time').compute() # approx 1-in-20 year event

CPU times: user 4min 9s, sys: 7.93 s, total: 4min 17s
Wall time: 8min 9s


In [17]:
# Merge and store
ds_out = xr.merge([ds_q99.drop('quantile').rename({'prcp':'pr_q99'}),
                   ds_rp1.drop('quantile').rename({'prcp':'pr_rp1'}),
                   ds_rp5.drop('quantile').rename({'prcp':'pr_rp5'}),
                   ds_rp10.drop('quantile').rename({'prcp':'pr_rp10'}),
                   ds_rp20.drop('quantile').rename({'prcp':'pr_rp20'})])

ds_out *= 86400
ds_out.to_netcdf(out_path + 'gmfd_precip_quantiles.nc')

# Regridding

## NEX-GDDP, CIL, DeepSD-BC grid

In [29]:
# NEX-GDDP out grid (same as CIL)
in_path = '/gpfs/group/kaf26/default/public/NEX-GDDP-CMIP6/models/'

out_grid = xr.open_dataset(in_path + 'ACCESS-CM2/ssp126/hurs/hurs_day_ACCESS-CM2_ssp126_r1i1p1f1_gn_2015.nc')

land_mask = out_grid.isel(time=0).hurs.isnull()

out_grid = xr.Dataset({'lat': out_grid.lat,
                       'lon': out_grid.lon})

In [31]:
# Read native grid
ds_t = xr.open_dataset(out_path + 'gmfd_temperature_quantiles.nc')
ds_pr = xr.open_dataset(out_path + 'gmfd_precip_quantiles.nc')

## xESMF conservative regridder
cons_regridder = xe.Regridder(ds_t, out_grid, 'conservative', periodic=True)
ds_t_cons = cons_regridder(ds_t)

cons_regridder = xe.Regridder(ds_pr, out_grid, 'conservative', periodic=True)
ds_pr_cons = cons_regridder(ds_pr)
    
# Mask ocean points
ds_t_cons = xr.where(land_mask, np.nan, ds_t_cons)
ds_pr_cons = xr.where(land_mask, np.nan, ds_pr_cons)
    
# Store
ds_t_cons.to_netcdf(out_path + 'gmfd_temperature_quantiles_nex-cil-deepsd.nc')
ds_pr_cons.to_netcdf(out_path + 'gmfd_precip_quantiles_nex-cil-deepsd.nc')

In [None]:
# Transfer to Azure to be read on non-MPC cluster
compressor = zarr.Blosc(cname='zstd', clevel=3)

# temp
ds_t_cons = ds_t_cons.chunk({'lat':600, 'lon':1440})
encoding = {vname: {'compressor': compressor} for vname in ds_t_cons.data_vars}

azure_prefix = 'quantiles/era5_temperature_quantiles_nex-cil-deepsd'
store = zarr.ABSStore(client=container_client, prefix=azure_prefix)

ds_t_cons.to_zarr(store=store, encoding=encoding, consolidated=True, mode='w')

# precip
ds_pr_cons = ds_pr_cons.chunk({'lat':600, 'lon':1440})

encoding = {vname: {'compressor': compressor} for vname in ds_pr_cons.data_vars}

azure_prefix = 'quantiles/era5_precip_quantiles_nex-cil-deepsd'
store = zarr.ABSStore(client=container_client, prefix=azure_prefix)
ds_pr_cons.to_zarr(store=store, encoding=encoding, consolidated=True, mode='w')

## ISIMIP grid

In [33]:
# ISIMIP
in_path = '/gpfs/group/kaf26/default/dcl5300/ISIMIP3b_input_climate_data/files/'

out_grid = xr.open_dataset(in_path + 'canesm5_r1i1p1f1_w5e5_ssp126_pr_global_daily_2015_2020.nc')

out_grid = xr.Dataset({'lat': out_grid.lat,
                       'lon': out_grid.lon})

In [35]:
# Read native grid
ds_t = xr.open_dataset(out_path + 'gmfd_temperature_quantiles.nc')
ds_pr = xr.open_dataset(out_path + 'gmfd_precip_quantiles.nc')

# xESMF conservative regridder
cons_regridder = xe.Regridder(ds_t, out_grid, 'conservative', periodic=True)
ds_t_cons = cons_regridder(ds_t)

cons_regridder = xe.Regridder(ds_pr, out_grid, 'conservative', periodic=True)
ds_pr_cons = cons_regridder(ds_pr)
    
# Store
ds_t_cons.to_netcdf(out_path + 'gmfd_temperature_quantiles_isimip.nc')
ds_pr_cons.to_netcdf(out_path + 'gmfd_precip_quantiles_isimip.nc')

## GARD-SV (carbonplan) grid

In [36]:
# carbonplan
out_grid = xr.Dataset({'lat': np.arange(-90, 90.001, 0.25),
                       'lon': np.arange(-180, 180, 0.25)})

In [None]:
# Read native grid
ds_t = xr.open_dataset(out_path + 'gmfd_temperature_quantiles.nc')
ds_pr = xr.open_dataset(out_path + 'gmfd_precip_quantiles.nc')

## xESMF regridder
# Conservative
cons_regridder = xe.Regridder(ds_t, out_grid, 'conservative', periodic=True)
ds_t_cons = cons_regridder(ds_t)

cons_regridder = xe.Regridder(ds_pr, out_grid, 'conservative', periodic=True)
ds_pr_cons = cons_regridder(ds_pr)
    
# Store
ds_t_cons.to_netcdf(out_path + 'gmfd_temperature_quantiles_gardsv.nc')
ds_pr_cons.to_netcdf(out_path + 'gmfd_precip_quantiles_gardsv.nc')