In [1]:
import numpy as np
import xarray as xr
import dask
import os
from glob import glob

### Preliminaries

In [2]:
###############################
# Set paths
# UPDATE THIS FOR REPRODUCTION
###############################
in_path = '/gpfs/group/kaf26/default/rls66/GMFD_25/'

In [3]:
############
# Dask
############
from dask_jobqueue import PBSCluster
cluster = PBSCluster(cores=1, resource_spec = 'pmem=15GB', memory='15GB',
                     worker_extra_args= ['#PBS -l feature=rhel7'], walltime = '00:20:00')

cluster.scale(jobs=20)  # ask for jobs

from dask.distributed import Client
client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.201.228:45724,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


# Temperature

In [8]:
# Read all
ds_tmin = xr.open_mfdataset(in_path + 'tmin_daily_*', parallel=True, chunks='auto')
ds_tmax = xr.open_mfdataset(in_path + 'tmax_daily_*', parallel=True, chunks='auto')

# Select time period
ds_tmin = ds_tmin.sel(time=slice('1980-01-01','2014-12-31'))
ds_tmax = ds_tmax.sel(time=slice('1980-01-01','2014-12-31'))

# Get tasavg
ds_tas = (ds_tmin['tmin'] + ds_tmax['tmax']) / 2.

In [13]:
# Merge
ds = xr.merge([xr.Dataset({'tas':ds_tas}), ds_tmin, ds_tmax])

In [16]:
%%time
# rechunk for quantile calculations
ds = ds.chunk({'time':-1, 'lat':50, 'lon':50})

# Get 95th, 99th quantiles
ds_q95 = ds.quantile(q=0.95, dim='time').compute()
ds_q99 = ds.quantile(q=0.99, dim='time').compute()

CPU times: user 3min 34s, sys: 7.68 s, total: 3min 42s
Wall time: 7min 13s


In [23]:
# Merge and store
ds_out = xr.merge([ds_q95.drop('quantile').rename({'tas':'tas_q95',
                                                   'tmax':'tasmax_q95',
                                                   'tmin':'tasmin_q95'}),
                   ds_q99.drop('quantile').rename({'tas':'tas_q99',
                                                   'tmax':'tasmax_q99',
                                                   'tmin':'tasmin_q99'})])

ds_out -= 273.15 # K -> C

ds_out['lon'] = np.where(ds_out['lon'] > 180, ds_out['lon'] - 360, ds_out['lon']) # update lon
ds_out = ds_out.sortby('lon')

ds_out.to_netcdf('../data/gmfd_temperature_quantiles.nc')

# Precipitation

In [4]:
# Read 
ds = xr.open_mfdataset(in_path + 'prcp_daily_*', parallel=True, chunks='auto')

# Select time period
ds = ds.sel(time=slice('1980-01-01','2014-12-31'))

In [8]:
%%time
# rechunk for quantile calculations
ds = ds.chunk({'time':-1, 'lat':50, 'lon':50})

# Get 95th, 99th quantiles (including dry days)
ds_q95_all = ds.quantile(q=0.95, dim='time').compute()
ds_q99_all = ds.quantile(q=0.99, dim='time').compute()

# Get 95th, 99th quantiles (wet days only: >1mm)
ds_q95_wet = ds.where(ds.prcp > 0.001).quantile(q=0.95, dim='time').compute()
ds_q99_wet = ds.where(ds.prcp > 0.001).quantile(q=0.99, dim='time').compute()

CPU times: user 1min 1s, sys: 2.77 s, total: 1min 4s
Wall time: 2min 26s


In [10]:
# Merge and store
ds_out = xr.merge([ds_q95_all.drop('quantile').rename({'prcp':'pr_q95_all'}),
                   ds_q99_all.drop('quantile').rename({'prcp':'pr_q99_all'}),
                   ds_q95_wet.drop('quantile').rename({'prcp':'pr_q95_wet'}),
                   ds_q99_wet.drop('quantile').rename({'prcp':'pr_q99_wet'})])

ds_out *= 86400
ds_out.to_netcdf('../data/gmfd_precip_quantiles.nc')