In [1]:
import numpy as np
import xarray as xr
import os
import intake

### Preliminaries

In [2]:
###############################
# Set paths
# UPDATE THIS FOR REPRODUCTION
###############################
out_path = '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/carbonplan/'

In [3]:
###################
# Models
###################
from utils import gardsv_ssp_dict, deepsdbc_ssp_dict, gardsv_var_dict, deepsdbc_var_dict

In [5]:
#################
# Data access
#################

# Complete catalog
cat = intake.open_esm_datastore(
    "https://cpdataeuwest.blob.core.windows.net/cp-cmip/version1/catalogs/global-downscaled-cmip6.json"
)

# function to grab all variables and SSPs for singe model/method
def grab_model(method, model, scenarios, var_ids):
    # Search catalogue for method, model, all SSPs, all vars
    dsets = cat.search(
        method=method,
        source_id=model,
        experiment_id=scenarios,
        variable_id=var_ids
    ).to_dataset_dict()
    
    # Concat along SSP dimension and return
    ds_ssp = []
    for key in list(dsets.keys()):
        ds_tmp = dsets[key]
        ds_tmp = ds_tmp.assign_coords(ssp = ds_tmp.attrs['intake_esm_attrs:experiment_id'])
        ds_tmp = ds_tmp.drop('member_id')
        ds_ssp.append(ds_tmp)
        
    return xr.concat(ds_ssp, dim='ssp')

In [6]:
##################
# Convert units
##################
def convert_units(ds):
    if 'tasmax' in ds.data_vars and ds.tasmax.attrs['units'] == 'K':
        ds['tasmax'] = ds['tasmax'] - 273.15
    if 'tasmin' in ds.data_vars and ds.tasmin.attrs['units'] == 'K':
        ds['tasmin'] = ds['tasmin'] - 273.15
    if 'pr' in ds.data_vars and ds.pr.attrs['units'] == 'kg m-2 s-1':
        ds['pr'] = ds['pr'] * 86400

In [None]:
############
# Dask
############
from dask_jobqueue import PBSCluster
cluster = PBSCluster(cores=1, resource_spec = 'pmem=20GB', memory='20GB',
                     env_extra= ['#PBS -l feature=rhel7'], walltime = '00:30:00')

cluster.scale(jobs=20)  # ask for jobs

from dask.distributed import Client
client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.201.238:37264,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


# GARD-SV

In [9]:
# Preliminaries
method = 'GARD-SV'
models = list(gardsv_ssp_dict.keys())

assert models == list(gardsv_var_dict.keys())

## Simple metrics

In [None]:
## Annual avgs and maxs
for model in models[:1]:
    # Check if already done
    annual_avgs_done = os.path.isfile(out_path + 'annual_avgs/' + model + '.nc')
    annual_maxs_done = os.path.isfile(out_path + 'annual_maxs/' + model + '.nc')
    if annual_avgs_done and annual_maxs_done:
        print(model + ' already done')
    else:
        # Grab model
        ds = grab_model(method, model, gardsv_ssp_dict[model], gardsv_var_dict[model])
        ds = ds.persist()

        # Annual averages
        if not annual_avgs_done:
            ds_out = ds.resample(time='1Y').mean().compute()
            ds_out = convert_units(ds_out)
            ds_out.to_netcdf(out_path + 'annual_avgs/' + model + '.nc')
            print(model + ' annual avgs done')
    
        # Annual maxs
        if not annual_maxs_done:
            ds_out = ds.resample(time='1Y').max().compute()
            ds_out = convert_units(ds_out)
            ds_out.to_netcdf(out_path + 'annual_maxs/' + model + '.nc')
            print(model + ' annual maxs done')


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.timescale.method'
