In [1]:
import numpy as np
import xarray as xr
import os
import intake

import getpass
import azure.storage.blob
import zarr

### Preliminaries

In [2]:
######################
# Azure blob storage
######################
# connection string (from azure web login, select your storage account, then "Access keys")
connection_string = getpass.getpass()

    
# format storage
container_client = azure.storage.blob.ContainerClient.from_connection_string(
    connection_string, container_name="mpctransfer")

 ········


In [3]:
###################
# Models
###################
from utils import gardsv_ssp_dict, gardsv_var_dict, deepsdbc_dict

In [13]:
#################
# Data access
#################

# Complete catalog
cat = intake.open_esm_datastore(
    "https://cpdataeuwest.blob.core.windows.net/cp-cmip/version1/catalogs/global-downscaled-cmip6.json"
)

# function to grab all variables and SSPs for singe model/method
def grab_model(method, model, scenarios):
    # Search catalogue for method, model, all SSPs, all vars
    dsets = cat.search(
        method=method,
        source_id=model,
        experiment_id=scenarios
    ).to_dataset_dict()
    
    # Concat along SSP dimension and return
    ds_ssp = []
    for key in list(dsets.keys()):
        # Get single ssp file
        ds_tmp = dsets[key]
        
        # Add ssp dimension
        if 'experiment_id' in ds_tmp.attrs:
            ds_tmp = ds_tmp.assign_coords(ssp = ds_tmp.attrs['experiment_id'])
        else:
            ds_tmp = ds_tmp.assign_coords(ssp = ds_tmp.attrs['intake_esm_attrs:experiment_id'])
        
        # Drop member ID
        ds_tmp = ds_tmp.drop('member_id')
        
        # for some models/methods we are missing precip
        # so need to fill with NaNs
        if 'pr' not in ds_tmp.data_vars:
            ds_tmp['pr'] = xr.full_like(ds_tmp['tasmax'], np.NaN)
        
        # Append
        ds_ssp.append(ds_tmp)

    return xr.concat(ds_ssp, dim='ssp')

In [5]:
##################
# Convert units
##################
def convert_units(ds):
    if 'tasmax' in ds.data_vars and ds.tasmax.attrs['units'] == 'K':
        ds['tasmax'] = ds['tasmax'] - 273.15
    if 'tasmin' in ds.data_vars and ds.tasmin.attrs['units'] == 'K':
        ds['tasmin'] = ds['tasmin'] - 273.15
    if 'pr' in ds.data_vars and ds.pr.attrs['units'] == 'kg m-2 s-1':
        ds['pr'] = ds['pr'] * 86400
    
    return ds

In [6]:
#########
# Dask
#########
import dask_gateway
gateway = dask_gateway.Gateway()

# cluster options
cluster_options = gateway.cluster_options()
cluster_options["worker_memory"] = 16
cluster_options["worker_cores"] = 1

# start cluster
cluster = gateway.new_cluster(cluster_options)
client = cluster.get_client()
cluster.scale(40)

# dashboard link
print(cluster.dashboard_link)

https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway/clusters/prod.7c21043089cc4970bd22d2e7ddfb9939/status


# GARD-SV

In [32]:
# Preliminaries
method = 'GARD-SV'
models = list(gardsv_ssp_dict.keys())

assert models == list(gardsv_var_dict.keys())

## Simple metrics

### Annual avgs

In [33]:
# loop through models: RUNTIME IS AROUND 5 MINS PER MODEL WITH 40 DASK WORKERS
for model in models:
    # Grab model
    ds = grab_model(method, model, gardsv_ssp_dict[model])
    
    ds = convert_units(ds)
    ds['tas'] = (ds['tasmin'] + ds['tasmax']) / 2.
    
    # Annual averages
    ds_final = ds.resample(time='1Y').mean()
    
    # storage options    
    ds_final = ds_final.chunk({'ssp':1, 'time':30, 'lat':720, 'lon':1440})
    
    compressor = zarr.Blosc(cname='zstd', clevel=3)
    encoding = {vname: {'compressor': compressor} for vname in ds_final.data_vars}
    
    azure_prefix = 'carbonplan/' + method + '/annual_avgs/' + model
    store = zarr.ABSStore(client=container_client, prefix=azure_prefix)

    # store
    ds_final.to_zarr(store=store, encoding=encoding, consolidated=True, mode='w')

    print(model)


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.timescale.method'


BCC-CSM2-MR

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.timescale.method'


CanESM5

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.timescale.method'


MIROC6

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.timescale.method'


MPI-ESM1-2-HR


### Annnual maxs

In [34]:
# loop through models: RUNTIME IS AROUND 15 MINS PER MODEL WITH 40 DASK WORKERS
for model in models:
    # Grab model
    ds = grab_model(method, model, gardsv_ssp_dict[model])

    ds = convert_units(ds)
    ds['tas'] = (ds['tasmin'] + ds['tasmax']) / 2.
    
    # Annual maxs
    ds_final = ds.resample(time='1Y').max()
    
    # storage options    
    ds_final = ds_final.chunk({'ssp':1, 'time':30, 'lat':720, 'lon':1440})
    
    compressor = zarr.Blosc(cname='zstd', clevel=3)
    encoding = {vname: {'compressor': compressor} for vname in ds_final.data_vars}
    
    azure_prefix = 'carbonplan/' + method + '/annual_maxs/' + model
    store = zarr.ABSStore(client=container_client, prefix=azure_prefix)

    # store
    ds_final.to_zarr(store=store, encoding=encoding, consolidated=True, mode='w')

    print(model)


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.timescale.method'


BCC-CSM2-MR

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.timescale.method'


CanESM5

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.timescale.method'


MIROC6

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.timescale.method'


MPI-ESM1-2-HR


# DeepSD-BC

In [7]:
# Preliminaries
method = 'DeepSD-BC'
models = list(deepsdbc_dict.keys())

## Simple metrics

### Annual avgs

In [8]:
# loop through models: RUNTIME IS AROUND 5 MINS PER MODEL WITH 40 DASK WORKERS
for model in models:
    # Grab model
    ds = grab_model(method, model, list(deepsdbc_dict[model].keys()))
    
    ds = convert_units(ds)
    ds['tas'] = (ds['tasmin'] + ds['tasmax']) / 2.
    
    # Annual averages
    ds_final = ds.resample(time='1Y').mean()
    
    # storage options    
    ds_final = ds_final.chunk({'ssp':1, 'time':30, 'lat':720, 'lon':1440})
    
    compressor = zarr.Blosc(cname='zstd', clevel=3)
    encoding = {vname: {'compressor': compressor} for vname in ds_final.data_vars}
    
    azure_prefix = 'carbonplan/' + method + '/annual_avgs/' + model
    store = zarr.ABSStore(client=container_client, prefix=azure_prefix)

    # store
    ds_final.to_zarr(store=store, encoding=encoding, consolidated=True, mode='w')

    print(model)


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.timescale.method'


CanESM5

--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.timescale.method'


MRI-ESM2-0


### Annnual maxs

In [14]:
# loop through models: RUNTIME IS AROUND 15 MINS PER MODEL WITH 40 DASK WORKERS
for model in models[1:]:
    # Grab model
    ds = grab_model(method, model, list(deepsdbc_dict[model].keys()))

    ds = convert_units(ds)
    ds['tas'] = (ds['tasmin'] + ds['tasmax']) / 2.
    
    # Annual maxs
    ds_final = ds.resample(time='1Y').max()
    
    # storage options    
    ds_final = ds_final.chunk({'ssp':1, 'time':30, 'lat':720, 'lon':1440})
    
    compressor = zarr.Blosc(cname='zstd', clevel=3)
    encoding = {vname: {'compressor': compressor} for vname in ds_final.data_vars}
    
    azure_prefix = 'carbonplan/' + method + '/annual_maxs/' + model
    store = zarr.ABSStore(client=container_client, prefix=azure_prefix)

    # store
    ds_final.to_zarr(store=store, encoding=encoding, consolidated=True, mode='w')

    print(model)


--> The keys in the returned dictionary of datasets are constructed as follows:
	'activity_id.institution_id.source_id.experiment_id.timescale.method'


MRI-ESM2-0
