In [1]:
###############################################
### FOR USE ON MICROSOFT PLANETARY COMPUTER ###
###############################################

In [2]:
import planetary_computer
import pystac_client
import pystac

import numpy as np
import xarray as xr
import pandas as pd

import collections
import fsspec
import requests

import getpass
import azure.storage.blob
import zarr

### Preliminaries

In [3]:
######################
# Azure blob storage
######################
# connection string (from azure web login, select your storage account, then "Access keys")
connection_string = getpass.getpass()

    
# format storage
container_client = azure.storage.blob.ContainerClient.from_connection_string(
    connection_string, container_name="mpctransfer")

 ········


In [4]:
###################
# Models
###################
from utils import cil_ssp_dict

models = list(cil_ssp_dict.keys())

In [9]:
#################
# Data access
#################

# Complete catalog
catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")

# function to grab variables and SSPs for singe model
def grab_model(model_id, vars_to_grab):
    # Search across all licences in CIL-GDPCIR
    search = catalog.search(
        collections=["cil-gdpcir-cc0", "cil-gdpcir-cc-by", "cil-gdpcir-cc-by-sa"],
        query={"cmip6:source_id" : {"eq": model_id},
               "cmip6:experiment_id": {"neq": "historical"}} # omit historical
    )
    ensemble = search.get_all_items()
    
    # grab all into one dataset
    ds_ssp = []

    for item in ensemble:
        signed = planetary_computer.sign(item)
        ds_vars = []
        for variable_id in vars_to_grab:
            asset = signed.assets[variable_id]
            ds_tmp = xr.open_dataset(asset.href, **asset.extra_fields["xarray:open_kwargs"])
            ds_tmp = ds_tmp.assign_coords(ssp = ds_tmp.attrs['experiment_id'])
            ds_vars.append(ds_tmp)
        ds_ssp.append(xr.merge(ds_vars))

    ds_out = xr.concat(ds_ssp, dim='ssp')
    
    return ds_out

In [10]:
#########
# Dask
#########
import dask_gateway
gateway = dask_gateway.Gateway()

# cluster options
cluster_options = gateway.cluster_options()
cluster_options["worker_memory"] = 16
cluster_options["worker_cores"] = 1

# start cluster
cluster = gateway.new_cluster(cluster_options)
client = cluster.get_client()
cluster.scale(40)

# dashboard link
print(cluster.dashboard_link)

https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway/clusters/prod.9a244b7b68f64d808635cd9163649855/status


# Simple metrics

## Annual averages

In [None]:
%%time
# loop through models: RUNTIME IS AROUND 15 MINS PER MODEL WITH 40 DASK WORKERS
for model in models:
    # load data (lazy)
    ds = grab_model(model, ['tasmin', 'tasmax', 'pr'])
    
    # compute
    ds['tas'] = (ds['tasmax'] + ds['tasmin']) / 2.
    ds_final = ds.resample(time='1Y').mean()
    
    # unit conversions
    ds_final['tas'] = ds_final['tas'] - 273.15 # K -> C
    ds_final['tasmax'] = ds_final['tasmax'] - 273.15 # K -> C
    ds_final['tasmin'] = ds_final['tasmin'] - 273.15 # K -> C
        
    # storage options    
    ds_final = ds_final.chunk({'ssp':1, 'time':10, 'lat':720, 'lon':1440})
    
    compressor = zarr.Blosc(cname='zstd', clevel=3)
    encoding = {vname: {'compressor': compressor} for vname in ds_final.data_vars}
    
    azure_prefix = 'cil-gdpcir/annual_avgs/' + model
    store = zarr.ABSStore(client=container_client, prefix=azure_prefix)

    # store
    ds_final.to_zarr(store=store, encoding=encoding, consolidated=True, mode='w')
    print(model)

## Annual maxima

In [11]:
%%time
# loop through models: RUNTIME IS AROUND 15 MINS PER MODEL WITH 40 DASK WORKERS
for model in models[9:]:
    # load data (lazy)
    ds = grab_model(model, ['tasmin', 'tasmax', 'pr'])
    
    # compute
    ds['tas'] = (ds['tasmax'] + ds['tasmin']) / 2.
    ds_final = ds.resample(time='1Y').max()
    
    # unit conversions
    ds_final['tas'] = ds_final['tas'] - 273.15 # K -> C
    ds_final['tasmax'] = ds_final['tasmax'] - 273.15 # K -> C
    ds_final['tasmin'] = ds_final['tasmin'] - 273.15 # K -> C
        
    # storage options    
    ds_final = ds_final.chunk({'ssp':1, 'time':10, 'lat':720, 'lon':1440})
    
    compressor = zarr.Blosc(cname='zstd', clevel=3)
    encoding = {vname: {'compressor': compressor} for vname in ds_final.data_vars}
    
    azure_prefix = 'cil-gdpcir/annual_maxs/' + model
    store = zarr.ABSStore(client=container_client, prefix=azure_prefix)

    # store
    ds_final.to_zarr(store=store, encoding=encoding, consolidated=True, mode='w')
    print(model)

INM-CM5-0
MIROC-ES2L
MIROC6
MPI-ESM1-2-LR
NESM3
NorESM2-LM
NorESM2-MM
UKESM1-0-LL
CPU times: user 3min 33s, sys: 6.36 s, total: 3min 39s
Wall time: 1h 46min 43s
