In [1]:
import numpy as np
import xarray as xr
import os 

import getpass
import azure.storage.blob
import zarr

### Preliminaries

In [2]:
###############################
# Set paths
# UPDATE THIS FOR REPRODUCTION
###############################
cil_out = '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/cmip6/cil-gdpcir/'

In [3]:
###################
# Models
###################

# nex models with all SSPs and variables (tas, pr)
complete_nex_models = ['ACCESS-CM2', 'ACCESS-ESM1-5', 'CanESM5', 'CMCC-ESM2', 
                       'CNRM-CM6-1', 'CNRM-ESM2-1', 'EC-Earth3',
                       'EC-Earth3-Veg-LR', 'FGOALS-g3', 'GFDL-CM4', 'GFDL-ESM4', 
                       'GISS-E2-1-G', 'INM-CM4-8', 'INM-CM5-0',
                       'IPSL-CM6A-LR', 'KACE-1-0-G', 'MIROC-ES2L', 'MIROC6',
                       'MPI-ESM1-2-HR', 'MPI-ESM1-2-LR', 'MRI-ESM2-0', 'NorESM2-LM',
                       'NorESM2-MM', 'TaiESM1', 'UKESM1-0-LL']

# cil models with all SSPs and variables
complete_cil_models = ["INM-CM4-8", "INM-CM5-0", "BCC-CSM2-MR", "CMCC-CM2-SR5",
              "CMCC-ESM2", "MIROC-ES2L", "MIROC6", "UKESM1-0-LL", "MPI-ESM1-2-LR",
              "NorESM2-LM", "NorESM2-MM", "GFDL-ESM4", "EC-Earth3", 
              "EC-Earth3-Veg-LR", "EC-Earth3-Veg", "CanESM5"]

# intersection of models
models = np.intersect1d(complete_cil_models, complete_nex_models)

In [4]:
############
# Dask
############
from dask_jobqueue import PBSCluster
cluster = PBSCluster(cores=1, resource_spec='pmem=10GB', memory='10GB',
                     project='open',
                     env_extra= ['#PBS -l feature=rhel7'], walltime = '02:00:00')

cluster.scale(jobs=25)  # ask for jobs

from dask.distributed import Client
client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.201.239:45865,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Transfer from Azure to local storage

In [5]:
###############################
# Azure blob storage access
###############################
# connection string (from azure web login, select your storage account, then "Access keys")
connection_string = getpass.getpass()

# NOTE: if you are not located in Western Europe, it will be much quicker 
# to: (1) create a new azure storage account that is located physically close 
# to where you are transfering to, (2) transfer all of the CIL-GDPCIR data to 
# that a blob container in that account via the azure storage explorer, then
# (3) link below to the physically-close blob container
container_client = azure.storage.blob.ContainerClient.from_connection_string(
    connection_string, container_name="roaraccess")

 ········


In [7]:
# loop through models
for model in models:
    print(model)
    for metric in ['annual_avgs', 'annual_maxs', 'annual_mins', 'precip_ind']:
        # check if already exists
        if os.path.isfile(cil_out + metric + '/' + model + '.nc'):
            print('   ' + metric + ' already done')
            continue
        else:
            try:
                # read
                azure_prefix = 'cil-gdpcir_rechunked/' + metric + '/' + model
                store = zarr.ABSStore(client=container_client, prefix=azure_prefix)

                ds_cil = xr.open_zarr(store=store).load(retries=5)
            
                # write
                ds_cil.to_netcdf(cil_out + metric + '/' + model + '.nc')
                print('   ' + metric)
            except: 
                print('    ERROR with ' + metric)

CMCC-ESM2
   annual_avgs
   annual_maxs
   annual_mins
   precip_ind
CanESM5
   annual_avgs
   annual_maxs
   annual_mins
   precip_ind
EC-Earth3
   annual_avgs
   annual_maxs
   annual_mins
   precip_ind
EC-Earth3-Veg-LR
   annual_avgs
   annual_maxs
   annual_mins
   precip_ind
GFDL-ESM4
   annual_avgs
   annual_maxs
   annual_mins
   precip_ind
INM-CM4-8
   annual_avgs
   annual_maxs
   annual_mins
   precip_ind
INM-CM5-0
   annual_avgs
   annual_maxs
   annual_mins
   precip_ind
MIROC-ES2L
   annual_avgs
   annual_maxs
   annual_mins
   precip_ind
MIROC6
   annual_avgs
   annual_maxs
   annual_mins
   precip_ind
MPI-ESM1-2-LR
   annual_avgs
   annual_maxs
   annual_mins
   precip_ind
NorESM2-LM
   annual_avgs
   annual_maxs
   annual_mins
   precip_ind
NorESM2-MM
   annual_avgs
   annual_maxs
   annual_mins
   precip_ind
UKESM1-0-LL
   annual_avgs
   annual_maxs
   annual_mins
   precip_ind
