In [1]:
import numpy as np
import xarray as xr
import os 

import zarr

In [8]:
ds = xr.open_zarr('/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/CanESM5/')

In [23]:
def preprocess_cbp(ds):
    ds = ds.sel(lat=slice(-60, 90))
    ds = ds.sortby('ssp')
    ds = ds.assign_coords(ensemble = 'carbonplan')
    ds = ds.assign_coords(model = ds.encoding['source'][74:-1])
    ds['time'] = ds.indexes['time'].year
    # for some models/methods we are missing precip
    # so need to fill with NaNs
    if 'pr' not in ds.data_vars:
        ds['pr'] = xr.full_like(ds['tas'], np.NaN)
    return ds

In [24]:
ds = xr.open_mfdataset(['/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/CanESM5/',
                        '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/BCC-CSM2-MR/'],
                       preprocess = preprocess_cbp, parallel=True,
                       combine='nested', concat_dim='model', data_vars='all',
                       engine='zarr')

In [25]:
ds

Unnamed: 0,Array,Chunk
Bytes,1.64 GiB,98.88 MiB
Shape,"(2, 3, 1, 85, 601, 1440)","(1, 1, 1, 30, 600, 1440)"
Count,8 Graph Layers,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.64 GiB 98.88 MiB Shape (2, 3, 1, 85, 601, 1440) (1, 1, 1, 30, 600, 1440) Count 8 Graph Layers 36 Chunks Type float32 numpy.ndarray",1  3  2  1440  601  85,

Unnamed: 0,Array,Chunk
Bytes,1.64 GiB,98.88 MiB
Shape,"(2, 3, 1, 85, 601, 1440)","(1, 1, 1, 30, 600, 1440)"
Count,8 Graph Layers,36 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.64 GiB,98.88 MiB
Shape,"(2, 3, 1, 85, 601, 1440)","(1, 1, 1, 30, 600, 1440)"
Count,11 Graph Layers,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.64 GiB 98.88 MiB Shape (2, 3, 1, 85, 601, 1440) (1, 1, 1, 30, 600, 1440) Count 11 Graph Layers 36 Chunks Type float32 numpy.ndarray",1  3  2  1440  601  85,

Unnamed: 0,Array,Chunk
Bytes,1.64 GiB,98.88 MiB
Shape,"(2, 3, 1, 85, 601, 1440)","(1, 1, 1, 30, 600, 1440)"
Count,11 Graph Layers,36 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.64 GiB,98.88 MiB
Shape,"(2, 3, 1, 85, 601, 1440)","(1, 1, 1, 30, 600, 1440)"
Count,11 Graph Layers,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.64 GiB 98.88 MiB Shape (2, 3, 1, 85, 601, 1440) (1, 1, 1, 30, 600, 1440) Count 11 Graph Layers 36 Chunks Type float32 numpy.ndarray",1  3  2  1440  601  85,

Unnamed: 0,Array,Chunk
Bytes,1.64 GiB,98.88 MiB
Shape,"(2, 3, 1, 85, 601, 1440)","(1, 1, 1, 30, 600, 1440)"
Count,11 Graph Layers,36 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.64 GiB,98.88 MiB
Shape,"(2, 3, 1, 85, 601, 1440)","(1, 1, 1, 30, 600, 1440)"
Count,11 Graph Layers,36 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.64 GiB 98.88 MiB Shape (2, 3, 1, 85, 601, 1440) (1, 1, 1, 30, 600, 1440) Count 11 Graph Layers 36 Chunks Type float32 numpy.ndarray",1  3  2  1440  601  85,

Unnamed: 0,Array,Chunk
Bytes,1.64 GiB,98.88 MiB
Shape,"(2, 3, 1, 85, 601, 1440)","(1, 1, 1, 30, 600, 1440)"
Count,11 Graph Layers,36 Chunks
Type,float32,numpy.ndarray


### Preliminaries

In [2]:
###############################
# Set paths
# UPDATE THIS FOR REPRODUCTION
###############################
out_path = '/gpfs/group/kaf26/default/dcl5300/lafferty-sriver_inprep_tbh_DATA/metrics/cil-gdpcir/'

In [3]:
###################
# Models
###################
from utils import cil_ssp_dict

models = list(cil_ssp_dict.keys())

In [4]:
############
# Dask
############
from dask_jobqueue import PBSCluster
cluster = PBSCluster(cores=1, resource_spec='pmem=10GB', memory='10GB',
                     project='open',
                     env_extra= ['#PBS -l feature=rhel7'], walltime = '01:00:00')

cluster.scale(jobs=25)  # ask for jobs

from dask.distributed import Client
client = Client(cluster)

client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: /proxy/8787/status,

0,1
Dashboard: /proxy/8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.102.201.239:34149,Workers: 0
Dashboard: /proxy/8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


## Transfer from Azure to local storage

In [6]:
###############################
# Azure blob storage access
###############################
# connection string (from azure web login, select your storage account, then "Access keys")
connection_string = getpass.getpass()

# NOTE: if you are not located in Western Europe, it will be much quicker 
# to: (1) create a new azure storage account that is located physically close 
# to where you are transfering to, (2) transfer all of the CIL-GDPCIR data to 
# that a blob container in that account via the azure storage explorer, then
# (3) link below to the physically-close blob container
container_client = azure.storage.blob.ContainerClient.from_connection_string(
    connection_string, container_name="roaraccess")

 ········


In [8]:
# loop through models
for model in models:
    print(model)
    for metric in ['annual_avgs', 'annual_maxs']:
        # check if already exists
        if os.path.isfile(out_path + metric + '/' + model + '.nc'):
            print('   ' + metric + ' already done')
            continue
        else:
            try:
                # read
                azure_prefix = 'cil-gdpcir/' + metric + '/' + model
                store = zarr.ABSStore(client=container_client, prefix=azure_prefix)

                ds_cil = xr.open_zarr(store=store).load(retries=5)
            
                # write
                ds_cil.to_netcdf(out_path + metric + '/' + model + '.nc')
                print('   ' + metric)
            except: 
                print('    ERROR with ' + metric)

ACCESS-ESM1-5
   annual_avgs already done
   annual_maxs
BCC-CSM2-MR
   annual_avgs already done
   annual_maxs
CanESM5
   annual_avgs already done
   annual_maxs
CMCC-ESM2
   annual_avgs already done
   annual_maxs
EC-Earth3
   annual_avgs already done
   annual_maxs
EC-Earth3-Veg-LR
   annual_avgs already done
   annual_maxs
GFDL-ESM4
   annual_avgs already done
   annual_maxs
HadGEM3-GC31-LL
   annual_avgs already done
   annual_maxs
INM-CM4-8
   annual_avgs already done
   annual_maxs
INM-CM5-0
   annual_avgs already done
   annual_maxs
MIROC-ES2L
   annual_avgs already done
   annual_maxs
MIROC6
   annual_avgs already done
   annual_maxs
MPI-ESM1-2-LR
   annual_avgs already done
   annual_maxs
NESM3
   annual_avgs already done
   annual_maxs
NorESM2-LM
   annual_avgs already done
   annual_maxs
NorESM2-MM
   annual_avgs already done
   annual_maxs
UKESM1-0-LL
   annual_avgs already done
   annual_maxs
