# Problem: Loading 4D variables and writing output to zarr


In [None]:
pip install xmip

## Import packages

In [1]:
import numpy as np
import xarray as xr
import dask
import s3fs
import zarr
import warnings
warnings.filterwarnings('ignore')

## Create a new Dask cluster with the Dask Gateway

In [2]:
from dask_gateway import Gateway
gateway = Gateway()

In [4]:
##A line of trick to clean your dask cluster before you start your computation
from dask.distributed import Client
clusters=gateway.list_clusters()
print(clusters )
for cluster in clusters :
    cluster= gateway.connect(cluster.name)
    print(cluster)
    client = Client(cluster)
    client.close()
    cluster.shutdown()

[]


In [5]:
cluster = gateway.new_cluster(worker_memory=2, worker_cores=1)

cluster.scale(40)
cluster

VBox(children=(HTML(value='<h2>GatewayCluster</h2>'), HBox(children=(HTML(value='\n<div>\n<style scoped>\n    …

## Get a client from the Dask Gateway Cluster

As stated above, creating a Dask `Client` is mandatory in order to perform following Daks computations on your Dask Cluster.

In [6]:
from distributed import Client

if cluster:
    client = Client(cluster) # create a dask Gateway cluster
else:
    client = Client()   # create a local dask cluster on the machine.
client

0,1
Connection method: Cluster object,Cluster type: dask_gateway.GatewayCluster
Dashboard: /jupyterhub/services/dask-gateway/clusters/daskhub.81ee0c0bae63493790a3629168290d7f/status,


## Open dictionary of OMIP2 simulations
With file lists taken from the ESGF Search Catalog with the following search requirements:
1. On the native grid
2. Have the varaibles:`umo`, `vmo`, `so`, `thetao`, `zos`, `mlotst`, `siconc`, `deptho`, `areacello`
3. At monthly time steps
4. The last 61 years of the simulation

In [7]:
# Load in file of model names and fnames
model_fnames_dict = np.load("models.npy", allow_pickle=True).item()


## Function for transforming threads file path to kerchunk'ed json files.



In [54]:
#setting for accessing s3

# get storage keys
access_key = !aws configure get aws_access_key_id
access_key = access_key[0]
secret_key = !aws configure get aws_secret_access_key
secret_key = secret_key[0]

# set storage target
client_kwargs = {'endpoint_url': 'https://object-store.cloud.muni.cz'}

import dask
@dask.delayed
def path_to_kerchunk(httppath,modelname,client_kwargs,access_key,secret_key):
    import kerchunk.hdf
    import fsspec
    import json
    with fsspec.open(httppath) as inf:
        info = kerchunk.hdf.SingleHdf5ToZarr(inf, httppath, inline_threshold=100).translate()
    jsonname=httppath.rsplit('/')[-1].rsplit('.nc')[0]+'.json'
    path='WAFFLES/kerchunk/'+modelname
    path='tmp/kerchunk/'+modelname


    #path='tmp/kerchunk/'+modelname
    s3_prefix = "s3://" + path
    jsonfile = f"{s3_prefix}/{jsonname}"
    print(jsonfile)
    target = s3fs.S3FileSystem(anon=False,client_kwargs=client_kwargs,key=access_key,secret=secret_key)
    with target.open(jsonfile, mode='w') as f:
        json.dump(info,f)
    return jsonfile



In [9]:
for a in model_fnames_dict.keys():
    print(len(model_fnames_dict.get(a)),a)

368 EC-Earth3
44 NorESM2-LM
8 MRI-ESM2-0
8 CMCC-CM2-SR5
107 CNRM-CM6-1
8 FGOALS-f3-L
28 CMCC-CM2-HR4
34 MIROC6
8 TaiESM1-TIMCOM2
107 CNRM-CM6-1-HR
260 ACCESS-OM2-025
260 ACCESS-OM2
8 TaiESM1-TIMCOM


In [61]:
modelnames=[a for a in model_fnames_dict.keys()]
d={}
for modelname in [modelnames[5]]:
    print(modelname)
    fpaths=model_fnames_dict.get(modelname)
    httppaths=[fpath.replace('dodsC', 'fileServer') for fpath in fpaths]
    jsons=[ path_to_kerchunk(httppath,modelname,client_kwargs,access_key,secret_key) for httppath in httppaths]
    %time
    ok=dask.compute(*jsons)
    d[modelname]=ok
d



FGOALS-f3-L
CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 8.58 µs


Exception: FileNotFoundError('http://esg.lasg.ac.cn/thredds/fileServer/esg_dataroot/CMIP6/OMIP/CAS/FGOALS-f3-L/omip2/r1i1p1f1/Omon/umo/gn/v20191119/umo_Omon_FGOALS-f3-L_omip2_r1i1p1f1_gn_195801-201812.nc')

2022-11-14 17:27:42,152 - distributed.client - ERROR - Exception raised while shutting down cluster daskhub.81ee0c0bae63493790a3629168290d7f
Traceback (most recent call last):
  File "/srv/conda/envs/notebook/lib/python3.9/site-packages/dask_gateway/client.py", line 993, in _stop_internal
    await self.gateway._stop_cluster(self.name)
  File "/srv/conda/envs/notebook/lib/python3.9/site-packages/dask_gateway/client.py", line 654, in _stop_cluster
    await self._request("DELETE", url)
  File "/srv/conda/envs/notebook/lib/python3.9/site-packages/dask_gateway/client.py", line 397, in _request
    resp = await session.request(method, url, json=json, **self._request_kwargs)
  File "/srv/conda/envs/notebook/lib/python3.9/site-packages/aiohttp/client.py", line 535, in _request
    conn = await self._connector.connect(
  File "/srv/conda/envs/notebook/lib/python3.9/site-packages/aiohttp/connector.py", line 542, in connect
    proto = await self._create_connection(req, traces, timeout)
  File 

In [56]:
len(d[modelname])

8

In [57]:
d

{'TaiESM1-TIMCOM': ('s3://tmp/kerchunk/TaiESM1-TIMCOM/areacello_Ofx_TaiESM1-TIMCOM_omip1_r1i1p1f1_gn.json',
  's3://tmp/kerchunk/TaiESM1-TIMCOM/deptho_Ofx_TaiESM1-TIMCOM_omip1_r1i1p1f1_gn.json',
  's3://tmp/kerchunk/TaiESM1-TIMCOM/vmo_Omon_TaiESM1-TIMCOM2_omip2_r1i1p1f1_gn_030601-036612.json',
  's3://tmp/kerchunk/TaiESM1-TIMCOM/thetao_Omon_TaiESM1-TIMCOM2_omip2_r1i1p1f1_gn_030601-036612.json',
  's3://tmp/kerchunk/TaiESM1-TIMCOM/so_Omon_TaiESM1-TIMCOM2_omip2_r1i1p1f1_gn_030601-036612.json',
  's3://tmp/kerchunk/TaiESM1-TIMCOM/umo_Omon_TaiESM1-TIMCOM2_omip2_r1i1p1f1_gn_030601-036612.json',
  's3://tmp/kerchunk/TaiESM1-TIMCOM/mlotst_Omon_TaiESM1-TIMCOM2_omip2_r1i1p1f1_gn_030601-036612.json',
  's3://tmp/kerchunk/TaiESM1-TIMCOM/zos_Omon_TaiESM1-TIMCOM2_omip2_r1i1p1f1_gn_030601-036612.json')}

# wget of file does not work on
MRI-ESM2-0(8), FGOALS-f3-L(8), MIROC6(34), 

In [20]:
!wget http://esgf-data2.diasjp.net/thredds/fileServer/esg_dataroot/CMIP6/OMIP/MRI/MRI-ESM2-0/omip2/r1i1p1f1/Omon/umo/gn/v20200406/umo_Omon_MRI-ESM2-0_omip2_r1i1p1f1_gn_195801-201812.nc

--2022-11-14 14:39:01--  http://esgf-data2.diasjp.net/thredds/fileServer/esg_dataroot/CMIP6/OMIP/MRI/MRI-ESM2-0/omip2/r1i1p1f1/Omon/umo/gn/v20200406/umo_Omon_MRI-ESM2-0_omip2_r1i1p1f1_gn_195801-201812.nc
Resolving esgf-data2.diasjp.net (esgf-data2.diasjp.net)... 157.1.137.37
Connecting to esgf-data2.diasjp.net (esgf-data2.diasjp.net)|157.1.137.37|:80... connected.
HTTP request sent, awaiting response... 503 Service Temporarily Unavailable
2022-11-14 14:39:01 ERROR 503: Service Temporarily Unavailable.



In [25]:
!wget http://esg.lasg.ac.cn/thredds/fileServer/esg_dataroot/CMIP6/OMIP/CAS/FGOALS-f3-L/omip2/r1i1p1f1/Omon/mlotst/gn/v20191104/mlotst_Omon_FGOALS-f3-L_omip2_r1i1p1f1_gn_165301-201812.nc

--2022-11-14 14:59:23--  http://esg.lasg.ac.cn/thredds/fileServer/esg_dataroot/CMIP6/OMIP/CAS/FGOALS-f3-L/omip2/r1i1p1f1/Omon/mlotst/gn/v20191104/mlotst_Omon_FGOALS-f3-L_omip2_r1i1p1f1_gn_165301-201812.nc
Resolving esg.lasg.ac.cn (esg.lasg.ac.cn)... 210.75.240.163
Connecting to esg.lasg.ac.cn (esg.lasg.ac.cn)|210.75.240.163|:80... connected.
HTTP request sent, awaiting response... 503 Service Unavailable
2022-11-14 14:59:24 ERROR 503: Service Unavailable.



In [35]:
!wget http://esgf-data2.diasjp.net/thredds/fileServer/esg_dataroot/CMIP6/OMIP/MIROC/MIROC6/omip2/r2i1p1f1/Omon/so/gn/v20200612/so_Omon_MIROC6_omip2_r2i1p1f1_gn_196801-197712.nc

--2022-11-14 16:23:41--  http://esgf-data2.diasjp.net/thredds/fileServer/esg_dataroot/CMIP6/OMIP/MIROC/MIROC6/omip2/r2i1p1f1/Omon/so/gn/v20200612/so_Omon_MIROC6_omip2_r2i1p1f1_gn_196801-197712.nc
Resolving esgf-data2.diasjp.net (esgf-data2.diasjp.net)... 157.1.137.37
Connecting to esgf-data2.diasjp.net (esgf-data2.diasjp.net)|157.1.137.37|:80... connected.
HTTP request sent, awaiting response... 503 Service Temporarily Unavailable
2022-11-14 16:23:42 ERROR 503: Service Temporarily Unavailable.



### List the transformed kerchunk files

In [59]:
#setting for accessing s3

# get storage keys
access_key = !aws configure get aws_access_key_id
access_key = access_key[0]
secret_key = !aws configure get aws_secret_access_key
secret_key = secret_key[0]

# set storage target
client_kwargs = {'endpoint_url': 'https://object-store.cloud.muni.cz'}

def dict_kerchunk(client_kwargs,access_key,secret_key):
    import kerchunk.hdf
    import fsspec

    path='WAFFLES/kerchunk/'
    #path='tmp/kerchunk/'


    target = s3fs.S3FileSystem(anon=False,client_kwargs=client_kwargs,key=access_key,secret=secret_key)
    modelnames=target.ls(path)
    modelnames=[name.rsplit('/')[-1] for name in modelnames]
    d={}
    for name in modelnames :
        d[name]=target.ls(path+name)
        d[name]=['s3://'+n for n in d[name]]
        print(path+name)
    return d
d=dict_kerchunk(client_kwargs,access_key,secret_key)


for a in model_fnames_dict.keys():
    computed=len(d.get(a)) if (a in d) else 'None'
    print(a, len(model_fnames_dict.get(a)), 'transformed done' ,computed)

WAFFLES/kerchunk/ACCESS-OM2
WAFFLES/kerchunk/ACCESS-OM2-025
WAFFLES/kerchunk/CMCC-CM2-HR4
WAFFLES/kerchunk/CMCC-CM2-SR5
WAFFLES/kerchunk/CNRM-CM6-1
WAFFLES/kerchunk/CNRM-CM6-1-HR
WAFFLES/kerchunk/EC-Earth3
WAFFLES/kerchunk/MIROC6
WAFFLES/kerchunk/NorESM2-LM
WAFFLES/kerchunk/TaiESM1-TIMCOM
WAFFLES/kerchunk/TaiESM1-TIMCOM2
EC-Earth3 368 transformed done 368
NorESM2-LM 44 transformed done 44
MRI-ESM2-0 8 transformed done None
CMCC-CM2-SR5 8 transformed done 8
CNRM-CM6-1 107 transformed done 107
FGOALS-f3-L 8 transformed done None
CMCC-CM2-HR4 28 transformed done 28
MIROC6 34 transformed done 1
TaiESM1-TIMCOM2 8 transformed done 8
CNRM-CM6-1-HR 107 transformed done 107
ACCESS-OM2-025 260 transformed done 260
ACCESS-OM2 260 transformed done 260
TaiESM1-TIMCOM 8 transformed done 8
