# Imports

In [None]:
import os, time

import s3fs

import xarray as xr
import rasterio
import numpy as np
import matplotlib.pyplot as plt

from pyproj import Proj

import RasterClipperFunctions


# Setup

In [None]:
racmo_s3_dir = 's3://gris-outlet-glacier-seasonality-icesat2/Data/RACMO/RACMO2.3p2/'
#zarr_dir = 's3://gris-outlet-glacier-seasonality-icesat2/Data/RACMO/RACMO2.3p2/zarr/'

fs_s3 = s3fs.S3FileSystem(anon=False, profile='icesat2')
racmo_s3_files = fs_s3.ls(racmo_s3_dir)


In [None]:
racmo_s3_file = racmo_s3_files[0]

start = time.time()

f = fs_s3.open(racmo_s3_file, 'rb')
ds = xr.open_dataset(f) #, engine='netcdf4')
variable = ds.variables['runoffcorr'][:,:100,:100].to_numpy() #[:,:,:]
x = ds.variables['x'].to_numpy() #[:]
y = ds.variables['y'].to_numpy() #[:]
ds.close()

end = time.time()
print('Elapsed time: {:5.1f} sec'.format(end-start))

#units = 'kg day^-1 m^-2'
#units = units.replace(' m^-2', '')


# Create mask array
We'll use this mask array to extract runoff from RACMO grid cells within our basin

In [None]:
#p = Proj(proj='stere',lat_0=90,lat_ts=70,lon_0=-45,ellps='WGS84')
#area = 1000. * 1000. # hard-coded to 1 km by 1 km

# Shapefile
(xClip, yClip) = RasterClipperFunctions.basinUnionPolygon('gimpbasinspoly/gimpbasinspoly.shp', 'basin=1.0')

xm, ym = np.meshgrid(x, y)
iStep = x[1] - x[0]

geoTransform = (x[0]-iStep/2, iStep, 0, y[-1]+iStep/2, 0, -iStep)
maskArray = np.flipud(RasterClipperFunctions.clipImage(np.flipud(np.ones(xm.shape)), xClip, yClip, geoTransform))


# Loop through RACMO files

In [None]:
variableSum = np.nan * np.zeros(len(racmo_s3_files))
for i, racmo_s3_file in enumerate(racmo_s3_files):
    print(racmo_s3_file)
    f = fs_s3.open(racmo_s3_file, 'rb')
    ds = xr.open_dataset(f) #, engine='netcdf4')
    variable = ds.variables['runoffcorr'].to_numpy()
    variableSum[i] = np.nansum(np.where(maskArray > 0.5, np.sum(variable, axis=0), np.nan))
    ds.close()
    

# Experimental code

In [None]:
from kerchunk.hdf import SingleHdf5ToZarr 
from kerchunk.combine import MultiZarrToZarr

import dask
from dask.distributed import Client
client = Client(n_workers=8)
client


In [None]:
def gen_json(u):
    so = dict(
        mode="rb", anon=True, default_fill_cache=False,
        default_cache_type="none"
    )
    with fs_s3.open(u, **so) as inf:
        h5chunks = SingleHdf5ToZarr(inf, u, inline_threshold=300)
        with open(f"jsons/{u.split('/')[-1]}.json", 'wb') as outf:
           outf.write(ujson.dumps(h5chunks.translate()).encode())
        

In [None]:
gen_json('s3://' + racmo_s3_files[0])


In [None]:
import pathlib
pathlib.Path('./jsons/').mkdir(exist_ok=True)
dask.compute(*[dask.delayed(gen_json)('s3://' + u) for u in racmo_s3_files])
