# Create combined JSON
read all individual referenceFileSystem JSON files and create combined JSON for entire dataset

In [None]:
import fsspec
import xarray as xr
import hvplot.xarray
import metpy

In [None]:
import ujson   # fast json
from kerchunk.combine import MultiZarrToZarr
import kerchunk

In [None]:
json_dir = 's3://esip-qhub/noaa/nwm/grid1km/json'

For file systems where files are changing, you want `skip_instance_cache=True` or else you won't see the changed files

In [None]:
fs_json = fsspec.filesystem('s3', anon=False, skip_instance_cache=True)  

In [None]:
year = '1983'

In [None]:
json_list  = fs_json.glob(f'{json_dir}/{year}*.json')
json_list = [f's3://{json}' for json in json_list]
print(len(json_list))
print(json_list[0])
print(json_list[-1])

In [None]:
json_list  = fs_json.ls(json_dir, refresh=True)
json_list = [f's3://{json}' for json in json_list]
print(len(json_list))
print(json_list[0])
print(json_list[-1])

#### Combine individual JSON into combined JSON

In [None]:
#combined_json = f's3://esip-qhub/noaa/nwm/grid1km/combined_{year}.json'
combined_json = f's3://esip-qhub/noaa/nwm/grid1km/combined.json'

In [None]:
mzz = MultiZarrToZarr(json_list,
    remote_protocol = 's3',
    remote_options = dict(anon=True),   
    concat_dims = ['time'], 
    identical_dims=["x", "y", "crs"],
    preprocess = kerchunk.combine.drop("reference_time"))                 

Create a dict from the mzz object

In [None]:
%%time
d = mzz.translate()

Write the dict to the combined JSON file

In [None]:
%%time
with fs_json.open(combined_json, 'wb') as f:
    f.write(ujson.dumps(d).encode());

Examine the combined kerchunked dataset 

In [None]:
%%time
s_opts = {'requester_pays':True, 'skip_instance_cache':True}
r_opts = {'anon':True}
fs = fsspec.filesystem("reference", fo=combined_json, ref_storage_args=s_opts,
                       remote_protocol='s3', remote_options=r_opts)
m = fs.get_mapper("")
ds = xr.open_dataset(m, engine="zarr", chunks={}, backend_kwargs=dict(consolidated=False))

In [None]:
ds

In [None]:
ds = ds[['ACCET', 'SNEQV', 'FSNO', 'crs']]

In [None]:
ds  = ds.metpy.parse_cf()

In [None]:
crs = ds['ACCET'].metpy.cartopy_crs

In [None]:
%%time
da = ds.ACCET.isel(time=500).load()

In [None]:
da.plot()

In [None]:
#da.hvplot(x='x', y='y', rasterize=True, cmap='turbo', data_aspect=1)

In [None]:
#da.hvplot(x='x', y='y', rasterize=True, crs=crs, cmap='turbo', tiles='OSM', alpha=0.7)