In [1]:
import numpy as np
import xarray as xr
import pandas as pd

import dask
import dask.array as da
from dask import delayed

from glob import glob

from datetime import datetime, timedelta
import os
import wrf
import time

import zarr

In [2]:
geog=xr.open_mfdataset('/eagle/climate_severe/geog/geo_em.d01.nc')

dummy_shape=geog.HGT_M[0].shape

In [3]:
#%%time

for sim in ['historical','end_of_century_8p5']:
    for v in ['AFWA_TOTPRECIP']:
        print(v)
        #set zarr storage location
        zarr_store='/eagle/climate_severe/bwallace_scratch/COARSENED/'+sim+'/'+v
        #set variables to write to file
        store_vars=[v,'Time']
    
        #establish path to pull data from
        base_path='/eagle/climate_severe/Derived_Subsets/AFWA_TOTPRECIP/'+sim+'/'
        #gather up the water year folders
        basePathFiles=glob(base_path+'*')
        basePathFiles.sort()

        npList=np.array(basePathFiles)
        
        #listSize=npList.shape[0]
        #listChunks=np.array_split(npList,15)

        time_dims=np.array([1]*(npList.shape[0]*96))
        full_shape=(time_dims.sum(),)+(dummy_shape)
        offsets=np.cumsum(time_dims)
        offsets-=offsets[0]

        #define dictionary of attrs
        ATTRS={
            'Time':["Time"],
            'south_north':["south_north"], #ydim
            'west_east':["west_east"], #xdim
            v:["Time","south_north","west_east"], #var is time, ydim, xdim
    
        }
    
        #store expected shapes of each var in dictionary
        shp={
            'Time':(time_dims.sum(),),
            v:full_shape,
        }
    
        #set chunksize. don't think Time matters here but i set it to match the variable
        #below chunk configuration is chunked every 24 timesteps and continuous in space
        chnks={
            'Time':(24,),
            v:(24,)+full_shape[1:],
        }   
        
        #assign attrs to individual variables
        attrs={}
        attrs[v]={}
        attrs[v]["_ARRAY_DIMENSIONS"]=ATTRS[v]
    
        #time is weird, need to do this or xarray screws up reading in the zarr
        attrs['Time']={}
        attrs['Time']['_ARRAY_DIMENSIONS']=ATTRS['Time']
        attrs["Time"].update(
            calendar="proleptic_gregorian",units='minutes since 1980-01-01', #make sure this is consistent with function
        )

        #create an empty zarr group 
        group=zarr.group(zarr_store)
    
        #allocate each variable in the zarr group
        for varname in store_vars:
            #can play around with the compression stuff, this had a nice tradeoff between compressibility and reaad times
            v=group.empty(varname,shape=shp[varname],chunks=chnks[varname],dtype='float32',
                          compressor=(zarr.Blosc(cname='zstd', clevel=3)),overwrite=True)
            v.attrs.update(attrs[varname])
        
print('Done')

AFWA_TOTPRECIP
AFWA_TOTPRECIP
Done


In [4]:
group['AFWA_TOTPRECIP']

<zarr.core.Array '/AFWA_TOTPRECIP' (525600, 899, 1399) float32>

In [3]:
#zarr_store='/eagle/climate_severe/bwallace_scratch/COARSENED/end_of_century_8p5/AFWA_TOTPRECIP'

In [4]:
#zarr.consolidate_metadata(zarr_store)

<zarr.hierarchy.Group '/'>