In [1]:
import xarray as xr
import time
from glob import glob
from os import path
import numpy as np

# Create list of CCMP filenames

In [2]:
dir_pattern = 'F:/data/sat_data/ccmp/v02.0/'
dir_pattern_zarr = 'F:/data/sat_data/ccmp/zarr/'
pattern = 'F:/data/sat_data/ccmp/v02.0/*/*/*_V02.0_L3.0_RSS.nc'

In [None]:
start_time = time.time()
#list files
files = [x for x in glob(pattern)]
#open dataset
ds=xr.open_mfdataset(files,combine='nested',concat_dim='time',decode_cf=False,mask_and_scale=False)
ds.close()
end_time = time.time()
print("Elapsed time was %g seconds" % (end_time - start_time))
#remove any duplicates
_, index = np.unique(ds['time'], return_index=True)
ds=ds.isel(time=index)
#rechunck data
ds = ds.chunk({'time':1000,'latitude':157,'longitude':180})

In [None]:
#only need to run once to write data
ds.to_zarr(dir_pattern_zarr)

# Test reading zarr file and making some plots

In [None]:
start_time = time.time()
ds2= xr.open_zarr(dir_pattern_zarr)
end_time = time.time()
print("Elapsed time was %g seconds" % (end_time - start_time))


In [None]:
#ts = ds3.uwnd.sel(time=slice('2000-01-01','2000-01-10')).mean({'latitude','longitude'})


In [None]:
start_time = time.time()
ts = ds2.uwnd.sel(latitude=slice(-10,0),longitude=slice(170,180)).mean({'latitude','longitude'})
end_time = time.time()
print("Elapsed time was %g seconds" % (end_time - start_time))


In [None]:
ts.plot()

# create CMC SST zarr data files

In [None]:
dir_pattern_zarr = 'F:/data/sst/cmc/zarr/'
dir_pattern = 'F:/data/sst/cmc/CMC0.2deg/v2/'
pattern = 'F:/data/sst/cmc/CMC0.2deg/v2/data/*/*/*-v02.0-fv02.0.nc'
files = [x for x in glob(pattern)]
dir_pattern = 'F:/data/sst/cmc/CMC0.1deg/v3/'
pattern = 'F:/data/sst/cmc/CMC0.1deg/v3/*/*/*-v02.0-fv03.0.nc'
files2 = [x for x in glob(pattern)]

In [None]:
start_time = time.time()
#open dataset
ds=xr.open_mfdataset(files,combine='nested',concat_dim='time',decode_cf=False,mask_and_scale=False)
ds.close()
ds2=xr.open_mfdataset(files2,combine='nested',concat_dim='time',decode_cf=False,mask_and_scale=False)
ds2.close()
end_time = time.time()
print("Elapsed time was %g seconds" % (end_time - start_time))

In [None]:
#interpolate the v3 0.1 data onto the older 0.2 deg grid for continuity
ds2_interp = ds2.interp(lat=ds.lat,lon=ds.lon)
#concat the two datasets together
ds_all = xr.concat([ds,ds2_interp],dim='time')
#remove any duplicates
_, index = np.unique(ds_all['time'], return_index=True)
ds_all=ds_all.isel(time=index)
#rechunck data  #data in int16 = 2 bytes 
ds_all = ds_all.chunk({'time':1000,'lat':300,'lon':300})

In [None]:
#output data to zarr format
ds_all.to_zarr(dir_pattern_zarr)

# create AVISO zarr data files

In [None]:
dir_pattern = 'F:/data/sat_data/aviso/data'
dir_pattern_zarr = 'F:/data/sat_data/aviso/zarr/'
pattern = 'F:/data/sat_data/aviso/data/*/*.nc'
files = [x for x in glob(pattern)]

In [None]:
start_time = time.time()
#open dataset
ds=xr.open_mfdataset(files,combine='nested',concat_dim='time',decode_cf=False,mask_and_scale=False)
ds.close()
end_time = time.time()
print("Elapsed time was %g seconds" % (end_time - start_time))
#remove any duplicates
_, index = np.unique(ds['time'], return_index=True)
ds=ds.isel(time=index)
#rechunck data  #data in int16 = 2 bytes 
ds = ds.chunk({'time':1000,'latitude':180,'longitude':180})

In [None]:
#output data to zarr format
ds.to_zarr(dir_pattern_zarr)