In [18]:
import xarray as xr
import os
from dask.diagnostics import ProgressBar
import numpy as np
import pandas as pd
from utils import geo
import regionmask

In [16]:
dataset = 'oc-cci'
frequency = '8day'
variable = 'chlor_a'
yearstart = 1999
yearend = 2023
monthorder = 'JULtoJUN'
averaging = 'wmean-latlon'

In [24]:
lats = [-30, -65]
# path to load from
indirbase = "/projects/SOCCOM/datasets/OC-CCI/"
indirlocal = "CCI_ALL-v6.0-8DAY"
indirregion = "_".join(["lat",*[str(lat) for lat in lats]])
filename = indirlocal+"-"+indirregion+"-gapfilled_lonlattime-roll_time3"
inpath = "/".join([indirbase,indirlocal,indirregion,filename])
# load data
ds = xr.open_zarr(inpath).chunk({'time':'auto','lon':-1,'lat':-1})
# set coordinates
ds['time'] = pd.to_datetime(ds['time'].values)
# Get area
ds,xgrid = geo.get_xgcm_horizontal(ds,axes_dims_dict={'X':'lon','Y':'lat'},periodic='X')
ds['area'] = ds['dxC']*ds['dyC']

In [25]:
ds

Unnamed: 0,Array,Chunk
Bytes,32.81 GiB,110.87 MiB
Shape,"(1212, 841, 8640)","(4, 841, 8640)"
Count,14750 Tasks,303 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 32.81 GiB 110.87 MiB Shape (1212, 841, 8640) (4, 841, 8640) Count 14750 Tasks 303 Chunks Type float32 numpy.ndarray",8640  841  1212,

Unnamed: 0,Array,Chunk
Bytes,32.81 GiB,110.87 MiB
Shape,"(1212, 841, 8640)","(4, 841, 8640)"
Count,14750 Tasks,303 Chunks
Type,float32,numpy.ndarray


In [19]:
# Apply a landmask
mask = regionmask.defined_regions.natural_earth.land_110.mask(ds['lon'], ds['lat'])
mask = mask.where(np.isfinite(mask),1)
mask = mask.where(mask==1,np.nan)
# mask.plot()
ds[variable] = ds[variable].fillna(0)*mask



In [20]:
# Extract spatial average for each year
years = np.arange(yearstart,yearend+1)
days = np.arange(0,365)
# Ordering such that years run from July to June
ds_year = xr.DataArray(dims=['day','year'],coords={'year':years,'day':days},name=variable)
for i,year in enumerate(years):
    print(year)
    if monthorder=='JULtoJUN':
        start = str(year-1)+'-07-01'
        end = str(year)+'-06-30'
        nleapday = 243 # for removing leap day (243 days after July 1)
    elif monthorder=='JANtoDEC':
        start = str(year-1)+'-01-01'
        end = str(year)+'-12-31'
        nleapday = 59 # for removing leap day (59 days after Jan 1)
    
    # Weighted spatial mean
    x = (ds[variable].sel({'time':slice(start,end)})).weighted(ds['area'].fillna(0)).mean(['lat','lon'])
    
    # Interpolate to daily
    # (day frquency data has some missing days, so interpolate that too)
    alltimes = pd.date_range(start,end)
    xi = x.interp({'time':alltimes},kwargs={'fill_value':np.nan})
        
    # Remove leap-year day for simplicity
    if len(xi)==366:
        xi = xi[np.arange(len(xi))!=nleapday]
        
    # Put into dataset
    with ProgressBar():
        xi = xi.compute()
    ds_year.loc[{'year':year}]=xi.values

1999
[########################################] | 100% Completed | 21.70 s
2000
[########################################] | 100% Completed | 28.83 s
2001
[########################################] | 100% Completed | 30.17 s
2002
[########################################] | 100% Completed | 32.37 s
2003
[########################################] | 100% Completed | 31.34 s
2004
[########################################] | 100% Completed | 28.82 s
2005
[########################################] | 100% Completed | 30.86 s
2006
[########################################] | 100% Completed | 31.22 s
2007
[########################################] | 100% Completed | 30.57 s
2008
[########################################] | 100% Completed | 31.27 s
2009
[########################################] | 100% Completed | 29.32 s
2010
[########################################] | 100% Completed | 30.69 s
2011
[########################################] | 100% Completed | 31.83 s
2012
[###################

In [21]:
outdir = '../../data/'
yearstr = str(yearstart)+'-'+str(yearend)
outpath = outdir+'.'.join([dataset,frequency,averaging,monthorder,yearstr,'nc'])

In [23]:
print('Saving to '+outpath)
if os.path.isfile(outpath):
    os.remove(outpath)
ds_year.to_netcdf(outpath)

Saving to ../../data/oc-cci.8day.wmean-latlon.JULtoJUN.1999-2023.nc


### By basin