# Setup

In [3]:
# import packages
%run ../../global_packages.py

# get the global parameters
%run ../../global_pars.py

# import your local functions
sys.path.insert(1, '../../')
from global_functions import *

# make sure the figures plot inline rather than at the end
%matplotlib inline

# Paths and Parameters

In [4]:
outfn = 'chl_processed.nc'

# get data from tigress
inpath = 'raw_data/concatenated/'
infn = 'ESACCI-OC-L3S-CHLOR_A-MERGED-1M_MONTHLY_4km_GEO_PML_OCx-fv5.0_19970904_20200601.nc'

# Get Data and Subset

In [5]:
ds = xr.open_dataset(inpath + infn)

# Subset ------------------------------------------------#
lat_slice = slice(lat_bounds[0], lat_bounds[1])
lon_slice = slice(lon_bounds[0], lon_bounds[1])
time_slice = slice(ts,te)

ds = ds.sel(lat=lat_slice,lon=lon_slice, time = time_slice)

# Get data, selecting lat/lon slice
mon_chl = ds['chlor_a'].sel(lat=lat_slice,lon=lon_slice, time = time_slice)
lat = mon_chl.lat.values
lon = mon_chl.lon.values

# Find Climatologies 
Resources: [link](http://xarray.pydata.org/en/stable/examples/monthly-means.html)

In [7]:
mon_chl_mon_clim = mon_chl.groupby('time.month').mean('time') 

  return np.nanmean(a, axis=axis, dtype=dtype)


# Find Anomalies

In [8]:
%%time
# monthly avg data - monthly climatology
mon_chl_mon_anom = mon_chl.groupby('time.month') - mon_chl_mon_clim

CPU times: user 4min 8s, sys: 27.8 s, total: 4min 36s
Wall time: 9.21 s


In [9]:
# convert to xarray dataset
ds=xr.Dataset(coords={'lon': mon_chl.lon,
                    'lat': mon_chl.lat,
                    'time': mon_chl.time})

# add variables to dataset
ds["mon_chl"]=xr.DataArray(mon_chl,dims = ['time','lat', 'lon'],
                     coords =[mon_chl.time,mon_chl.lat,mon_chl.lon])

# clim
ds["mon_chl_mon_clim"]=xr.DataArray(mon_chl_mon_clim,dims = ['month','lat', 'lon'],
                     coords =[mon_chl_mon_clim.month,mon_chl.lat,mon_chl.lon])

# anom
ds["mon_chl_mon_anom"]=xr.DataArray(mon_chl_mon_anom,dims = ['time','lat', 'lon'],
                     coords =[mon_chl_mon_anom.time,mon_chl.lat,mon_chl.lon])


In [10]:
# delete if already present
if os.path.isfile(outfn):
    os.remove(outfn)

ds.to_netcdf(outfn,mode='w',format = "NETCDF4")

ds

<xarray.Dataset>
Dimensions:           (lat: 828, lon: 1296, month: 12, time: 273)
Coordinates:
  * lon               (lon) float64 48.52 48.56 48.6 48.65 ... 102.4 102.4 102.5
  * lat               (lat) float64 -1.479 -1.438 -1.396 ... 32.9 32.94 32.98
  * time              (time) datetime64[ns] 1997-09-04 1997-10-01 ... 2020-05-01
  * month             (month) int64 1 2 3 4 5 6 7 8 9 10 11 12
Data variables:
    mon_chl           (time, lat, lon) float64 0.2797 0.2686 0.2979 ... nan nan
    mon_chl_mon_clim  (month, lat, lon) float64 0.2109 0.2216 0.221 ... nan nan
    mon_chl_mon_anom  (time, lat, lon) float64 -0.1182 -0.1113 ... nan nan