# Setup

In [1]:
# import your standard packages
%run ../../../global_tools/start.py

# import your local functions
sys.path.insert(1, '../')
from local_functions import *

# make sure the figures plot inline rather than at the end
%matplotlib inline

Default libraries loaded.


# Paths and Parameters

In [2]:
outfn = '../../data/chl_processed.nc'

# get data from tigress
inpath = '/home/jennap/projects/LRGROUP/shared_data/chl-oc-cci/concatenated/'
infn = 'ESACCI-OC-L3S-CHLOR_A-MERGED-1M_MONTHLY_4km_GEO_PML_OCx-fv5.0_19970904_20200601.nc'

lat_bounds = [-1.5, 33]
lon_bounds = [48.5, 102.5]
time_bounds = ['1997-09-04','2020-05-31']

# Get Data and Subset

In [3]:
ds = xr.open_dataset(inpath + infn)

# Subset ------------------------------------------------#
lat_slice = slice(lat_bounds[0], lat_bounds[1])
lon_slice = slice(lon_bounds[0], lon_bounds[1])
time_slice = slice(time_bounds[0],time_bounds[1])

ds = ds.sel(lat=lat_slice,lon=lon_slice, time = time_slice)

# Get data, selecting lat/lon slice
mon_chl = ds['chlor_a'].sel(lat=lat_slice,lon=lon_slice, time = time_slice)
lat = mon_chl.lat.values
lon = mon_chl.lon.values

# Find Climatologies 
Resources: [link](http://xarray.pydata.org/en/stable/examples/monthly-means.html)

In [4]:
mon_chl_mon_clim = monthly_clim(mon_chl)

  return np.nanmean(a, axis=axis, dtype=dtype)


# Find Anomalies

In [5]:
%%time
# monthly avg data - monthly climatology
mon_chl_mon_anom = mon_chl.groupby('time.month') - mon_chl_mon_clim

CPU times: user 12min 29s, sys: 47.6 s, total: 13min 17s
Wall time: 26.6 s


In [7]:
# convert to xarray dataset
ds=xr.Dataset(coords={'lon': mon_chl.lon,
                    'lat': mon_chl.lat,
                    'time': mon_chl.time})

# add variables to dataset
ds["mon_chl"]=xr.DataArray(mon_chl,dims = ['time','lat', 'lon'],
                     coords =[mon_chl.time,mon_chl.lat,mon_chl.lon])

# clim
ds["mon_chl_mon_clim"]=xr.DataArray(mon_chl_mon_clim,dims = ['month','lat', 'lon'],
                     coords =[mon_chl_mon_clim.month,mon_chl.lat,mon_chl.lon])

# anom
ds["mon_chl_mon_anom"]=xr.DataArray(mon_chl_mon_anom,dims = ['time','lat', 'lon'],
                     coords =[mon_chl_mon_anom.time,mon_chl.lat,mon_chl.lon])


In [8]:
# delete if already present
if os.path.isfile(outfn):
    os.remove(outfn)

ds.to_netcdf(outfn,mode='w',format = "NETCDF4")

ds

<xarray.Dataset>
Dimensions:           (lat: 828, lon: 1296, month: 12, time: 273)
Coordinates:
  * lon               (lon) float64 48.52 48.56 48.6 48.65 ... 102.4 102.4 102.5
  * lat               (lat) float64 -1.479 -1.438 -1.396 ... 32.9 32.94 32.98
  * time              (time) datetime64[ns] 1997-09-04 1997-10-01 ... 2020-05-01
  * month             (month) int64 1 2 3 4 5 6 7 8 9 10 11 12
Data variables:
    mon_chl           (time, lat, lon) float64 0.2797 0.2686 0.2979 ... nan nan
    mon_chl_mon_clim  (month, lat, lon) float64 0.2109 0.2216 0.221 ... nan nan
    mon_chl_mon_anom  (time, lat, lon) float64 -0.1182 -0.1113 ... nan nan