# Setup

In [10]:
# Packages -----------------------------------------------#

# Data Analysis
import xarray as xr
import numpy as np
import pandas as pd
import metpy.calc as mpcalc
import matplotlib.dates as dates
import matplotlib.pyplot as plt
from scipy.interpolate import griddata

# Timing Processes and Progress
import time
from tqdm import tqdm

# make sure the figures plot inline rather than at the end
%matplotlib inline

# get data from tigress
# path = '/home/jennap/projects/LRGROUP/shared_data/chlorophyll_globcolor/'
path = '/home/jennap/projects/LRGROUP/shared_data/chl-globcolor-100km-case-1-and-case-2-waters/concatenated-monthly/'
# path = '/home/jennap/projects/LRGROUP/shared_data/chl-globcolor-100km-case-1-and-case-2-waters/case-1-waters-indiv-monthly/'
# chlinfn = 'all_L3m_19970901-20180831__GLOB_25_AVW-MODVIR_CHL1_MO_00.nc'
chlinfn = 'all_L3m_AV_CHL1_100km_global_monthly_merged_1997_2020.nc'
# chlinfn = 'L3m_19970901-19970930__GLOB_100_AV-SWF_CHL1_MO_00.nc'
# chlinfn = 'data/L3m_19970901-19970930__GLOB_25_AV-SWF_CHL1_MO_00.nc'

ds = xr.open_dataset(path + chlinfn)
ds
# ds.chl1_mean.attrs["units"]
# # print(ds.keys())

# Subset ------------------------------------------------#
# Create slice variables to subset domain before finding means
lat_slice = slice(-20, 30) # bounds inclusive
lon_slice = slice(35, 120) # bounds inclusive

# Get data, selecting lat/lon slice
mon_chl = ds['chl1_mean'].sel(lat=lat_slice,lon=lon_slice)
lat = mon_chl.lat.values
lon = mon_chl.lon.values

del lat_slice, lon_slice

# Downsample to Monthly and Seasonal Temporal Resolution

In [11]:
%%time
seas_chl = mon_chl.resample(time='QS-DEC').mean(dim="time")

CPU times: user 94.1 ms, sys: 34.6 ms, total: 129 ms
Wall time: 79.5 ms


  return np.nanmean(a, axis=axis, dtype=dtype)


# Find Climatologies 
Resources: [link](http://xarray.pydata.org/en/stable/examples/monthly-means.html)

In [12]:
%%time
# -------------------------------------------
# weighted seasonal
# -------------------------------------------

# get months
month_length = mon_chl.time.dt.days_in_month

# calculate the weights by grouping by 'time.season'.
weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum()

# calculate the weighted average
chl_seas_clim = (mon_chl * weights).groupby('time.season').sum(dim='time')   

# set the places that are now zero from the weights to nans
chl_seas_clim = chl_seas_clim.where(chl_seas_clim != 0,np.nan) # for some reason .where sets the locations not in the condition to nan by default

# -------------------------------------------
# monthly
# -------------------------------------------

chl_mon_clim = mon_chl.groupby('time.month').mean('time') 

CPU times: user 55 ms, sys: 55.1 ms, total: 110 ms
Wall time: 108 ms


  return np.nanmean(a, axis=axis, dtype=dtype)


# Find Anomalies

In [5]:
%%time
# monthly avg data - monthly climatology
mon_chl_mon_anom = mon_chl.groupby('time.month') - chl_mon_clim

# seasonal avg data - seasonal climatology
seas_chl_seas_anom = mon_chl.groupby('time.season') - chl_seas_clim

CPU times: user 13.6 s, sys: 1.19 s, total: 14.8 s
Wall time: 494 ms


# Put into 2 degree Bins

In [None]:
#%%time

def latlon_coarsen(invar,lat,lon,bounds,binwidth = 1):
    import numpy as np
 
    # create a pandas dataframe
    df = pd.DataFrame(dict(
            invar = np.array(invar),
            lat= np.array(lat),
            lon= np.array(lon)
        ))
    
    # create 1 degree bins
    latedges = np.arange(bounds[2]-(binwidth/2),bounds[3]+(binwidth/2),binwidth)
    lat_inds = list(range(len(latedges)-1))

    lonedges = np.arange(bounds[0]-(binwidth/2),bounds[1]+(binwidth/2),binwidth)
    lon_inds = list(range(len(lonedges)-1))

    latbins = latedges[1:]-(binwidth/2)
    lonbins = lonedges[1:]-(binwidth/2)

    df['latedges'] = pd.cut(lat, latedges)
    df['lonedges'] = pd.cut(lon, lonedges)
    df['latbins_ind'] = pd.cut(lat, latedges,labels = lat_inds)
    df['lonbins_ind'] = pd.cut(lon, lonedges,labels = lon_inds)
    df['lat_lon_indx']=df.groupby(['latbins_ind', 'lonbins_ind']).ngroup()
    grouped = df.groupby(['latbins_ind', 'lonbins_ind'])

    invar_BINNED = np.zeros((len(latbins),len(lonbins)), dtype=np.ndarray)
    invar_BINNED[:] = np.nan

    invar_binned_ave = np.zeros((len(latbins),len(lonbins)), dtype=np.ndarray)
    invar_binned_ave[:] = np.nan


    #extract the data for each group
    for name, group in grouped:
        i = np.array(group.latbins_ind)
        j = np.array(group.lonbins_ind)

        invar_BINNED[i[0],j[0]] = group.invar

        invar_binned_ave[i[0],j[0]] = np.nanmean(group.invar)

    return np.array(invar_binned_ave,dtype = float),lonbins,latbins

# grid lat lon data
xx,yy = np.meshgrid(lon,lat)

# define extent of grid
bounds = [35,120,-20,30]

_,lonbins_2deg,latbins_2deg =latlon_coarsen(np.array(mon_chl_mon_anom[0,:,:]).ravel(),yy.ravel(),
                                            xx.ravel(),bounds,2)

# monthly data
mon_chl_2deg=np.zeros([mon_chl.shape[0],len(latbins_2deg),len(lonbins_2deg)])
for tt in range(mon_chl.shape[0]):
    mon_chl_2deg[tt,:,:],_,_ = latlon_coarsen(np.array(mon_chl[tt,:,:]).ravel(),yy.ravel(),xx.ravel(),bounds,2)
    

# monthly climatology
chl_mon_clim_2deg=np.zeros([chl_mon_clim.shape[0],len(latbins_2deg),len(lonbins_2deg)])
for tt in range(chl_mon_clim.shape[0]):
    chl_mon_clim_2deg[tt,:,:],_,_ = latlon_coarsen(np.array(chl_mon_clim[tt,:,:]).ravel(),yy.ravel(),xx.ravel(),
                                                   bounds,2)

# monthly anomaly
mon_chl_mon_anom_2deg=np.zeros([mon_chl_mon_anom.shape[0],len(latbins_2deg),len(lonbins_2deg)])
for tt in range(mon_chl_mon_anom.shape[0]):
    mon_chl_mon_anom_2deg[tt,:,:],_,_ = latlon_coarsen(np.array(mon_chl_mon_anom[tt,:,:]).ravel(),yy.ravel(),
                                                       xx.ravel(),bounds,2)
# 1 degree
_,lonbins_1deg,latbins_1deg =latlon_coarsen(np.array(mon_chl_mon_anom[0,:,:]).ravel(),yy.ravel(),
                                            xx.ravel(),bounds,1)

# monthly data
mon_chl_1deg=np.zeros([mon_chl.shape[0],len(latbins_1deg),len(lonbins_1deg)])
for tt in range(mon_chl.shape[0]):
    mon_chl_1deg[tt,:,:],_,_ = latlon_coarsen(np.array(mon_chl[tt,:,:]).ravel(),
                                                yy.ravel(),xx.ravel(),bounds,1)
    

# monthly climatology
chl_mon_clim_1deg=np.zeros([chl_mon_clim.shape[0],len(latbins_1deg),len(lonbins_1deg)])
for tt in range(chl_mon_clim.shape[0]):
    chl_mon_clim_1deg[tt,:,:],_,_ = latlon_coarsen(np.array(chl_mon_clim[tt,:,:]).ravel(),yy.ravel(),xx.ravel(),
                                                   bounds,1)

# monthly anomaly
mon_chl_mon_anom_1deg=np.zeros([mon_chl_mon_anom.shape[0],len(latbins_1deg),len(lonbins_1deg)])
for tt in range(mon_chl_mon_anom.shape[0]):
    mon_chl_mon_anom_1deg[tt,:,:],_,_ = latlon_coarsen(np.array(mon_chl_mon_anom[tt,:,:]).ravel(),yy.ravel(),
                                                       xx.ravel(),bounds,1)



    
# # 0.25 degree
# _,lonbins_025deg,latbins_025deg =latlon_coarsen(np.array(mon_chl_mon_anom[0,:,:]).ravel(),yy.ravel(),
#                                             xx.ravel(),bounds,0.25)

# # monthly data
# mon_chl_025deg=np.zeros([mon_chl.shape[0],len(latbins_025deg),len(lonbins_025deg)])
# for tt in range(mon_chl.shape[0]):
#     mon_chl_025deg[tt,:,:],_,_ = latlon_coarsen(np.array(mon_chl[tt,:,:]).ravel(),
#                                                 yy.ravel(),xx.ravel(),bounds,0.25)
    

# # monthly climatology
# chl_mon_clim_025deg=np.zeros([chl_mon_clim.shape[0],len(latbins_025deg),len(lonbins_025deg)])
# for tt in range(chl_mon_clim.shape[0]):
#     chl_mon_clim_025deg[tt,:,:],_,_ = latlon_coarsen(np.array(chl_mon_clim[tt,:,:]).ravel(),yy.ravel(),xx.ravel(),
#                                                    bounds,0.25)

# # monthly anomaly
# mon_chl_mon_anom_025deg=np.zeros([mon_chl_mon_anom.shape[0],len(latbins_025deg),len(lonbins_025deg)])
# for tt in range(mon_chl_mon_anom.shape[0]):
#     mon_chl_mon_anom_025deg[tt,:,:],_,_ = latlon_coarsen(np.array(mon_chl_mon_anom[tt,:,:]).ravel(),yy.ravel(),
#                                                        xx.ravel(),bounds,0.25)













In [None]:
# convert to xarray dataset
ds=xr.Dataset(coords={'lon': daily_chl.lon,
                    'lat': daily_chl.lat,
                    'time': daily_chl.time})

# add variables to dataset

ds["daily_chl"]=(['time','lat', 'lon'],  daily_chl)
ds["mon_chl"]=xr.DataArray(mon_chl,dims = ['month_time','lat', 'lon'],
                     coords =[mon_chl.time,daily_chl.lat,daily_chl.lon])
ds["seas_chl"]=xr.DataArray(seas_chl,dims = ['season_time','lat', 'lon'],
                     coords =[seas_chl.time,daily_chl.lat,daily_chl.lon])

ds["mon_chl_2deg"]=xr.DataArray(mon_chl_2deg,dims = ['month_time','lat_2deg', 'lon_2deg'],
                     coords =[mon_chl.time,latbins_2deg,lonbins_2deg])
ds["mon_chl_1deg"]=xr.DataArray(mon_chl_1deg,dims = ['month_time','lat_1deg', 'lon_1deg'],
                     coords =[mon_chl.time,latbins_1deg,lonbins_1deg])
# ds["mon_chl_025deg"]=xr.DataArray(mon_chl_025deg,dims = ['month_time','lat_025deg', 'lon_025deg'],
#                      coords =[mon_chl.time,latbins_025deg,lonbins_025deg])

# clim
ds["chl_mon_clim"]=xr.DataArray(chl_mon_clim,dims = ['month','lat', 'lon'],
                     coords =[chl_mon_clim.month,daily_chl.lat,daily_chl.lon])
ds["chl_seas_clim"]=xr.DataArray(chl_seas_clim,dims = ['season','lat', 'lon'],
                     coords =[chl_seas_clim.season,daily_chl.lat,daily_chl.lon])

ds["chl_mon_clim_2deg"]=xr.DataArray(chl_mon_clim_2deg,dims = ['month','lat_2deg', 'lon_2deg'],
                     coords =[chl_mon_clim.month,latbins_2deg,lonbins_2deg])
ds["chl_mon_clim_1deg"]=xr.DataArray(chl_mon_clim_1deg,dims = ['month','lat_1deg', 'lon_1deg'],
                     coords =[chl_mon_clim.month,latbins_1deg,lonbins_1deg])
# ds["chl_mon_clim_025deg"]=xr.DataArray(chl_mon_clim_025deg,dims = ['month','lat_025deg', 'lon_025deg'],
#                      coords =[chl_mon_clim.month,latbins_025deg,lonbins_025deg])

# anom
ds["mon_chl_mon_anom"]=xr.DataArray(mon_chl_mon_anom,dims = ['month_time','lat', 'lon'],
                     coords =[mon_chl_mon_anom.time,daily_chl.lat,daily_chl.lon])
ds["seas_chl_seas_anom"]=xr.DataArray(seas_chl_seas_anom,dims = ['season_time','lat', 'lon'],
                     coords =[seas_chl_seas_anom.time,daily_chl.lat,daily_chl.lon])

ds["mon_chl_mon_anom_2deg"]=xr.DataArray(mon_chl_mon_anom_2deg,dims = ['month_time','lat_2deg', 'lon_2deg'],
                     coords =[mon_chl_mon_anom.time,latbins_2deg,lonbins_2deg])
ds["mon_chl_mon_anom_1deg"]=xr.DataArray(mon_chl_mon_anom_1deg,dims = ['month_time','lat_1deg', 'lon_1deg'],
                     coords =[mon_chl_mon_anom.time,latbins_1deg,lonbins_1deg])
# ds["mon_chl_mon_anom_025deg"]=xr.DataArray(mon_chl_mon_anom_025deg,dims = ['month_time','lat_025deg', 'lon_025deg'],
#                      coords =[mon_chl_mon_anom.time,latbins_025deg,lonbins_025deg])

In [None]:
# save_path = '/projects/GEOCLIM/LRGROUP/jennap/Modulation_of_Coastal_Hypoxia_by_the_IOD/data/'
save_path = ""
ds.to_netcdf(save_path + slainfn[:-3] + '_processed.nc',mode='w',format = "NETCDF4")

ds

In [None]:
import matplotlib as mpl
# p = plt.pcolor(mon_chl_mon_anom_2deg[0,:,:], norm=mpl.colors.LogNorm(vmin = 1, vmax = 10))
p = plt.pcolor(ds.chl_mon_clim[0,:,:])
plt.colorbar(p)

In [None]:
chl_mon_clim_2deg