# Setup

In [None]:
# Packages -----------------------------------------------#

# Data Analysis
import xarray as xr
import numpy as np
import pandas as pd
import metpy.calc as mpcalc
import matplotlib.dates as dates
import matplotlib.pyplot as plt
from scipy.interpolate import griddata

# Timing Processes and Progress
import time
from tqdm import tqdm

# make sure the figures plot inline rather than at the end
%matplotlib inline

outfn = 'chl_25km_processed.nc'


# get data
path = '/home/jennap/projects/LRGROUP/shared_data/chl-globcolor-case-1-and-case-2-waters/concatenated-daily/'
# chlinfn = 'all_L3m_AV_CHL1_100km_global_monthly_merged_1997_2020.nc'
chlinfn = 'all_L3m_AV_CHL1_25km_global_monthly_merged_1997_2020.nc'

ds = xr.open_dataset(path + chlinfn)
ds
# ds.chl1_mean.attrs["units"]
# # print(ds.keys())


In [None]:
# Subset ------------------------------------------------#
# Create slice variables to subset domain before finding means
lat_slice = slice(-20, 30) # bounds inclusive
lon_slice = slice(35, 120) # bounds inclusive
time_slice = slice('1997-09-04','2019-12-31')

# Get data, selecting lat/lon slice
daily_chl = ds['chl1_mean'].sel(lat=lat_slice,lon=lon_slice, time = time_slice)
lat = daily_chl.lat.values
lon = daily_chl.lon.values
daily_chl

# 1997-2012 anomaly

In [None]:
time_slice = slice('1997-09-04','2012-12-31')
mchl = daily_chl.sel(lat=lat_slice,lon=lon_slice, time = time_slice).mean(axis=0,skipna=True)
daily_chla = daily_chl - np. nanmean(daily_chl,0)
daily_chla

# Detrend

In [None]:
%%time

# stack lat and lon into a single dimension called allpoints
stacked = daily_chla.stack(allpoints=['lat','lon'])
# set places where there are nans to zero since polyfit can't deal with them
stacked_nonan = stacked.fillna(0)

# convert date to a number to polyfit can handle it
datenum = dates.date2num(stacked_nonan.time)
daily_chla_slope, daily_chla_intercept = np.polyfit(datenum, stacked_nonan, 1)

#reshape the data
daily_chla_slope = np.reshape(daily_chla_slope, daily_chla.shape[1:3])
daily_chla_intercept = np.reshape(daily_chla_intercept, daily_chla.shape[1:3])

# define a function to compute a linear trend of a timeseries
def linear_detrend(y):
    x = dates.date2num(y.time)
    m, b = np.polyfit(x, y, 1)
    # we need to return a dataarray or else xarray's groupby won't be happy
    return xr.DataArray(y - (m*x + b))

# apply the function over allpoints to calculate the trend at each point
daily_chla_dtrnd = stacked_nonan.groupby('allpoints').apply(linear_detrend)
# unstack back to lat lon coordinates
daily_chla_dtrnd = daily_chla_dtrnd.unstack('allpoints')

# fill all points we set originally to zero back to nan
daily_chla_dtrnd = daily_chla_dtrnd.where(~np.isnan(daily_chla))

In [None]:
# find global means for comparison
stacked = daily_chla.stack(allpoints=['lat','lon'])
global_mean = stacked.mean(dim='allpoints',skipna=True)
global_mean.plot()

# find global means for comparison
stacked = daily_chla_dtrnd.stack(allpoints=['lat','lon'])
global_mean_dtrnd = stacked.mean(dim='allpoints',skipna=True)
global_mean_dtrnd.plot()
plt.legend

# Downsample to Monthly and Seasonal Temporal Resolution

In [None]:
%%time
# monthly
mon_chla_dtrnd = daily_chla_dtrnd.resample(time='1MS').mean(dim="time")
# seasonal
seas_chla_dtrnd = daily_chla_dtrnd.resample(time='QS-DEC').mean(dim="time")

# Find Climatologies 
Resources: [link](http://xarray.pydata.org/en/stable/examples/monthly-means.html)

In [None]:
%%time
# -------------------------------------------
# weighted seasonal
# -------------------------------------------

# get months
month_length = mon_chla_dtrnd.time.dt.days_in_month

# calculate the weights by grouping by 'time.season'.
weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum()

# calculate the weighted average
chla_seas_clim = (daily_chla_dtrnd * weights).groupby('time.season').sum(dim='time')   

# set the places that are now zero from the weights to nans
chla_seas_clim = chla_seas_clim.where(chla_seas_clim != 0,np.nan) # for some reason .where sets the locations not in the condition to nan by default

# -------------------------------------------
# monthly
# -------------------------------------------

chla_mon_clim = daily_chla_dtrnd.groupby('time.month').mean('time') 

# Find Anomalies

In [None]:
%%time
# monthly avg data - monthly climatology
mon_chla_mon_anom = daily_chla_dtrnd.groupby('time.month') - chla_mon_clim

# seasonal avg data - seasonal climatology
seas_chla_seas_anom = daily_chla_dtrnd.groupby('time.season') - chla_seas_clim

In [None]:
# convert to xarray dataset
ds=xr.Dataset(coords={'lon': mon_chla_dtrnd.lon,
                    'lat': mon_chla_dtrnd.lat,
                    'time': mon_chla_dtrnd.time})

# add variables to dataset

ds["mon_chla"]=xr.DataArray(mon_chla_dtrnd,dims = ['time','lat', 'lon'],
                     coords =[mon_chla_dtrnd.time,mon_chla_dtrnd.lat,mon_chla_dtrnd.lon])
ds["seas_chla"]=xr.DataArray(seas_chla_dtrnd,dims = ['season_time','lat', 'lon'],
                     coords =[seas_chla_dtrnd.time,mon_chla_dtrnd.lat,mon_chla_dtrnd.lon])

# clim
ds["chla_mon_clim"]=xr.DataArray(chla_mon_clim,dims = ['month','lat', 'lon'],
                     coords =[chla_mon_clim.month,mon_chla_dtrnd.lat,mon_chla_dtrnd.lon])
ds["chla_seas_clim"]=xr.DataArray(chla_seas_clim,dims = ['season','lat', 'lon'],
                     coords =[chla_seas_clim.season,mon_chla_dtrnd.lat,mon_chla_dtrnd.lon])


# anom
ds["mon_chla_mon_anom"]=xr.DataArray(mon_chla_mon_anom,dims = ['time','lat', 'lon'],
                     coords =[mon_chla_mon_anom.time,mon_chla_dtrnd.lat,mon_chla_dtrnd.lon])
ds["seas_chla_seas_anom"]=xr.DataArray(seas_chla_seas_anom,dims = ['season_time','lat', 'lon'],
                     coords =[seas_chla_seas_anom.time,mon_chla_dtrnd.lat,mon_chla_dtrnd.lon])


In [None]:
import os

# delete if already present
if os.path.isfile(outfn):
    os.remove(outfn)

ds.to_netcdf(outfn,mode='w',format = "NETCDF4")

ds

In [None]:
plt.pcolormesh(ds.chla_seas_clim[0,:,:])