# Setup

In [13]:
# Packages -----------------------------------------------#

import os

# Data Analysis
import xarray as xr
import numpy as np
import pandas as pd
import metpy.calc as mpcalc
import matplotlib.dates as dates
import matplotlib.pyplot as plt
from scipy.interpolate import griddata

# Timing Processes and Progress
import time
from tqdm import tqdm

# make sure the figures plot inline rather than at the end
%matplotlib inline

# get data from tigress
path = '/home/jennap/projects/LRGROUP/shared_data/ssh_AVISO/'
# slainfn = 'all_adt_sla_global_twosat_phy_l4_vDT2018_daily_1994_2018.nc'
slainfn = 'all_adt_sla_global_twosat_phy_l4_vDT2018_daily_1994_2020_03_01.nc'

ds = xr.open_dataset(path + slainfn)
ds
#print(ds.keys())

# Subset ------------------------------------------------#
# Create slice variables to subset domain before finding means
lat_slice = slice(-20, 30) # bounds inclusive
lon_slice = slice(35, 120) # bounds inclusive

# Get data, selecting lat/lon slice
daily_sla = ds['sla'].sel(latitude=lat_slice,longitude=lon_slice)
lat = daily_sla.latitude.values
lon = daily_sla.longitude.values

del lat_slice, lon_slice

In [24]:
ds.adt.comment

'The absolute dynamic topography is the sea surface height above geoid; the adt is obtained as follows: adt=sla+mdt where mdt is the mean dynamic topography; see the product user manual for details'

# Detrend SLA

Resources: [link](https://ecco-v4-python-tutorial.readthedocs.io/ECCO_v4_Example_calculations_with_scalar_quantities.html), [link](https://groups.google.com/g/xarray/c/fz7HHgpgwk0), [link](https://stackoverflow.com/questions/28647172/numpy-polyfit-doesnt-handle-nan-values), [link](https://stackoverflow.com/questions/28647172/numpy-polyfit-doesnt-handle-nan-values), [link](https://stackoverflow.com/questions/17638137/curve-fitting-to-a-time-series-in-the-format-datetime)



In [8]:
%%time

# stack lat and lon into a single dimension called allpoints
stacked = daily_sla.stack(allpoints=['latitude','longitude'])

# set places where there are nans to zero since polyfit can't deal with them
stacked_nonan = stacked.fillna(0)

# convert date to a number to polyfit can handle it
datenum = dates.date2num(stacked_nonan.time)
daily_sla_slope, daily_sla_intercept = np.polyfit(datenum, stacked_nonan, 1)

#reshape the data
daily_sla_slope = np.reshape(daily_sla_slope, daily_sla.shape[1:3])
daily_sla_intercept = np.reshape(daily_sla_intercept, daily_sla.shape[1:3])

# define a function to compute a linear trend of a timeseries
def linear_detrend(y):
    x = dates.date2num(y.time)
    m, b = np.polyfit(x, y, 1)
    # we need to return a dataarray or else xarray's groupby won't be happy
    return xr.DataArray(y - (m*x + b))

# apply the function over allpoints to calculate the trend at each point
daily_sla_dtrnd = stacked_nonan.groupby('allpoints').apply(linear_detrend)
# unstack back to lat lon coordinates
daily_sla_dtrnd = daily_sla_dtrnd.unstack('allpoints')

# fill all points we set originally to zero back to nan
daily_sla_dtrnd = daily_sla_dtrnd.where(~np.isnan(daily_sla))

# delete trended data to save on memory
del daily_sla,ds, stacked, stacked_nonan

# make plot for this to show difference..

CPU times: user 2h 1min 15s, sys: 4min 47s, total: 2h 6min 2s
Wall time: 10min 14s


# Downsample to Monthly and Seasonal Temporal Resolution

In [9]:
%%time
# monthly
mon_sla_dtrnd = daily_sla_dtrnd.resample(time='1MS').mean(dim="time")
# seasonal
seas_sla_dtrnd = daily_sla_dtrnd.resample(time='QS-DEC').mean(dim="time")

  return np.nanmean(a, axis=axis, dtype=dtype)


CPU times: user 5min 45s, sys: 15 s, total: 6min
Wall time: 12 s


# Find Climatologies 
Resources: [link](http://xarray.pydata.org/en/stable/examples/monthly-means.html)

In [10]:
%%time
# -------------------------------------------
# weighted seasonal
# -------------------------------------------

# get months
month_length = daily_sla_dtrnd.time.dt.days_in_month

# calculate the weights by grouping by 'time.season'.
weights = month_length.groupby('time.season') / month_length.groupby('time.season').sum()

# calculate the weighted average
sla_seas_clim_dtrnd = (daily_sla_dtrnd * weights).groupby('time.season').sum(dim='time')   

# set the places that are now zero from the weights to nans
sla_seas_clim_dtrnd = sla_seas_clim_dtrnd.where(sla_seas_clim_dtrnd != 0,np.nan) # for some reason .where sets the locations not in the condition to nan by default

# -------------------------------------------
# monthly
# -------------------------------------------

sla_mon_clim_dtrnd = daily_sla_dtrnd.groupby('time.month').mean('time') 

  return np.nanmean(a, axis=axis, dtype=dtype)


CPU times: user 1min 36s, sys: 6.84 s, total: 1min 43s
Wall time: 13.2 s


# Find Anomalies

In [11]:
%%time

# # daily data - monthly climatology
# daily_sla_mon_anom_dtrnd = daily_sla_dtrnd.groupby('time.month') - sla_mon_clim_dtrnd

# monthly avg data - monthly climatology
mon_sla_mon_anom_dtrnd = mon_sla_dtrnd.groupby('time.month') - sla_mon_clim_dtrnd

# # monthly avg data - seasonal climatology
# seas_sla_mon_anom_dtrnd = mon_sla_dtrnd.groupby('time.season') - sla_seas_clim_dtrnd

# seasonal avg data - seasonal climatology
seas_sla_seas_anom_dtrnd = mon_sla_dtrnd.groupby('time.season') - sla_seas_clim_dtrnd

CPU times: user 8.84 s, sys: 725 ms, total: 9.57 s
Wall time: 510 ms


In [12]:
# convert to xarray dataset
ds=xr.Dataset(coords={'longitude': daily_sla_dtrnd.longitude,
                    'latitude': daily_sla_dtrnd.latitude,
                    'time': daily_sla_dtrnd.time})

# add variables to dataset

ds["daily_sla_dtrnd"]=(['time','latitude', 'longitude'],  daily_sla_dtrnd)
ds["mon_sla_dtrnd"]=xr.DataArray(mon_sla_dtrnd,dims = ['month_time','latitude', 'longitude'],
                     coords =[mon_sla_dtrnd.time,daily_sla_dtrnd.latitude,daily_sla_dtrnd.longitude])
ds["seas_sla_dtrnd"]=xr.DataArray(seas_sla_dtrnd,dims = ['season_time','latitude', 'longitude'],
                     coords =[seas_sla_dtrnd.time,daily_sla_dtrnd.latitude,daily_sla_dtrnd.longitude])

# clim
ds["sla_mon_clim_dtrnd"]=xr.DataArray(sla_mon_clim_dtrnd,dims = ['month','latitude', 'longitude'],
                     coords =[sla_mon_clim_dtrnd.month,daily_sla_dtrnd.latitude,daily_sla_dtrnd.longitude])
ds["sla_seas_clim_dtrnd"]=xr.DataArray(sla_seas_clim_dtrnd,dims = ['season','latitude', 'longitude'],
                     coords =[sla_seas_clim_dtrnd.season,daily_sla_dtrnd.latitude,daily_sla_dtrnd.longitude])

# anom
ds["mon_sla_mon_anom_dtrnd"]=xr.DataArray(mon_sla_mon_anom_dtrnd,dims = ['month_time','latitude', 'longitude'],
                     coords =[mon_sla_mon_anom_dtrnd.time,daily_sla_dtrnd.latitude,daily_sla_dtrnd.longitude])
ds["seas_sla_seas_anom_dtrnd"]=xr.DataArray(seas_sla_seas_anom_dtrnd,dims = ['season_time','latitude', 'longitude'],
                     coords =[seas_sla_seas_anom_dtrnd.time,daily_sla_dtrnd.latitude,daily_sla_dtrnd.longitude])

# save_path = '/projects/GEOCLIM/LRGROUP/jennap/Modulation_of_Coastal_Hypoxia_by_the_IOD/data/'
outfn = slainfn[:-3] + '_dtrnd.nc'

# delete if already present
if os.path.isfile(outfn):
    os.remove(outfn)

ds.to_netcdf(outfn,mode='w',format = "NETCDF4")
ds

NameError: name 'os' is not defined