## Example for eddy identification and tracking for daily data using py-eddy-tracker (for IFS/FESOM output)

- Basic setup to identify, track and composite eddies uses ICON output from known paths. Please refer to noteboook examples [howto-IDtrackcompeddy-daily.ipynb](https://github.com/eerie-project/EERIE_hackathon_2023/blob/main/ICON/ICON-O/howto-IDtrackcompeddy-daily.ipynb)  and [howto-eddycompositeotherfields-daily.ipynb]((https://github.com/eerie-project/EERIE_hackathon_2023/blob/main/ICON/ICON-O/howto-eddycompositeotherfields-daily.ipynb) (Dian Putrasahan)
- py-eddy-tracker reads in netcdf file but does not know how to use xarray. This issue was dealt with in [pyeddytracker-intake-xarray-parallel-demo.ipynb](https://github.com/eerie-project/EERIE_hackathon_2023/blob/main/RESULTS/pyeddytracker_xarray_dask_parallel/pyeddytracker-intake-xarray-parallel-demo.ipynb) (Aaron Wienkers). Furthermore, the code uses dask for parallelisation. 
- Now, feeding py-eddy-tracker with xarray obtained from reading in with intake catalog is possible, so here's an example of how it is done (Aaron Wienkers and Dian Putrasahan)
- Parameter choices for py-eddy-tracker closely follow those used for AVISO, following [recommendation from py-eddy-tracker author](https://github.com/AntSimi/py-eddy-tracker/discussions/198). 

| Parameter (identification) | Value | Description |
| ------------------------------- | ------------ | --------------------------- |
| wavelength (Bessel filter) | 700 km | spatial cutoff for high pass filter in km |
| wavelength_order (filter) | 1 | |
| step_ht | 0.002 m | intervals to search for closed contours (m) |
| shape error | 70 | Error max (%) between ratio of circle fit and contour |
| nb_step_to_be_mle | 0 (default 2?) | don't allow micro relief in an eddy, used for computing amplitude | 
| sampling (affects storage) | Not set (default 50) | affects storage, using 20-30 is acceptable |
| pixel_limit | Not set (default None) | Min and max pixel count for valid contour (5, 2000)  |
| presampling_multiplier | Not set (default 10) | |
| sampling_method | Not set (default visvalingam) | |
| precision | Not set (default None) | |


| Parameter (tracking) | Value |
| ------------------------ | ------------ |
| cmin | 0.05 |
| virtual | 4 |



#### Eddy identification based on 0.25deg grid

In [1]:
import xarray as xr
import numpy as np
import matplotlib.pylab as plt
import matplotlib.cm as cm
from scipy.interpolate import CloughTocher2DInterpolator, LinearNDInterpolator, NearestNDInterpolator
import glob
import intake
import intake_xarray
import dask
import pandas as pd
dask.config.set({"array.slicing.split_large_chunks": True}) 

from py_eddy_tracker.dataset.grid import RegularGridDataset
from datetime import datetime, timedelta
from netCDF4 import Dataset

import io
import os

import warnings
warnings.filterwarnings("ignore")

In [2]:
## Start Parallel Client
from concurrent.futures import ProcessPoolExecutor
from concurrent.futures import ThreadPoolExecutor
# Note: Could also use Dask Distributed Client
n_cpu = 64

In [3]:
cat = intake.open_catalog("https://raw.githubusercontent.com/eerie-project/intake_catalogues/main/eerie.yaml")
model = 'ifs-fesom2-sr'
expid = 'eerie-control-1950'
gridspec = 'gr025'
cat_regrid = cat['dkrz.disk.model-output'][model][expid]['ocean'][gridspec]
print(list(cat_regrid))

['daily', 'monthly']


In [4]:
ds = cat_regrid['daily'].to_dask()
varname = 'ssh'

In [5]:
# ds_subset = ds.sel(time=slice('1951-01-01','1956-12-31'))
ds_subset = ds
datearr = np.array([pd.Timestamp(t).to_pydatetime() for t in ds_subset.time.values])


In [6]:
# Directories
scratch = '/scratch/m/m300466/'
datadir = scratch+expid+'/'+gridspec+'/'
outdir1 = datadir+'/'+model+'/eddytrack/'

wavelength=700
outdir = outdir1+'sm'+str(wavelength)+'/'

if not os.path.exists(datadir+'/'+model):
    os.makedirs(datadir+'/'+model)
if not os.path.exists(outdir1):
    os.makedirs(outdir1)
if not os.path.exists(outdir):
    os.makedirs(outdir)

## ID eddies

In [7]:
def detection(ncfile, varname, date): 
    #Follow AVISO and Malcolm's parameter choices
    wavelength=700  #choice of spatial cutoff for high pass filter in km
    step_ht=0.002 #intervals to search for closed contours (5mm in this case)
    g = RegularGridDataset(None, "lon", "lat", centered=True, nc4file=ncfile)  # NOTE: Using 'None' for the .nc file path then requires specifying directly the netcdf4 variable in memory
    g.add_uv(varname)
    g.bessel_high_filter(varname, wavelength, order=1)
        
    a, c = g.eddy_identification(varname, "u", "v", 
    date,  # Date of identification
    step_ht,  # step between two isolines of detection (m)
    #pixel_limit=(50, 400),  # Min and max pixel count for valid contour
    nb_step_to_be_mle = 0,  # don't allow micro relief in an eddy, used for computing amplitude
    #sampling = 20,  #affects storage, default 50, try between 20-30
    shape_error=70  # Error max (%) between ratio of circle fit and contour
    )
    return a,c,g

# Parallel function wrapper to the for-loop 
def delayed_ID_and_save(date, tt):
    varname='ssh'
    
    # Load data from xarray into netcdf4 type
    da_ssh = ds_subset.isel(time=tt)
    # da_ssh = ds_oce.isel(time=tt).ssh
    da_ssh.time.encoding.pop("_FillValue",None)
    da_netcdf = Dataset('in-mem-file', mode='r', memory=da_ssh.to_netcdf())
    
    #print('Identifying daily eddies for '+date.strftime('%Y%m%d'))
    a_filtered, c_filtered, g_filtered = detection(da_netcdf,varname,date)
    with Dataset(date.strftime(outdir+"eddyID_anticyclonic_"+date.strftime('%Y%m%d')+".nc"), "w") as h:
        a_filtered.to_netcdf(h)
    with Dataset(date.strftime(outdir+"eddyID_cyclonic_"+date.strftime('%Y%m%d')+".nc"), "w") as h:
        c_filtered.to_netcdf(h)
    del a_filtered
    del c_filtered
    del g_filtered
    del date

In [None]:
%%capture --no-stdout
# ID all of the eddies in the ds_subset in parallel
with ProcessPoolExecutor(max_workers=n_cpu) as executor:
    results = list(executor.map(delayed_ID_and_save, datearr, range(len(datearr))))

## Track Eddies

In [7]:
import os 
import glob

from py_eddy_tracker.featured_tracking.area_tracker import AreaTracker
from py_eddy_tracker.tracking import Correspondances

import numpy as np
from datetime import datetime, timedelta
from netCDF4 import Dataset
import xarray as xr


In [8]:
#Functions from eddy-tracking.py (aided by Malcolm Roberts)
def tracking(file_objects, previous_correspondance, eddy_type, zarr=False, nb_obs_min=10, raw=True, cmin=0.05, virtual=4):
    # %%
    # We run a tracking with a tracker which uses contour overlap, on first time step
    output_dir = os.path.dirname(previous_correspondance)
    class_kw = dict(cmin=cmin)
    if not os.path.isfile(previous_correspondance):
        c = Correspondances(
            datasets=file_objects, class_method=AreaTracker, 
            class_kw=class_kw, virtual=virtual
        )
        c.track()
        c.prepare_merging()
    else:
        c = Correspondances(
            datasets=file_objects, class_method=AreaTracker, 
            class_kw=class_kw, virtual=virtual,
            previous_correspondance=previous_correspondance
        )
        c.track()
        c.prepare_merging()
        c.merge()

    new_correspondance = previous_correspondance[:-3]+'_new.nc'
    with Dataset(new_correspondance, "w") as h:
        c.to_netcdf(h)

    try:
        # test can read new file, and then move to replace old file
        nc = Dataset(new_correspondance, 'r')
        os.rename(new_correspondance, previous_correspondance)
    except:
        raise Exception('Error opening new correspondance file '+new_correspondance)

    write_obs_files(c, raw, output_dir, zarr, eddy_type, nb_obs_min)
    

def write_obs_files(c, raw, output_dir, zarr, eddy_type, nb_obs_min):
    kw_write = dict(path=output_dir, zarr_flag=zarr, sign_type=eddy_type)

    fout = os.path.join(output_dir, eddy_type+'_untracked.nc')
    c.get_unused_data(raw_data=raw).write_file(
        filename=fout
    )

    short_c = c._copy()
    short_c.shorter_than(size_max=nb_obs_min)
    short_track = short_c.merge(raw_data=raw)

    if c.longer_than(size_min=nb_obs_min) is False:
        long_track = short_track.empty_dataset()
    else:
        long_track = c.merge(raw_data=raw)

    # We flag obs
    if c.virtual:
        long_track["virtual"][:] = long_track["time"] == 0
        long_track.normalize_longitude()
        long_track.filled_by_interpolation(long_track["virtual"] == 1)
        short_track["virtual"][:] = short_track["time"] == 0
        short_track.normalize_longitude()
        short_track.filled_by_interpolation(short_track["virtual"] == 1)

    print("Longer track saved have %d obs", c.nb_obs_by_tracks.max())
    print(
        "The mean length is %d observations for long track",
        c.nb_obs_by_tracks.mean(),
    )

    fout = os.path.join(output_dir, eddy_type+'_tracks.nc')
    long_track.write_file(filename=fout)
    fout = os.path.join(output_dir, eddy_type+'_short.nc')
    short_track.write_file(
        #filename="%(path)s/%(sign_type)s_track_too_short.nc", **kw_write
        filename=fout
    )



In [9]:
# yrrng=datearr[0].year
tracker_dir=outdir+'tracks/'
if not os.path.exists(tracker_dir):
    os.makedirs(tracker_dir)

nb_obs_min = 10 # minimum of 10 points in track to be considered a long trajectory
raw = False # 
cmin = 0.05 # minimum contour
virtual = 4 # number of consecutive timesteps with missing detection allowed
class_kw = dict(cmin=cmin)
zarr = False

In [11]:
eddy_type='anticyclonic'
previous_correspondance = os.path.join(tracker_dir, eddy_type+'_correspondance.nc')
# search = os.path.join(outdir+'eddyID_'+eddy_type+'_'+str(yrrng)+'????.nc')
search = os.path.join(outdir+'eddyID_'+eddy_type+'_????????.nc')
print('search files ',search)
file_objects = sorted(glob.glob(search))
tracking(file_objects, previous_correspondance, eddy_type, zarr=zarr, nb_obs_min=nb_obs_min, raw=raw, cmin=cmin)


search files  /scratch/m/m300466/eerie-control-1950/gr025//ifs-fesom2-sr/eddytrack/sm700/eddyID_anticyclonic_????????.nc
Longer track saved have %d obs 3179
The mean length is %d observations for long track 48.690123532386714


In [12]:
eddy_type='cyclonic'  #need to include all changes with eddy_type
previous_correspondance = os.path.join(tracker_dir, eddy_type+'_correspondance.nc')
search = os.path.join(outdir+'eddyID_'+eddy_type+'_????????.nc')
print('search files ',search)
file_objects = sorted(glob.glob(search))
tracking(file_objects, previous_correspondance, eddy_type, zarr=zarr, nb_obs_min=nb_obs_min, raw=raw, cmin=cmin)


search files  /scratch/m/m300466/eerie-control-1950/gr025//ifs-fesom2-sr/eddytrack/sm700/eddyID_cyclonic_????????.nc


In [None]:
# #Files created in /work/bm1344/m300466/reg25/ifsfesom/eddytrack_test/tracks/

# anticyclonic_dm_correspondance.nc
# anticyclonic_untracked.nc
# anticyclonic_tracks.nc
# anticyclonic_short.nc
# cyclonic_dm_correspondance.nc
# cyclonic_untracked.nc
# cyclonic_tracks.nc
# cyclonic_short.nc