# AlongTrack Data

In [1]:
import autoroot
import typing as tp
from dataclasses import dataclass
import functools as ft
import numpy as np
import pandas as pd
import xarray as xr
import einops
from metpy.units import units
import pint_xarray
import xarray_dataclasses as xrdataclass
from oceanbench._src.datasets.base import XRDABatcher
from oceanbench._src.geoprocessing.spatial import transform_360_to_180
from oceanbench._src.geoprocessing.subset import where_slice
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.ticker as ticker
import seaborn as sns

sns.reset_defaults()
sns.set_context(context="talk", font_scale=0.7)

%load_ext autoreload
%autoreload 2


## Data

In [2]:
!ls "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/raw/dc_obs/"

2020a_SSH_mapping_NATL60_envisat.nc
2020a_SSH_mapping_NATL60_geosat2.nc
2020a_SSH_mapping_NATL60_jason1.nc
2020a_SSH_mapping_NATL60_karin_swot.nc
2020a_SSH_mapping_NATL60_nadir_swot.nc
2020a_SSH_mapping_NATL60_topex-poseidon_interleaved.nc


In [3]:

!ls /gpfswork/rech/yrf/commun/data_challenges/dc21a_ose/test/train

dt_gulfstream_alg_phy_l3_20161201-20180131_285-315_23-53.nc
dt_gulfstream_h2g_phy_l3_20161201-20180131_285-315_23-53.nc
dt_gulfstream_j2g_phy_l3_20161201-20180131_285-315_23-53.nc
dt_gulfstream_j2n_phy_l3_20161201-20180131_285-315_23-53.nc
dt_gulfstream_j3_phy_l3_20161201-20180131_285-315_23-53.nc
dt_gulfstream_s3a_phy_l3_20161201-20180131_285-315_23-53.nc


In [4]:
files_nadir_dc20a = [
    "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/raw/dc_obs/2020a_SSH_mapping_NATL60_jason1.nc",
    "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/raw/dc_obs/2020a_SSH_mapping_NATL60_envisat.nc",
    "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/raw/dc_obs/2020a_SSH_mapping_NATL60_geosat2.nc",
    "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/raw/dc_obs/2020a_SSH_mapping_NATL60_topex-poseidon_interleaved.nc"
]

files_nadir_dc21a = [
    "/gpfswork/rech/yrf/commun/data_challenges/dc21a_ose/test/train/dt_gulfstream_alg_phy_l3_20161201-20180131_285-315_23-53.nc",
    "/gpfswork/rech/yrf/commun/data_challenges/dc21a_ose/test/train/dt_gulfstream_h2g_phy_l3_20161201-20180131_285-315_23-53.nc",
    "/gpfswork/rech/yrf/commun/data_challenges/dc21a_ose/test/train/dt_gulfstream_j2g_phy_l3_20161201-20180131_285-315_23-53.nc",
    "/gpfswork/rech/yrf/commun/data_challenges/dc21a_ose/test/train/dt_gulfstream_j2n_phy_l3_20161201-20180131_285-315_23-53.nc",
    "/gpfswork/rech/yrf/commun/data_challenges/dc21a_ose/test/train/dt_gulfstream_j3_phy_l3_20161201-20180131_285-315_23-53.nc",
    "/gpfswork/rech/yrf/commun/data_challenges/dc21a_ose/test/train/dt_gulfstream_s3a_phy_l3_20161201-20180131_285-315_23-53.nc"
]


ds_nadir = xr.open_dataset(files_nadir_dc21a[0])

In [5]:
ds_nadir

In [6]:
def preprocess_nadir_dc20a(da, variable="ssh_mod"):
        
    da = da.rename({variable: "ssh"})
    
    da = da.sel(
        time=slice("2012-10-22", "2012-12-03"),
        drop=True
    ).compute()
    
    da["lon"] = transform_360_to_180(da["lon"])
    
    da = where_slice(da, "lon", -64.975, -55.007)
    da = where_slice(da, "lat", 33.025, 42.9917)
    
    da = da.drop_dims("cycle")
        
    return da

def preprocess_nadir_dc21a(da, variable="ssh_mod"):
        
    da = da.rename({variable: "ssh"})
    
    da = da.sel(
        time=slice("2017-01-01", "2017-03-01"),
        drop=True
    ).compute()
    
    da = da.rename({"longitude": "lon", "latitude": "lat"})
    
    da["lon"] = transform_360_to_180(da["lon"])
    
    da = where_slice(da, "lon", -64.975, -55.007)
    da = where_slice(da, "lat", 33.025, 42.9917)
            
    return da

In [7]:
preprocess_fn = ft.partial(preprocess_nadir_dc21a, variable="sla_unfiltered")

ds_nadir = xr.open_mfdataset(
    files_nadir_dc21a, 
    preprocess=preprocess_fn,
    combine="nested",
    engine="netcdf4",
    concat_dim="time"
)

ds_nadir = ds_nadir.sortby("time")

ds_nadir

In [8]:
# %matplotlib inline

# fig, ax = plt.subplots()

# sub_ds = ds_nadir.isel(time=slice(0,None))
# pts = ax.scatter(sub_ds.lon, sub_ds.lat, c=sub_ds.ssh, s=0.1)
# ax.set(
#     xlabel="Longitude",
#     ylabel="Latitude",
# )

# plt.colorbar(pts, label="Sea Surface Height [m]")
# plt.tight_layout()
# plt.show()

## Data Structure

## Gridding

In [9]:
# !ls /gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/staging/natl60/
!ls /gpfswork/rech/yrf/commun/data_challenges/dc21a_ose/test/results

OSE_ssh_mapping_4dvarNet_2022.nc  OSE_ssh_mapping_DUACS.nc
OSE_ssh_mapping_4dvarNet.nc	  OSE_ssh_mapping_DYMOST.nc
OSE_ssh_mapping_BASELINE.nc	  OSE_ssh_mapping_MIOST.nc
OSE_ssh_mapping_BFN.nc


In [10]:
file_natl60 = "/gpfswork/rech/yrf/commun/data_challenges/dc20a_osse/staging/natl60/NATL60-CJM165_GULFSTREAM_ssh_y2013.1y.nc"
file_DUACS = "/gpfswork/rech/yrf/commun/data_challenges/dc21a_ose/test/results/OSE_ssh_mapping_DUACS.nc"


In [11]:
def open_natl60_reference(file, variable="gssh"):
    da = xr.open_dataset(file, decode_times=False)
    da["time"] = pd.to_datetime(da.time)
    da = da.sortby("time")
    da["lon"] = transform_360_to_180(da["lon"])
    da = da.sel(
        time=slice("2012-10-22", "2012-12-03"),
        lon=slice(-64.975, -55.007),
        lat=slice(33.025, 42.9917),
        drop=True
    )
    da = da.rename({variable: "ssh"})
    return da


def open_ose_reference(file, variable="gssh"):
    da = xr.open_dataset(file, decode_times=True)
    da = da.sortby("time")
    da["lon"] = transform_360_to_180(da["lon"])
    da = da.sel(
        time=slice("2017-01-01", "2017-03-01"),
        lon=slice(-64.975, -55.007),
        lat=slice(33.025, 42.9917),
        drop=True
    )
    da = da.rename({variable: "ssh"})
    return da

In [12]:
# ds_natl60 = open_ssh_reference(file_natl60, "ssh")
ds_duacs = open_ose_reference(file_DUACS, "ssh")
ds_duacs

In [13]:
import pyinterp



In [14]:
from oceanbench._src.geoprocessing.gridding import coord_based_to_grid

In [15]:
ds_nadir_gridded = coord_based_to_grid(
    ds_nadir, 
    ds_duacs,
    data_vars=["ssh"], 
    t_res=pd.to_timedelta(12, unit="hour")
)
# np.isfinite(ds_nadir_gridded.ssh.isel(time=6)).plot.imshow()

In [16]:
# import holoviews as hv
# hv.extension("matplotlib")


# variable = "ssh" # "vort_r" # "ke" #  
# cmap = "viridis" # "RdBu_r" # "YlGnBu_r" #
# field_name = "DUACS"

# ssh_ds = xr.Dataset({
#     field_name: ds_duacs[variable],
#     "NADIR": np.isfinite(ds_nadir_gridded[variable]),
# })


# to_plot_ds = ssh_ds.transpose("time", "lat", "lon")#.isel(time=slice(25, 55, 1))

# clim = (
#     to_plot_ds[[field_name, "NADIR"]].to_array().pipe(lambda da: (da.quantile(0.005).item(), da.quantile(0.995).item()))
# )

# images = hv.Layout([
#     hv.Dataset(to_plot_ds)
#     .to(hv.QuadMesh, ["lon", "lat"], v).relabel(v)
#     .options(cmap=cmap, clim=clim)
#     for v in to_plot_ds]
# ).cols(2).opts(sublabel_format="")

# hv.output(images, holomap="gif", fps=2, dpi=125)

## Segments

## PSD Analysis