# RMSD drifters/SWOT – Verger-Miralles et al. (2025)

**SWOT enhances small-scale eddy detection in the Mediterranean Sea**

Author: *Elisabet Verger-Miralles*  
Institution: IMEDEA (CSIC-UIB)

Compute RMSD SVPB Drifters vels. module vs SWOT

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import pandas as pd
import glob
from scipy.interpolate import griddata
from datetime import datetime
from functions import process_and_concat_swot
from scipy.stats import bootstrap

In [2]:
# 1. DRIFTERS
filedir = '../../grl_codes_to_publish_def_swotv2.0.1/data/drifters_filtered/SVPB/'
dd1 = '2023-04-23T00:00:00.000000000'
dd2 = '2023-04-29T00:00:00.000000000'
drifter_ids = ['035', '039', '040', '041', '042']
ds_drifters = []

for num in drifter_ids:
    url = filedir + f"drifter-svpb{num}_inertial_osc_filt_subset.nc"
    ds = xr.open_dataset(url).sel(time=slice(dd1, dd2))
    ds_drifters.append(ds)

# 2. SWOT
dir_SWOT = '../../grl_codes_to_publish_def_swotv2.0.1/data/SWOT/'
files_sw = np.sort(glob.glob(dir_SWOT + '*v2*.nc'))[0:8]
lon_min, lon_max, lat_min, lat_max = 1.1, 1.9, 39.6, 40.1

df_swot = None
for file in files_sw:
    ds_SWOT = xr.open_dataset(file)
    df_swot = process_and_concat_swot(ds_SWOT, lon_min, lon_max, lat_min, lat_max, df_swot)

df_swot = df_swot.dropna().reset_index(drop=True)
df_swot['mean_time'] = df_swot.groupby(df_swot['time'].dt.date)['time'].transform('mean')

group_transect_ugos = df_swot.groupby('mean_time')['ugos_filtered'].apply(list).reset_index()
group_transect_vgos = df_swot.groupby('mean_time')['vgos_filtered'].apply(list).reset_index()
group_transect_lon = df_swot.groupby('mean_time')['longitude'].apply(list).reset_index()
group_transect_lat = df_swot.groupby('mean_time')['latitude'].apply(list).reset_index()

# Mean per transect
mean_transect_ugos = []
mean_transect_vgos = []
for i in range(len(group_transect_ugos)):
    int_u = griddata((np.array(group_transect_lon['longitude'][i]),
                      np.array(group_transect_lat['latitude'][i])),
                     np.array(group_transect_ugos['ugos_filtered'][i]),
                     (np.array(ds_drifters[0].LON), np.array(ds_drifters[0].LAT)), method='cubic')
    int_v = griddata((np.array(group_transect_lon['longitude'][i]),
                      np.array(group_transect_lat['latitude'][i])),
                     np.array(group_transect_vgos['vgos_filtered'][i]),
                     (np.array(ds_drifters[0].LON), np.array(ds_drifters[0].LAT)), method='cubic')
    mean_transect_ugos.append(np.nanmean(int_u))
    mean_transect_vgos.append(np.nanmean(int_v))

unique_mean_times = df_swot['mean_time'].unique()
df_swot['ugos_mean_transect'] = df_swot['mean_time'].map({t: mean_transect_ugos[i] for i, t in enumerate(unique_mean_times)})
df_swot['vgos_mean_transect'] = df_swot['mean_time'].map({t: mean_transect_vgos[i] for i, t in enumerate(unique_mean_times)})

# 3. SPATIOTEMPORAL INTERP. FOR ALL THE DRIFTERS
u_svp_all, v_svp_all, u_swot_all, v_swot_all = [], [], [], []

for ds in ds_drifters:
    Lon, Lat = ds.LON.values, ds.LAT.values
    u_svp, v_svp = ds.U.values, ds.V.values
    t = np.array(ds.time)

    u_swot_int, v_swot_int = [], []

    for i in range(len(Lon)):
        time_drifter = np.datetime64(t[i])
        lon_drifter, lat_drifter = Lon[i], Lat[i]

        df_swot['time_diff'] = abs(df_swot['mean_time'] - time_drifter)
        sorted_diffs = df_swot['time_diff'].drop_duplicates().sort_values()
        if len(sorted_diffs) < 2: continue
        min1, min2 = sorted_diffs.iloc[0], sorted_diffs.iloc[1]

        for min_diff in [min1, min2]:
            subset = df_swot[df_swot['time_diff'] == min_diff]
            ugos_an = subset['ugos_filtered'].values - np.nanmean(subset['ugos_mean_transect'].values)
            vgos_an = subset['vgos_filtered'].values - np.nanmean(subset['vgos_mean_transect'].values)
            lon_sw, lat_sw = subset['longitude'].values, subset['latitude'].values

            u_interp = griddata((lon_sw, lat_sw), ugos_an, (lon_drifter, lat_drifter), method='cubic')
            v_interp = griddata((lon_sw, lat_sw), vgos_an, (lon_drifter, lat_drifter), method='cubic')

            if min_diff == min1:
                u_interp1, v_interp1 = u_interp, v_interp
            else:
                u_interp2, v_interp2 = u_interp, v_interp

        denom = min1 + min2
        u_int_def = (u_interp1 * min2 + u_interp2 * min1) / denom
        v_int_def = (v_interp1 * min2 + v_interp2 * min1) / denom

        u_swot_int.append(u_int_def)
        v_swot_int.append(v_int_def)

    u_svp_all.extend(u_svp)
    v_svp_all.extend(v_svp)
    u_swot_all.extend(u_swot_int)
    v_swot_all.extend(v_swot_int)

u_svp_all = np.array(u_svp_all)
v_svp_all = np.array(v_svp_all)
u_swot_all = np.array(u_swot_all)
v_swot_all = np.array(v_swot_all)

# MODULE
abs_vel_svp = np.sqrt(u_svp_all**2 + v_svp_all**2)
abs_vel_swot = np.sqrt(u_swot_all**2 + v_swot_all**2)


rmsd = np.sqrt(np.nanmean((abs_vel_svp - abs_vel_swot) ** 2))
print(f'RMSE (módulo): {rmsd:.4f} m/s')

# # Dirección
# dir_vel_svp = np.degrees(np.arctan2(u_svp_all, v_svp_all))
# dir_vel_swot = np.degrees(np.arctan2(u_swot_all, v_swot_all))

# def ang_err_180(angles):
#     angles = np.where(angles < -180., 360 + angles, angles)
#     angles = np.where(angles > 180., angles - 360, angles)
#     return angles

# angle_diff = ang_err_180(dir_vel_svp - dir_vel_swot)
# rmse_dir = np.sqrt(np.nanmean(angle_diff ** 2))
# print(f'RMSE dirección: {rmse_dir:.2f} grados')

KeyboardInterrupt: 

In [2]:
percent_improv_modul = 100*(10.76 - 7.4)/10.76
percent_improv_modul

31.2267657992565

## BOOTSTRAP

MODULE

In [6]:
def rmsd(drifter, swot, axis=0):
    """Compute Root Mean Square Deviation (RMSD)."""
    diff = drifter - swot
    return np.sqrt(np.nanmean(diff**2, axis=axis))

# Combine data into a tuple without reshaping
data = (abs_vel_svp, abs_vel_swot)

# Perform bootstrap resampling
res = bootstrap(
    data, 
    statistic=rmsd, 
    n_resamples=1000, 
    confidence_level=0.95, 
    method='BCa',  # Bias-Corrected and Accelerated bootstrap method
    paired=True,  # Since we compare paired velocity values
    random_state=42  # For reproducibility
)

# Print results
print(f"RMSD: {rmsd(abs_vel_svp, abs_vel_swot):.4f}")
# print(f"RMSD: {rmsd(np.array(subsampling), np.array(subsampling_duacs)):.4f}")

print(f"95% Confidence Interval: {res.confidence_interval}")

RMSD: 0.0740
95% Confidence Interval: ConfidenceInterval(low=np.float64(0.06977504141634953), high=np.float64(0.07920249844900723))


In [7]:
# low confidence interval
low_ci = res.confidence_interval[0]
high_ci = res.confidence_interval[1]
low_ci, high_ci

(np.float64(0.06977504141634953), np.float64(0.07920249844900723))

In [8]:
0.0740 - low_ci, high_ci - 0.0740 # longitude of the error bar

(np.float64(0.004224958583650462), np.float64(0.005202498449007234))

In [9]:
(((0.0740- low_ci)*100) + ((high_ci - 0.0740)*100))/2

np.float64(0.4713728516328848)