# Transform

In [1]:
from typing import Tuple, List

import numpy as np
import pandas as pd
import pywsra
import xarray as xr
import littlebuoybigwaves as buoy
from configure import read_stored_variable

## Setup

In [2]:
# %run 'nb0-datasets.ipynb'
%run -i configure.py

earl_ds = read_stored_variable('earl_ds')
fiona_ds = read_stored_variable('fiona_ds')
ian_ds = read_stored_variable('ian_ds')
julia_ds = read_stored_variable('julia_ds')
idalia_ds = read_stored_variable('idalia_ds')
lee_ds = read_stored_variable('lee_ds')
atomic_ds = read_stored_variable('atomic_ds')

earl_drifter_df = read_stored_variable('earl_drifter_df')
fiona_drifter_df = read_stored_variable('fiona_drifter_df')
ian_drifter_df = read_stored_variable('ian_drifter_df')
idalia_drifter_df = read_stored_variable('idalia_drifter_df')
lee_drifter_df = read_stored_variable('lee_drifter_df')
atomic_swift_ds = read_stored_variable('atomic_swift_ds')

## WSRA

### Quality control metrics

Compute the standard deviation of the mean square slope observations.  These (5) observations are independent measures of mean square slope offset by -20, -10, 0, +10, and +20 seconds from the reported time.

In [5]:
def mean_square_slope_std(wsra_ds: xr.Dataset) -> xr.DataArray:
    """ Compute standard deviation of WSRA mean square slopes. """
    return wsra_ds['sea_surface_mean_square_slope'].std(axis=1)

In [6]:
earl_ds['sea_surface_mean_square_slope_std'] = mean_square_slope_std(earl_ds)
fiona_ds['sea_surface_mean_square_slope_std'] = mean_square_slope_std(fiona_ds)
ian_ds['sea_surface_mean_square_slope_std'] = mean_square_slope_std(ian_ds)
julia_ds['sea_surface_mean_square_slope_std'] = mean_square_slope_std(julia_ds)
idalia_ds['sea_surface_mean_square_slope_std'] = mean_square_slope_std(idalia_ds)
lee_ds['sea_surface_mean_square_slope_std'] = mean_square_slope_std(lee_ds)
atomic_ds['sea_surface_mean_square_slope_std'] = mean_square_slope_std(atomic_ds)

### Masking

Mask the WSRA observations based on flight metadata and quality control metrics.

In [7]:
def mask_wsra(wsra_ds: xr.Dataset, mask_dict: dict) -> xr.Dataset:
    """ Mask WSRA observations. """
    wsra_masked_ds = (wsra_ds
                      .wsra.create_trajectory_mask(mask_dict)
                      .wsra.mask(drop=True)
                      .drop_duplicates(dim='time'))  #TODO: added 01-30
    num_masked_values = wsra_masked_ds['time_mask'].attrs['num_masked_values']
    perc_masked_values = 100 * num_masked_values / wsra_ds['time_mask'].size
    print(
        f"{wsra_ds.attrs['storm_name']}: "
        f"{num_masked_values} masked values ({perc_masked_values.round(1)}%)."
    )
    return wsra_masked_ds


In [8]:
mask_dict = {
    'wsra_computed_roll': (-2.5, 2.5),
    'platform_radar_altitude': (1000, 4000),
    # 'peak_spectral_variance': (),
    'platform_speed_wrt_ground': (80, 250),
    'met_sfmr_rain_rate': (0, 25),
    'rainfall_rate_median': (0, 25),
    'sea_surface_mean_square_slope_std': (0, 0.1),
}

In [9]:
earl_masked_ds = mask_wsra(earl_ds, mask_dict)
fiona_masked_ds = mask_wsra(fiona_ds, mask_dict)
ian_masked_ds = mask_wsra(ian_ds, mask_dict)
julia_masked_ds = mask_wsra(julia_ds, mask_dict)
idalia_masked_ds = mask_wsra(idalia_ds, mask_dict)
lee_masked_ds = mask_wsra(lee_ds, mask_dict)
atomic_masked_ds = mask_wsra(atomic_ds, mask_dict)



earl: 1818 masked values (64.3%).
fiona: 366 masked values (25.3%).
ian: 491 masked values (34.1%).
julia: 222 masked values (49.3%).
idalia: 108 masked values (13.8%).
lee: 907 masked values (38.9%).
atomic: 1154 masked values (47.4%).




### Frequency spectra

In [10]:
def wsra_wn_spectrum_to_fq_spectrum(wsra_ds: xr.Dataset) -> xr.Dataset:
    """ Convert WSRA wavenumber spectra to frequency-direction spectra. """
    new_ds = wsra_ds = (wsra_ds
                        .wsra.wn_spectrum_to_fq_dir_spectrum(regrid=True)
                        .wsra.fq_dir_spectrum_to_fq_spectrum())
    return new_ds

In [11]:
earl_masked_ds = wsra_wn_spectrum_to_fq_spectrum(earl_masked_ds)
fiona_masked_ds = wsra_wn_spectrum_to_fq_spectrum(fiona_masked_ds)
ian_masked_ds = wsra_wn_spectrum_to_fq_spectrum(ian_masked_ds)
julia_masked_ds = wsra_wn_spectrum_to_fq_spectrum(julia_masked_ds)
idalia_masked_ds = wsra_wn_spectrum_to_fq_spectrum(idalia_masked_ds)
lee_masked_ds = wsra_wn_spectrum_to_fq_spectrum(lee_masked_ds)
atomic_masked_ds = wsra_wn_spectrum_to_fq_spectrum(atomic_masked_ds)

  return np.abs(estimated_var - actual_var) / np.abs(actual_var) * 100
  warn(
  return np.abs(estimated_var - actual_var) / np.abs(actual_var) * 100
  warn(
  return np.abs(estimated_var - actual_var) / np.abs(actual_var) * 100
  return np.abs(estimated_var - actual_var) / np.abs(actual_var) * 100
  warn(
  return np.abs(estimated_var - actual_var) / np.abs(actual_var) * 100
  warn(
  return np.abs(estimated_var - actual_var) / np.abs(actual_var) * 100
  warn(
  return np.abs(estimated_var - actual_var) / np.abs(actual_var) * 100
  warn(


## Colocation

In [14]:
def is_spot_id(id_index: pd.Index) -> np.ndarray[bool]:
    """ Return boolean array where index contains Spotters """
    return id_index.str.contains('SPOT')

def is_microswift_id(id_index: pd.Index) -> np.ndarray[bool]:
    """ Return boolean array where index contains microSWIFTs """
    return id_index.str.match(r'^\d{3}$')  # e.g. 043

def is_dwsd_id(id_index: pd.Index) -> np.ndarray[bool]:
    """ Return boolean array where index contains DWSDs """
    return id_index.str.contains('X')  # 30023  #TODO: intentially null

def add_colocated_ds_id_dim(
    colocated_ds: xr.Dataset,
    path_coords: Tuple,
    path_vars: List,
    drifter_label: str,
    drifter_id: str,
) -> xr.Dataset:
    """ Expand colocated Dataset drifter DataArrays with an `id` dimension. """
    # Update all coordinates and variables except for `time` (path_coords[0]).
    vars_to_update = (list(path_coords[1:])
                      + path_vars
                      + ['time_difference', 'distance'])
    # Reassign each variable with an expanded `id` dim.
    for var in vars_to_update:
        prefix = drifter_label + '_'
        var_name = prefix + var
        dim_name = prefix + 'id'
        colocated_ds[var_name] = (colocated_ds[var_name]
                                  .expand_dims(dim={dim_name:[drifter_id]}))
    return colocated_ds

def colocate_wsra_and_drifters(
        wsra_ds: xr.Dataset,
        drifter_df: pd.DataFrame
) -> xr.Dataset:
    """
    Colocate observations in a WSRA Dataset with those in a drifter DataFrame,
    merge the results back into a copy of the WSRA Dataset, and return.
    """
    # Separate drifters by type (each type has a different # of frequencies)
    id_index = drifter_df.index.get_level_values(level='id')
    is_spot = is_spot_id(id_index)
    is_microswift = is_microswift_id(id_index)
    is_dwsd = is_dwsd_id(id_index)
    labels = ['spotter', 'microswift', 'dwsd']

    # For each type in the DataFrame (if any), subset the DataFrame and
    # colocate with the WSRA Dataset by drifter id.  Collect all colocated
    # Datasets into a list for later merging.
    ds_list = []
    for label, bool_index in zip(labels, [is_spot, is_microswift, is_dwsd]):
        if bool_index.sum() > 0:
            drifter_subset_df = (drifter_df
                                .loc[bool_index]
                                .sort_index())

            drifter_ds = drifter_subset_df.buoy.to_xarray()  #TODO: reorder time to first

            drifter_ids = drifter_subset_df.index.get_level_values(level='id').unique()

            path_coords = ('time', 'longitude', 'latitude')
            path_vars =  ['energy_density', 'significant_height']

            # For each drifter of this type, colocate with the WSRA Dataset.
            # This is done individually so that we can collect drifters under
            # an `id` coordinate.
            for drifter_id in drifter_ids:
                wsra_colocated_ds = wsra_ds.wsra.colocate_with_path_ds(
                    path_ds=drifter_ds.sel(id=drifter_id, drop=True),
                    path_coords=path_coords,
                    path_vars=path_vars,
                    temporal_tolerance=np.timedelta64(90, 'm'),
                    spatial_tolerance=300,  # km,
                    prefix=label,
                )
                wsra_colocated_ds = add_colocated_ds_id_dim(
                    colocated_ds=wsra_colocated_ds,
                    path_coords=path_coords,
                    path_vars=path_vars,
                    drifter_label=label,
                    drifter_id=drifter_id,
                )
                # Xarray needs timedelta units to assigned.
                time_difference_name = label + '_' + 'time_difference'
                wsra_colocated_ds[time_difference_name] \
                    = wsra_colocated_ds[time_difference_name].astype('timedelta64[s]')

                ds_list.append(wsra_colocated_ds)

    wsra_colocated_ds = xr.merge(ds_list)
    return wsra_colocated_ds

In [15]:
earl_merged_ds = colocate_wsra_and_drifters(earl_masked_ds, earl_drifter_df)
fiona_merged_ds = colocate_wsra_and_drifters(fiona_masked_ds, fiona_drifter_df)
ian_merged_ds = colocate_wsra_and_drifters(ian_masked_ds, ian_drifter_df)
# # julia_masked_ds = colocate_wsra_and_drifters(julia_masked_ds, julia_drifter_ds)
idalia_merged_ds = colocate_wsra_and_drifters(idalia_masked_ds, idalia_drifter_df,)
lee_merged_ds = colocate_wsra_and_drifters(lee_masked_ds, lee_drifter_df)

atomic_merged_ds = atomic_masked_ds.wsra.colocate_with_path_ds(
    path_ds = atomic_swift_ds,
    path_coords = ('time', 'lon', 'lat'),
    path_vars =  ['energy', 'sea_surface_wave_significant_height', 'wind_speed'],
    temporal_tolerance = np.timedelta64(30, 'm'),
    spatial_tolerance = 50,  # km,
    prefix='swift',
)

  path_subset_ds['time_difference'] = path_subset_ds['time_difference'].astype('timedelta64[s]')
  = wsra_colocated_ds[time_difference_name].astype('timedelta64[s]')
  path_subset_ds['time_difference'] = path_subset_ds['time_difference'].astype('timedelta64[s]')
  = wsra_colocated_ds[time_difference_name].astype('timedelta64[s]')
  path_subset_ds['time_difference'] = path_subset_ds['time_difference'].astype('timedelta64[s]')
  = wsra_colocated_ds[time_difference_name].astype('timedelta64[s]')
  path_subset_ds['time_difference'] = path_subset_ds['time_difference'].astype('timedelta64[s]')
  = wsra_colocated_ds[time_difference_name].astype('timedelta64[s]')
  path_subset_ds['time_difference'] = path_subset_ds['time_difference'].astype('timedelta64[s]')
  = wsra_colocated_ds[time_difference_name].astype('timedelta64[s]')
  path_subset_ds['time_difference'] = path_subset_ds['time_difference'].astype('timedelta64[s]')
  = wsra_colocated_ds[time_difference_name].astype('timedelta64[s]')
  pa

### Store

In [87]:
%%capture

%store earl_merged_ds
%store fiona_merged_ds
%store ian_merged_ds
%store julia_merged_ds
%store idalia_merged_ds
%store lee_merged_ds
%store atomic_merged_ds

%store all_wsra_df
%store atomic_df