In [3]:
import os
import pickle

import numpy as np
import pandas as pd
import wavespectra 
import xarray as xr

DATA_DIRECTORY = '/vortexfs1/home/csherwood/proj/NOPP/buoy_data/'
DATA_FILENAME = 'hurricane_ian_spotter_data_v1.pickle'

with open(os.path.join(DATA_DIRECTORY, DATA_FILENAME), 'rb') as handle:
    spotter = pickle.load(handle)

# `spotter` is a python dictionary of Pandas DataFrames, keyed by
# each drifter ID. The drifter ids can then be accessed as follows:
spotter_ids = list(spotter.keys())

# Dictionary to store wavespectra datasets in; can be any container.
wavespectra_datasets = {}

for spotter_id in spotter.keys():

    # Extract the observation times that contain spectral data.
    only_waves = spotter[spotter_id]['energy_density'].notnull()
    drifter = spotter[spotter_id][only_waves]

    # Exract the coordinate arrays; note that the frequency array is
    # uniform across the Spotter observations, so we can just 
    # use the array in the first index of the DataFrame.
    time = drifter.index.to_numpy()
    freq = drifter['frequency'][0] 

    # Extract the variable arrays.
    efth = np.stack(drifter['energy_density'])
    lat = drifter['latitude']
    lon = drifter['longitude']

    # Construct the dataset. This must match the conventions used by the
    # wavespectra package:
    # (https://wavespectra.readthedocs.io/en/latest/conventions.html#)
    # Note that the directional spectrum needs to be computed using the
    # directional moments and an estimator (e.g. MEM). As shown here, 
    # the dataset will be constructed, but it will be scalar, e.g. 
    # efth(time, freq) and not efth(time, freq, dir).
    ds = xr.Dataset(
        data_vars=dict(
            # efth=(["freq", "dir", "time"], efth), #TODO: need to compute this
            efth=(["time", "freq"], efth), # Delete this line if using above (Note: efth may need to be transposed)
            lat=(["time"], lat),
            lon=(["time"], lon),
            site="",
        ),
        coords=dict(
            time=time,
            freq=freq,
            dir=[], #TODO: need to compute this from directional moments
        ),
        attrs=dict(
            # Attributes here; wavespectra would put significant wave
            # height, etc., here but these are a function of time so it
            # might be reasonable to set them as data_vars instead (as 
            # a function of the time coordinate)
        )
    )

    # Format and attach SpecArray accessor to the existing xarray 
    # dataset using the wavespectra.read_dataset() method. Store it in 
    # the dictionary keyed by spotter_id.
    wavespectra_datasets[spotter_id] = wavespectra.read_dataset(ds)

In [4]:
wavespectra_datasets

{'SPOT-30068D': <xarray.Dataset>
 Dimensions:  (time: 72, freq: 39, dir: 0)
 Coordinates:
   * time     (time) object 2022-09-27T00:35:55+00:00 ... 2022-09-29T23:35:55+...
   * freq     (freq) float64 0.0293 0.03906 0.04883 ... 0.4688 0.498 0.6543
   * dir      (dir) float64 
 Data variables:
     efth     (time, freq) float64 0.003251 0.003751 0.004001 ... 0.02 0.006252
     lat      (time) float64 26.04 26.05 26.05 26.05 ... 25.46 25.45 25.43 25.42
     lon      (time) float64 -82.43 -82.44 -82.44 -82.44 ... -82.3 -82.29 -82.29
     site     <U1 '',
 'SPOT-30097D': <xarray.Dataset>
 Dimensions:  (time: 72, freq: 39, dir: 0)
 Coordinates:
   * time     (time) object 2022-09-27T00:27:53+00:00 ... 2022-09-29T23:27:53+...
   * freq     (freq) float64 0.0293 0.03906 0.04883 ... 0.4688 0.498 0.6543
   * dir      (dir) float64 
 Data variables:
     efth     (time, freq) float64 0.002001 0.006002 ... 0.01875 0.007501
     lat      (time) float64 26.81 26.81 26.8 26.8 ... 25.59 25.56 25.54 2