# Datasets

In [1]:
import glob
import os
import pickle
import re
from typing import List

import numpy as np
import pandas as pd
import pywsra
import xarray as xr
from littlebuoybigwaves import geo

from configure import get_config


# Setup

Load the configuration file, `config.toml`, which contains the data directories.

In [2]:
config = get_config()

## WSRA and P-3 met data

In [3]:
WSRA_DIR = config['dirs']['wsra']

def construct_wsra_path(storm_name: str) -> str:
    return os.path.join(WSRA_DIR, storm_name)

def read_wsra_and_met_data(
    storm_name: str,
    met_data_vars:  str | List[str] = 'all',
    met_rename_dict: dict[str, str] | None = None,
) -> xr.Dataset:
    # Open WSRA and P-3 met data.
    directory = construct_wsra_path(storm_name)
    wsra_ds = pywsra.read_wsra_directory(directory, index_by_time=True)
    met_ds = pywsra.read_met_directory(os.path.join(directory, 'met'),  # TODO: use AC data?
                                       data_vars=met_data_vars)

    # Merge the datasets by resampling the P-3 met data onto the WSRA times.
    wsra_merged_ds = pywsra.merge_met_vars(wsra_ds=wsra_ds,
                                           met_ds=met_ds,
                                           data_vars=met_data_vars,
                                           resample_method=np.nanmedian,
                                           rename_dict=met_rename_dict)

    # Save `storm_name` as an attribute for future reference.
    wsra_merged_ds.attrs['storm_name'] = storm_name.lower()

    return wsra_merged_ds


In [4]:
read_kwargs = dict(
    met_data_vars = [
        'SfmrWS.1', 'SfmrWErr.1', 'SfmrRainRate.1', 'SfmrDV.1', 'LonGPS.1', 'LatGPS.1',
    ],
    met_rename_dict = {
        'SfmrWS.1': 'met_sfmr_10m_wind_speed',
        'SfmrWErr.1': 'met_sfmr_10m_wind_speed_error',
        'SfmrRainRate.1': 'met_sfmr_rain_rate',
        'SfmrDV.1': 'met_sfmr_data_validity',
        'LonGPS.1': 'met_longitude',
        'LatGPS.1': 'met_latitude',
    },
)

In [5]:
earl_ds = read_wsra_and_met_data('Earl', **read_kwargs)
fiona_ds = read_wsra_and_met_data('Fiona', **read_kwargs)
ian_ds = read_wsra_and_met_data('Ian', **read_kwargs)
julia_ds = read_wsra_and_met_data('Julia', **read_kwargs)  #TODO: wrong data on Prosensing site for Julia 
franklin_ds = read_wsra_and_met_data('Franklin', **read_kwargs)
idalia_ds = read_wsra_and_met_data('Idalia', **read_kwargs)
lee_ds = read_wsra_and_met_data('Lee', **read_kwargs)
atomic_ds = read_wsra_and_met_data('atomic', **read_kwargs)
#TODO: include nigel and spotters
# pywsra.read_wsra_file(EUREC4A_ATOMIC_P3_WSRA_20200117_20200211_with_SWIFT.nc', index_by_time=False)

  values.append(resample_method(met_in_window[var].values))
  values.append(resample_method(met_in_window[var].values))
  values.append(resample_method(met_in_window[var].values))
  values.append(resample_method(met_in_window[var].values))
  values.append(resample_method(met_in_window[var].values))
  values.append(resample_method(met_in_window[var].values))
  values.append(resample_method(met_in_window[var].values))
  return np.nanmean(a, axis, out=out, keepdims=keepdims)
  values.append(resample_method(met_in_window[var].values))


## Buoys

### Hurricane drifter datasets

In [6]:
def read_drifter_data(
    drifter_file: str,
) -> pd.DataFrame:

    with open(drifter_file, 'rb') as handle:
        drifter_data = pickle.load(handle)

    # if any(key in drifter_data for key in ['spotter', 'microswift', 'dwsd']):
    drifter_data = merge_drifter_dicts(drifter_data)

    return concatenate_drifters(drifter_data)

def merge_drifter_dicts(drifter_data: dict) -> dict:
    drifter_dict = (drifter_data.get('spotter', {}) |
                    drifter_data.get('microswift', {}) |
                    drifter_data.get('dwsd', {}))
    return drifter_dict

def concatenate_drifters(drifter_dict: dict) -> pd.DataFrame:
    """
    Concatenate a dictionary of individual drifter DataFrames into a single,
    multi-index DataFrame.  Drop the observations that do not contain waves
    (remove off-hour pressure and temperature observations).

    Args:
        drifter_dict (dict): individual drifter DataFrames keyed by id.

    Returns:
        DataFrame: concatenated drifters
    """
    drifter_df = (
        pd.concat(drifter_dict, names=['id', 'time'])
        .dropna(subset='energy_density')
    )
    return drifter_df

In [7]:
EARL_DRIFTER_FILE = config['files']['earl_drifters']
FIONA_DRIFTER_FILE = config['files']['fiona_drifters']
IAN_DRIFTER_FILE = config['files']['ian_drifters']
IDALIA_DRIFTER_FILE = config['files']['idalia_drifters']
LEE_DRIFTER_FILE = config['files']['lee_drifters']

earl_drifter_df = read_drifter_data(EARL_DRIFTER_FILE)
fiona_drifter_df = read_drifter_data(FIONA_DRIFTER_FILE)
ian_drifter_df = read_drifter_data(IAN_DRIFTER_FILE)
idalia_drifter_df = read_drifter_data(IDALIA_DRIFTER_FILE)
lee_drifter_df = read_drifter_data(LEE_DRIFTER_FILE)

### ATOMIC SWIFTs

In [8]:
#TODO: read in ATOMIC SWIFTs and convert to dataframe in transform
def read_swift_directory(path: str) -> dict:
    """Helper function to read a directory of SWIFT .nc files.

    Returns a dictionary of xarray Datasets. Requires 'SWIFT[id]' to be
    in the individual filenames.
    """
    filenames = glob.glob(path + "/*.nc")
    swifts = {}
    for file in filenames:
        swift_id = re.search('SWIFT[0-9]{2}', file).group()
        swifts[swift_id] = xr.open_dataset(file)

    return swifts

In [9]:
ATOMIC_SWIFT_DIR = config['dirs']['atomic_swift']
atomic_swifts = read_swift_directory(ATOMIC_SWIFT_DIR)

#TODO: combine all SWIFTs into single ds or df


all_atomic_swift_ds = []
# for swift_id in atomic_swifts.keys():
for swift_id in ['SWIFT16']:
    atomic_swift_ds = atomic_swifts[swift_id]
    atomic_swift_ds = atomic_swift_ds.expand_dims(swift_id=[swift_id])
    all_atomic_swift_ds.append(atomic_swift_ds)


atomic_swift_ds = xr.concat(all_atomic_swift_ds, dim='swift_id', coords='minimal')



## NHC

In [10]:
NHC_DIR = config['dirs']['nhc']

def construct_nhc_path(storm_id: str, feature: str) -> str:
    folder = f'{storm_id.lower()}_best_track'
    filename = f'{storm_id.upper()}_{feature}.shp'
    return os.path.join(NHC_DIR, folder, filename)

def read_nhc_best_track(storm_id):
    pts = geo.read_shp_file(construct_nhc_path(storm_id, 'pts'), index_by_datetime=True)
    pts = geo.best_track_pts_to_intensity(pts)
    lin = geo.read_shp_file(construct_nhc_path(storm_id, 'lin'))
    windswath = geo.read_shp_file(construct_nhc_path(storm_id, 'windswath'))
    # radii = geo.read_shp_file(construct_nhc_path(storm_id, 'radii'))
    return pts, lin, windswath #, radii


In [11]:
earl_best_track = read_nhc_best_track(storm_id=earl_ds.attrs['storm_id'])
fiona_best_track = read_nhc_best_track(storm_id=fiona_ds.attrs['storm_id'])
ian_best_track = read_nhc_best_track(storm_id=ian_ds.attrs['storm_id'])
julia_best_track = read_nhc_best_track(storm_id=julia_ds.attrs['storm_id'])
idalia_best_track = read_nhc_best_track(storm_id=idalia_ds.attrs['storm_id'])
lee_best_track = read_nhc_best_track(storm_id=lee_ds.attrs['storm_id'])

## IBTrACS

https://www.ncei.noaa.gov/products/international-best-track-archive

In [12]:
IBTRACS_BASE_URL = ('https://www.ncei.noaa.gov/data/international-best-'
                    'track-archive-for-climate-stewardship-ibtracs/'
                    'v04r00/access/csv/')
IBTRACS_BASE_CSV = f'ibtracs.last3years.list.v04r00.csv'
IBTRACS_PATH = config['dirs']['ibtracs']
# ibtracs_df = pd.read_csv(IBTRACS_BASE_URL + IBTRACS_BASE_CSV, low_memory=False)
ibtracs_df = pd.read_csv(IBTRACS_PATH, low_memory=False)

idalia_ibtracs_df = (ibtracs_df
    .query('NAME == "IDALIA"')
    .assign(ISO_TIME = lambda df: pd.to_datetime(df['ISO_TIME']))
    .set_index('ISO_TIME', drop=True)
    .assign(LAT = lambda df: df['LAT'].astype(np.float64))
    .assign(LON = lambda df: df['LON'].astype(np.float64))
)

ian_ibtracs_df = (ibtracs_df
    .query('NAME == "IAN"')
    .assign(ISO_TIME = lambda df: pd.to_datetime(df['ISO_TIME']))
    .set_index('ISO_TIME', drop=True)
    .assign(LAT = lambda df: df['LAT'].astype(np.float64))
    .assign(LON = lambda df: df['LON'].astype(np.float64))
)


Stored 'ibtracs_df' (DataFrame)


## Store

In [43]:
%%capture

%store earl_ds
%store fiona_ds
%store ian_ds
%store julia_ds
%store idalia_ds
%store lee_ds
%store atomic_ds

%store earl_drifter_df
%store fiona_drifter_df
%store ian_drifter_df
%store idalia_drifter_df
%store lee_drifter_df
%store atomic_swift_ds

%store earl_best_track
%store fiona_best_track
%store ian_best_track
# %store julia_best_track
%store idalia_best_track
%store lee_best_track

%store idalia_ibtracs_df
%store ian_ibtracs_df
