In [1]:
from ast import literal_eval
import numpy as np
import os
import pandas as pd
import pickle
from scipy.spatial import cKDTree
import sys

from rex.utilities.utilities import to_records_array

cwd = os.getcwd()


def check_value(dict_in):
    dict_out = {}
    for key, value in dict_in.items():
        if isinstance(value, dict):
            for k, v in value.items():
                dict_out[key] = check_value(value)
        else:
            if isinstance(value, str):
                if '/' in value:
                    pass
                else:
                    dict_out[key] = value
            else:
                dict_out[key] = value
    return dict_out


def remove_paths(dict_in):
    dict_out = {}
    for key, value in dict_in.items():
        if isinstance(value, dict):
            for k, v in value.items():
                dict_out[key] = check_value(value)
        else:
            if isinstance(value, str):
                if '/' in value:
                    pass
                else:
                    dict_out[key] = value
            else:
                dict_out[key] = value

    return dict_out


def get_size(item):
    return sys.getsizeof(item.copy()) * 10**-6


def get_chunk_size(item, chunk_size=2.0, thresh=0.1):
    item_size = get_size(item)
    item_len = len(item)
    chunk_start = int(item_len // (item_size / chunk_size))
    i = 0
    while True:
        if item_len % (chunk_start + i) == 0:
            chunk = chunk_start + i
            break
        elif item_len % (chunk_start - i) == 0:
            chunk = chunk_start - i
            break
        else:
            offset_size = get_size(item[:i].copy())
            if offset_size > chunk_size * thresh:
                chunk = chunk_start
                print('No unique chunk found')
                break
            else:
                i += 1

    chunk_size = get_size(item[:chunk].copy())
    print('Chunk size ({}, ) = {:.2f} MB'.format(chunk, chunk_size))
    return chunk


def round_to(num, round_val):
    round_num = num - num % round_val
    return int(round_num)


def calc_chunks(t_chunk, dtype, chunk_size=2*10**6, round_to=None):
    pixel_size = np.dtype(dtype).itemsize
    s_chunk = chunk_size / (t_chunk * pixel_size)

    if round_to:
        s_chunk = round_to(s_chunk, round_to)
    else:
        s_chunk = int(np.floor(s_chunk))

    return (t_chunk, s_chunk)


box_dir = '/Users/mrossol/Box/HSDS/wave/Alaska'

# Alaska
## Meta Data

In [4]:
meta_path = os.path.join(box_dir, 'alaska_wave_meta_original.csv')
columns = {'dist': 'distance_to_shore', 'lat': 'latitude', 'lon': 'longitude'}
meta = pd.read_csv(meta_path).rename(columns=columns)
meta_coords = meta[['latitude', 'longitude']].values

coords = np.load(os.path.join(box_dir, 'alaska_coords.npy'))[:, ::-1]

np.allclose(coords, meta_coords)

True

In [29]:
meta_coords = meta[['latitude', 'longitude']].values

coords = np.load(os.path.join(box_dir, 'alaska_coords.npy'))[:, ::-1]

coords[:, 1] - 360, meta_coords[:, 1]

(array([-162.17664, -162.18121, -162.18639, ..., -148.9899 , -148.99121,
        -148.99657], dtype=float32),
 array([-162.176638, -162.181221, -162.186393, ..., -148.989905,
        -148.991211, -148.996571]))

In [30]:
meta_path = os.path.join(box_dir, 'alaska_wave_meta_shifted_lon.csv')
columns = {'dist': 'distance_to_shore', 'lat': 'latitude', 'lon': 'longitude'}
meta = pd.read_csv(meta_path).rename(columns=columns)
depth_path = os.path.join(box_dir, 'alaska_depth.npy')
meta['water_depth'] = np.load(depth_path)
meta['jurisdiction'] = meta['jurisdiction'].fillna('None')
meta = meta.set_index('gid')
cols = ['latitude', 'longitude', 'timezone', 'distance', 'water_depth']
meta[cols] = meta[cols].astype('float32')
out_path = os.path.join(box_dir, 'alaska_wave_meta.csv')
meta.to_csv(out_path)

meta_arr = to_records_array(meta)

out_path = os.path.join(box_dir, 'alaska_wave_meta.npy')
np.save(out_path, meta_arr)
meta.head()

Unnamed: 0_level_0,latitude,longitude,timezone,distance,jurisdiction,water_depth
gid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,58.649342,-162.176636,-9.0,418.692139,Alaska,-7.114744
1,58.647961,-162.181213,-9.0,724.452698,Alaska,6.70196
2,58.6464,-162.186386,-9.0,1069.480835,Alaska,9.754598
3,58.644642,-162.19223,-9.0,1460.994629,Alaska,15.821035
4,58.642654,-162.198822,-9.0,1901.846069,Alaska,19.083487


## Time_Index and Meta Chunk Sizes

In [7]:
# Hourly
time_index = np.array(pd.date_range('2012-01-01 00:00:00', '2012-12-31 23:00:00', freq='3h').astype(str),
                      'S20')

print(time_index.shape)
t_size = get_size(time_index)
print('time_index size = {:.2f} MB'.format(t_size))

(2928,)
time_index size = 0.06 MB


In [31]:
meta_path = os.path.join(box_dir, 'alaska_wave_meta.npy')
meta_data = np.load(meta_path)

m_size = sys.getsizeof(meta_data) * 10**-6
print('meta size = {:.2f} MB'.format(m_size))

m_chunks = get_chunk_size(meta_data, thresh=0.1)

meta size = 105.15 MB
No unique chunk found
Chunk size (74074, ) = 2.00 MB


In [32]:
meta_path = os.path.join(box_dir, 'alaska_wave_meta.npy')
meta_data = np.load(meta_path, allow_pickle=True)
lat_lon = pd.DataFrame(meta_data)
lat_lon = lat_lon[['latitude', 'longitude']].values.copy()

c_size = sys.getsizeof(lat_lon) * 10**-6
print('lat_lon size = {:.2f} MB'.format(c_size))

c_chunks = get_chunk_size(lat_lon, thresh=0.1)

lat_lon size = 31.15 MB
No unique chunk found
Chunk size (249999, ) = 2.00 MB


## Variable Attributes

In [3]:
names = {}

descriptions = {'water_depth': 'Grid node depth',
         'mean_wave_direction': 'Direction Normal to the Wave Crests',
         'significant_wave_height': 'Calculated as the zeroth spectral moment (i.e., H_m0)',
         'mean_absolute_period': 'Resolved Spectral Moment (m_0/m_1)',
         'peak_period': 'The period associated with the maximum value of the wave energy spectrum',
         'mean_zero-crossing_period': 'Total wave energy flux from all directions',
         'energy_period': 'Spectral width characterizes the relative spreading of energy in the wave spectrum. Large values indicate a wider spectral peak',
         'directionality_coefficient': 'Fraction of total wave energy travelling in the "direction of maximum wave power" direction',
         'maximum_energy_direction': 'The direction from which the most wave energy is travelling',
         'omni-directional_wave_power': 'Total wave energy flux from all directions',
         'spectral_width': 'Spectral width characterizes the relative spreading of energy in the wave spectrum. Large values indicate a wider spectral peak',
         'maximum_energy': 'Maximum directionally resolved wave energy'}

units = {'water_depth': 'm',
         'mean_wave_direction': 'deg',
         'significant_wave_height': 'm',
         'mean_absolute_period': 's',
         'peak_period': 's',
         'mean_zero-crossing_period': 's',
         'energy_period': 's',
         'directionality_coefficient': '',
         'maximum_energy_direction': 'deg',
         'omni-directional_wave_power': 'W/m',
         'spectral_width': '',
         'maximum_energy': 'W/m'}

SWAN_names = {'water_depth': 'DEPTH',
              'mean_wave_direction': 'DIR',
              'significant_wave_height': 'HSIGN',
              'mean_absolute_period': 'PER',
              'peak_period': 'RTP',
              'mean_zero-crossing_period': 'TMM10',
              'energy_period': 'TM02',
              'directionality_coefficient': '',
              'maximum_energy_direction': '',
              'omni-directional_wave_power': '',
              'spectral_width': '',
              'maximum_energy': 'jdmax'}

IEC_names = {'water_depth': 'h',
             'mean_wave_direction': 'Sigma',
             'significant_wave_height': 'H_s',
             'mean_absolute_period': 'T_p',
             'peak_period': 'T_p',
             'mean_zero-crossing_period': 'T_02',
             'energy_period': 'T_e',
             'directionality_coefficient': 'd',
             'maximum_energy_direction': 'Jsigma_Jmax',
             'omni-directional_wave_power': 'J',
             'spectral_width': 'epsilon_o',
             'maximum_energy': 'J_sigma_jdmax'}

references = {'water_depth': 'SWAN Manual',
              'mean_wave_direction': 'SWAN Manual',
              'significant_wave_height': 'SWAN Manual, IEC62600-101',
              'mean_absolute_period': 'SWAN Manual',
              'peak_period': 'SWAN Manual',
              'mean_zero-crossing_period': 'SWAN Manual, IEC62600-101',
              'energy_period': 'IEC62600-101',
              'directionality_coefficient': 'IEC62600-101',
              'maximum_energy_direction': 'IEC62600-101',
              'omni-directional_wave_power': 'IEC62600-101',
              'spectral_width': 'IEC62600-101',
              'maximum_energy': 'SWAN Manual, IEC62600-101'}

dimensions = {'water_depth': ['position'],
              'mean_wave_direction': ['time', 'position'],
              'significant_wave_height': ['time', 'position'],
              'mean_absolute_period': ['time', 'position'],
              'peak_period': ['time', 'position'],
              'mean_zero-crossing_period': ['time', 'position'],
              'energy_period': ['time', 'posi"tion'],
              'directionality_coefficient': ['time', 'position'],
              'maximum_energy_direction': ['time', 'position'],
              'omni-directional_wave_power': ['time', 'position'],
              'spectral_width': ['time', 'position'],
              'maximum_energy': ['time', 'position']}

In [15]:
path = os.path.join(cwd, 'h5_attrs/alaska_wave_h5_attrs.json')
wave_attrs = pd.read_json(path)

alaska_vars = [v for v in wave_attrs.index
               if v not in ['coordinates', 'time_index']]

overlap_vars = [v for v in units.keys() if v in alaska_vars]
new_vars = [v for v in alaska_vars if v not in units]
missing_vars = [v for v in units.keys() if v not in alaska_vars]

print('overlaping variables:\n{}'.format(overlap_vars))
print('new variables:\n{}'.format(new_vars))
print('missing variables:\n{}'.format(missing_vars))

overlaping variables:
['water_depth', 'mean_wave_direction', 'significant_wave_height', 'mean_absolute_period', 'peak_period', 'mean_zero-crossing_period', 'energy_period', 'directionality_coefficient', 'maximum_energy_direction', 'omni-directional_wave_power', 'spectral_width']
new variables:
['peak_period_direction']
missing variables:
['maximum_energy']


In [4]:
path = os.path.join(cwd, 'h5_attrs/alaska_wave_h5_attrs.json')
wave_attrs = pd.read_json(path)
alaska_vars =  list(units.keys()) + ['time_index', 'coordinates']
mask = wave_attrs.index.isin(alaska_vars)
wave_attrs = wave_attrs.loc[mask]

wave_attrs['name'] = None
wave_attrs['attrs'] = None
wave_attrs['chunks'] = None

# 8 week hourly chunks
t_chunk = int(8 * 7 * 24 / 3)
for var, row in wave_attrs.iterrows():

    if var == 'time_index':
        wave_attrs.at[var, 'attrs'] = {'freq': '3h', 'timezone': 'UTC', 'units': 'GMT', 'dimensions': ['time']}
        wave_attrs.at[var, 'chunks'] = None
    elif var == 'coordinates':
        wave_attrs.at[var, 'chunks'] = (249999, 2)
        attrs  = {'description': '(latitude, longitude) using Datum: NAD83',
                  'src_name': '(Xp, Yp)',
                  'units': '(deg, deg)',
                  'dimensions': ['position']}
        wave_attrs.at[var, 'attrs'] = attrs
        wave_attrs.at[var, 'dtype'] = 'float32'
    else:
        if var == 'water_depth':
            chunks = None
        else:
            chunks = calc_chunks(t_chunk, 'float32')

        wave_attrs.at[var, 'chunks'] = chunks
        wave_attrs.at[var , 'name'] = names.get(var, None)
        wave_attrs.at[var, 'dtype'] = 'float32'
        wave_attrs.at[var, 'attrs'] = {'description': descriptions[var],
                                       'dimensions': dimensions[var],
                                       'units': units[var],
                                       'SWAN_name': SWAN_names[var],
                                       'IEC_name': IEC_names[var]}

# Meta data
wave_attrs.at['meta', 'chunks'] = (74074, )
wave_attrs.at['meta', 'dtype'] = None
wave_attrs.at['meta', 'name'] = None
wave_attrs.at['meta', 'attrs'] = {'dimensions': ['position']}

# Global attributes
wave_attrs.at['global', 'chunks'] = None
wave_attrs.at['global', 'dtype'] = None
wave_attrs.at['global', 'name'] = None
wave_attrs.at['global', 'attrs'] = {
    'ref_SWAN-Manual': "SWAN Team, SWAN: User Manual, Delft University of Technology, Delft, The Netherlands, Cycle III Version 41.31, 2019.",
    "ref_IEC62600-101": "International Electrotechnical Commission, Marine energy - Wave, tidal and other water current converters - Part 101: Wave energy resource assessment and characterization, Technical Specification 62600–101, 2015.",
    "ref_Wu-Wang-Yang-Garcia-Medina-2020": "W.C. Wu, T. Wang, Z. Yang, and G. García-Medina, “Development and validation of a high-resolution regional wave hindcast model for U.S. West Coast wave resource characterization,” Renewable Energy, vol. 152, pp. 736–753, Jun. 2020.",
    "source": "PNNL2019", "version": "v1.0.0"}

path = os.path.join(cwd, 'hsds_attrs/alaska_hsds_attrs.json')
wave_attrs.to_json(path, indent=4)
wave_attrs

Unnamed: 0,attrs,dtype,chunks,name
coordinates,"{'description': '(latitude, longitude) using D...",float32,"(249999, 2)",
directionality_coefficient,{'description': 'Fraction of total wave energy...,float32,"(448, 1116)",
energy_period,{'description': 'Spectral width characterizes ...,float32,"(448, 1116)",
maximum_energy_direction,{'description': 'The direction from which the ...,float32,"(448, 1116)",
mean_absolute_period,{'description': 'Resolved Spectral Moment (m_0...,float32,"(448, 1116)",
mean_wave_direction,{'description': 'Direction Normal to the Wave ...,float32,"(448, 1116)",
mean_zero-crossing_period,{'description': 'Total wave energy flux from a...,float32,"(448, 1116)",
omni-directional_wave_power,{'description': 'Total wave energy flux from a...,float32,"(448, 1116)",
peak_period,{'description': 'The period associated with th...,float32,"(448, 1116)",
significant_wave_height,{'description': 'Calculated as the zeroth spec...,float32,"(448, 1116)",


## Virtual Buoy

In [43]:
names = {}

descriptions = {'water_depth': 'Grid node depth',
         'mean_wave_direction': 'Direction Normal to the Wave Crests',
         'significant_wave_height': 'Calculated as the zeroth spectral moment (i.e., H_m0)',
         'mean_absolute_period': 'Resolved Spectral Moment (m_0/m_1)',
         'peak_period': 'The period associated with the maximum value of the wave energy spectrum',
         'mean_zero-crossing_period': 'Total wave energy flux from all directions',
         'energy_period': 'Spectral width characterizes the relative spreading of energy in the wave spectrum. Large values indicate a wider spectral peak',
         'directionality_coefficient': 'Fraction of total wave energy travelling in the "direction of maximum wave power" direction',
         'maximum_energy_direction': 'The direction from which the most wave energy is travelling',
         'omni-directional_wave_power': 'Total wave energy flux from all directions',
         'spectral_width': 'Spectral width characterizes the relative spreading of energy in the wave spectrum. Large values indicate a wider spectral peak',
         'directional_wave_spectrum': 'Variance density over the i^th frequency and j^th direction (m^2Hz^-1deg^-1)',
         'direction': 'direction of wave propagation based on epsg:4326 (deg)',
         'frequency': 'i^th discrete frequency (Hz)',
         'frequency_bin_edges': 'Frequency bin definition: [Low Edge, High Edge)'}

units = {'water_depth': 'm',
         'mean_wave_direction': 'deg',
         'significant_wave_height': 'm',
         'mean_absolute_period': 's',
         'peak_period': 's',
         'mean_zero-crossing_period': 's',
         'energy_period': 's',
         'directionality_coefficient': '',
         'maximum_energy_direction': 'deg',
         'omni-directional_wave_power': 'W/m',
         'spectral_width': '',
         'directional_wave_spectrum': 'm^2 Hz^-1 deg^-1',
         'direction': 'deg',
         'frequency': 'Hz',
         'frequency_bin_edges': 'Hz'}

src_names = {k : k for k in names}
src_names['directional_wave_spectrum'] = 'energy'

SWAN_names = {'water_depth': 'DEPTH',
         'mean_wave_direction': 'DIR',
         'significant_wave_height': 'HSIGN',
         'mean_absolute_period': 'PER',
         'peak_period': 'RTP',
         'mean_zero-crossing_period': 'TMM10',
         'energy_period': 'TM02',
         'directionality_coefficient': '',
         'maximum_energy_direction': '',
         'omni-directional_wave_power': '',
         'spectral_width': '',
         'directional_wave_spectrum': 'energy',
         'frequency': 'frequency',
         'direction': 'direction',
         'frequency_bin_edges': ''}

IEC_names = {'water_depth': 'h',
         'mean_wave_direction': 'Sigma',
         'significant_wave_height': 'H_s',
         'mean_absolute_period': 'T_p',
         'peak_period': 'T_p',
         'mean_zero-crossing_period': 'T_02',
         'energy_period': 'T_e',
         'directionality_coefficient': 'd',
         'maximum_energy_direction': 'Jsigma_Jmax',
         'omni-directional_wave_power': 'J',
         'spectral_width': 'epsilon_o',
         'directional_wave_spectrum': 'S_ij',
         'direction': 'Sigma',
         'frequency': 'f_i',
         'frequency_bin_edges': ''}

references = {'water_depth': 'SWAN Manual',
         'mean_wave_direction': 'SWAN Manual',
         'significant_wave_height': 'SWAN Manual, IEC62600-101',
         'mean_absolute_period': 'SWAN Manual',
         'peak_period': 'SWAN Manual',
         'mean_zero-crossing_period': 'SWAN Manual, IEC62600-101',
         'energy_period': 'IEC62600-101',
         'directionality_coefficient': 'IEC62600-101',
         'maximum_energy_direction': 'IEC62600-101',
         'omni-directional_wave_power': 'IEC62600-101',
         'spectral_width': 'IEC62600-101',
         'directional_wave_spectrum': 'IEC62600-101',
         'direction': 'IEC62600-101',
         'frequency': 'IEC62600-101',
         'frequency_bin_edges': 'IEC62600-101'}

dimensions = {'water_depth': ['position'],
         'mean_wave_direction': ['time', 'position'],
         'significant_wave_height': ['time', 'position'],
         'mean_absolute_period': ['time', 'position'],
         'peak_period': ['time', 'position'],
         'mean_zero-crossing_period': ['time', 'position'],
         'energy_period': ['time', 'position'],
         'directionality_coefficient': ['time', 'position'],
         'maximum_energy_direction': ['time', 'position'],
         'omni-directional_wave_power': ['time', 'position'],
         'spectral_width': ['time', 'position'],
         'directional_wave_spectrum': ['time', 'frequency', 'direction', 'position'],
         'direction': ['direction'],
         'frequency': ['frequency'],
         'frequency_bin_edges': ['frequency', '[Low Edge, High Edge)']}

In [38]:
path = os.path.join(cwd, 'h5_attrs/alaska_buoy_h5_attrs.json')
wave_attrs = pd.read_json(path)

alaska_vars = [v for v in wave_attrs.index
               if v not in ['coordinates', 'time_index']]

overlap_vars = [v for v in units.keys() if v in alaska_vars]
new_vars = [v for v in alaska_vars if v not in units]
missing_vars = [v for v in units.keys() if v not in alaska_vars]

print('overlaping variables:\n{}'.format(overlap_vars))
print('new variables:\n{}'.format(new_vars))
print('missing variables:\n{}'.format(missing_vars))

overlaping variables:
['mean_wave_direction', 'significant_wave_height', 'peak_period', 'mean_zero-crossing_period', 'energy_period', 'maximum_energy_direction', 'omni-directional_wave_power', 'spectral_width', 'directional_wave_spectrum', 'direction', 'frequency']
new variables:
['water_depth']
missing variables:
['depth', 'mean_absolute_period', 'directionality_coefficient', 'frequency_bin_edges']


In [34]:
meta_path = os.path.join(box_dir, 'alaska_wave_meta.csv')
meta = pd.read_csv(meta_path)
meta = meta.set_index('gid')
tree = cKDTree(meta[['latitude', 'longitude']].values)

path = os.path.join(box_dir, 'alaska_buoy_coords.npy')
coords = np.load(path)

_, pos = tree.query(coords[:, ::-1])

buoy_meta = meta.loc[pos]
buoy_meta['latitude'] = coords[:, 1]
buoy_meta['longitude'] = coords[:, 0] - 360
display(buoy_meta.head())
cols = ['latitude', 'longitude', 'timezone', 'distance', 'water_depth']
buoy_meta[cols] = buoy_meta[cols].astype('float32')
buoy_meta.dtypes

Unnamed: 0_level_0,latitude,longitude,timezone,distance,jurisdiction,water_depth
gid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
504,212.050995,-303.768002,12.0,217386.56,Russia,3880.827
478,213.195007,-299.416,12.0,260345.14,Russia,3752.0881
479,213.156998,-299.77,12.0,257685.28,Russia,3719.1467
513,204.953003,-307.215,12.0,217579.62,Russia,3268.5103
559,175.153,-304.917999,12.0,324149.47,Russia,4299.187


latitude        float32
longitude       float32
timezone        float32
distance        float32
jurisdiction     object
water_depth     float32
dtype: object

In [35]:
meta_arr = to_records_array(buoy_meta)

out_path = os.path.join(box_dir, 'alaska_buoy_meta.npy')
np.save(out_path, meta_arr)

In [36]:
meta_path = os.path.join(box_dir, 'alaska_buoy_meta.npy')
meta_data = np.load(meta_path)

m_size = sys.getsizeof(meta_data) * 10**-6
print('meta size = {:.3f} MB'.format(m_size))

m_chunks = get_chunk_size(meta_data, thresh=0.1)

meta size = 0.001 MB
Chunk size (24, ) = 0.00 MB


In [37]:
meta_path = os.path.join(box_dir, 'alaska_buoy_meta.npy')
meta_data = np.load(meta_path, allow_pickle=True)
lat_lon = pd.DataFrame(meta_data)
lat_lon = lat_lon[['latitude', 'longitude']].values.copy()

c_size = sys.getsizeof(lat_lon) * 10**-6
print('lat_lon size = {:.3f} MB'.format(c_size))

c_chunks = get_chunk_size(lat_lon, thresh=0.1)

lat_lon size = 0.000 MB
Chunk size (24, ) = 0.00 MB


In [18]:
dset_arr = np.ones((8760, 127), dtype='float32')
dset_size = sys.getsizeof(dset_arr) * 10**-6
print('dset size = {:.3f} MB'.format(dset_size))

dset_arr = np.ones((8760, 60), dtype='float32')
dset_size = sys.getsizeof(dset_arr) * 10**-6
print('dset size = {:.3f} MB'.format(dset_size))

dset size = 4.450 MB
dset size = 2.134 MB


In [41]:
dset_arr = np.ones((8784, 29, 72, 24), dtype='float32')
dset_size = sys.getsizeof(dset_arr) * 10**-6
print('energy size = {:.3f} MB'.format(dset_size))

dset_arr = np.ones((8*7*24, 6, 12, 6), dtype='float32')
dset_size = sys.getsizeof(dset_arr) * 10**-6
print('energy size = {:.3f} MB'.format(dset_size))

energy size = 1760.735 MB
energy size = 2.323 MB


In [44]:
path = os.path.join(cwd, 'h5_attrs/alaska_buoy_h5_attrs.json')
buoy_attrs = pd.read_json(path)

buoy_attrs['name'] = None
buoy_attrs['attrs'] = None

for var, row in buoy_attrs.iterrows():

    if var == 'time_index':
        buoy_attrs.at[var, 'attrs'] = {'freq': '1h', 'timezone': 'UTC', 'units': 'GMT', 'dimensions': ['time']}
        buoy_attrs.at[var, 'chunks'] = None
    elif var == 'coordinates':
        buoy_attrs.at[var, 'chunks'] = None
        attrs  = {'description': '(latitude, longitude) using Datum: NAD83',
                  'src_name': '(Xp, Yp)',
                  'units': '(deg, deg)',
                  'dimensions': ['position']}
        buoy_attrs.at[var, 'attrs'] = attrs
    else:
        if var == 'directional_wave_spectrum':
            chunks = (8*7*24, 6, 12, 6)
        elif var in ['depth', 'direction', 'frequency']:
            chunks = None
        else:
            chunks = (None, 60)

        attrs = {'description': descriptions[var],
                 'dimensions': dimensions[var],
                 'units': units[var],
                 'SWAN_name': SWAN_names[var],
                 'IEC_name': IEC_names[var]}
        src_name = src_names.get(var)
        if src_name:
            attrs['src_name'] = src_name

        buoy_attrs.at[var, 'chunks'] = chunks
        buoy_attrs.at[var, 'dtype'] = 'float32'
        buoy_attrs.at[var , 'name'] = names.get(var)
        buoy_attrs.at[var, 'attrs'] = attrs

# Meta data
buoy_attrs.at['meta', 'chunks'] = None
buoy_attrs.at['meta', 'dtype'] = None
buoy_attrs.at['meta', 'name'] = None
buoy_attrs.at['meta', 'attrs'] = {'dimensions': ['position']}

# Global attributes
buoy_attrs.at['global', 'chunks'] = None
buoy_attrs.at['global', 'dtype'] = None
buoy_attrs.at['global', 'name'] = None
buoy_attrs.at['global', 'attrs'] = {
    'ref_SWAN-Manual': "SWAN Team, SWAN: User Manual, Delft University of Technology, Delft, The Netherlands, Cycle III Version 41.31, 2019.",
    "ref_IEC62600-101": "International Electrotechnical Commission, Marine energy - Wave, tidal and other water current converters - Part 101: Wave energy resource assessment and characterization, Technical Specification 62600–101, 2015.",
    "ref_Wu-Wang-Yang-Garcia-Medina-2020": "W.C. Wu, T. Wang, Z. Yang, and G. García-Medina, “Development and validation of a high-resolution regional wave hindcast model for U.S. West Coast wave resource characterization,” Renewable Energy, vol. 152, pp. 736–753, Jun. 2020.",
    "source": "PNNL2019",
    "version": "v1.0.0"}

path = os.path.join(cwd, 'hsds_attrs/alaska_buoy_hsds_attrs.json')
buoy_attrs.to_json(path, indent=4)
buoy_attrs

Unnamed: 0,attrs,dtype,chunks,name
coordinates,"{'description': '(latitude, longitude) using D...",float64,,
direction,{'description': 'direction of wave propagation...,float32,,
directional_wave_spectrum,{'description': 'Variance density over the i^t...,float32,"(1344, 6, 12, 6)",
energy_period,{'description': 'Spectral width characterizes ...,float32,"(None, 60)",
frequency,{'description': 'i^th discrete frequency (Hz)'...,float32,,
maximum_energy_direction,{'description': 'The direction from which the ...,float32,"(None, 60)",
mean_wave_direction,{'description': 'Direction Normal to the Wave ...,float32,"(None, 60)",
mean_zero-crossing_period,{'description': 'Total wave energy flux from a...,float32,"(None, 60)",
omni-directional_wave_power,{'description': 'Total wave energy flux from a...,float32,"(None, 60)",
peak_period,{'description': 'The period associated with th...,float32,"(None, 60)",
