In [3]:
from ast import literal_eval
import numpy as np
import os
import pandas as pd
import pickle
from scipy.spatial import cKDTree
import sys

from rex.utilities import to_records_array

cwd = os.getcwd()


def check_value(dict_in):
    dict_out = {}
    for key, value in dict_in.items():
        if isinstance(value, dict):
            for k, v in value.items():
                dict_out[key] = check_value(value)
        else:
            if isinstance(value, str):
                if '/' in value:
                    pass
                else:
                    dict_out[key] = value
            else:
                dict_out[key] = value
    return dict_out


def remove_paths(dict_in):
    dict_out = {}
    for key, value in dict_in.items():
        if isinstance(value, dict):
            for k, v in value.items():
                dict_out[key] = check_value(value)
        else:
            if isinstance(value, str):
                if '/' in value:
                    pass
                else:
                    dict_out[key] = value
            else:
                dict_out[key] = value

    return dict_out


def get_size(item):
    return sys.getsizeof(item.copy()) * 10**-6


def get_chunk_size(item, chunk_size=2.0, thresh=0.1):
    item_size = get_size(item)
    item_len = len(item)
    chunk_start = int(item_len // (item_size / chunk_size))
    i = 0
    while True:
        if item_len % (chunk_start + i) == 0:
            chunk = chunk_start + i
            break
        elif item_len % (chunk_start - i) == 0:
            chunk = chunk_start - i
            break
        else:
            offset_size = get_size(item[:i].copy())
            if offset_size > chunk_size * thresh:
                chunk = chunk_start
                print('No unique chunk found')
                break
            else:
                i += 1

    chunk_size = get_size(item[:chunk].copy())
    print('Chunk size ({}, ) = {:.2f} MB'.format(chunk, chunk_size))
    return chunk


def round_to(num, round_val):
    round_num = num - num % round_val
    return int(round_num)


def calc_chunks(t_chunk, dtype, chunk_size=2*10**6, round_to=None):
    pixel_size = np.dtype(dtype).itemsize
    s_chunk = chunk_size / (t_chunk * pixel_size)

    if round_to:
        s_chunk = round_to(s_chunk, round_to)
    else:
        s_chunk = int(np.floor(s_chunk))

    return (t_chunk, s_chunk)


box_dir = '/Users/mrossol/Box/HSDS/wave/Hawaii'

# Hawaii
## Meta Data

In [18]:
meta_path = os.path.join(box_dir, 'hawaii_wave_meta.csv')
columns = {'dist': 'distance_to_shore', 'lat': 'latitude', 'lon': 'longitude'}
meta = pd.read_csv(meta_path).rename(columns=columns)
depth_path = os.path.join(box_dir, 'hawaii_depth.npy')
meta['water_depth'] = np.load(depth_path)
meta.head()

Unnamed: 0,gid,latitude,longitude,distance_to_shore,timezone,jurisdiction,water_depth
0,0,15.0,-164.0,821611.172208,-11,,5533.0
1,1,15.0,-163.953675,819447.646274,-11,,5546.794922
2,2,15.0,-163.907346,817307.563882,-11,,5577.151367
3,3,15.0,-163.861015,815191.11688,-11,,5616.104004
4,4,15.0,-163.814681,813098.45699,-11,,5672.62207


In [19]:
meta_path = os.path.join(box_dir, 'hawaii_wave_meta.csv')
columns = {'dist': 'distance_to_shore', 'lat': 'latitude', 'lon': 'longitude'}
meta = pd.read_csv(meta_path).rename(columns=columns)
depth_path = os.path.join(box_dir, 'hawaii_depth.npy')
meta['water_depth'] = np.load(depth_path)
meta = meta.set_index('gid')
mask = meta['jurisdiction'].isnull()
meta.loc[mask, 'jurisdiction']

meta_arr = to_records_array(meta)

out_path = os.path.join(box_dir, 'hawaii_wave_meta.npy')
np.save(out_path, meta_arr)
meta_arr

rec.array([(15.      , -164.     , 8.2161119e+05, -11, b'None', 5.5330000e+03),
           (15.      , -163.95367, 8.1944762e+05, -11, b'None', 5.5467949e+03),
           (15.      , -163.90735, 8.1730756e+05, -11, b'None', 5.5771514e+03),
           ...,
           (20.796572, -156.4993 , 2.6289202e+01, -10, b'Hawaii', 1.5534000e+00),
           (20.796776, -156.49643, 0.0000000e+00, -10, b'Hawaii', 1.7089000e+00),
           (20.796309, -156.4936 , 1.4618778e+01, -10, b'Hawaii', 2.7727001e+00)],
          dtype=[('latitude', '<f4'), ('longitude', '<f4'), ('distance_to_shore', '<f4'), ('timezone', '<i2'), ('jurisdiction', 'S7'), ('water_depth', '<f4')])

## Time_Index and Meta Chunk Sizes

In [20]:
# Hourly
time_index = np.array(pd.date_range('2012-01-01 00:00:00', '2012-12-31 23:00:00', freq='3h').astype(str),
                      'S20')

print(time_index.shape)
t_size = get_size(time_index)
print('time_index size = {:.2f} MB'.format(t_size))

(2928,)
time_index size = 0.06 MB


In [21]:
meta_path = os.path.join(box_dir, 'hawaii_wave_meta.npy')
meta_data = np.load(meta_path)

m_size = sys.getsizeof(meta_data) * 10**-6
print('meta size = {:.2f} MB'.format(m_size))

m_chunks = get_chunk_size(meta_data, thresh=0.1)

meta size = 17.51 MB
No unique chunk found
Chunk size (79999, ) = 2.00 MB


In [22]:
meta_path = os.path.join(box_dir, 'hawaii_wave_meta.npy')
meta_data = np.load(meta_path, allow_pickle=True)
lat_lon = pd.DataFrame(meta_data)
lat_lon = lat_lon[['latitude', 'longitude']].values.copy()

c_size = sys.getsizeof(lat_lon) * 10**-6
print('lat_lon size = {:.2f} MB'.format(c_size))

c_chunks = get_chunk_size(lat_lon, thresh=0.1)

lat_lon size = 5.60 MB
No unique chunk found
Chunk size (249995, ) = 2.00 MB


## Variable Attributes

In [23]:
names = {'depth': 'water_depth'}

descriptions = {'depth': 'Grid node depth',
         'mean_wave_direction': 'Direction Normal to the Wave Crests',
         'significant_wave_height': 'Calculated as the zeroth spectral moment (i.e., H_m0)',
         'mean_absolute_period': 'Resolved Spectral Moment (m_0/m_1)',
         'peak_period': 'The period associated with the maximum value of the wave energy spectrum',
         'mean_zero-crossing_period': 'Total wave energy flux from all directions',
         'energy_period': 'Spectral width characterizes the relative spreading of energy in the wave spectrum. Large values indicate a wider spectral peak',
         'directionality_coefficient': 'Fraction of total wave energy travelling in the "direction of maximum wave power" direction',
         'maximum_energy_direction': 'The direction from which the most wave energy is travelling',
         'omni-directional_wave_power': 'Total wave energy flux from all directions',
         'spectral_width': 'Spectral width characterizes the relative spreading of energy in the wave spectrum. Large values indicate a wider spectral peak',
         'maximum_energy': 'Maximum directionally resolved wave energy'}

units = {'depth': 'm',
         'mean_wave_direction': 'deg',
         'significant_wave_height': 'm',
         'mean_absolute_period': 's',
         'peak_period': 's',
         'mean_zero-crossing_period': 's',
         'energy_period': 's',
         'directionality_coefficient': '',
         'maximum_energy_direction': 'deg',
         'omni-directional_wave_power': 'W/m',
         'spectral_width': '',
         'maximum_energy': 'W/m'}

src_names = {k : k for k in names}

SWAN_names = {'depth': 'DEPTH',
              'mean_wave_direction': 'DIR',
              'significant_wave_height': 'HSIGN',
              'mean_absolute_period': 'PER',
              'peak_period': 'RTP',
              'mean_zero-crossing_period': 'TMM10',
              'energy_period': 'TM02',
              'directionality_coefficient': '',
              'maximum_energy_direction': '',
              'omni-directional_wave_power': '',
              'spectral_width': '',
              'maximum_energy': 'jdmax'}

IEC_names = {'depth': 'h',
             'mean_wave_direction': 'Sigma',
             'significant_wave_height': 'H_s',
             'mean_absolute_period': 'T_p',
             'peak_period': 'T_p',
             'mean_zero-crossing_period': 'T_02',
             'energy_period': 'T_e',
             'directionality_coefficient': 'd',
             'maximum_energy_direction': 'Jsigma_Jmax',
             'omni-directional_wave_power': 'J',
             'spectral_width': 'epsilon_o',
             'maximum_energy': 'J_sigma_jdmax'}

references = {'depth': 'SWAN Manual',
              'mean_wave_direction': 'SWAN Manual',
              'significant_wave_height': 'SWAN Manual, IEC62600-101',
              'mean_absolute_period': 'SWAN Manual',
              'peak_period': 'SWAN Manual',
              'mean_zero-crossing_period': 'SWAN Manual, IEC62600-101',
              'energy_period': 'IEC62600-101',
              'directionality_coefficient': 'IEC62600-101',
              'maximum_energy_direction': 'IEC62600-101',
              'omni-directional_wave_power': 'IEC62600-101',
              'spectral_width': 'IEC62600-101',
              'maximum_energy': 'SWAN Manual, IEC62600-101'}

dimensions = {'depth': ['position'],
              'mean_wave_direction': ['time', 'position'],
              'significant_wave_height': ['time', 'position'],
              'mean_absolute_period': ['time', 'position'],
              'peak_period': ['time', 'position'],
              'mean_zero-crossing_period': ['time', 'position'],
              'energy_period': ['time', 'position'],
              'directionality_coefficient': ['time', 'position'],
              'maximum_energy_direction': ['time', 'position'],
              'omni-directional_wave_power': ['time', 'position'],
              'spectral_width': ['time', 'position'],
              'maximum_energy': ['time', 'position']}

In [24]:
path = os.path.join(cwd, 'h5_attrs/hawaii_h5_attrs.json')
wave_attrs = pd.read_json(path)

hawaii_vars = [v for v in wave_attrs.index
               if v not in ['coordinates', 'time_index']]

overlap_vars = [v for v in units.keys() if v in hawaii_vars]
new_vars = [v for v in hawaii_vars if v not in units]
missing_vars = [v for v in units.keys() if v not in hawaii_vars]

print('overlaping variables:\n{}'.format(overlap_vars))
print('new variables:\n{}'.format(new_vars))
print('missing variables:\n{}'.format(missing_vars))

overlaping variables:
['depth', 'mean_wave_direction', 'significant_wave_height', 'mean_absolute_period', 'peak_period', 'energy_period', 'directionality_coefficient', 'maximum_energy_direction', 'omni-directional_wave_power', 'spectral_width', 'maximum_energy']
new variables:
['latitudinal_surface_wind_force', 'latitudinal_wind_velocity', 'longitudinal_surface_wind_force', 'longitudinal_wind_velocity']
missing variables:
['mean_zero-crossing_period']


In [25]:
path = os.path.join(cwd, 'h5_attrs/hawaii_h5_attrs.json')
wave_attrs = pd.read_json(path)
hawaii_vars =  list(units.keys()) + ['time_index', 'coordinates']
mask = wave_attrs.index.isin(hawaii_vars)
wave_attrs = wave_attrs.loc[mask]

wave_attrs['name'] = None
wave_attrs['attrs'] = None
wave_attrs['chunks'] = None

# 8 week hourly chunks
t_chunk = int(8 * 7 * 24 / 3)
for var, row in wave_attrs.iterrows():
    
    if var == 'time_index':
        wave_attrs.at[var, 'attrs'] = {'freq': '3h', 'timezone': 'UTC', 'units': 'GMT', 'dimensions': ['time']}
        wave_attrs.at[var, 'chunks'] = None
    elif var == 'coordinates':
        wave_attrs.at[var, 'chunks'] = (249995, 2)
        attrs  = {'description': '(latitude, longitude) using Datum: NAD83',
                  'src_name': '(Xp, Yp)',
                  'units': '(deg, deg)',
                  'dimensions': ['position']}
        wave_attrs.at[var, 'attrs'] = attrs
        wave_attrs.at[var, 'dtype'] = 'float32'
    else:
        if var == 'depth':
            chunks = None
        else:
            chunks = calc_chunks(t_chunk, 'float32')

        wave_attrs.at[var, 'chunks'] = chunks
        wave_attrs.at[var , 'name'] = names.get(var, None)
        wave_attrs.at[var, 'dtype'] = 'float32'
        wave_attrs.at[var, 'attrs'] = {'description': descriptions[var],
                                       'dimensions': dimensions[var],
                                       'units': units[var],
                                       'SWAN_name': SWAN_names[var],
                                       'IEC_name': IEC_names[var]}

# Meta data
wave_attrs.at['meta', 'chunks'] = (79999, )
wave_attrs.at['meta', 'dtype'] = None
wave_attrs.at['meta', 'name'] = None
wave_attrs.at['meta', 'attrs'] = {'dimensions': ['position']}

# Global attributes
wave_attrs.at['global', 'chunks'] = None
wave_attrs.at['global', 'dtype'] = None
wave_attrs.at['global', 'name'] = None
wave_attrs.at['global', 'attrs'] = {
    'ref_SWAN-Manual': "SWAN Team, SWAN: User Manual, Delft University of Technology, Delft, The Netherlands, Cycle III Version 41.31, 2019.",
    "ref_IEC62600-101": "International Electrotechnical Commission, Marine energy - Wave, tidal and other water current converters - Part 101: Wave energy resource assessment and characterization, Technical Specification 62600–101, 2015.",
    "ref_Wu-Wang-Yang-Garcia-Medina-2020": "W.C. Wu, T. Wang, Z. Yang, and G. García-Medina, “Development and validation of a high-resolution regional wave hindcast model for U.S. West Coast wave resource characterization,” Renewable Energy, vol. 152, pp. 736–753, Jun. 2020.",
    "source": "PNNL2019", "version": "v1.0.0"}

path = os.path.join(cwd, 'hsds_attrs/hawaii_hsds_attrs.json')
wave_attrs.to_json(path, indent=4)
wave_attrs

Unnamed: 0,attrs,dtype,chunks,name
coordinates,"{'description': '(latitude, longitude) using D...",float32,"(249995, 2)",
depth,"{'description': 'Grid node depth', 'dimensions...",float32,,water_depth
directionality_coefficient,{'description': 'Fraction of total wave energy...,float32,"(448, 1116)",
energy_period,{'description': 'Spectral width characterizes ...,float32,"(448, 1116)",
maximum_energy,{'description': 'Maximum directionally resolve...,float32,"(448, 1116)",
maximum_energy_direction,{'description': 'The direction from which the ...,float32,"(448, 1116)",
mean_absolute_period,{'description': 'Resolved Spectral Moment (m_0...,float32,"(448, 1116)",
mean_wave_direction,{'description': 'Direction Normal to the Wave ...,float32,"(448, 1116)",
omni-directional_wave_power,{'description': 'Total wave energy flux from a...,float32,"(448, 1116)",
peak_period,{'description': 'The period associated with th...,float32,"(448, 1116)",


## Virtual Buoy

In [4]:
descriptions = {'water_depth': 'Grid node depth',
         'mean_wave_direction': 'Direction Normal to the Wave Crests',
         'significant_wave_height': 'Calculated as the zeroth spectral moment (i.e., H_m0)',
         'mean_absolute_period': 'Resolved Spectral Moment (m_0/m_1)',
         'peak_period': 'The period associated with the maximum value of the wave energy spectrum',
         'mean_zero-crossing_period': 'Total wave energy flux from all directions',
         'energy_period': 'Spectral width characterizes the relative spreading of energy in the wave spectrum. Large values indicate a wider spectral peak',
         'directionality_coefficient': 'Fraction of total wave energy travelling in the "direction of maximum wave power" direction',
         'maximum_energy_direction': 'The direction from which the most wave energy is travelling',
         'omni-directional_wave_power': 'Total wave energy flux from all directions',
         'spectral_width': 'Spectral width characterizes the relative spreading of energy in the wave spectrum. Large values indicate a wider spectral peak',
         'directional_wave_spectrum': 'Variance density over the i^th frequency and j^th direction (m^2Hz^-1deg^-1)',
         'direction': 'direction of wave propagation based on epsg:4326 (deg)',
         'frequency': 'i^th discrete frequency (Hz)',
         'frequency_bin_edges': 'Frequency bin definition: [Low Edge, High Edge)'}

units = {'water_depth': 'm',
         'mean_wave_direction': 'deg',
         'significant_wave_height': 'm',
         'mean_absolute_period': 's',
         'peak_period': 's',
         'mean_zero-crossing_period': 's',
         'energy_period': 's',
         'directionality_coefficient': '',
         'maximum_energy_direction': 'deg',
         'omni-directional_wave_power': 'W/m',
         'spectral_width': '',
         'directional_wave_spectrum': 'm^2 Hz^-1 deg^-1',
         'direction': 'deg',
         'frequency': 'Hz',
         'frequency_bin_edges': 'Hz'}

SWAN_names = {'water_depth': 'DEPTH',
         'mean_wave_direction': 'DIR',
         'significant_wave_height': 'HSIGN',
         'mean_absolute_period': 'PER',
         'peak_period': 'RTP',
         'mean_zero-crossing_period': 'TMM10',
         'energy_period': 'TM02',
         'directionality_coefficient': '',
         'maximum_energy_direction': '',
         'omni-directional_wave_power': '',
         'spectral_width': '',
         'directional_wave_spectrum': 'energy',
         'frequency': 'frequency',
         'direction': 'direction',
         'frequency_bin_edges': ''}

IEC_names = {'water_depth': 'h',
         'mean_wave_direction': 'Sigma',
         'significant_wave_height': 'H_s',
         'mean_absolute_period': 'T_p',
         'peak_period': 'T_p',
         'mean_zero-crossing_period': 'T_02',
         'energy_period': 'T_e',
         'directionality_coefficient': 'd',
         'maximum_energy_direction': 'Jsigma_Jmax',
         'omni-directional_wave_power': 'J',
         'spectral_width': 'epsilon_o',
         'directional_wave_spectrum': 'S_ij',
         'direction': 'Sigma',
         'frequency': 'f_i',
         'frequency_bin_edges': ''}

references = {'water_depth': 'SWAN Manual',
         'mean_wave_direction': 'SWAN Manual',
         'significant_wave_height': 'SWAN Manual, IEC62600-101',
         'mean_absolute_period': 'SWAN Manual',
         'peak_period': 'SWAN Manual',
         'mean_zero-crossing_period': 'SWAN Manual, IEC62600-101',
         'energy_period': 'IEC62600-101',
         'directionality_coefficient': 'IEC62600-101',
         'maximum_energy_direction': 'IEC62600-101',
         'omni-directional_wave_power': 'IEC62600-101',
         'spectral_width': 'IEC62600-101',
         'directional_wave_spectrum': 'IEC62600-101',
         'direction': 'IEC62600-101',
         'frequency': 'IEC62600-101',
         'frequency_bin_edges': 'IEC62600-101'}

dimensions = {'water_depth': ['position'],
         'mean_wave_direction': ['time', 'position'],
         'significant_wave_height': ['time', 'position'],
         'mean_absolute_period': ['time', 'position'],
         'peak_period': ['time', 'position'],
         'mean_zero-crossing_period': ['time', 'position'],
         'energy_period': ['time', 'position'],
         'directionality_coefficient': ['time', 'position'],
         'maximum_energy_direction': ['time', 'position'],
         'omni-directional_wave_power': ['time', 'position'],
         'spectral_width': ['time', 'position'],
         'directional_wave_spectrum': ['time', 'frequency', 'direction', 'position'],
         'direction': ['direction'],
         'frequency': ['frequency'],
         'frequency_bin_edges': ['frequency', '[Low Edge, High Edge)']}

In [42]:
path = os.path.join(cwd, 'h5_attrs/hawaii_buoy_h5_attrs.json')
wave_attrs = pd.read_json(path)

alaska_vars = [v for v in wave_attrs.index
               if v not in ['coordinates', 'time_index']]

overlap_vars = [v for v in units.keys() if v in alaska_vars]
new_vars = [v for v in alaska_vars if v not in units]
missing_vars = [v for v in units.keys() if v not in alaska_vars]

print('overlaping variables:\n{}'.format(overlap_vars))
print('new variables:\n{}'.format(new_vars))
print('missing variables:\n{}'.format(missing_vars))


overlaping variables:
['water_depth', 'mean_wave_direction', 'significant_wave_height', 'peak_period', 'mean_zero-crossing_period', 'energy_period', 'maximum_energy_direction', 'omni-directional_wave_power', 'spectral_width', 'directional_wave_spectrum', 'direction', 'frequency']
new variables:
[]
missing variables:
['mean_absolute_period', 'directionality_coefficient', 'frequency_bin_edges']


In [27]:
meta_path = os.path.join(box_dir, 'hawaii_wave_meta.csv')
columns = {'dist': 'distance_to_shore', 'depth': 'water_depth',
           'lat': 'latitude', 'lon': 'longitude'}
meta = pd.read_csv(meta_path).rename(columns=columns)
meta = meta.set_index('gid')

path = os.path.join(box_dir, 'hawaii_buoy_coords.npy')
coords = np.load(path)
coords[:, 1] -= 360

tree = cKDTree(meta[['latitude', 'longitude']].values)
_, pos = tree.query(coords)

buoy_meta = meta.loc[pos]
buoy_meta['latitude'] = coords[:, 0]
buoy_meta['longitude'] = coords[:, 1]

buoy_meta

Unnamed: 0_level_0,latitude,longitude,distance_to_shore,timezone,jurisdiction
gid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
118323,22.750000,-158.000000,97417.036400,-10,Federal
609041,21.586555,-157.864456,1702.954348,-10,Hawaii
633137,21.465702,-157.750656,1161.650459,-10,Hawaii
628918,21.474234,-157.753769,1428.830179,-10,Hawaii
631049,21.478683,-157.761261,1629.448483,-10,Hawaii
...,...,...,...,...,...
307641,21.666903,-160.563126,2181.003032,-10,Hawaii
320014,21.678698,-160.548859,2155.391914,-10,Hawaii
324204,21.676561,-160.530685,2195.811066,-10,Hawaii
336903,21.662399,-160.520081,1954.017270,-10,Hawaii


In [28]:
meta_arr = to_records_array(buoy_meta)

out_path = os.path.join(box_dir, 'hawaii_buoy_meta.npy')
np.save(out_path, meta_arr)
meta_arr

rec.array([(22.75    , -158.     , 97417.04   , -10, b'Federal'),
           (21.586555, -157.86446,  1702.9543 , -10, b'Hawaii'),
           (21.465702, -157.75066,  1161.6505 , -10, b'Hawaii'),
           (21.474234, -157.75377,  1428.8302 , -10, b'Hawaii'),
           (21.478683, -157.76126,  1629.4485 , -10, b'Hawaii'),
           (18.880035, -155.71666,  4996.994  , -10, b'Hawaii'),
           (18.93806 , -155.78339,  5109.433  , -10, b'Hawaii'),
           (18.977991, -155.86662,  4780.6875 , -10, b'Hawaii'),
           (19.036366, -155.93301,  5020.8955 , -10, b'Hawaii'),
           (19.118437, -155.969  ,  5358.619  , -10, b'Hawaii'),
           (19.207495, -155.95602,  5328.23   , -10, b'Hawaii'),
           (19.296265, -155.94101,  5309.7354 , -10, b'Hawaii'),
           (19.383024, -155.95813,  5302.699  , -10, b'Hawaii'),
           (19.465124, -155.99388,  5153.343  , -10, b'Hawaii'),
           (19.55202 , -156.01677,  5242.8545 , -10, b'Hawaii'),
           (19.620691, -

In [29]:
meta_path = os.path.join(box_dir, 'hawaii_buoy_meta.npy')
meta_data = np.load(meta_path)

m_size = sys.getsizeof(meta_data) * 10**-6
print('meta size = {:.3f} MB'.format(m_size))

m_chunks = get_chunk_size(meta_data, thresh=0.1)

meta size = 0.019 MB
Chunk size (914, ) = 0.02 MB


In [30]:
meta_path = os.path.join(box_dir, 'hawaii_buoy_meta.npy')
meta_data = np.load(meta_path, allow_pickle=True)
lat_lon = pd.DataFrame(meta_data)
lat_lon = lat_lon[['latitude', 'longitude']].values.copy()

c_size = sys.getsizeof(lat_lon) * 10**-6
print('lat_lon size = {:.3f} MB'.format(c_size))

c_chunks = get_chunk_size(lat_lon, thresh=0.1)

lat_lon size = 0.007 MB
Chunk size (914, ) = 0.01 MB


In [39]:
dset_arr = np.ones((8760, 914), dtype='float32')
dset_size = sys.getsizeof(dset_arr) * 10**-6
print('dset size = {:.3f} MB'.format(dset_size))

dset_arr = np.ones((24*7*13, 200), dtype='float32')
dset_size = sys.getsizeof(dset_arr) * 10**-6
print('dset size = {:.3f} MB'.format(dset_size))


dset size = 32.027 MB
dset size = 1.747 MB


In [35]:
dset_arr = np.ones((8760, 31, 48, 914), dtype='float32')
dset_size = sys.getsizeof(dset_arr) * 10**-6
print('energy size = {:.3f} MB'.format(dset_size))

dset_arr = np.ones((12*7*24, 6, 8, 5), dtype='float32')
dset_size = sys.getsizeof(dset_arr) * 10**-6
print('energy size = {:.3f} MB'.format(dset_size))


energy size = 47655.521 MB
energy size = 1.936 MB


In [6]:
path = os.path.join(cwd, 'h5_attrs/hawaii_buoy_h5_attrs.json')
buoy_attrs = pd.read_json(path)

buoy_attrs['name'] = None
buoy_attrs['attrs'] = None

for var, row in buoy_attrs.iterrows():

    if var == 'time_index':
        buoy_attrs.at[var, 'attrs'] = {'freq': '1h', 'timezone': 'UTC', 'units': 'GMT', 'dimensions': ['time']}
        buoy_attrs.at[var, 'chunks'] = None
    elif var == 'coordinates':
        buoy_attrs.at[var, 'chunks'] = None
        attrs  = {'description': '(latitude, longitude) using Datum: NAD83',
                  'src_name': '(Xp, Yp)',
                  'units': '(deg, deg)',
                  'dimensions': ['position']}
        buoy_attrs.at[var, 'attrs'] = attrs
    else:
        if var == 'directional_wave_spectrum':
            chunks = (12 * 7 * 24, 6, 8, 5)
        elif var in ['water_depth', 'frequency', 'direction']:
            chunks = None
        else:
            chunks = 24 * 7 * 13, 200

        buoy_attrs.at[var, 'chunks'] = chunks
        buoy_attrs.at[var, 'dtype'] = 'float32'
        buoy_attrs.at[var, 'attrs'] = {'description': descriptions[var],
                                       'dimensions': dimensions[var],
                                       'units': units[var],
                                       'SWAN_name': SWAN_names[var],
                                       'IEC_name': IEC_names[var]}

# Meta data
buoy_attrs.at['meta', 'chunks'] = None
buoy_attrs.at['meta', 'dtype'] = None
buoy_attrs.at['meta', 'name'] = None
buoy_attrs.at['meta', 'attrs'] = {'dimensions': ['position']}

# Global attributes
buoy_attrs.at['global', 'chunks'] = None
buoy_attrs.at['global', 'dtype'] = None
buoy_attrs.at['global', 'name'] = None
buoy_attrs.at['global', 'attrs'] = {
    'ref_SWAN-Manual': "SWAN Team, SWAN: User Manual, Delft University of Technology, Delft, The Netherlands, Cycle III Version 41.31, 2019.",
    "ref_IEC62600-101": "International Electrotechnical Commission, Marine energy - Wave, tidal and other water current converters - Part 101: Wave energy resource assessment and characterization, Technical Specification 62600–101, 2015.",
    "ref_Wu-Wang-Yang-Garcia-Medina-2020": "W.C. Wu, T. Wang, Z. Yang, and G. García-Medina, “Development and validation of a high-resolution regional wave hindcast model for U.S. West Coast wave resource characterization,” Renewable Energy, vol. 152, pp. 736–753, Jun. 2020.",
    "source": "PNNL2019", "version": "v1.0.0"}

path = os.path.join(cwd, 'hsds_attrs/hawaii_buoy_hsds_attrs.json')
buoy_attrs.to_json(path, indent=4)
buoy_attrs


Unnamed: 0,attrs,dtype,chunks,name
coordinates,"{'description': '(latitude, longitude) using D...",float64,,
direction,{'description': 'direction of wave propagation...,float32,,
directional_wave_spectrum,{'description': 'Variance density over the i^t...,float32,"(2016, 6, 8, 5)",
energy_period,{'description': 'Spectral width characterizes ...,float32,"(2184, 200)",
frequency,{'description': 'i^th discrete frequency (Hz)'...,float32,,
maximum_energy_direction,{'description': 'The direction from which the ...,float32,"(2184, 200)",
mean_wave_direction,{'description': 'Direction Normal to the Wave ...,float32,"(2184, 200)",
mean_zero-crossing_period,{'description': 'Total wave energy flux from a...,float32,"(2184, 200)",
omni-directional_wave_power,{'description': 'Total wave energy flux from a...,float32,"(2184, 200)",
peak_period,{'description': 'The period associated with th...,float32,"(2184, 200)",
