Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added HDF5 decoder #384

Merged
merged 1 commit into from
Feb 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions easyvvuq/decoders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .simple_csv import SimpleCSV
from .json import JSONDecoder
from .yaml import YAMLDecoder
from .hdf5 import HDF5

__copyright__ = """

Expand Down
109 changes: 109 additions & 0 deletions easyvvuq/decoders/hdf5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""A Decoder for HDF5 format files.
"""

import os
import logging
import h5py
from easyvvuq import OutputType

__copyright__ = """

Copyright 2018 Robin A. Richardson, David W. Wright

This file is part of EasyVVUQ

EasyVVUQ is free software: you can redistribute it and/or modify
it under the terms of the Lesser GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

EasyVVUQ is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Lesser GNU General Public License for more details.

You should have received a copy of the Lesser GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.

"""
__license__ = "LGPL"


logger = logging.Logger(__name__)


class HDF5:
"""HDF5 Decoder.

Parameters
----------
target_filename: str
Filename of an HDF5 file to decode.
ouput_columns: list
A list of column names that will be selected to appear in the output.
"""

def __init__(self, target_filename, output_columns):
if len(output_columns) == 0:
msg = "output_columns cannot be empty."
logger.error(msg)
raise RuntimeError(msg)
self.target_filename = target_filename
self.output_columns = output_columns
self.output_type = OutputType('sample')

@staticmethod
def _get_output_path(run_info=None, outfile=None):
"""Constructs absolute path from the `target_filename` and the `run_dir` parameter
in the `run_info` retrieved from the database.

Parameters
----------
run_info: dict
Run info as retrieved from the database.
outfile: str
Filename of the file to be parsed.

Returns
-------
str
An absolute path to the output file in the run directory.
"""
run_path = run_info['run_dir']
if not os.path.isdir(run_path):
raise RuntimeError(f"Run directory does not exist: {run_path}")
return os.path.join(run_path, outfile)

def parse_sim_output(self, run_info={}):
"""Parses the HDF5 file and converts it to the EasyVVUQ internal dictionary based
format. The file is parsed in such a way that each column will appear as a vector
QoI in the output dictionary.

For example if the file contains the following data
a,b
1,2
3,4

And both `a` and `b` are specified as `output_columns` the output will look as follows
{'a': [1, 3], 'b': [2, 4]}.

Parameters
----------
run_info: dict
Information about the run (used to retrieve construct the absolute path
to the CSV file that needs decoding.
"""
out_path = self._get_output_path(run_info, self.target_filename)
results = {}

with h5py.File(out_path, 'r') as h5f:
for column in self.output_columns:
try:
# TODO: this will always flatten, but HDF5 could handle
# 2D or 3D arrays as well. Will probably break the analysis
# classes though, but maybe something to incorporate later.
results[column] = h5f[column][()].flatten().tolist()
except KeyError:
raise RuntimeError('column not found in the hdf5 file: {}'.format(column))

return results
2 changes: 1 addition & 1 deletion easyvvuq/sampling/simplex_stochastic_collocation.py
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,7 @@ def check_LEC_j(self, p_j, v, S_j, n_mc, queue):
----------
p_j : int
The polynomial order of the j-th stencil.
v : array
v : array
The code samples.
S_j : array, shape (N + 1,)
The interpolation stencil of the j-th simplex element, expressed
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ dill
tqdm
qcg-pilotjob~=0.13.0
qcg-pilotjob-executor-api~=0.13.0
h5py
Binary file added tests/hdf5/test.hdf5
Binary file not shown.
24 changes: 24 additions & 0 deletions tests/test_decoder_hdf5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from easyvvuq.decoders.hdf5 import HDF5
import os
import numpy as np
import pytest


@pytest.fixture
def decoder():
return HDF5('test.hdf5', output_columns=['array10', 'array2'])


def test_hdf5(decoder):
df = decoder.parse_sim_output({'run_dir': os.path.join('tests', 'hdf5')})
# test decoding arrays of different length from the same file
assert df['array10'] == np.arange(10).tolist()
assert df['array2'] == np.arange(2).tolist()


def test_get_output_path(decoder):
assert(decoder._get_output_path(
{'run_dir': os.path.join('tests', 'hdf5')}, 'test.hdf5') ==
os.path.join('tests', 'hdf5', 'test.hdf5'))
with pytest.raises(RuntimeError):
decoder._get_output_path({'run_dir': os.path.join('hdf5')}, 'test.hdf5')
Loading