Skip to content

Commit

Permalink
Merge pull request #480 from rainwoodman/bigfile-header-from-root
Browse files Browse the repository at this point in the history
bigfile shall look for header starting from root.
  • Loading branch information
rainwoodman authored Apr 30, 2018
2 parents 41c17d0 + 8df623b commit 16237f3
Showing 1 changed file with 20 additions and 6 deletions.
26 changes: 20 additions & 6 deletions nbodykit/io/bigfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,14 @@ class BigFile(FileType):
the name of the directory holding the bigfile data
exclude : list of str, optional
the data sets to exlude from loading within bigfile; default
is the header
is the header. If any list is given, the name of the header column
must be given too if it is not part of the data set.
header : str, optional
the path to the header; default is to use a column 'Header'.
It is relative to the file, not the dataset.
dataset : str
load a specific dataset from the bigfile; default is to starting
from the root.
finding columns from a specific dataset in the bigfile;
the default is start looking for columns from the root.
"""
def __init__(self, path, exclude=None, header=Automatic, dataset='./'):

Expand All @@ -53,11 +54,10 @@ def __init__(self, path, exclude=None, header=Automatic, dataset='./'):
# the file path
with bigfile.BigFile(filename=path) as ff:
columns = ff[self.dataset].blocks
if header is Automatic:
for header in ['Header', 'header', '.']:
if header in columns: break
header = self._find_header(header, ff)

if exclude is None:
# by default exclude header only.
exclude = [header]

columns = list(set(columns) - set(exclude))
Expand All @@ -81,6 +81,20 @@ def __init__(self, path, exclude=None, header=Automatic, dataset='./'):
else:
self.attrs[k] = numpy.array(attrs[k], copy=True)

def _find_header(self, header, ff):
""" Find header from the file block by default. """
if header is Automatic:
for header in ['Header', 'header', '.']:
if header in ff.blocks: break

# shall not make the assertion here because header can be nested deep.
# then not shown in ff.blocks. try catch may work better.
#if not header in ff.blocks:
# raise KeyError("header block `%s` is not defined in the bigfile. Candidates can be `%s`"
# % (header, str(ff.blocks))

return header

def read(self, columns, start, stop, step=1):
"""
Read the specified column(s) over the given range,
Expand Down

0 comments on commit 16237f3

Please sign in to comment.