Merge pull request #480 from rainwoodman/bigfile-header-from-root

bigfile shall look for header starting from root.
bccp · Apr 30, 2018 · 16237f3 · 16237f3
2 parents 41c17d0 + 8df623b
commit 16237f3
Showing 1 changed file with 20 additions and 6 deletions.
diff --git a/nbodykit/io/bigfile.py b/nbodykit/io/bigfile.py
@@ -30,13 +30,14 @@ class BigFile(FileType):
         the name of the directory holding the bigfile data
     exclude : list of str, optional
         the data sets to exlude from loading within bigfile; default
-        is the header
+        is the header. If any list is given, the name of the header column
+        must be given too if it is not part of the data set.
     header : str, optional
         the path to the header; default is to use a column 'Header'.
         It is relative to the file, not the dataset.
     dataset : str
-        load a specific dataset from the bigfile; default is to starting
-        from the root.
+        finding columns from a specific dataset in the bigfile;
+        the default is start looking for columns from the root.
     """
     def __init__(self, path, exclude=None, header=Automatic, dataset='./'):
 
@@ -53,11 +54,10 @@ def __init__(self, path, exclude=None, header=Automatic, dataset='./'):
         # the file path
         with bigfile.BigFile(filename=path) as ff:
             columns = ff[self.dataset].blocks
-            if header is Automatic:
-                for header in ['Header', 'header', '.']:
-                    if header in columns: break
+            header = self._find_header(header, ff)
 
             if exclude is None:
+                # by default exclude header only.
                 exclude = [header]
 
             columns = list(set(columns) - set(exclude))
@@ -81,6 +81,20 @@ def __init__(self, path, exclude=None, header=Automatic, dataset='./'):
                 else:
                     self.attrs[k] = numpy.array(attrs[k], copy=True)
 
+    def _find_header(self, header, ff):
+        """ Find header from the file block by default. """
+        if header is Automatic:
+            for header in ['Header', 'header', '.']:
+                if header in ff.blocks: break
+
+        # shall not make the assertion here because header can be nested deep.
+        # then not shown in ff.blocks. try catch may work better.
+        #if not header in ff.blocks:
+        #    raise KeyError("header block `%s` is not defined in the bigfile. Candidates can be `%s`"
+        #            % (header, str(ff.blocks))
+
+        return header
+
     def read(self, columns, start, stop, step=1):
         """
         Read the specified column(s) over the given range,