Merge pull request #9: stricter parsing to avoid silent errors; suppo…

…rt "BrainVolStatsFixed" header
fphammerle · May 6, 2020 · 7a03663 · 7a03663
2 parents 7a3b034 + 759c02d
commit 7a03663
Show file tree

Hide file tree

Showing 4 changed files with 165 additions and 20 deletions.
diff --git a/.pylintrc b/.pylintrc
@@ -1,3 +1,4 @@
 [MESSAGES CONTROL]
 
-disable=missing-docstring
+disable=bad-continuation, # black (will supersede yapf) https://github.com/PyCQA/pylint/pull/3571
+        missing-docstring
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+### Fixed
+- fixed parsing of `BrainVolStatsFixed` header
+  (https://github.com/fphammerle/freesurfer-stats/pull/1 @soichih,
+  https://github.com/fphammerle/freesurfer-stats/pull/9)
 
 ## [1.1.0] - 2020-05-06
 ### Added

diff --git a/freesurfer_stats/__init__.py b/freesurfer_stats/__init__.py
@@ -50,6 +50,7 @@
 import re
 import typing
 
+import numpy
 import pandas
 
 from freesurfer_stats.version import __version__
@@ -114,9 +115,25 @@ def _read_headers(self, stream: typing.TextIO) -> None:
     @classmethod
     def _format_column_name(cls, name: str, unit: typing.Optional[str]) -> str:
         column_name = name.lower()
-        if unit not in ['unitless', 'NA']:
-            column_name += '_' + unit
-        return cls._COLUMN_NAMES_NON_SAFE_REGEX.sub('_', column_name)
+        if unit not in ["unitless", "NA"]:
+            column_name += "_" + unit
+        return cls._COLUMN_NAMES_NON_SAFE_REGEX.sub("_", column_name)
+
+    @classmethod
+    def _parse_whole_brain_measurements_line(
+        cls, line: str,
+    ) -> typing.Tuple[str, numpy.ndarray]:
+        match = cls._GENERAL_MEASUREMENTS_REGEX.match(line)
+        if not match:
+            raise ValueError("unexpected line: {!r}".format(line))
+        key, name, value, unit = match.groups()
+        if (
+            key == "SupraTentorialVolNotVent"
+            and name.lower() == "supratentorial volume"
+        ):
+            name += " Without Ventricles"
+        column_name = cls._format_column_name(name, unit)
+        return column_name, pandas.to_numeric([value], errors="raise")
 
     @classmethod
     def _read_column_attributes(cls, num: int, stream: typing.TextIO) \
@@ -140,17 +157,18 @@ def _read(self, stream: typing.TextIO) -> None:
         self._read_headers(stream)
         self.whole_brain_measurements = pandas.DataFrame()
         line = self._read_header_line(stream)
-        while not line.startswith('NTableCols'):
-            match = self._GENERAL_MEASUREMENTS_REGEX.match(line)
-            if match:
-                key, name, value, unit = match.groups()
-                if key == 'SupraTentorialVolNotVent' and name.lower() == 'supratentorial volume':
-                    name += ' Without Ventricles'
-                column_name = self._format_column_name(name, unit)
-                assert column_name not in self.whole_brain_measurements, \
-                    (key, name, column_name, self.whole_brain_measurements)
-                self.whole_brain_measurements[column_name] \
-                    = pandas.to_numeric([value], errors='raise')
+        while not line.startswith("NTableCols"):
+            if line.startswith("BrainVolStatsFixed"):
+                # https://surfer.nmr.mgh.harvard.edu/fswiki/BrainVolStatsFixed
+                assert (
+                    line.startswith("BrainVolStatsFixed see ")
+                    or line == "BrainVolStatsFixed-NotNeeded because voxelvolume=1mm3"
+                )
+                self.headers["BrainVolStatsFixed"] = line[len("BrainVolStatsFixed-") :]
+            else:
+                column_name, value = self._parse_whole_brain_measurements_line(line)
+                assert column_name not in self.whole_brain_measurements, column_name
+                self.whole_brain_measurements[column_name] = value
             line = self._read_header_line(stream)
         columns = self._read_column_attributes(
             int(line[len('NTableCols '):]), stream)

diff --git a/tests/test_cortical_parcellation_stats.py b/tests/test_cortical_parcellation_stats.py
@@ -18,16 +18,20 @@
 import datetime
 import os
 
+import numpy
 import pandas.util.testing
 import pytest
 
 from conftest import SUBJECTS_DIR
 from freesurfer_stats import CorticalParcellationStats
 
+# pylint: disable=too-many-arguments
+
 
 @pytest.mark.parametrize(
     ('path', 'headers', 'hemisphere',
-     'whole_brain_measurements', 'structural_measurements'),
+     'whole_brain_measurements', 'structural_measurements_length',
+     'structural_measurements_subset'),
     [(os.path.join(SUBJECTS_DIR, 'fabian', 'stats', 'lh.aparc.DKTatlas.stats.short'),
       {'CreationTime': datetime.datetime(2019, 5, 9, 21, 5, 54, tzinfo=datetime.timezone.utc),
        'generating_program': 'mris_anatomical_stats',
@@ -56,6 +60,7 @@
        'supratentorial_volume_mm^3': 1172669.548920,
        'supratentorial_volume_without_ventricles_mm^3': 1164180.548920,
        'estimated_total_intracranial_volume_mm^3': 1670487.274486},
+      3,
       [{'structure_name': 'caudalanteriorcingulate',
         'number_of_vertices': 2061,
         'surface_area_mm^2': 1472,
@@ -115,6 +120,7 @@
        'supratentorial_volume_mm^3': 1172669.548920,
        'supratentorial_volume_without_ventricles_mm^3': 1164180.548920,
        'estimated_total_intracranial_volume_mm^3': 1670487.274486},
+      2,
       [{'structure_name': 'bankssts',
         'number_of_vertices': 1344,
         'surface_area_mm^2': 825,
@@ -134,9 +140,72 @@
         'integrated_rectified_mean_curvature_mm^-1': 0.110,
         'integrated_rectified_gaussian_curvature_mm^-2': 0.021,
         'folding_index': 3,
-        'intrinsic_curvature_index': 0.6}])],
+        'intrinsic_curvature_index': 0.6}]),
+     (os.path.join(
+         SUBJECTS_DIR, 'soichi', 'stats', 'rh.aparc.stats'),
+      {'CreationTime': datetime.datetime(2020, 5, 4, 22, 20, 26, tzinfo=datetime.timezone.utc),
+       'AnnotationFileTimeStamp': datetime.datetime(2020, 5, 4, 21, 58, 13),
+       'AnnotationFile': '../label/rh.aparc.annot',
+       'SUBJECTS_DIR': '/N/dc2/scratch/hayashis/bigred3-workflows'
+                       '/5eb0689676c10ead933d673c/5eb068b076c10e7b013d673f',
+       'anatomy_type': 'surface',
+       'cmdline': 'mris_anatomical_stats -th3 -mgz -cortex ../label/rh.cortex.label '
+                  '-f ../stats/rh.aparc.stats -b -a ../label/rh.aparc.annot -c '
+                  '../label/aparc.annot.ctab output rh white',
+       'cvs_version': '7.0.0',
+       'generating_program': 'mris_anatomical_stats',
+       'hemi': 'rh',
+       'hostname': 'nid00762',
+       'machine': 'x86_64',
+       'mrisurf.c-cvs_version': '7.0.0',
+       'subjectname': 'output',
+       'sysname': 'Linux',
+       'user': 'hayashis',
+       'BrainVolStatsFixed': 'NotNeeded because voxelvolume=1mm3'},
+      'right',
+      {'white_surface_total_area_mm^2': 83579.2,
+       'mean_thickness_mm': 2.35815,
+       'brain_segmentation_volume_mm^3': 1169408.0,
+       'brain_segmentation_volume_without_ventricles_mm^3': 1157593.0,
+       'brain_segmentation_volume_without_ventricles_from_surf_mm^3': 1157593.0,
+       'total_cortical_gray_matter_volume_mm^3': 454587.696158,
+       'supratentorial_volume_mm^3': 1023873.0,
+       'supratentorial_volume_without_ventricles_mm^3': 1012058.0,
+       'estimated_total_intracranial_volume_mm^3': 1420434.160521},
+      34,
+      [{'structure_name': 'bankssts',
+        'number_of_vertices': 1094,
+        'surface_area_mm^2': 757,
+        'gray_matter_volume_mm^3': 1725,
+        'average_thickness_mm': 2.215,
+        'thickness_stddev_mm': 0.544,
+        'integrated_rectified_mean_curvature_mm^-1': 0.109,
+        'integrated_rectified_gaussian_curvature_mm^-2': 0.025,
+        'folding_index': 9,
+        'intrinsic_curvature_index': 1.1},
+       {'structure_name': 'caudalanteriorcingulate',
+        'number_of_vertices': 1137,
+        'surface_area_mm^2': 780,
+        'gray_matter_volume_mm^3': 2327,
+        'average_thickness_mm': 2.842,
+        'thickness_stddev_mm': 0.667,
+        'integrated_rectified_mean_curvature_mm^-1': 0.116,
+        'integrated_rectified_gaussian_curvature_mm^-2': 0.021,
+        'folding_index': 11,
+        'intrinsic_curvature_index': 1.0},
+       {'structure_name': 'caudalmiddlefrontal',
+        'number_of_vertices': 3126,
+        'surface_area_mm^2': 2218,
+        'gray_matter_volume_mm^3': 5978,
+        'average_thickness_mm': 2.447,
+        'thickness_stddev_mm': 0.605,
+        'integrated_rectified_mean_curvature_mm^-1': 0.122,
+        'integrated_rectified_gaussian_curvature_mm^-2': 0.024,
+        'folding_index': 28,
+        'intrinsic_curvature_index': 3.1}])],
 )
-def test_read(path, headers, hemisphere, whole_brain_measurements, structural_measurements):
+def test_read(path, headers, hemisphere, whole_brain_measurements, structural_measurements_length,
+              structural_measurements_subset):
     stats = CorticalParcellationStats.read(path)
     assert headers == stats.headers
     assert hemisphere == stats.hemisphere
@@ -159,10 +228,63 @@ def test_read(path, headers, hemisphere, whole_brain_measurements, structural_me
         'folding_index',
         'intrinsic_curvature_index',
     ]
+    assert len(stats.structural_measurements) == structural_measurements_length
     pandas.util.testing.assert_frame_equal(
-        left=pandas.DataFrame(structural_measurements),
-        right=stats.structural_measurements,
+        left=pandas.DataFrame(structural_measurements_subset),
+        right=stats.structural_measurements.iloc[:3],
         check_like=True,  # ignore the order of index & columns
         check_dtype=True,
         check_names=True,
     )
+
+
+@pytest.mark.parametrize(
+    ('path', 'structural_measurements_length'),
+    [(os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'lh.BA_exvivo.stats'), 14),
+     (os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'lh.BA_exvivo.thresh.stats'), 14),
+     (os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'lh.aparc.DKTatlas.stats'), 31),
+     (os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'lh.aparc.a2009s.stats'), 74),
+     (os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'lh.aparc.pial.stats'), 34),
+     (os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'lh.aparc.stats'), 34),
+     (os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'rh.BA_exvivo.stats'), 14),
+     (os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'rh.BA_exvivo.thresh.stats'), 14),
+     (os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'rh.aparc.DKTatlas.stats'), 31),
+     (os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'rh.aparc.a2009s.stats'), 74),
+     (os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'rh.aparc.pial.stats'), 34),
+     (os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'rh.aparc.stats'), 34)],
+)
+def test_read_structural_measurements_length(path, structural_measurements_length):
+    # simple test to verify no exception gets raised, see test_read for comprehensive test
+    stats = CorticalParcellationStats.read(path)
+    assert len(stats.structural_measurements) == structural_measurements_length
+
+
+@pytest.mark.parametrize(
+    ("line", "expected_column_name", "expected_value"),
+    [
+        (
+            "Measure Cortex, CortexVol Total cortical gray matter volume, 553998.311189, mm^3",
+            "total_cortical_gray_matter_volume_mm^3",
+            553998.311189,
+        )
+    ],
+)
+def test__parse_whole_brain_measurements_line(
+    line, expected_column_name, expected_value
+):
+    # pylint: disable=protected-access
+    column_name, value = CorticalParcellationStats._parse_whole_brain_measurements_line(
+        line,
+    )
+    assert column_name == expected_column_name
+    assert numpy.allclose(value, [expected_value])
+
+
+@pytest.mark.parametrize(
+    "line",
+    ["Measure Cortex, CortexVol Total cortical gray matter volume, 553998.311189",],
+)
+def test__parse_whole_brain_measurements_line_parse_error(line):
+    # pylint: disable=protected-access
+    with pytest.raises(ValueError):
+        CorticalParcellationStats._parse_whole_brain_measurements_line(line)