Skip to content
This repository has been archived by the owner on Sep 2, 2023. It is now read-only.

Commit

Permalink
Merge pull request #9: stricter parsing to avoid silent errors; suppo…
Browse files Browse the repository at this point in the history
…rt "BrainVolStatsFixed" header
  • Loading branch information
fphammerle committed May 6, 2020
2 parents 7a3b034 + 759c02d commit 7a03663
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 20 deletions.
3 changes: 2 additions & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[MESSAGES CONTROL]

disable=missing-docstring
disable=bad-continuation, # black (will supersede yapf) https://github.com/PyCQA/pylint/pull/3571
missing-docstring
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Fixed
- fixed parsing of `BrainVolStatsFixed` header
(https://github.com/fphammerle/freesurfer-stats/pull/1 @soichih,
https://github.com/fphammerle/freesurfer-stats/pull/9)

## [1.1.0] - 2020-05-06
### Added
Expand Down
46 changes: 32 additions & 14 deletions freesurfer_stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
import re
import typing

import numpy
import pandas

from freesurfer_stats.version import __version__
Expand Down Expand Up @@ -114,9 +115,25 @@ def _read_headers(self, stream: typing.TextIO) -> None:
@classmethod
def _format_column_name(cls, name: str, unit: typing.Optional[str]) -> str:
column_name = name.lower()
if unit not in ['unitless', 'NA']:
column_name += '_' + unit
return cls._COLUMN_NAMES_NON_SAFE_REGEX.sub('_', column_name)
if unit not in ["unitless", "NA"]:
column_name += "_" + unit
return cls._COLUMN_NAMES_NON_SAFE_REGEX.sub("_", column_name)

@classmethod
def _parse_whole_brain_measurements_line(
cls, line: str,
) -> typing.Tuple[str, numpy.ndarray]:
match = cls._GENERAL_MEASUREMENTS_REGEX.match(line)
if not match:
raise ValueError("unexpected line: {!r}".format(line))
key, name, value, unit = match.groups()
if (
key == "SupraTentorialVolNotVent"
and name.lower() == "supratentorial volume"
):
name += " Without Ventricles"
column_name = cls._format_column_name(name, unit)
return column_name, pandas.to_numeric([value], errors="raise")

@classmethod
def _read_column_attributes(cls, num: int, stream: typing.TextIO) \
Expand All @@ -140,17 +157,18 @@ def _read(self, stream: typing.TextIO) -> None:
self._read_headers(stream)
self.whole_brain_measurements = pandas.DataFrame()
line = self._read_header_line(stream)
while not line.startswith('NTableCols'):
match = self._GENERAL_MEASUREMENTS_REGEX.match(line)
if match:
key, name, value, unit = match.groups()
if key == 'SupraTentorialVolNotVent' and name.lower() == 'supratentorial volume':
name += ' Without Ventricles'
column_name = self._format_column_name(name, unit)
assert column_name not in self.whole_brain_measurements, \
(key, name, column_name, self.whole_brain_measurements)
self.whole_brain_measurements[column_name] \
= pandas.to_numeric([value], errors='raise')
while not line.startswith("NTableCols"):
if line.startswith("BrainVolStatsFixed"):
# https://surfer.nmr.mgh.harvard.edu/fswiki/BrainVolStatsFixed
assert (
line.startswith("BrainVolStatsFixed see ")
or line == "BrainVolStatsFixed-NotNeeded because voxelvolume=1mm3"
)
self.headers["BrainVolStatsFixed"] = line[len("BrainVolStatsFixed-") :]
else:
column_name, value = self._parse_whole_brain_measurements_line(line)
assert column_name not in self.whole_brain_measurements, column_name
self.whole_brain_measurements[column_name] = value
line = self._read_header_line(stream)
columns = self._read_column_attributes(
int(line[len('NTableCols '):]), stream)
Expand Down
132 changes: 127 additions & 5 deletions tests/test_cortical_parcellation_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,20 @@
import datetime
import os

import numpy
import pandas.util.testing
import pytest

from conftest import SUBJECTS_DIR
from freesurfer_stats import CorticalParcellationStats

# pylint: disable=too-many-arguments


@pytest.mark.parametrize(
('path', 'headers', 'hemisphere',
'whole_brain_measurements', 'structural_measurements'),
'whole_brain_measurements', 'structural_measurements_length',
'structural_measurements_subset'),
[(os.path.join(SUBJECTS_DIR, 'fabian', 'stats', 'lh.aparc.DKTatlas.stats.short'),
{'CreationTime': datetime.datetime(2019, 5, 9, 21, 5, 54, tzinfo=datetime.timezone.utc),
'generating_program': 'mris_anatomical_stats',
Expand Down Expand Up @@ -56,6 +60,7 @@
'supratentorial_volume_mm^3': 1172669.548920,
'supratentorial_volume_without_ventricles_mm^3': 1164180.548920,
'estimated_total_intracranial_volume_mm^3': 1670487.274486},
3,
[{'structure_name': 'caudalanteriorcingulate',
'number_of_vertices': 2061,
'surface_area_mm^2': 1472,
Expand Down Expand Up @@ -115,6 +120,7 @@
'supratentorial_volume_mm^3': 1172669.548920,
'supratentorial_volume_without_ventricles_mm^3': 1164180.548920,
'estimated_total_intracranial_volume_mm^3': 1670487.274486},
2,
[{'structure_name': 'bankssts',
'number_of_vertices': 1344,
'surface_area_mm^2': 825,
Expand All @@ -134,9 +140,72 @@
'integrated_rectified_mean_curvature_mm^-1': 0.110,
'integrated_rectified_gaussian_curvature_mm^-2': 0.021,
'folding_index': 3,
'intrinsic_curvature_index': 0.6}])],
'intrinsic_curvature_index': 0.6}]),
(os.path.join(
SUBJECTS_DIR, 'soichi', 'stats', 'rh.aparc.stats'),
{'CreationTime': datetime.datetime(2020, 5, 4, 22, 20, 26, tzinfo=datetime.timezone.utc),
'AnnotationFileTimeStamp': datetime.datetime(2020, 5, 4, 21, 58, 13),
'AnnotationFile': '../label/rh.aparc.annot',
'SUBJECTS_DIR': '/N/dc2/scratch/hayashis/bigred3-workflows'
'/5eb0689676c10ead933d673c/5eb068b076c10e7b013d673f',
'anatomy_type': 'surface',
'cmdline': 'mris_anatomical_stats -th3 -mgz -cortex ../label/rh.cortex.label '
'-f ../stats/rh.aparc.stats -b -a ../label/rh.aparc.annot -c '
'../label/aparc.annot.ctab output rh white',
'cvs_version': '7.0.0',
'generating_program': 'mris_anatomical_stats',
'hemi': 'rh',
'hostname': 'nid00762',
'machine': 'x86_64',
'mrisurf.c-cvs_version': '7.0.0',
'subjectname': 'output',
'sysname': 'Linux',
'user': 'hayashis',
'BrainVolStatsFixed': 'NotNeeded because voxelvolume=1mm3'},
'right',
{'white_surface_total_area_mm^2': 83579.2,
'mean_thickness_mm': 2.35815,
'brain_segmentation_volume_mm^3': 1169408.0,
'brain_segmentation_volume_without_ventricles_mm^3': 1157593.0,
'brain_segmentation_volume_without_ventricles_from_surf_mm^3': 1157593.0,
'total_cortical_gray_matter_volume_mm^3': 454587.696158,
'supratentorial_volume_mm^3': 1023873.0,
'supratentorial_volume_without_ventricles_mm^3': 1012058.0,
'estimated_total_intracranial_volume_mm^3': 1420434.160521},
34,
[{'structure_name': 'bankssts',
'number_of_vertices': 1094,
'surface_area_mm^2': 757,
'gray_matter_volume_mm^3': 1725,
'average_thickness_mm': 2.215,
'thickness_stddev_mm': 0.544,
'integrated_rectified_mean_curvature_mm^-1': 0.109,
'integrated_rectified_gaussian_curvature_mm^-2': 0.025,
'folding_index': 9,
'intrinsic_curvature_index': 1.1},
{'structure_name': 'caudalanteriorcingulate',
'number_of_vertices': 1137,
'surface_area_mm^2': 780,
'gray_matter_volume_mm^3': 2327,
'average_thickness_mm': 2.842,
'thickness_stddev_mm': 0.667,
'integrated_rectified_mean_curvature_mm^-1': 0.116,
'integrated_rectified_gaussian_curvature_mm^-2': 0.021,
'folding_index': 11,
'intrinsic_curvature_index': 1.0},
{'structure_name': 'caudalmiddlefrontal',
'number_of_vertices': 3126,
'surface_area_mm^2': 2218,
'gray_matter_volume_mm^3': 5978,
'average_thickness_mm': 2.447,
'thickness_stddev_mm': 0.605,
'integrated_rectified_mean_curvature_mm^-1': 0.122,
'integrated_rectified_gaussian_curvature_mm^-2': 0.024,
'folding_index': 28,
'intrinsic_curvature_index': 3.1}])],
)
def test_read(path, headers, hemisphere, whole_brain_measurements, structural_measurements):
def test_read(path, headers, hemisphere, whole_brain_measurements, structural_measurements_length,
structural_measurements_subset):
stats = CorticalParcellationStats.read(path)
assert headers == stats.headers
assert hemisphere == stats.hemisphere
Expand All @@ -159,10 +228,63 @@ def test_read(path, headers, hemisphere, whole_brain_measurements, structural_me
'folding_index',
'intrinsic_curvature_index',
]
assert len(stats.structural_measurements) == structural_measurements_length
pandas.util.testing.assert_frame_equal(
left=pandas.DataFrame(structural_measurements),
right=stats.structural_measurements,
left=pandas.DataFrame(structural_measurements_subset),
right=stats.structural_measurements.iloc[:3],
check_like=True, # ignore the order of index & columns
check_dtype=True,
check_names=True,
)


@pytest.mark.parametrize(
('path', 'structural_measurements_length'),
[(os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'lh.BA_exvivo.stats'), 14),
(os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'lh.BA_exvivo.thresh.stats'), 14),
(os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'lh.aparc.DKTatlas.stats'), 31),
(os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'lh.aparc.a2009s.stats'), 74),
(os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'lh.aparc.pial.stats'), 34),
(os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'lh.aparc.stats'), 34),
(os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'rh.BA_exvivo.stats'), 14),
(os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'rh.BA_exvivo.thresh.stats'), 14),
(os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'rh.aparc.DKTatlas.stats'), 31),
(os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'rh.aparc.a2009s.stats'), 74),
(os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'rh.aparc.pial.stats'), 34),
(os.path.join(SUBJECTS_DIR, 'soichi', 'stats', 'rh.aparc.stats'), 34)],
)
def test_read_structural_measurements_length(path, structural_measurements_length):
# simple test to verify no exception gets raised, see test_read for comprehensive test
stats = CorticalParcellationStats.read(path)
assert len(stats.structural_measurements) == structural_measurements_length


@pytest.mark.parametrize(
("line", "expected_column_name", "expected_value"),
[
(
"Measure Cortex, CortexVol Total cortical gray matter volume, 553998.311189, mm^3",
"total_cortical_gray_matter_volume_mm^3",
553998.311189,
)
],
)
def test__parse_whole_brain_measurements_line(
line, expected_column_name, expected_value
):
# pylint: disable=protected-access
column_name, value = CorticalParcellationStats._parse_whole_brain_measurements_line(
line,
)
assert column_name == expected_column_name
assert numpy.allclose(value, [expected_value])


@pytest.mark.parametrize(
"line",
["Measure Cortex, CortexVol Total cortical gray matter volume, 553998.311189",],
)
def test__parse_whole_brain_measurements_line_parse_error(line):
# pylint: disable=protected-access
with pytest.raises(ValueError):
CorticalParcellationStats._parse_whole_brain_measurements_line(line)

0 comments on commit 7a03663

Please sign in to comment.