Skip to content
This repository has been archived by the owner on Sep 2, 2023. It is now read-only.

Commit

Permalink
refactor: wrap pandas.io.common.get_filepath_or_buffer to prepare for…
Browse files Browse the repository at this point in the history
… adding pandas v1.2.0 support
  • Loading branch information
fphammerle committed Dec 31, 2020
1 parent 3e21ed9 commit 53339f2
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 35 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ jobs:
- 0.23.*
- 0.24.*
- 0.25.*
- 1.*
- 1.1.*
exclude:
# https://travis-ci.org/github/fphammerle/freesurfer-stats/jobs/683777317#L208
# https://github.com/pandas-dev/pandas/commit/18efcb27361478daa3118079ecb166c733691ecb#diff-2eeaed663bd0d25b7e608891384b7298R814
- python-version: 3.5
pandas-version: 1.*
pandas-version: 1.1.*
- python-version: 3.7
pandas-version: 0.21.*
- python-version: 3.7
Expand Down
66 changes: 36 additions & 30 deletions freesurfer_stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,26 @@
from freesurfer_stats.version import __version__


def _get_filepath_or_buffer(
path: typing.Union[str, pathlib.Path]
) -> typing.Tuple[typing.Any, bool]: # (pandas._typing.FileOrBuffer, bool)
# path_or_buffer: typing.Union[str, pathlib.Path, typing.IO[typing.AnyStr],
# s3fs.S3File, gcsfs.GCSFile]
# https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/io/parsers.py#L436
# https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/_typing.py#L30
(path_or_buffer, _, _, *instructions) = pandas.io.common.get_filepath_or_buffer(
path
)
if instructions: # pragma: no cover
# https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/io/common.py#L171
assert len(instructions) == 1, instructions
should_close = instructions[0]
else: # pragma: no cover
# https://github.com/pandas-dev/pandas/blob/v0.21.0/pandas/io/common.py#L171
should_close = hasattr(path_or_buffer, "close")
return path_or_buffer, should_close


class CorticalParcellationStats:

_HEMISPHERE_PREFIX_TO_SIDE = {"lh": "left", "rh": "right"}
Expand Down Expand Up @@ -87,7 +107,7 @@ def _read_header_line(stream: typing.TextIO) -> str:

@classmethod
def _read_column_header_line(
cls, stream: typing.TextIO,
cls, stream: typing.TextIO
) -> typing.Tuple[int, str, str]:
line = cls._read_header_line(stream)
assert line.startswith("TableCol"), line
Expand All @@ -108,15 +128,15 @@ def _read_headers(self, stream: typing.TextIO) -> None:
attr_value = attr_value.strip("$").rstrip()
if attr_name == "CreationTime":
attr_dt = datetime.datetime.strptime(
attr_value, "%Y/%m/%d-%H:%M:%S-%Z",
attr_value, "%Y/%m/%d-%H:%M:%S-%Z"
)
if attr_dt.tzinfo is None:
assert attr_value.endswith("-GMT")
attr_dt = attr_dt.replace(tzinfo=datetime.timezone.utc)
attr_value = attr_dt
if attr_name == "AnnotationFileTimeStamp":
attr_value = datetime.datetime.strptime(
attr_value, "%Y/%m/%d %H:%M:%S",
attr_value, "%Y/%m/%d %H:%M:%S"
)
self.headers[attr_name] = attr_value

Expand All @@ -129,7 +149,7 @@ def _format_column_name(cls, name: str, unit: typing.Optional[str]) -> str:

@classmethod
def _parse_whole_brain_measurements_line(
cls, line: str,
cls, line: str
) -> typing.Tuple[str, numpy.ndarray]:
match = cls._GENERAL_MEASUREMENTS_REGEX.match(line)
if not match:
Expand All @@ -145,7 +165,7 @@ def _parse_whole_brain_measurements_line(

@classmethod
def _read_column_attributes(
cls, num: int, stream: typing.TextIO,
cls, num: int, stream: typing.TextIO
) -> typing.List[typing.Dict[str, str]]:
columns = []
for column_index in range(1, int(num) + 1):
Expand Down Expand Up @@ -193,31 +213,17 @@ def _read(self, stream: typing.TextIO) -> None:

@classmethod
def read(cls, path: typing.Union[str, pathlib.Path]) -> "CorticalParcellationStats":
# path_or_buffer: typing.Union[str, pathlib.Path, typing.IO[typing.AnyStr],
# s3fs.S3File, gcsfs.GCSFile]
# https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/io/parsers.py#L436
# https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/_typing.py#L30
(
path_or_buffer,
_,
_,
*instructions,
) = pandas.io.common.get_filepath_or_buffer(path)
# https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/io/common.py#L171
# https://github.com/pandas-dev/pandas/blob/v0.21.0/pandas/io/common.py#L171
if instructions: # pragma: no cover
assert len(instructions) == 1, instructions
should_close = instructions[0]
else: # pragma: no cover
should_close = hasattr(path_or_buffer, "close")
path_or_buffer, should_close = _get_filepath_or_buffer(path)
stats = cls()
if hasattr(path_or_buffer, "readline"):
# pylint: disable=protected-access
stats._read(io.TextIOWrapper(path_or_buffer))
else:
with open(path_or_buffer, "r") as stream:
try:
if hasattr(path_or_buffer, "readline"):
# pylint: disable=protected-access
stats._read(stream)
if should_close:
path_or_buffer.close()
stats._read(io.TextIOWrapper(path_or_buffer))
else:
with open(path_or_buffer, "r") as stream:
# pylint: disable=protected-access
stats._read(stream)
finally:
if should_close:
path_or_buffer.close()
return stats
6 changes: 3 additions & 3 deletions tests/test_cortical_parcellation_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,15 +346,15 @@ def test__parse_whole_brain_measurements_line(
):
# pylint: disable=protected-access
column_name, value = CorticalParcellationStats._parse_whole_brain_measurements_line(
line,
line
)
assert column_name == expected_column_name
assert numpy.allclose(value, [expected_value])


@pytest.mark.parametrize(
"line",
["Measure Cortex, CortexVol Total cortical gray matter volume, 553998.311189",],
["Measure Cortex, CortexVol Total cortical gray matter volume, 553998.311189"],
)
def test__parse_whole_brain_measurements_line_parse_error(line):
# pylint: disable=protected-access
Expand All @@ -364,7 +364,7 @@ def test__parse_whole_brain_measurements_line_parse_error(line):

@pytest.mark.parametrize(
"path_str",
[os.path.join(SUBJECTS_DIR, "fabian", "stats", "lh.aparc.DKTatlas.stats.short"),],
[os.path.join(SUBJECTS_DIR, "fabian", "stats", "lh.aparc.DKTatlas.stats.short")],
)
def test_read_pathlib(path_str: str):
stats_str = CorticalParcellationStats.read(path_str)
Expand Down

0 comments on commit 53339f2

Please sign in to comment.