From e1cf4fd4c1e86f15b0de65104d042d109ab3faa5 Mon Sep 17 00:00:00 2001 From: Fabian Peter Hammerle Date: Wed, 6 May 2020 20:27:08 +0200 Subject: [PATCH] CorticalParcellationStats.read: support pathlib.Path, http, s3 etc. via pandas.io.common.get_filepath_or_buffer --- CHANGELOG.md | 4 ++++ freesurfer_stats/__init__.py | 19 ++++++++++++++++--- tests/test_cortical_parcellation_stats.py | 23 +++++++++++++++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cf7ea3..6ef8dda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added +- `CorticalParcellationStats.read` support `pathlib.Path`, `"http://…"`, `"https://…"`, `"s3://…"` etc. + via `pandas.io.common.get_filepath_or_buffer` + (https://github.com/fphammerle/freesurfer-stats/issues/6) ## [1.1.0] - 2020-05-06 ### Added diff --git a/freesurfer_stats/__init__.py b/freesurfer_stats/__init__.py index 5a4e824..0cfc408 100644 --- a/freesurfer_stats/__init__.py +++ b/freesurfer_stats/__init__.py @@ -47,6 +47,8 @@ """ import datetime +import io +import pathlib import re import typing @@ -161,9 +163,20 @@ def _read(self, stream: typing.TextIO) -> None: .apply(pandas.to_numeric, errors='ignore') @classmethod - def read(cls, path: str) -> 'CorticalParcellationStats': + def read(cls, path: typing.Union[str, pathlib.Path]) -> "CorticalParcellationStats": + # support http, s3 & gcs + # https://github.com/pandas-dev/pandas/blob/v0.25.3/pandas/io/common.py#L171 + path_or_buffer, _, _, should_close = pandas.io.common.get_filepath_or_buffer( + path + ) stats = cls() - with open(path, 'r') as stream: + if hasattr(path_or_buffer, "readline"): # pylint: disable=protected-access - stats._read(stream) + stats._read(io.TextIOWrapper(path_or_buffer)) + else: + with open(path_or_buffer, "r") as stream: + # pylint: disable=protected-access + stats._read(stream) + if should_close: + path_or_buffer.close() return stats diff --git a/tests/test_cortical_parcellation_stats.py b/tests/test_cortical_parcellation_stats.py index 4bc38aa..c3e47f6 100644 --- a/tests/test_cortical_parcellation_stats.py +++ b/tests/test_cortical_parcellation_stats.py @@ -17,6 +17,7 @@ """ import datetime import os +import pathlib import pandas.util.testing import pytest @@ -166,3 +167,25 @@ def test_read(path, headers, hemisphere, whole_brain_measurements, structural_me check_dtype=True, check_names=True, ) + + +@pytest.mark.parametrize( + "path_str", + [os.path.join(SUBJECTS_DIR, "fabian", "stats", "lh.aparc.DKTatlas.stats.short"),], +) +def test_read_pathlib(path_str: str): + stats_str = CorticalParcellationStats.read(path_str) + stats_pathlib = CorticalParcellationStats.read(pathlib.Path(path_str)) + assert stats_str.headers == stats_pathlib.headers + + +@pytest.mark.parametrize( + "url", + [ + "https://raw.githubusercontent.com/fphammerle/freesurfer-stats" + "/master/tests/subjects/fabian/stats/rh.aparc.stats" + ], +) +def test_read_https(url: str): + stats = CorticalParcellationStats.read(url) + assert stats.headers["generating_program"] == "mris_anatomical_stats"