diff --git a/setup.py b/setup.py index 4184c4d21..28493b04f 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ def ascii_bytes_from(path, *paths): "scipy", "openslide-wrapper", "typing_extensions", + "pandas", ] test_requires = ["pytest", "coverage", "pytest-cov==2.8.1", "coveralls"] diff --git a/src/histolab/tiler.py b/src/histolab/tiler.py index 006bd235c..8b6cbd7d2 100644 --- a/src/histolab/tiler.py +++ b/src/histolab/tiler.py @@ -19,6 +19,7 @@ from typing import Protocol, runtime_checkable except ImportError: from typing_extensions import Protocol, runtime_checkable +import pandas as pd @runtime_checkable @@ -525,26 +526,35 @@ def __init__( super().__init__(tile_size, level, check_tissue, pixel_overlap, prefix, suffix) - def extract(self, slide: Slide): + def extract(self, slide: Slide, report_path: str = None): """Extract grid tiles and save them to disk, according to a scoring function and following this filename pattern: `{prefix}tile_{tiles_counter}_level{level}_{x_ul_wsi}-{y_ul_wsi}-{x_br_wsi}-{y_br_wsi}{suffix}` + Save a CSV report file with the saved tiles and the associated score. + Parameters ---------- slide : Slide Slide from which to extract the tiles + report_path : str, optional + Path to the CSV report. If None, no report will be saved """ highest_score_tiles = self._highest_score_tiles(slide) tiles_counter = 0 + filenames = [] for tiles_counter, (score, tile_wsi_coords) in enumerate(highest_score_tiles): tile = slide.extract_tile(tile_wsi_coords, self.level) tile_filename = self._tile_filename(tile_wsi_coords, tiles_counter) tile.save(tile_filename) + filenames.append(tile_filename) print(f"\t Tile {tiles_counter} - score: {score} saved: {tile_filename}") + if report_path: + self._save_report(report_path, highest_score_tiles, filenames) + print(f"{tiles_counter+1} Grid Tiles have been saved.") def _highest_score_tiles(self, slide: Slide) -> List[Tuple[float, CoordinatePair]]: @@ -578,6 +588,32 @@ def _highest_score_tiles(self, slide: Slide) -> List[Tuple[float, CoordinatePair return highest_score_tiles + def _save_report( + self, + report_path: str, + highest_score_tiles: List[Tuple[float, CoordinatePair]], + filenames: List[str], + ) -> None: + """Save to ``filename`` the report of the saved tiles with the associated score. + + The CSV file + + Parameters + ---------- + report_path : str + Path to the report + highest_score_tiles : List[Tuple[float, CoordinatePair]] + List of tuples containing the score and the extraction coordinates for the + tiles with the highest score. Each tuple represents a tile. + filenames : List[str] + List of the tiles' filename + """ + + report = pd.DataFrame( + {"filename": filenames, "score": np.array(highest_score_tiles)[:, 0]} + ) + report.to_csv(report_path, index=None) + def _scores(self, slide: Slide) -> List[Tuple[float, CoordinatePair]]: """Calculate the scores for all the tiles extracted from the ``slide``. diff --git a/tests/unit/test_tiler.py b/tests/unit/test_tiler.py index 9b693edef..4ff05f946 100644 --- a/tests/unit/test_tiler.py +++ b/tests/unit/test_tiler.py @@ -10,6 +10,7 @@ from histolab.tile import Tile from histolab.tiler import GridTiler, RandomTiler, ScoreTiler, Tiler from histolab.types import CoordinatePair +import pandas as pd from ..unitutil import ( ANY, @@ -735,6 +736,7 @@ def it_can_extract_score_tiles(self, request, tmpdir): os.path.join(tmp_path_, "processed", f"tile_{i}_level2_0-10-0-10.png") for i in range(2) ] + _save_report = method_mock(request, ScoreTiler, "_save_report") random_scorer = RandomScorer() score_tiler = ScoreTiler(random_scorer, (10, 10), 2, 2) @@ -755,6 +757,72 @@ def it_can_extract_score_tiles(self, request, tmpdir): assert os.path.exists( os.path.join(tmp_path_, "processed", "tile_1_level2_0-10-0-10.png") ) + _save_report.assert_not_called() + + def it_can_save_report(self, request, tmpdir): + tmp_path_ = tmpdir.mkdir("path") + coords = CoordinatePair(0, 10, 0, 10) + highest_score_tiles = [(0.8, coords), (0.7, coords)] + filenames = ["tile0.png", "tile1.png"] + random_scorer_ = instance_mock(request, RandomScorer) + score_tiler = ScoreTiler(random_scorer_, (10, 10), 2, 2) + report_ = pd.DataFrame({"filename": filenames, "score": [0.8, 0.7]}) + + score_tiler._save_report( + os.path.join(tmp_path_, "report.csv"), highest_score_tiles, filenames + ) + + assert os.path.exists(os.path.join(tmp_path_, "report.csv")) + report = pd.read_csv(os.path.join(tmp_path_, "report.csv")) + pd.testing.assert_frame_equal(report, report_) + + def it_can_extract_score_tiles_and_save_report(self, request, tmpdir): + _extract_tile = method_mock(request, Slide, "extract_tile") + tmp_path_ = tmpdir.mkdir("myslide") + image = PILImageMock.DIMS_500X500_RGBA_COLOR_155_249_240 + image.save(os.path.join(tmp_path_, "mywsi.png"), "PNG") + slide_path = os.path.join(tmp_path_, "mywsi.png") + slide = Slide(slide_path, os.path.join(tmp_path_, "processed")) + _highest_score_tiles = method_mock(request, ScoreTiler, "_highest_score_tiles") + coords = CoordinatePair(0, 10, 0, 10) + tile = Tile(image, coords) + _extract_tile.return_value = tile + _highest_score_tiles.return_value = [(0.8, coords), (0.7, coords)] + _tile_filename = method_mock(request, GridTiler, "_tile_filename") + _tile_filename.side_effect = [ + os.path.join(tmp_path_, "processed", f"tile_{i}_level2_0-10-0-10.png") + for i in range(2) + ] + _save_report = method_mock(request, ScoreTiler, "_save_report") + random_scorer = RandomScorer() + score_tiler = ScoreTiler(random_scorer, (10, 10), 2, 2) + + score_tiler.extract(slide, "report.csv") + + assert _extract_tile.call_args_list == [ + call(slide, coords, 2), + call(slide, coords, 2), + ] + _highest_score_tiles.assert_called_once_with(score_tiler, slide) + assert _tile_filename.call_args_list == [ + call(score_tiler, coords, 0), + call(score_tiler, coords, 1), + ] + assert os.path.exists( + os.path.join(tmp_path_, "processed", "tile_0_level2_0-10-0-10.png") + ) + assert os.path.exists( + os.path.join(tmp_path_, "processed", "tile_1_level2_0-10-0-10.png") + ) + _save_report.assert_called_once_with( + score_tiler, + "report.csv", + [(0.8, coords), (0.7, coords)], + [ + os.path.join(tmp_path_, "processed", f"tile_{i}_level2_0-10-0-10.png") + for i in range(2) + ], + ) # fixtures -------------------------------------------------------