Skip to content

Commit

Permalink
Save CSV report with tile filename and score
Browse files Browse the repository at this point in the history
  • Loading branch information
alessiamarcolini committed Aug 17, 2020
1 parent 64307d7 commit 550ec0b
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 1 deletion.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def ascii_bytes_from(path, *paths):
"scipy",
"openslide-wrapper",
"typing_extensions",
"pandas",
]

test_requires = ["pytest", "coverage", "pytest-cov==2.8.1", "coveralls"]
Expand Down
38 changes: 37 additions & 1 deletion src/histolab/tiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from typing import Protocol, runtime_checkable
except ImportError:
from typing_extensions import Protocol, runtime_checkable
import pandas as pd


@runtime_checkable
Expand Down Expand Up @@ -525,26 +526,35 @@ def __init__(

super().__init__(tile_size, level, check_tissue, pixel_overlap, prefix, suffix)

def extract(self, slide: Slide):
def extract(self, slide: Slide, report_path: str = None):
"""Extract grid tiles and save them to disk, according to a scoring function and
following this filename pattern:
`{prefix}tile_{tiles_counter}_level{level}_{x_ul_wsi}-{y_ul_wsi}-{x_br_wsi}-{y_br_wsi}{suffix}`
Save a CSV report file with the saved tiles and the associated score.
Parameters
----------
slide : Slide
Slide from which to extract the tiles
report_path : str, optional
Path to the CSV report. If None, no report will be saved
"""
highest_score_tiles = self._highest_score_tiles(slide)

tiles_counter = 0
filenames = []

for tiles_counter, (score, tile_wsi_coords) in enumerate(highest_score_tiles):
tile = slide.extract_tile(tile_wsi_coords, self.level)
tile_filename = self._tile_filename(tile_wsi_coords, tiles_counter)
tile.save(tile_filename)
filenames.append(tile_filename)
print(f"\t Tile {tiles_counter} - score: {score} saved: {tile_filename}")

if report_path:
self._save_report(report_path, highest_score_tiles, filenames)

print(f"{tiles_counter+1} Grid Tiles have been saved.")

def _highest_score_tiles(self, slide: Slide) -> List[Tuple[float, CoordinatePair]]:
Expand Down Expand Up @@ -578,6 +588,32 @@ def _highest_score_tiles(self, slide: Slide) -> List[Tuple[float, CoordinatePair

return highest_score_tiles

def _save_report(
self,
report_path: str,
highest_score_tiles: List[Tuple[float, CoordinatePair]],
filenames: List[str],
) -> None:
"""Save to ``filename`` the report of the saved tiles with the associated score.
The CSV file
Parameters
----------
report_path : str
Path to the report
highest_score_tiles : List[Tuple[float, CoordinatePair]]
List of tuples containing the score and the extraction coordinates for the
tiles with the highest score. Each tuple represents a tile.
filenames : List[str]
List of the tiles' filename
"""

report = pd.DataFrame(
{"filename": filenames, "score": np.array(highest_score_tiles)[:, 0]}
)
report.to_csv(report_path, index=None)

def _scores(self, slide: Slide) -> List[Tuple[float, CoordinatePair]]:
"""Calculate the scores for all the tiles extracted from the ``slide``.
Expand Down
68 changes: 68 additions & 0 deletions tests/unit/test_tiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from histolab.tile import Tile
from histolab.tiler import GridTiler, RandomTiler, ScoreTiler, Tiler
from histolab.types import CoordinatePair
import pandas as pd

from ..unitutil import (
ANY,
Expand Down Expand Up @@ -735,6 +736,7 @@ def it_can_extract_score_tiles(self, request, tmpdir):
os.path.join(tmp_path_, "processed", f"tile_{i}_level2_0-10-0-10.png")
for i in range(2)
]
_save_report = method_mock(request, ScoreTiler, "_save_report")
random_scorer = RandomScorer()
score_tiler = ScoreTiler(random_scorer, (10, 10), 2, 2)

Expand All @@ -755,6 +757,72 @@ def it_can_extract_score_tiles(self, request, tmpdir):
assert os.path.exists(
os.path.join(tmp_path_, "processed", "tile_1_level2_0-10-0-10.png")
)
_save_report.assert_not_called()

def it_can_save_report(self, request, tmpdir):
tmp_path_ = tmpdir.mkdir("path")
coords = CoordinatePair(0, 10, 0, 10)
highest_score_tiles = [(0.8, coords), (0.7, coords)]
filenames = ["tile0.png", "tile1.png"]
random_scorer_ = instance_mock(request, RandomScorer)
score_tiler = ScoreTiler(random_scorer_, (10, 10), 2, 2)
report_ = pd.DataFrame({"filename": filenames, "score": [0.8, 0.7]})

score_tiler._save_report(
os.path.join(tmp_path_, "report.csv"), highest_score_tiles, filenames
)

assert os.path.exists(os.path.join(tmp_path_, "report.csv"))
report = pd.read_csv(os.path.join(tmp_path_, "report.csv"))
pd.testing.assert_frame_equal(report, report_)

def it_can_extract_score_tiles_and_save_report(self, request, tmpdir):
_extract_tile = method_mock(request, Slide, "extract_tile")
tmp_path_ = tmpdir.mkdir("myslide")
image = PILImageMock.DIMS_500X500_RGBA_COLOR_155_249_240
image.save(os.path.join(tmp_path_, "mywsi.png"), "PNG")
slide_path = os.path.join(tmp_path_, "mywsi.png")
slide = Slide(slide_path, os.path.join(tmp_path_, "processed"))
_highest_score_tiles = method_mock(request, ScoreTiler, "_highest_score_tiles")
coords = CoordinatePair(0, 10, 0, 10)
tile = Tile(image, coords)
_extract_tile.return_value = tile
_highest_score_tiles.return_value = [(0.8, coords), (0.7, coords)]
_tile_filename = method_mock(request, GridTiler, "_tile_filename")
_tile_filename.side_effect = [
os.path.join(tmp_path_, "processed", f"tile_{i}_level2_0-10-0-10.png")
for i in range(2)
]
_save_report = method_mock(request, ScoreTiler, "_save_report")
random_scorer = RandomScorer()
score_tiler = ScoreTiler(random_scorer, (10, 10), 2, 2)

score_tiler.extract(slide, "report.csv")

assert _extract_tile.call_args_list == [
call(slide, coords, 2),
call(slide, coords, 2),
]
_highest_score_tiles.assert_called_once_with(score_tiler, slide)
assert _tile_filename.call_args_list == [
call(score_tiler, coords, 0),
call(score_tiler, coords, 1),
]
assert os.path.exists(
os.path.join(tmp_path_, "processed", "tile_0_level2_0-10-0-10.png")
)
assert os.path.exists(
os.path.join(tmp_path_, "processed", "tile_1_level2_0-10-0-10.png")
)
_save_report.assert_called_once_with(
score_tiler,
"report.csv",
[(0.8, coords), (0.7, coords)],
[
os.path.join(tmp_path_, "processed", f"tile_{i}_level2_0-10-0-10.png")
for i in range(2)
],
)

# fixtures -------------------------------------------------------

Expand Down

0 comments on commit 550ec0b

Please sign in to comment.