Skip to content

Commit

Permalink
Merge d94b5f1 into d0e9dbb
Browse files Browse the repository at this point in the history
  • Loading branch information
alessiamarcolini committed Aug 17, 2020
2 parents d0e9dbb + d94b5f1 commit 1866b52
Show file tree
Hide file tree
Showing 6 changed files with 555 additions and 97 deletions.
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def ascii_bytes_from(path, *paths):
"scipy",
"openslide-wrapper",
"typing_extensions",
"pandas",
]

test_requires = ["pytest", "coverage", "pytest-cov==2.8.1", "coveralls"]
Expand Down
22 changes: 22 additions & 0 deletions src/histolab/scorer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from abc import abstractmethod

import numpy as np

from histolab.tile import Tile

try:
from typing import Protocol, runtime_checkable
except ImportError:
from typing_extensions import Protocol, runtime_checkable


@runtime_checkable
class Scorer(Protocol):
@abstractmethod
def __call__(self, tile: Tile) -> float:
raise NotImplementedError


class RandomScorer(Scorer):
def __call__(self, tile: Tile) -> float:
return np.random.random()
278 changes: 218 additions & 60 deletions src/histolab/tiler.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import os
from abc import abstractmethod
from typing import Tuple
from typing import List, Tuple

import numpy as np
import pandas as pd

from histolab.exceptions import LevelError

from .exceptions import LevelError
from .scorer import Scorer
from .slide import Slide
from .tile import Tile
from .types import CoordinatePair
Expand Down Expand Up @@ -117,26 +118,6 @@ def __init__(
self.prefix = prefix
self.suffix = suffix

@property
def tile_size(self) -> Tuple[int, int]:
return self._valid_tile_size

@tile_size.setter
def tile_size(self, tile_size_: Tuple[int, int]):
if tile_size_[0] < 1 or tile_size_[1] < 1:
raise ValueError(f"Tile size must be greater than 0 ({tile_size_})")
self._valid_tile_size = tile_size_

@property
def level(self) -> int:
return self._valid_level

@level.setter
def level(self, level_: int):
if level_ < 0:
raise LevelError(f"Level cannot be negative ({level_})")
self._valid_level = level_

def extract(self, slide: Slide):
"""Extract tiles arranged in a grid and save them to disk, following this
filename pattern:
Expand Down Expand Up @@ -165,6 +146,26 @@ def extract(self, slide: Slide):

print(f"{tiles_counter} Grid Tiles have been saved.")

@property
def level(self) -> int:
return self._valid_level

@level.setter
def level(self, level_: int):
if level_ < 0:
raise LevelError(f"Level cannot be negative ({level_})")
self._valid_level = level_

@property
def tile_size(self) -> Tuple[int, int]:
return self._valid_tile_size

@tile_size.setter
def tile_size(self, tile_size_: Tuple[int, int]):
if tile_size_[0] < 1 or tile_size_[1] < 1:
raise ValueError(f"Tile size must be greater than 0 ({tile_size_})")
self._valid_tile_size = tile_size_

def _grid_coordinates_from_bbox_coordinates(
self, bbox_coordinates: CoordinatePair, slide: Slide
) -> CoordinatePair:
Expand Down Expand Up @@ -224,14 +225,14 @@ def _grid_coordinates_generator(self, slide: Slide) -> CoordinatePair:
Iterator[CoordinatePair]
Iterator of tiles' CoordinatePair
"""
box_mask_thumb = self.box_mask(slide)
box_mask = self.box_mask(slide)

regions = regions_from_binary_mask(box_mask_thumb)
regions = regions_from_binary_mask(box_mask)
for region in regions: # at the moment there is only one region
bbox_coordinates_thumb = region_coordinates(region)
bbox_coordinates = scale_coordinates(
bbox_coordinates_thumb,
box_mask_thumb.shape[::-1],
box_mask.shape[::-1],
slide.level_dimensions(self.level),
)
yield from self._grid_coordinates_from_bbox_coordinates(
Expand Down Expand Up @@ -347,15 +348,26 @@ def __init__(
self.prefix = prefix
self.suffix = suffix

@property
def tile_size(self) -> Tuple[int, int]:
return self._valid_tile_size
def extract(self, slide: Slide):
"""Extract random tiles and save them to disk, following this filename pattern:
`{prefix}tile_{tiles_counter}_level{level}_{x_ul_wsi}-{y_ul_wsi}-{x_br_wsi}-{y_br_wsi}{suffix}`
@tile_size.setter
def tile_size(self, tile_size_: Tuple[int, int]):
if tile_size_[0] < 1 or tile_size_[1] < 1:
raise ValueError(f"Tile size must be greater than 0 ({tile_size_})")
self._valid_tile_size = tile_size_
Parameters
----------
slide : Slide
Slide from which to extract the tiles
"""

np.random.seed(self.seed)

random_tiles = self._random_tiles_generator(slide)

tiles_counter = 0
for tiles_counter, (tile, tile_wsi_coords) in enumerate(random_tiles):
tile_filename = self._tile_filename(tile_wsi_coords, tiles_counter)
tile.save(tile_filename)
print(f"\t Tile {tiles_counter} saved: {tile_filename}")
print(f"{tiles_counter+1} Random Tiles have been saved.")

@property
def level(self) -> int:
Expand All @@ -380,27 +392,15 @@ def max_iter(self, max_iter_: int = 1e4):
)
self._valid_max_iter = max_iter_

def extract(self, slide: Slide):
"""Extract random tiles and save them to disk, following this filename pattern:
`{prefix}tile_{tiles_counter}_level{level}_{x_ul_wsi}-{y_ul_wsi}-{x_br_wsi}-{y_br_wsi}{suffix}`
Parameters
----------
slide : Slide
Slide from which to extract the tiles
"""

np.random.seed(self.seed)

random_tiles = self._random_tiles_generator(slide)
@property
def tile_size(self) -> Tuple[int, int]:
return self._valid_tile_size

tiles_counter = 0
for tiles_counter, (tile, tile_wsi_coords) in enumerate(random_tiles):
tile_filename = self._tile_filename(tile_wsi_coords, tiles_counter)
full_tile_path = os.path.join(slide.processed_path, "tiles", tile_filename)
tile.save(full_tile_path)
print(f"\t Tile {tiles_counter} saved: {tile_filename}")
print(f"{tiles_counter+1} Random Tiles have been saved.")
@tile_size.setter
def tile_size(self, tile_size_: Tuple[int, int]):
if tile_size_[0] < 1 or tile_size_[1] < 1:
raise ValueError(f"Tile size must be greater than 0 ({tile_size_})")
self._valid_tile_size = tile_size_

def _random_tile_coordinates(self, slide: Slide) -> CoordinatePair:
"""Return 0-level Coordinates of a tile picked at random within the box.
Expand All @@ -415,26 +415,26 @@ def _random_tile_coordinates(self, slide: Slide) -> CoordinatePair:
CoordinatePair
Random tile Coordinates at level 0
"""
box_mask_thumb = self.box_mask(slide)
box_mask = self.box_mask(slide)
tile_w_lvl, tile_h_lvl = self.tile_size

x_ul_lvl = np.random.choice(np.where(box_mask_thumb)[1])
y_ul_lvl = np.random.choice(np.where(box_mask_thumb)[0])
x_ul_lvl = np.random.choice(np.where(box_mask)[1])
y_ul_lvl = np.random.choice(np.where(box_mask)[0])

# Scale tile dimensions to thumbnail dimensions
tile_w_thumb = (
tile_w_lvl * box_mask_thumb.shape[1] / slide.level_dimensions(self.level)[0]
tile_w_lvl * box_mask.shape[1] / slide.level_dimensions(self.level)[0]
)
tile_h_thumn = (
tile_h_lvl * box_mask_thumb.shape[0] / slide.level_dimensions(self.level)[1]
tile_h_lvl * box_mask.shape[0] / slide.level_dimensions(self.level)[1]
)

x_br_lvl = x_ul_lvl + tile_w_thumb
y_br_lvl = y_ul_lvl + tile_h_thumn

tile_wsi_coords = scale_coordinates(
reference_coords=CoordinatePair(x_ul_lvl, y_ul_lvl, x_br_lvl, y_br_lvl),
reference_size=box_mask_thumb.shape[::-1],
reference_size=box_mask.shape[::-1],
target_size=slide.dimensions,
)

Expand Down Expand Up @@ -481,3 +481,161 @@ def _random_tiles_generator(self, slide: Slide) -> Tuple[Tile, CoordinatePair]:

if valid_tile_counter >= self.n_tiles:
break


class ScoreTiler(GridTiler):
"""Extractor of tiles arranged in a grid according to a scoring function.
The extraction procedure is the same as the ``GridTiler`` extractor, but only the
first ``n_tiles`` tiles with the highest score are saved.
Arguments
---------
scorer : Scorer
Scoring function used to score the tiles.
tile_size : Tuple[int, int]
(width, height) of the extracted tiles.
n_tiles : int, optional
The number of tiles to be saved. Default is 0, which means that all the tiles
will be saved (same exact behaviour of a GridTiler). Cannot be negative.
level : int, optional
Level from which extract the tiles. Default is 0.
check_tissue : bool, optional
Whether to check if the tile has enough tissue to be saved. Default is True.
pixel_overlap : int, optional
Number of overlapping pixels (for both height and width) between two adjacent
tiles. If negative, two adjacent tiles will be strided by the absolute value of
``pixel_overlap``. Default is 0.
prefix : str, optional
Prefix to be added to the tile filename. Default is an empty string.
suffix : str, optional
Suffix to be added to the tile filename. Default is '.png'
"""

def __init__(
self,
scorer: Scorer,
tile_size: Tuple[int, int],
n_tiles: int = 0,
level: int = 0,
check_tissue: bool = True,
pixel_overlap: int = 0,
prefix: str = "",
suffix: str = ".png",
):
self.scorer = scorer
self.n_tiles = n_tiles

super().__init__(tile_size, level, check_tissue, pixel_overlap, prefix, suffix)

def extract(self, slide: Slide, report_path: str = None):
"""Extract grid tiles and save them to disk, according to a scoring function and
following this filename pattern:
`{prefix}tile_{tiles_counter}_level{level}_{x_ul_wsi}-{y_ul_wsi}-{x_br_wsi}-{y_br_wsi}{suffix}`
Save a CSV report file with the saved tiles and the associated score.
Parameters
----------
slide : Slide
Slide from which to extract the tiles
report_path : str, optional
Path to the CSV report. If None, no report will be saved
"""
highest_score_tiles = self._highest_score_tiles(slide)

tiles_counter = 0
filenames = []

for tiles_counter, (score, tile_wsi_coords) in enumerate(highest_score_tiles):
tile = slide.extract_tile(tile_wsi_coords, self.level)
tile_filename = self._tile_filename(tile_wsi_coords, tiles_counter)
tile.save(tile_filename)
filenames.append(tile_filename)
print(f"\t Tile {tiles_counter} - score: {score} saved: {tile_filename}")

if report_path:
self._save_report(report_path, highest_score_tiles, filenames)

print(f"{tiles_counter+1} Grid Tiles have been saved.")

def _highest_score_tiles(self, slide: Slide) -> List[Tuple[float, CoordinatePair]]:
"""Calculate the tiles with the highest scores and their extraction coordinates.
Parameters
----------
slide : Slide
The slide to extract the tiles from.
Returns
-------
List[Tuple[float, CoordinatePair]]
List of tuples containing the score and the extraction coordinates for the
tiles with the highest score. Each tuple represents a tile.
Raises
------
ValueError
If ``n_tiles`` is negative.
"""
all_scores = self._scores(slide)

sorted_tiles_by_score = sorted(all_scores, key=lambda x: x[0], reverse=True)
if self.n_tiles > 0:
highest_score_tiles = sorted_tiles_by_score[: self.n_tiles]
elif self.n_tiles == 0:
highest_score_tiles = sorted_tiles_by_score
else:
raise ValueError(f"'n_tiles' cannot be negative ({self.n_tiles})")

return highest_score_tiles

def _save_report(
self,
report_path: str,
highest_score_tiles: List[Tuple[float, CoordinatePair]],
filenames: List[str],
) -> None:
"""Save to ``filename`` the report of the saved tiles with the associated score.
The CSV file
Parameters
----------
report_path : str
Path to the report
highest_score_tiles : List[Tuple[float, CoordinatePair]]
List of tuples containing the score and the extraction coordinates for the
tiles with the highest score. Each tuple represents a tile.
filenames : List[str]
List of the tiles' filename
"""

report = pd.DataFrame(
{"filename": filenames, "score": np.array(highest_score_tiles)[:, 0]}
)
report.to_csv(report_path, index=None)

def _scores(self, slide: Slide) -> List[Tuple[float, CoordinatePair]]:
"""Calculate the scores for all the tiles extracted from the ``slide``.
Parameters
----------
slide : Slide
The slide to extract the tiles from.
Returns
-------
List[Tuple[float, CoordinatePair]]
List of tuples containing the score and the extraction coordinates for each
tile. Each tuple represents a tile.
"""
grid_tiles = self._grid_tiles_generator(slide)

scores = []

for tile, tile_wsi_coords in grid_tiles:
score = self.scorer(tile)
scores.append((score, tile_wsi_coords))

return scores

0 comments on commit 1866b52

Please sign in to comment.