In [1]:
import os
import openslide as ops
import numpy as np
import logging

In [2]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', filename="logs/patch_extraction.log")
logging.info("Starting patch extraction...")
WSI_DIR = "./datasets/wsi"
PATCH_DIR = "./datasets/patches"
PATCH_SIZE = 224
OVERLAP = 0
if not os.path.exists(PATCH_DIR):
    os.makedirs(PATCH_DIR)

logging.info("Extracting patches from WSI...")
logging.info("Patch size: {}".format(PATCH_SIZE))
logging.info("Overlap: {}".format(OVERLAP))
logging.info("WSI directory: {}".format(WSI_DIR))
logging.info("Patch directory: {}".format(PATCH_DIR))

In [3]:
class PatchExtractor:
    def __init__(self, slide_root_path, patch_size, overlap, save_path):
        self.slides_path = []
        for root, dirs, files in os.walk(slide_root_path):
            for file in files:
                if file.endswith('.tif'):
                    self.slide_path = os.path.join(root, file)
                    self.slides_path.append(self.slide_path)
        
        self.patch_size = patch_size
        self.overlap = overlap
        self.save_path = save_path

    def extract_patches(self):
        for slide_path in self.slides_path:
            try:
                slide = ops.OpenSlide(slide_path)
                slide_name = os.path.basename(slide_path).split('.')[0]
                slide_width, slide_height = slide.dimensions
                patch_width, patch_height = self.patch_size
                overlap_width, overlap_height = self.overlap

                for y in range(0, slide_height, patch_height-overlap_height):
                    for x in range(0, slide_width, patch_width-overlap_width):
                        patch = slide.read_region((x, y), 0, self.patch_size)
                        if patch.size < self.patch_size:
                            continue
                        else:
                            patch.save(f'{self.save_path}/{slide_name}_{x}_{y}.png')
                            logging.info(f"Extracted patch from {slide_path} at ({x}, {y})")
            except Exception as e:
                logging.error(f"Error extracting patches from {slide_path}: {e}")

In [None]:
extractor = PatchExtractor(slide_root_path=WSI_DIR, patch_size=(PATCH_SIZE, PATCH_SIZE), overlap=(OVERLAP, OVERLAP), save_path=PATCH_DIR)
extractor.extract_patches()