In [1]:
%pwd


'/orcd/data/edboyden/002/ezh/uni'

In [18]:
import os
import glob
import openslide
from PIL import Image
from tqdm.notebook import tqdm
from concurrent.futures import ThreadPoolExecutor

input_dir  = "diff_svs_flat"
output_dir = "all_virchow_tiles"
level      = 1      # Openslide pyramid level (0 is highest resolution)
tile_size  = 224    # pixels
overlap    = 0      # pixels of overlap between tiles
max_workers = 20    # adjust based on number of cpus

os.makedirs(output_dir, exist_ok=True)

In [19]:
slide_paths = glob.glob(os.path.join(input_dir, "*.svs"))
if not slide_paths:
    raise FileNotFoundError(f"No SVS files found in {input_dir}")

In [20]:
def extract_tiles_from_slide(slide_path):
    slide_name = os.path.splitext(os.path.basename(slide_path))[0]
    slide      = openslide.OpenSlide(slide_path)
    W, H       = slide.level_dimensions[level]
    stride     = tile_size - overlap

    coords = [(x, y)
              for y in range(0, H, stride)
              for x in range(0, W, stride)]

    print(f"\n→ Extracting {len(coords)} tiles from '{slide_name}' at level {level}")

    def save_tile(coord):
        x, y = coord
        tile = slide.read_region((x, y), level, (tile_size, tile_size)).convert("RGB")
        fname = f"{slide_name}_L{level}_{x}_{y}.png"
        os.makedirs(os.path.join(output_dir, slide_name), exist_ok=True)
        tile.save(os.path.join(output_dir, slide_name, fname))

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for _ in tqdm(
            executor.map(save_tile, coords),
            total=len(coords),
            desc=slide_name
        ):
            pass

    slide.close()

In [None]:
for slide_path in slide_paths:
    slide_name = os.path.splitext(os.path.basename(slide_path))[0]
    slide_out_dir = os.path.join(output_dir, slide_name)

    if os.path.isdir(slide_out_dir):
        print(f"Skipping '{slide_name}' — tiles already exist.")
        continue

    extract_tiles_from_slide(slide_path)

print("\nAll done!")