In [5]:
# test_tiletorch.py
from pathlib import Path
from src.tiletorch.tiling import summarize_slide, preview_tiling, extract_tiles
from src.tiletorch.filters import make_filter
import pandas as pd
from PIL import Image

# 🔧 Edit this to your own WSI path
WSI_PATH = "./examples/TCGA-HC-A6HY-01Z-00-DX1.9753A5EC-6068-4502-ACBB-4C6F1EA77AF9.svs"
OUT_DIR = Path("out_demo")
OUT_DIR.mkdir(exist_ok=True)

# 1. Print slide metadata
meta = summarize_slide(WSI_PATH)
print("Slide metadata:", meta)

# 2. Build a filter (tissue + entropy thresholds)
pred = make_filter(min_tissue=0.12, min_entropy=3.8, min_lapvar=0.0)

# 3. Preview only (no tiles written, just overlay thumbnail)
stats = preview_tiling(
    slide_path=WSI_PATH,
    out_path=OUT_DIR / "preview.png",
    tile_size=256,
    level=0,
    overlap=0,
    filter_fn=pred,
    thumb_max=768,
)
print("Preview stats:", stats)

# Open the preview image
# Image.open(OUT_DIR / "preview.png").show()

# 4. Real tiling (saves tiles + CSV index)
index = extract_tiles(
    slide_path=WSI_PATH,
    out_dir=OUT_DIR / "tiles",
    tile_size=256,
    level=0,
    overlap=0,
    filter_fn=pred,
    write_index_csv=OUT_DIR / "index.csv",
)
print(f"Extracted {len(index)} tiles")

# Load and inspect index CSV
df = pd.read_csv(OUT_DIR / "index.csv")
print(df.head())

Slide metadata: {'slide_path': './examples/TCGA-HC-A6HY-01Z-00-DX1.9753A5EC-6068-4502-ACBB-4C6F1EA77AF9.svs', 'levels': 3, 'level_dimensions': [(19184, 18822), (4796, 4705), (2398, 2352)]}


Preview TCGA-HC-A6HY-01Z-00-DX1.9753A5EC-6068-4502-ACBB-4C6F1EA77AF9 (L0): 100%|██████████| 5402/5402 [00:16<00:00, 333.61it/s]


Preview stats: {'preview_path': 'out_demo/preview.png', 'total_tiles': 5402, 'kept_tiles': 1298, 'kept_ratio': 0.24028137726767865}


Tiling TCGA-HC-A6HY-01Z-00-DX1.9753A5EC-6068-4502-ACBB-4C6F1EA77AF9 (L0): 100%|██████████| 5402/5402 [00:16<00:00, 318.62it/s]


Extracted 1298 tiles
                                            slide_id  \
0  TCGA-HC-A6HY-01Z-00-DX1.9753A5EC-6068-4502-ACB...   
1  TCGA-HC-A6HY-01Z-00-DX1.9753A5EC-6068-4502-ACB...   
2  TCGA-HC-A6HY-01Z-00-DX1.9753A5EC-6068-4502-ACB...   
3  TCGA-HC-A6HY-01Z-00-DX1.9753A5EC-6068-4502-ACB...   
4  TCGA-HC-A6HY-01Z-00-DX1.9753A5EC-6068-4502-ACB...   

                                           tile_path     x      y  level  \
0  out_demo/tiles/TCGA-HC-A6HY-01Z-00-DX1.9753A5E...  2048   9984      0   
1  out_demo/tiles/TCGA-HC-A6HY-01Z-00-DX1.9753A5E...  2048  10240      0   
2  out_demo/tiles/TCGA-HC-A6HY-01Z-00-DX1.9753A5E...  2048  10496      0   
3  out_demo/tiles/TCGA-HC-A6HY-01Z-00-DX1.9753A5E...  2048  10752      0   
4  out_demo/tiles/TCGA-HC-A6HY-01Z-00-DX1.9753A5E...  2048  11008      0   

   tile_size  tissue_frac   entropy  
0        256     0.469238  5.982952  
1        256     0.501953  6.114062  
2        256     0.446777  5.713562  
3        256     0.411133  5.5872

In [None]:
from src.tiletorch.tiling import extract_tiles_to_sink
from src.tiletorch.sinks import FileSink, WebDatasetSink
from src.tiletorch.filters import make_filter
WSI_PATH = "./examples/TCGA-HC-A6HY-01Z-00-DX1.9753A5EC-6068-4502-ACBB-4C6F1EA77AF9.svs"
sink = FileSink(out_dir="out/tiles_jpeg", fmt="jpeg", codec_kwargs={"quality": 92, "subsampling": 0, "optimize": True})
pred = make_filter(min_tissue=0.12, min_entropy=3.8)

idx = extract_tiles_to_sink(
    slide_path=WSI_PATH,
    sink=sink,
    tile_size=256, level=0, overlap=0,
    filter_fn=pred,
)

Tiling TCGA-HC-A6HY-01Z-00-DX1.9753A5EC-6068-4502-ACBB-4C6F1EA77AF9 (L0): 100%|██████████| 5402/5402 [00:17<00:00, 309.11it/s]


In [7]:
from src.tiletorch.sinks import FileSink, WebDatasetSink
sink = WebDatasetSink(
    shard_pattern="out/shards/tiles-%06d.tar",  # will write ...-000000.tar, -000001.tar, ...
    fmt="jpeg",
    codec_kwargs={"quality": 92, "subsampling": 0, "optimize": True},
    max_count=10000,             # ~10k tiles per shard
    max_bytes=750*1024*1024,     # or ~750MB per shard (rolls when either limit hits)
)

pred = make_filter(min_tissue=0.12, min_entropy=3.8)
idx = extract_tiles_to_sink(WSI_PATH, sink, tile_size=512, level=0, filter_fn=pred)

Tiling TCGA-HC-A6HY-01Z-00-DX1.9753A5EC-6068-4502-ACBB-4C6F1EA77AF9 (L0): 100%|██████████| 1332/1332 [00:17<00:00, 75.68it/s]


In [8]:
sink = WebDatasetSink(
    shard_pattern="out/shards_webp/tiles-%06d.tar",
    fmt="webp",
    codec_kwargs={"quality": 90, "method": 6},
    max_count=15000,  # WebP usually smaller -> fit more per shard
)

pred = make_filter(min_tissue=0.12, min_entropy=3.8)
idx = extract_tiles_to_sink(WSI_PATH, sink, tile_size=512, level=0, filter_fn=pred)

Tiling TCGA-HC-A6HY-01Z-00-DX1.9753A5EC-6068-4502-ACBB-4C6F1EA77AF9 (L0): 100%|██████████| 1332/1332 [00:36<00:00, 36.58it/s]
