In [1]:
import openslide as ops
import os
import matplotlib.pyplot as plt
import logging

In [None]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', filename="logs/ds_analysis.log")
logging.info("Starting thumbnail generation...")
THUMB_SIZE = 4096
dataset_dir = "./datasets/wsi"
thumbnail_dir = "./datasets/thumbnails"
logging.info(f"Dataset directory: {dataset_dir}")
logging.info(f"Thumbnail directory: {thumbnail_dir}")

if not os.path.exists(thumbnail_dir):
    os.makedirs(thumbnail_dir)

tiff_files = []
for root, dirs, files in os.walk(dataset_dir):
    for file in files:
        if file.endswith(".tif"):
            tiff_files.append(os.path.join(root, file))

print(f"Found {len(tiff_files)} tiff files")
logging.info(f"Found {len(tiff_files)} tiff files")

In [3]:
def plot_thumbnail(thumbnail, title):
    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
    ax.imshow(thumbnail)
    ax.set_title(title)
    plt.show()

In [None]:
for idx,tiff_file in enumerate(tiff_files):
    slide_path = tiff_file
    try:
        slide = ops.OpenSlide(slide_path)
        thumbnail = slide.get_thumbnail((THUMB_SIZE, THUMB_SIZE))
        thumbnail.save(os.path.join(thumbnail_dir, os.path.basename(slide_path).replace(".tif", ".png")))
        plot_thumbnail(thumbnail, os.path.basename(slide_path))
        scale = THUMB_SIZE / max(slide.dimensions)
        logging.info(f"Slide: {slide_path}")
        logging.info(f"Slide Dimension: {slide.dimensions} pixels")
        logging.info(f"Number of Levels: {slide.level_count}")
        logging.info(f"file size: {round(os.path.getsize(slide_path) / 1024 ** 2, 2)} MB")
        logging.info(f"Scale: {scale}")
        logging.info(f"Thumbnail Dimension: {[int(dim * scale) for dim in slide.dimensions]} pixels")
        logging.info("-----------------------------")

    except Exception as e:
        logging.error(f"Error processing {slide_path}")
        continue