# installs and imports

In [1]:
!apt update && apt install -y openslide-tools
!pip install openslide-python

[33m0% [Working][0m            Get:1 https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/ InRelease [3,622 B]
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  InRelease
Hit:3 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu focal InRelease
Get:4 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]
Hit:5 http://archive.ubuntu.com/ubuntu focal InRelease
Get:6 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]
Hit:7 http://ppa.launchpad.net/cran/libgit2/ubuntu focal InRelease
Hit:8 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal InRelease
Get:9 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]
Hit:10 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu focal InRelease
Hit:11 http://ppa.launchpad.net/ubuntugis/ppa/ubuntu focal InRelease
Fetched 340 kB in 2s (150 kB/s)
Reading package lists... Done
Building dependency tree       
Reading state information... Done
25 packages can be upgraded.

In [2]:
import os
import time
import logging
import openslide
from tqdm import tqdm
from google.colab import drive

# Mount the Google Drive to access the files
drive.mount("/content/gdrive/")

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


# read slide

In [13]:
# Set the working directory and slide path
work_directory = "/content/gdrive/MyDrive/Research of Deep Learning Classification for Soft Tissue Sarcomas/"
slide_path = os.path.join(work_directory, "SCANS", "1007555.svs")

# Extract the slide name from the slide path
slide_name = os.path.splitext(os.path.basename(slide_path))[0]

# Set the target path for the slide images
target_path = os.path.join(work_directory, "Code", "images", slide_name)

# Set the logging path for the slide images
logging_path = os.path.join(target_path, "logs")

# Create directories if they don't exist
for path in [logging_path, target_path]:
    try:
        os.makedirs(path)
        print(f"Created directory: {path}")
    except FileExistsError:
        print(f"Directory already exists: {path}")

# Create a logger
logger = logging.getLogger(__name__)

# Set the logging level for the logger
logger.setLevel(logging.WARNING)

# Create a formatter for the logger
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")

# Create a file handler for the logger
log_filename = time.strftime("%Y%m%d_%H%M%S.log")
file_handler = logging.FileHandler(os.path.join(logging_path, log_filename))
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)

# Add the file handler to the logger
logger.addHandler(file_handler)

Directory already exists: /content/gdrive/MyDrive/Research of Deep Learning Classification for Soft Tissue Sarcomas/Code/images/1007555/logs
Directory already exists: /content/gdrive/MyDrive/Research of Deep Learning Classification for Soft Tissue Sarcomas/Code/images/1007555


In [14]:
# Open the slide and calculate the number of patches
slide = openslide.OpenSlide(slide_path)
num_horizontal_patches = slide.level_dimensions[0][0] // 512
num_vertical_patches = slide.level_dimensions[0][1] // 512
num_patches = num_horizontal_patches * num_vertical_patches
logger.info(f"There are {num_patches} patches in this image!")

There are 58302 patches in this image!


In [15]:
try:
    # Extract patches from the slide and save them to the target directory
    for y in tqdm(range(num_vertical_patches), desc="y-axis"):
        for x in tqdm(range(num_horizontal_patches), desc="x-axis", leave=False):
            x_start = x * 512
            y_start = y * 512

            # Check if the patch exceeds the dimensions of the slide
            if y_start > slide.level_dimensions[0][1]:
                break

            patch_id = f"{slide_name}_patch_{y}_{x}"
            patch_filename = f"{patch_id}.png"
            patch_filepath = os.path.join(target_path, patch_filename)

            if os.path.exists(patch_filepath):
                logger.warning(f"Patch {patch_id} already exists.")
                continue

            try:
                region = slide.read_region((x_start, y_start), 0, (512, 512))
                region.save(patch_filepath)
                logger.info(f"Patch {patch_id} saved to {patch_filepath}.")
            except Exception as e:
                logger.error(f"Error saving patch {patch_id}: {str(e)}")

    # Close the slide
    slide.close()

except Exception as e:
    logger.error(f"Error processing slide: {str(e)}")

y-axis:   0%|          | 0/158 [00:00<?, ?it/s]


INFO:__main__:Patch 1007555_patch_0_73 saved to /content/gdrive/MyDrive/Research of Deep Learning Classification for Soft Tissue Sarcomas/Code/images/1007555/1007555_patch_0_73.png.
INFO:__main__:Patch 1007555_patch_0_74 saved to /content/gdrive/MyDrive/Research of Deep Learning Classification for Soft Tissue Sarcomas/Code/images/1007555/1007555_patch_0_74.png.

x-axis:  20%|██        | 75/369 [00:03<00:16, 17.78it/s] [AINFO:__main__:Patch 1007555_patch_0_75 saved to /content/gdrive/MyDrive/Research of Deep Learning Classification for Soft Tissue Sarcomas/Code/images/1007555/1007555_patch_0_75.png.
INFO:__main__:Patch 1007555_patch_0_76 saved to /content/gdrive/MyDrive/Research of Deep Learning Classification for Soft Tissue Sarcomas/Code/images/1007555/1007555_patch_0_76.png.

y-axis:   0%|          | 0/158 [00:10<?, ?it/s]


KeyboardInterrupt: ignored