In [1]:
!apt update && apt install -y openslide-tools
!pip install openslide-python

[33m0% [Working][0m            Get:1 https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/ InRelease [3,622 B]
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  InRelease [1,581 B]
Hit:3 http://archive.ubuntu.com/ubuntu focal InRelease
Get:4 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]
Get:5 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]
Hit:6 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu focal InRelease
Get:7 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  Packages [1,007 kB]
Get:8 http://archive.ubuntu.com/ubuntu focal-backports InRelease [108 kB]
Hit:9 http://ppa.launchpad.net/cran/libgit2/ubuntu focal InRelease
Hit:10 http://ppa.launchpad.net/deadsnakes/ppa/ubuntu focal InRelease
Get:11 http://archive.ubuntu.com/ubuntu focal-updates/main amd64 Packages [3,150 kB]
Hit:12 http://ppa.launchpad.net/graphics-drivers/ppa/ubuntu focal InRelease
Hit:13 http://ppa.launchpad

In [2]:
import os
import cv2
import time
import logging
import openslide
from tqdm import tqdm
from google.colab import drive

# Mount the Google Drive to access the files
drive.mount("/content/gdrive/")

Mounted at /content/gdrive/


In [4]:
# Set the working directory and slide path
work_directory = "/content/gdrive/MyDrive/Research of Deep Learning Classification for Soft Tissue Sarcomas/"
slide_path = os.path.join(work_directory, "SCANS", "1007555.svs")

# Extract the slide name from the slide path
slide_name = os.path.splitext(os.path.basename(slide_path))[0]

# Set the target path for the slide images
target_path = os.path.join(work_directory, "Code", "images", slide_name)

# Set the logging path for the slide images
logging_path = os.path.join(target_path, "logs")

whites_path = os.path.join(target_path, "whites")

# Create directories if they don't exist
for path in [target_path, logging_path, whites_path]:
    try:
        os.makedirs(path)
        print(f"Created directory: {path}")
    except FileExistsError:
        print(f"Directory already exists: {path}")

# Create a logger
logger = logging.getLogger(__name__)

# Set the logging level for the logger
logger.setLevel(logging.WARNING)

# Create a formatter for the logger
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")

# Create a file handler for the logger
log_filename = time.strftime("%Y%m%d_%H%M%S.log")
file_handler = logging.FileHandler(os.path.join(logging_path, log_filename))
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)

# Add the file handler to the logger
logger.addHandler(file_handler)

Directory already exists: /content/gdrive/MyDrive/Research of Deep Learning Classification for Soft Tissue Sarcomas/Code/images/1007555
Directory already exists: /content/gdrive/MyDrive/Research of Deep Learning Classification for Soft Tissue Sarcomas/Code/images/1007555/logs
Created directory: /content/gdrive/MyDrive/Research of Deep Learning Classification for Soft Tissue Sarcomas/Code/images/1007555/whites


In [5]:
# Define the white ratio threshold
white_ratio_threshold = 0.6


# Define the function to check the white ratio of an image
def check_white_ratio(image_path, white_threshold=240):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
    white_pixels = cv2.sumElems(hist[white_threshold:])[0]
    total_pixels = gray.shape[0] * gray.shape[1]
    white_ratio = white_pixels / total_pixels
    return white_ratio

In [6]:
# Open the slide and calculate the number of patches
slide = openslide.OpenSlide(slide_path)
num_horizontal_patches = slide.level_dimensions[0][0] // 512
num_vertical_patches = slide.level_dimensions[0][1] // 512
num_patches = num_horizontal_patches * num_vertical_patches
logger.info(f"There are {num_patches} patches in this image!")

In [8]:
try:
    # Extract patches from the slide and save them to the target directory
    for y in tqdm(range(num_vertical_patches), desc="y-axis"):
        for x in tqdm(range(num_horizontal_patches), desc="x-axis", leave=False):
            x_start = x * 512
            y_start = y * 512

            # Check if the patch exceeds the dimensions of the slide
            if y_start > slide.level_dimensions[0][1]:
                break

            patch_id = f"{slide_name}_patch_{y}_{x}"
            patch_filename = f"{patch_id}.png"
            patch_filepath = os.path.join(target_path, patch_filename)

            if os.path.exists(patch_filepath):
                logger.warning(f"Patch {patch_id} already exists.")
                continue

            try:
                region = slide.read_region((x_start, y_start), 0, (512, 512))
                white_ratio = check_white_ratio(region)

                if white_ratio >= white_ratio_threshold:
                    white_patch_filepath = os.path.join(whites_path, patch_filename)
                    region.save(white_patch_filepath)
                    logger.warning(
                        f"Patch {patch_id} is {white_ratio*100:.2f}% white and saved to {white_patch_filepath}."
                    )

                else:
                    region.save(patch_filepath)
                    logger.info(
                        f"Patch {patch_id} is {white_ratio*100:.2f}% white and saved to {patch_filepath}."
                    )

            except Exception as e:
                logger.error(f"Error saving patch {patch_id}: {str(e)}")

    # Close the slide
    slide.close()

except Exception as e:
    logger.error(f"Error processing slide: {str(e)}")

y-axis:   0%|          | 0/158 [00:00<?, ?it/s]


ERROR:__main__:Error saving patch 1007555_patch_0_98: Can't convert object to 'str' for 'filename'
ERROR:__main__:Error saving patch 1007555_patch_0_99: Can't convert object to 'str' for 'filename'
ERROR:__main__:Error saving patch 1007555_patch_0_100: Can't convert object to 'str' for 'filename'
ERROR:__main__:Error saving patch 1007555_patch_0_101: Can't convert object to 'str' for 'filename'
ERROR:__main__:Error saving patch 1007555_patch_0_102: Can't convert object to 'str' for 'filename'
ERROR:__main__:Error saving patch 1007555_patch_0_103: Can't convert object to 'str' for 'filename'
ERROR:__main__:Error saving patch 1007555_patch_0_104: Can't convert object to 'str' for 'filename'
ERROR:__main__:Error saving patch 1007555_patch_0_105: Can't convert object to 'str' for 'filename'
ERROR:__main__:Error saving patch 1007555_patch_0_106: Can't convert object to 'str' for 'filename'
ERROR:__main__:Error saving patch 1007555_patch_0_10

KeyboardInterrupt: ignored