In [1]:
!apt update && apt install -y openslide-tools
!pip install openslide-python

[33m0% [Working][0m            Get:1 https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/ InRelease [3,622 B]
[33m0% [Connecting to archive.ubuntu.com (91.189.91.38)] [Waiting for headers] [1 I[0m[33m0% [Connecting to archive.ubuntu.com (91.189.91.38)] [Waiting for headers] [Con[0m                                                                               Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64  InRelease [1,581 B]
[33m0% [Connecting to archive.ubuntu.com (91.189.91.38)] [Waiting for headers] [Con[0m[33m0% [Connecting to archive.ubuntu.com (91.189.91.38)] [Waiting for headers] [Con[0m                                                                               Get:3 http://security.ubuntu.com/ubuntu focal-security InRelease [114 kB]
Hit:4 http://archive.ubuntu.com/ubuntu focal InRelease
Get:5 http://archive.ubuntu.com/ubuntu focal-updates InRelease [114 kB]
Get:6 http://ppa.launchpad.net/c2d4u.team/c2d4u4.0+/ubuntu

In [3]:
import os
import time
import logging
import openslide
from google.colab import drive
from openslide.deepzoom import DeepZoomGenerator
import numpy as np
from matplotlib import pyplot as plt

# Mount the Google Drive to access the files
drive.mount("/content/gdrive/")

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [9]:
# Set the working directory and slide path
work_directory = "/content/gdrive/MyDrive/Research of Deep Learning Classification for Soft Tissue Sarcomas/"
slide_path = os.path.join(
    work_directory,
    "Datasets",
    "TCGA_annoted",
    "ss",
    "TCGA-DX-A7EQ-01Z-00-DX1.6E243B4A-CE79-4B31-B98B-24B89E7C2FB4.svs",
)

# Extract the slide name from the slide path
slide_name = os.path.splitext(os.path.basename(slide_path))[0]

# Set the target path for the slide images
target_path = os.path.join(work_directory, "Code", "images", slide_name)

# Set the logging path for the slide images
logging_path = os.path.join(target_path, "logs")

# Create directories if they don't exist
for path in [logging_path, target_path]:
    try:
        os.makedirs(path)
        print(f"Created directory: {path}")
    except FileExistsError:
        print(f"Directory already exists: {path}")

# Create a logger
logger = logging.getLogger(__name__)

# Set the logging level for the logger
logger.setLevel(logging.WARNING)

# Create a formatter for the logger
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")

# Create a file handler for the logger
log_filename = time.strftime("%Y%m%d_%H%M%S.log")
file_handler = logging.FileHandler(os.path.join(logging_path, log_filename))
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(formatter)

# Add the file handler to the logger
logger.addHandler(file_handler)

Directory already exists: /content/gdrive/MyDrive/Research of Deep Learning Classification for Soft Tissue Sarcomas/Code/images/TCGA-DX-A7EQ-01Z-00-DX1.6E243B4A-CE79-4B31-B98B-24B89E7C2FB4/logs
Directory already exists: /content/gdrive/MyDrive/Research of Deep Learning Classification for Soft Tissue Sarcomas/Code/images/TCGA-DX-A7EQ-01Z-00-DX1.6E243B4A-CE79-4B31-B98B-24B89E7C2FB4


In [11]:
# Open the slide and calculate the number of patches
slide = openslide.OpenSlide(slide_path)
tiles = DeepZoomGenerator(slide, tile_size=256, overlap=0, limit_bounds=False)

print("The number of levels in the tiles object are: ", tiles.level_count)
print("The dimensions of data in each level are: ", tiles.level_dimensions)
# print("Total number of tiles = : ", tiles.tile_count)
total_tiles = tiles.level_tiles[-1][0] * tiles.level_tiles[-1][1]
print("This means there are ", total_tiles, " total tiles in this level")

logger.info(f"There are {total_tiles} patches in this image!")

The number of levels in the tiles object are:  18
The dimensions of data in each level are:  ((1, 1), (2, 2), (4, 3), (7, 6), (14, 11), (27, 22), (54, 44), (107, 88), (214, 175), (428, 350), (856, 700), (1712, 1399), (3424, 2798), (6848, 5595), (13695, 11190), (27390, 22380), (54780, 44760), (109560, 89520))
This means there are  149800  total tiles in this level


In [13]:
###### Saving each tile to local directory
cols, rows = tiles.level_tiles[-1]
cols, rows

(428, 350)

In [None]:
tile_dir = target_path

for row in range(rows):
    for col in range(cols):
        tile_name = os.path.join(tile_dir, "%d_%d" % (col, row))
        print("Now saving tile with title: ", tile_name)
        temp_tile = tiles.get_tile(tiles.level_count - 1, (col, row))
        temp_tile_RGB = temp_tile.convert("RGB")
        temp_tile_np = np.array(temp_tile_RGB)
        plt.imsave(tile_name + ".png", temp_tile_np)