#### **1. Extract files**

In [1]:
import zipfile
import os

zip_path = "../data/patient_000.zip"
output_dir = "../data/patient_000"

# Extract the zip file if not already extracted
if not os.path.exists(output_dir):
    print(f"Extracting {zip_path} to {output_dir}...")
    os.makedirs(output_dir, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(output_dir)
    print(f"Extracted files to {output_dir}")
else:
    print(f"Output directory {output_dir} already exists, skipping extraction.")


Output directory ../data/patient_000 already exists, skipping extraction.


#### **2. Visualize image**

In [None]:
from PIL import Image
import matplotlib.pyplot as plt
import os

# Disable decompression bomb check for large images
Image.MAX_IMAGE_PIXELS = None

image_dir = os.path.join(os.getcwd(), "..", "data", "patient_000")
print(f"Files in directory: {os.listdir(image_dir)}")
image_files = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(".tif")]

if not image_files:
    print(f"No .tif files found in {image_dir}")
else:
    image_path = image_files[0]
    image = Image.open(image_path)
    # plt.imshow(image)
    # plt.title(f"Image: {os.path.basename(image_path)}")
    # plt.axis("off")
    # plt.show()


Files in directory: ['patient_000_node_0.tif', 'patient_000_node_1.tif', 'patient_000_node_2.tif', 'patient_000_node_3.tif', 'patient_000_node_4.tif']


In [3]:
import openslide
import matplotlib.pyplot as plt

image_path = os.path.join(os.getcwd(), "..", "data", "patient_000", "example.tif")  # Replace "example.tif" with the actual file name

slide = openslide.OpenSlide(image_path)

print(f"Dimensions: {slide.dimensions}")
print(f"Available Levels: {slide.level_dimensions}")
print(f"Downsampling Factors: {slide.level_downsamples}")

region = slide.read_region((0, 0), 0, (256, 256))  # (x, y), level, (width, height)
region = region.convert("RGB")

plt.imshow(region)
plt.title("Extracted Region (256x256)")
plt.axis("off")
plt.show()

ModuleNotFoundError: Couldn't locate OpenSlide DLL. Try `pip install openslide-bin`, or if you're using an OpenSlide binary package, ensure you've called os.add_dll_directory(). https://openslide.org/api/python/#installing

In [None]:
image = Image.open(image_path)
downsampled_image = image.resize((1024, 1024))  # Resize to 1024x1024 pixels
plt.imshow(downsampled_image)
plt.title("Downsampled Image")
plt.axis("off")
plt.show()

#### **3. Convert to tensor**

In [None]:
import torchvision.transforms as transforms

transform = transforms.Compose([
    transforms.ToTensor(),  
    transforms.Resize((512, 512))  # Resize all images to the same size
])

image_tensor = transform(image)

print(f"Image tensor shape: {image_tensor.shape}")

#### **4. Crop uniformly**

In [None]:
# Crop the image to 256x256
crop_transform = transforms.Compose([
    transforms.CenterCrop((256, 256))
])

cropped_image = crop_transform(image)
plt.imshow(cropped_image)
plt.title("Cropped Image")
plt.axis("off")
plt.show()

#### **5. Add masks?**

In [None]:
import numpy as np

# Binary mask
mask = np.zeros((512, 512), dtype=np.uint8)
mask[100:400, 100:400] = 255  # TODO: where?

# Convert mask to PIL image
mask_image = Image.fromarray(mask)

# Overlay mask
overlay = Image.blend(image.convert("RGBA"), mask_image.convert("RGBA"), alpha=0.5)
plt.imshow(overlay)
plt.title("Image with Mask")
plt.axis("off")
plt.show()

#### **6. Load to Dataset**

In [None]:
from torch.utils.data import Dataset

class PatientDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.image_files = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(".tif")]
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_path = self.image_files[idx]
        image = Image.open(image_path)
        if self.transform:
            image = self.transform(image)
        return image

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((512, 512))
])

dataset = PatientDataset(image_dir="./data/patient_000", transform=transform)

image_tensor = dataset[0]
print(f"Loaded image tensor shape: {image_tensor.shape}")

In [None]:
fig, axes = plt.subplots(1, 5, figsize=(15, 5))
for i in range(5):
    image_tensor = dataset[i]
    axes[i].imshow(image_tensor.permute(1, 2, 0))  # Convert tensor to image format
    axes[i].axis("off")
plt.show()