In [1]:
import pandas as pd
import re

## Metadata of patches
Our pipeline accepts svs at a time so to avoid overwritting in the process_list_autogen.csv, we are creating our own list

In [2]:
# import pandas as pd

# # Hardcoded column names
# columns = [
#     'slide_id', 'process' ,'status','seg_level', 'sthresh', 'mthresh', 'close', 'use_otsu',
#     'keep_ids', 'exclude_ids', 'a_t', 'a_h', 'max_n_holes','vis_level','line_thickness',
#     'use_padding', 'contour_fn'
# ]

# # Create empty DataFrame with only headers
# df = pd.DataFrame(columns=columns)

# # Save to CSV
# df.to_csv("./FLAT_DIRECTORY/process_list_final.csv", index=False)

In [3]:
# import pandas as pd

# # Hardcoded column names
# columns = [
#     'slide_id', 'process' ,'status','seg_level', 'sthresh', 'mthresh', 'close', 'use_otsu',
#     'keep_ids', 'exclude_ids', 'a_t', 'a_h', 'max_n_holes','vis_level','line_thickness',
#     'use_padding', 'contour_fn'
# ]

# # Create empty DataFrame with only headers
# df = pd.DataFrame(columns=columns)

# # Save to CSV
# df.to_csv("./FLAT_DIRECTORY/process_list_autogen.csv", index=False)

In [4]:
# import pandas as pd

# # Hardcoded column names
# columns = [
#     'svs_file','renamed_file'
# ]

# # Create empty DataFrame with only headers
# df = pd.DataFrame(columns=columns)

# # Save to CSV
# df.to_csv("./FLAT_DIRECTORY/file_names.csv", index=False)

In [5]:
def is_valid_svs_file(filename, excel_path="PatientsSheet.xlsx", columnNumb=0):
    """
    Check if a given .svs filename is valid based on:
    - Matching first 12 characters with patient ID in Excel column 0.
    - Patient's label in specified column is not NaN or empty.

    Args:
        filename (str): Filename to check (should end in .svs).
        excel_path (str): Path to the Excel sheet.
        columnNumb (int): Column number to check for the label (default: 1 for MSI).

    Returns:
        tuple: (bool, value) where bool is True if valid, False otherwise,
               and value is the label from the specified column or None if invalid.
    """
    # Check if filename ends with .svs
    if not filename.lower().endswith('.svs'):
        print(f"Error: Filename '{filename}' does not end with .svs")
        return False, None

    # Check if filename is long enough to extract patient ID
    if len(filename) < 12:
        print(f"Error: Filename '{filename}' is too short (less than 12 characters)")
        return False, None

    # Extract patient ID from first 12 characters
    patient_id = filename[:12]

    try:
        # Read Excel file
        df = pd.read_excel(excel_path, header=None)
    except FileNotFoundError:
        print(f"Error: Excel file '{excel_path}' not found.")
        return False, None
    except Exception as e:
        print(f"Error reading Excel file: {e}")
        return False, None

    # Check if DataFrame is empty
    if df.empty:
        print("Error: Excel file is empty.")
        return False, None

    # Handle headers - skip first row if it contains "PATIENTS"
    if len(df) > 0 and str(df.iloc[0, 0]).strip().upper() == "PATIENTS":
        df = df.iloc[1:]

    # Check if we have data after potential header removal
    if df.empty:
        print("Error: No data found after header processing.")
        return False, None

    # Ensure we have enough columns (at least columnNumb + 1 since it's 0-indexed)
    required_columns = max(1, columnNumb + 1)  # At least 1 column, or columnNumb + 1
    if df.shape[1] < required_columns:
        print(f"Error: Excel file must have at least {required_columns} columns. "
              f"Found {df.shape[1]} columns. Cannot access column {columnNumb}.")
        return False, None

    # Clean and standardize patient ID column (always column 0)
    df.iloc[:, 0] = df.iloc[:, 0].astype(str).str.strip()
    
    # Clean and standardize the target column
    df.iloc[:, columnNumb] = df.iloc[:, columnNumb].astype(str).str.strip()

    # Filter for this patient
    match = df[df.iloc[:, 0] == patient_id]

    if match.empty:
        print(f"Patient ID '{patient_id}' not found in Excel file.")
        return False, None

    # Check label validity in specified column
    label = match.iloc[0, columnNumb]
    if pd.isna(label) or str(label).strip().lower() in ['nan', '', 'none']:
        print(f"Patient '{patient_id}' has invalid or empty label in column {columnNumb}: '{label}'")
        return False, None

    print(f"Valid file: Patient '{patient_id}' found with label in column {columnNumb}: '{label}'")
    return True, label

## Renaming files to patientid and label
Clam on default give name of svs to .h5 file so we are renaming our .h5 file to 12 char of .svs file with label

In [6]:
def rename_h5_file(svs_filename, label, h5_dir):
    base_name = os.path.splitext(os.path.basename(svs_filename))[0]
    old_h5_path = os.path.join(h5_dir, f"{base_name}.h5")
    new_base = f"{base_name[:12]}_{label}.h5"
    new_h5_path = os.path.join(h5_dir, new_base)

    if os.path.exists(old_h5_path):
        try:
            os.rename(old_h5_path, new_h5_path)
            print(f"📦 Renamed H5: {old_h5_path} → {new_h5_path}")
        except FileExistsError:
            print(f"⚠️ File already exists: {new_h5_path}. Skipping rename.")
    else:
        print(f"❌ File not found: {old_h5_path}")

    return new_h5_path

## Patch creation function

In [7]:
import os
import re
import numpy as np
import pandas as pd
from PIL import Image
import h5py
import openslide

def create_patches_from_h5(h5_path, svs_path, output_dir, magnification):
    base_name = os.path.splitext(os.path.basename(h5_path))[0]
    metadatadir= r"D:\Aamir Gulzar\KSA_project2\dataset\patches_metadata"
    os.makedirs(metadatadir, exist_ok=True)

    # If output_dir already exists, skip patch creation and rebuild metadata from PNGs
    if os.path.exists(output_dir) and os.path.isdir(output_dir):
        print(f"⚠️ Output directory '{output_dir}' already exists. Rebuilding metadata from existing PNGs...")

        metadata = []
        pattern = re.compile(rf"{re.escape(base_name)}_x(\d+)_y(\d+)_patch(\d{{5}})\.png")

        for filename in sorted(os.listdir(output_dir)):
            if filename.endswith(".png") and filename.startswith(base_name):
                match = pattern.match(filename)
                if not match:
                    print(f"⚠️ Skipping unrecognized filename: {filename}")
                    continue

                x, y, patch_number = map(int, match.groups())
                patch_path = os.path.join(output_dir, filename)

                try:
                    patch = Image.open(patch_path).convert('RGB')
                    rgb_array = np.array(patch)
                    avg_r, avg_g, avg_b = rgb_array.mean(axis=(0, 1))
                    std_r, std_g, std_b = rgb_array.std(axis=(0, 1))

                    metadata.append({
                        'patch_location': patch_path,
                        'patch_name': filename,
                        'patch_number': patch_number,
                        'patch_x': x,
                        'patch_y': y,
                        'avg_R': round(avg_r, 2),
                        'avg_G': round(avg_g, 2),
                        'avg_B': round(avg_b, 2),
                        'std_R': round(std_r, 2),
                        'std_G': round(std_g, 2),
                        'std_B': round(std_b, 2)
                    })
                except Exception as e:
                    print(f"❌ Failed to process {filename}: {e}")

        # Save CSV
        csv_path = os.path.join(metadatadir, f"{base_name}.csv")
        df = pd.DataFrame(metadata)
        df.to_csv(csv_path, index=False)

        print(f"📄 Metadata CSV rebuilt and saved to: {csv_path}")
        return

    # Otherwise, proceed with normal patch extraction
    os.makedirs(output_dir)

    # Set patch size
    if magnification == 40:
        PATCH_SIZE = 1024
        FINAL_SIZE = 512
    elif magnification == 20:
        PATCH_SIZE = 512
        FINAL_SIZE = 512
    else:
        raise ValueError(f"Unsupported magnification: {magnification}")

    # Load coordinates
    with h5py.File(h5_path, 'r') as h5_file:
        coords = h5_file['coords'][:]

    slide = openslide.OpenSlide(svs_path)
    metadata = []

    for i, (x, y) in enumerate(coords):
        patch = slide.read_region((int(x), int(y)), 0, (PATCH_SIZE, PATCH_SIZE)).convert('RGB')

        if PATCH_SIZE != FINAL_SIZE:
            patch = patch.resize((FINAL_SIZE, FINAL_SIZE), resample=Image.BICUBIC)
            x = int(x) // 2
            y = int(y) // 2
        else:
            x = int(x)
            y = int(y)

        patch_name = f"{base_name}_x{x}_y{y}_patch{i:05d}.png"
        patch_path = os.path.join(output_dir, patch_name)
        patch.save(patch_path)

        rgb_array = np.array(patch)
        avg_r, avg_g, avg_b = rgb_array.mean(axis=(0, 1))
        std_r, std_g, std_b = rgb_array.std(axis=(0, 1))

        metadata.append({
            'patch_location': patch_path,
            'patch_name': patch_name,
            'patch_number': i,
            'patch_x': x,
            'patch_y': y,
            'avg_R': round(avg_r, 2),
            'avg_G': round(avg_g, 2),
            'avg_B': round(avg_b, 2),
            'std_R': round(std_r, 2),
            'std_G': round(std_g, 2),
            'std_B': round(std_b, 2)
        })

    # Save metadata CSV
    csv_path = os.path.join(metadatadir, f"{base_name}.csv")
    df = pd.DataFrame(metadata)
    df.to_csv(csv_path, index=False)

    print(f"✅ Done! Saved {len(coords)} patches to {output_dir}")
    print(f"📄 Metadata CSV saved to: {csv_path}")


## Comparing to the slidelog metadata file

In [8]:
import os
import pandas as pd

def check_svs_file(svs_file, excel_file, filename_column="Filename", label_column="Label"):
    try:
        df = pd.read_csv(excel_file)

        filename = svs_file
        row = df[df[filename_column] == filename]

        if not row.empty:
            label = row.iloc[0][label_column]
            return True, label
        else:
            return False, None

    except Exception as e:
        print(f"Error reading Excel or processing file: {e}")
        return False, None


## Pipeline

In [9]:
import os

# Directory path
directory = r'D:\Aamir Gulzar\KSA_project2\dataset\patch_data'

# Get list of folders only (not files)
folder_names = [name for name in os.listdir(directory) if os.path.isdir(os.path.join(directory, name))]

print(f"Total folders found: {len(folder_names)}")
print(folder_names)

Total folders found: 418
['TCGA-3L-AA1B_nonMSIH', 'TCGA-4N-A93T_nonMSIH', 'TCGA-5M-AAT4_nonMSIH', 'TCGA-5M-AAT6_MSIH', 'TCGA-5M-AATE_nonMSIH', 'TCGA-A6-2671_nonMSIH', 'TCGA-A6-2681_nonMSIH', 'TCGA-A6-2685_nonMSIH', 'TCGA-A6-2686_MSIH', 'TCGA-A6-3807_nonMSIH', 'TCGA-A6-4105_nonMSIH', 'TCGA-A6-4107_nonMSIH', 'TCGA-A6-5657_nonMSIH', 'TCGA-A6-5660_nonMSIH', 'TCGA-A6-5661_MSIH', 'TCGA-A6-5662_nonMSIH', 'TCGA-A6-5664_nonMSIH', 'TCGA-A6-5665_MSIH', 'TCGA-A6-5666_nonMSIH', 'TCGA-A6-5667_nonMSIH', 'TCGA-A6-6137_nonMSIH', 'TCGA-A6-6138_nonMSIH', 'TCGA-A6-6142_nonMSIH', 'TCGA-A6-6648_nonMSIH', 'TCGA-A6-6649_nonMSIH', 'TCGA-A6-6651_nonMSIH', 'TCGA-A6-6652_nonMSIH', 'TCGA-A6-6653_MSIH', 'TCGA-A6-6654_nonMSIH', 'TCGA-A6-A565_nonMSIH', 'TCGA-A6-A566_nonMSIH', 'TCGA-A6-A567_nonMSIH', 'TCGA-A6-A56B_nonMSIH', 'TCGA-AA-3664_nonMSIH', 'TCGA-AA-3666_nonMSIH', 'TCGA-AA-3667_nonMSIH', 'TCGA-AA-3673_nonMSIH', 'TCGA-AA-3678_nonMSIH', 'TCGA-AA-3679_nonMSIH', 'TCGA-AA-3680_nonMSIH', 'TCGA-AA-3681_nonMSIH', 'TCGA

In [None]:
import os
import subprocess
from pathlib import Path
import openslide

# Top-level directory containing subfolders with .svs files
wsi_root = "D:/Aamir Gulzar/WSI_Raw_data"
flat_dir = r"D:\Aamir Gulzar\KSA_project2\dataset\Flat_directory"
count = 0
root_dir = r"D:\Aamir Gulzar\KSA_project2\dataset\patch_data"

# Get all subdirectories
subfolders = [f.name for f in os.scandir(root_dir) if f.is_dir()]

# Loop through all subfolders
for subdir in os.listdir(wsi_root):
    # if count == 2:
    #     break
    count += 1
    print(f"Count: {count}")
    
    full_subdir_path = os.path.join(wsi_root, subdir)
    if not os.path.isdir(full_subdir_path):
        continue  # Skip non-folder items

    # Search for .svs file in this subfolder
    svs_files = [f for f in os.listdir(full_subdir_path) if f.endswith('.svs')]
    if not svs_files:
        print(f"❌ No .svs file found in: {full_subdir_path}")
        continue
    # else:
    #     print(f"🔍 Found .svs files in {subdir}: {svs_files}")

    # If multiple .svs files exist, process each one
    for svs_file in svs_files:
        print(f"\n🚀 Starting processing of: {svs_file} in folder: {subdir}")

        # Validate SVS against metadata
        bool_valid, label = check_svs_file(
            svs_file,
            "./slides_metadata/slide_metadata_filtered.csv",
            filename_column="Filename",
            label_column="Label"
        )

        newnameofsvs = f"{svs_file[:12]}_{label}"

        if newnameofsvs in folder_names:
            print("File already exists")
            break

        if not bool_valid:
            print(f"⚠️ Skipping {svs_file}: Not found or invalid in metadata.")
            continue
        else:
            print(f"✅ Valid SVS: {svs_file} | Label: {label}")

        source_dir = full_subdir_path
        source_dir_quoted = f"{source_dir}"
        flat_dir_quoted = f"{flat_dir}"

        # output_dir = "./FLAT_DIRECTORY/downsampled"
        svs_path = os.path.join(source_dir_quoted, svs_file)

        try:
            slide = openslide.OpenSlide(svs_path)
        except Exception as e:
            print(f"❌ Error opening slide {svs_file}: {e}")
            continue

        # Print magnification and spacing
        mag = int(slide.properties.get("openslide.objective-power", 20))
        spacing = slide.properties.get("openslide.mpp-x", "Unknown")
        print(f"🔬 Magnification: {mag}x")
        print(f"📏 Spacing (mpp-x): {spacing}")

        # Set patch and step size
        if mag == 40:
            print("📐 Using patch/step size of 1024 (40x)")
            patch_size = '1024'
            step_size = '1024'
        else:
            print("📐 Using patch/step size of 512 (20x)")
            patch_size = '512'
            step_size = '512'

        
        # Construct command
        command = [
            'python', './CLAM/create_patches_fp.py',
            '--source', source_dir_quoted,
            '--save_dir', flat_dir,
            '--patch_size', patch_size,
            '--step_size', step_size,
            '--seg', '--patch'
        ]
        print("📦 Running patch creation script...")

        with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as proc:
            for line in proc.stdout:
                print(line, end='')  # Stream live output

        # Rename .h5 file and log it
        h5directory= r"D:\Aamir Gulzar\KSA_project2\dataset\Flat_directory\patches"
        new_h5_path = rename_h5_file(svs_file, label, h5_dir=h5directory)
        print(f"📄 Renamed .h5 file path: {new_h5_path}")

        # Set patch output directory
        base_dir = "D:/Aamir Gulzar/KSA_project2/dataset/patch_data"
        sub_dir = f"{svs_file[:12]}_{label}"
        output_dir = os.path.join(base_dir, sub_dir)

        # Determine skipped patches file path
        svs_base_filename = os.path.splitext(os.path.basename(svs_file))[0]
        skipped_filename = f"{svs_base_filename}skipped_patches.csv"
        full_skipped_path = Path("./FLAT_DIRECTORY/patches/") / skipped_filename

        # Run patch extraction from .h5
        print(f"🧩 Extracting image patches to: {output_dir}")
        create_patches_from_h5(new_h5_path, svs_path, output_dir, mag)
        print(f"✅ Finished processing {svs_file}")

Count: 1

🚀 Starting processing of: TCGA-A6-2686-01Z-00-DX1.0540a027-2a0c-46c7-9af0-7b8672631de7.svs in folder: 001b7d97-9425-43c3-a9a3-a36cb3d2a591
File already exists
Count: 2

🚀 Starting processing of: TCGA-D5-5539-01Z-00-DX1.9c46fe78-2adb-4f49-9141-cda135c2c90b.svs in folder: 00d69f69-3120-4b0c-8985-aab4edf9bc4b
File already exists
Count: 3

🚀 Starting processing of: TCGA-A6-2683-01Z-00-DX1.0dfc5d0a-68f4-45e1-a879-0428313c6dbc.svs in folder: 011da0ba-03eb-41cd-ae90-2dca944410e5
⚠️ Skipping TCGA-A6-2683-01Z-00-DX1.0dfc5d0a-68f4-45e1-a879-0428313c6dbc.svs: Not found or invalid in metadata.
Count: 4

🚀 Starting processing of: TCGA-F4-6459-01Z-00-DX1.80a78213-1137-4521-9d60-ac64813dec4c.svs in folder: 01f2d311-0825-4c67-af09-cf1a5ae6b1af
File already exists
Count: 5

🚀 Starting processing of: TCGA-AY-4070-01Z-00-DX1.dd650ac6-8480-4fd8-85b8-15a7840a5933.svs in folder: 021821c9-68f3-4be3-97ec-4ade5b4aaa60
File already exists
Count: 6

🚀 Starting processing of: TCGA-A6-6653-01Z-00-DX1.e13

## Testing pipeline

In [11]:
import os

# Define your paths
h5_folder = r"D:\Aamir Gulzar\KSA_project2\dataset\Flat_directory\patches"
patch_data_folder = r"D:\Aamir Gulzar\KSA_project2\dataset\patch_data"

# Get list of .h5 files (without .h5 extension)
h5_files = [os.path.splitext(f)[0] for f in os.listdir(h5_folder) if f.endswith('.h5')]

# Get list of directory names in patch_data
patch_dirs = [d for d in os.listdir(patch_data_folder) if os.path.isdir(os.path.join(patch_data_folder, d))]

# Find .h5 files without corresponding directory
missing_dirs = [f + '.h5' for f in h5_files if f not in patch_dirs]

# Print results
print("Missing directories for the following .h5 files:")
for fname in missing_dirs:
    print(fname)

Missing directories for the following .h5 files:
TCGA-QG-A5Z1-01Z-00-DX1.F3157C57-0F35-42D3-9CA5-C72D93F1BF89.h5
TCGA-T9-A92H-01Z-00-DX2.43894C88-2096-4932-9E9D-17BDCACF988C.h5
TCGA-T9-A92H-01Z-00-DX3.1DE7D5ED-60F7-4645-8243-AB0C027B3ED7.h5


In [8]:
import openslide
svs_path= "D:/Aamir Gulzar/WSI_Raw_data/ed5f8c30-29e3-4144-948b-b8658564f2d6/TCGA-3L-AA1B-01Z-00-DX1.8923A151-A690-40B7-9E5A-FCBEDFC2394F.svs"
# "D:\Aamir Gulzar\WSI_Raw_data\021821c9-68f3-4be3-97ec-4ade5b4aaa60\TCGA-AY-4070-01Z-00-DX1.dd650ac6-8480-4fd8-85b8-15a7840a5933.svs"
slide = openslide.OpenSlide(svs_path)
magnification = slide.properties.get("openslide.objective-power")
slide.close()

if magnification:
    print(f"Magnification: {magnification}x")
    if magnification == "20":
        found_20x_path = svs_path
        
        print(f"✅ Found 200 magnification at: {found_20x_path}")
else:
    print(f"File: | Magnification info not found")

Magnification: 40x


In [9]:
# Load the SVS file
slide = openslide.OpenSlide(svs_path)

# Get dimensions of the base (level 0)
width, height = slide.dimensions
print(f"Base level dimensions: {width} x {height}")

# Get number of levels
level_count = slide.level_count
print(f"Number of levels: {level_count}")

# Print dimensions for each level
print("Level dimensions:")
for i in range(level_count):
    print(f" Level {i}: {slide.level_dimensions[i]}")

# Optional: print additional metadata
print("\nProperties:")
for key, value in slide.properties.items():
    print(f"{key}: {value}")

Base level dimensions: 95615 x 74462
Number of levels: 4
Level dimensions:
 Level 0: (95615, 74462)
 Level 1: (23903, 18615)
 Level 2: (5975, 4653)
 Level 3: (2987, 2326)

Properties:
aperio.AppMag: 40
aperio.DSR ID: resc3-dsr1
aperio.Date: 11/19/14
aperio.DisplayColor: 0
aperio.Exposure Scale: 0.000001
aperio.Exposure Time: 109
aperio.Filename: TCGA-3L-AA1B-01Z-00-DX1
aperio.Focus Offset: 0.000000
aperio.ICC Profile: ScanScope v1
aperio.ImageID: 164819
aperio.Left: 20.562956
aperio.LineAreaXOffset: 0.011464
aperio.LineAreaYOffset: -0.002805
aperio.LineCameraSkew: -0.000153
aperio.MPP: 0.2527
aperio.OriginalHeight: 74562
aperio.OriginalWidth: 97536
aperio.Parmset: GOG136
aperio.ScanScope ID: SS1764CNTLR
aperio.StripeWidth: 2032
aperio.Time: 17:24:10
aperio.Time Zone: GMT-05:00
aperio.Title: TCGA-3L-AA1B-01Z-00-DX1
aperio.Top: 24.159639
aperio.User: 79ba7f43-3d2d-48db-a92d-9bb62c29f510
openslide.associated.thumbnail.height: 768
openslide.associated.thumbnail.width: 986
openslide.comment

## Patches of orignal 40-20x each of 512

In [5]:
import os
import subprocess
from pathlib import Path
import openslide  # Ensure this is imported
# Define helper functions here: check_svs_file, rename_h5_file, create_patches_from_h5

# -------------------
# Define the full path to the specific .svs file you want to process
# found_20x_path = "D:/Aamir Gulzar/WSI_Raw_data/0ecb2d7b-0a69-46ce-80d6-1be65cec3cc7/TCGA-AA-3524-01Z-00-DX1.b1aae264-87be-4514-8f9d-25660b39caa7.svs"
found_20x_path=r"D:\Aamir Gulzar\WSI_Raw_data\02e1301c-1ee8-4853-9925-d01a1e7f7c3c\TCGA-AG-3581-01Z-00-DX1.2d8ba8ba-0533-41e5-b0e0-0d49ef408302.svs"

# Derived variables
flat_dir = "./FLAT_DIRECTORY"
svs_file = os.path.basename(found_20x_path)
source_dir = os.path.dirname(found_20x_path)

# Step 1: Check if it's a valid slide using metadata
bool_valid, label = check_svs_file(
    svs_file,
    "./slides_metadata/slide_metadata_filtered.csv",
    filename_column="Filename",
    label_column="Label"
)

if not bool_valid:
    print("❌ Slide not valid. Skipping.")
    exit()

newnameofsvs = f"{svs_file[:12]}_{label}"
print(f"✅ Slide is valid: {newnameofsvs}")

# Step 2: Determine magnification
slide = openslide.OpenSlide(found_20x_path)
mag = int(slide.properties.get("openslide.objective-power", 20))

if mag == 40:
    print("🔬 Magnification is 40x")
    patch_size = '1024'
    step_size = '1024'
else:
    print("🔬 Magnification is 20x")
    patch_size = '512'
    step_size = '512'

# Step 3: Run CLAM patch creation script
command = [
    'python', './CLAM/create_patches_fp.py',
    '--source', source_dir,
    '--save_dir', flat_dir,
    '--patch_size', patch_size,
    '--step_size', step_size,
    '--seg', '--patch'
]

print(f"🚀 Running patch extraction command for {svs_file}")
with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as proc:
    for line in proc.stdout:
        print(line, end='')

# Step 4: Rename .h5 file
new_h5_path = rename_h5_file(svs_file, label, h5_dir="./FLAT_DIRECTORY/patches")
print(f"✅ Renamed .h5 file: {new_h5_path}")

# Step 5: Create patches from .h5
base_dir = "./Patches"
sub_dir = f"{svs_file[:12]}_{label}"
output_dir = os.path.join(base_dir, sub_dir)
print(f"📦 Creating PNG patches in: {output_dir}")

# Optional: path for skipped patches CSV
svs_base_filename = os.path.splitext(svs_file)[0]
# full_path = Path("./FLAT_DIRECTORY/patches") / f"{svs_base_filename}skipped_patches.csv"

# If your `create_patches_from_h5` function requires that CSV, uncomment the next line and pass full_path
# create_patches_from_h5(new_h5_path, found_20x_path, output_dir, full_path, mag)

create_patches_from_h5(new_h5_path, found_20x_path, output_dir, mag)

❌ Slide not valid. Skipping.
✅ Slide is valid: TCGA-AG-3581_None
🔬 Magnification is 20x
🚀 Running patch extraction command for TCGA-AG-3581-01Z-00-DX1.2d8ba8ba-0533-41e5-b0e0-0d49ef408302.svs
Computed patch_level = 0 for 20x WSI (target 20x)

  0%|          | 0/1 [00:00<?, ?it/s]
100%|##########| 1/1 [01:28<00:00, 88.73s/it]
100%|##########| 1/1 [01:28<00:00, 88.73s/it]


progress: 0.00, 0/1
Creating patches for:  TCGA-AG-3581-01Z-00-DX1.2d8ba8ba-0533-41e5-b0e0-0d49ef408302 ...
Total number of contours to process:  1
Bounding Box: 26624 41984 50305 37825
Contour Area: 1351756800.0
Extracted 5337 coordinates
📦 Renamed H5: ./FLAT_DIRECTORY/patches\TCGA-AG-3581-01Z-00-DX1.2d8ba8ba-0533-41e5-b0e0-0d49ef408302.h5 → ./FLAT_DIRECTORY/patches\TCGA-AG-3581_None.h5
✅ Renamed .h5 file: ./FLAT_DIRECTORY/patches\TCGA-AG-3581_None.h5
📦 Creating PNG patches in: ./Patches\TCGA-AG-3581_None
✅ Done! Saved 5337 patches to ./Patches\TCGA-AG-3581_None
📄 Metadata CSV saved to: ./patches_metadata\TCGA-AG-358

## Patches of orignal 10x from 20x each of 512

In [None]:
import os
import subprocess
from pathlib import Path
import openslide  # Ensure this is imported
# Define helper functions here: check_svs_file, rename_h5_file, create_patches_from_h5

# -------------------
# Define the full path to the specific .svs file you want to process
# found_20x_path = "D:/Aamir Gulzar/WSI_Raw_data/0ecb2d7b-0a69-46ce-80d6-1be65cec3cc7/TCGA-AA-3524-01Z-00-DX1.b1aae264-87be-4514-8f9d-25660b39caa7.svs"

# Derived variables
flat_dir = "./FLAT_DIRECTORY"
svs_file = os.path.basename(found_20x_path)
source_dir = os.path.dirname(found_20x_path)

# Step 1: Check if it's a valid slide using metadata
bool_valid, label = check_svs_file(
    svs_file,
    "./slides_metadata/slide_metadata_filtered.csv",
    filename_column="Filename",
    label_column="Label"
)

if not bool_valid:
    print("❌ Slide not valid. Skipping.")
    exit()

newnameofsvs = f"{svs_file[:12]}_{label}"
print(f"✅ Slide is valid: {newnameofsvs}")

# Step 2: Determine magnification
slide = openslide.OpenSlide(found_20x_path)
mag = int(slide.properties.get("openslide.objective-power", 20))

if mag == 40:
    print("🔬 Magnification is 40x")
    patch_size = '1024'
    step_size = '1024'
else:
    print("🔬 Magnification is 20x")
    patch_size = '512'
    step_size = '512'

# Step 3: Run CLAM patch creation script
command = [
    'python', './CLAM/create_patches_fp.py',
    '--source', source_dir,
    '--save_dir', flat_dir,
    '--patch_size', patch_size,
    '--step_size', step_size,
    '--seg', '--patch'
]

print(f"🚀 Running patch extraction command for {svs_file}")
with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as proc:
    for line in proc.stdout:
        print(line, end='')

# Step 4: Rename .h5 file
new_h5_path = rename_h5_file(svs_file, label, h5_dir="./FLAT_DIRECTORY/patches")
print(f"✅ Renamed .h5 file: {new_h5_path}")

# Step 5: Create patches from .h5
base_dir = "./Patches"
sub_dir = f"{svs_file[:12]}_{label}"
output_dir = os.path.join(base_dir, sub_dir)
print(f"📦 Creating PNG patches in: {output_dir}")

# Optional: path for skipped patches CSV
svs_base_filename = os.path.splitext(svs_file)[0]
# full_path = Path("./FLAT_DIRECTORY/patches") / f"{svs_base_filename}skipped_patches.csv"

# If your `create_patches_from_h5` function requires that CSV, uncomment the next line and pass full_path
# create_patches_from_h5(new_h5_path, found_20x_path, output_dir, full_path, mag)

create_patches_from_h5(new_h5_path, found_20x_path, output_dir, mag)

## Clam with 40x each of patch size 512

:/Aamir Gulzar/WSI_Raw_data/021821c9-68f3-4be3-97ec-4ade5b4aaa60/TCGA-AY-4070-01Z-00-DX1.dd650ac6-8480-4fd8-85b8-15a7840a5933.svs

In [10]:
import os
import subprocess
from pathlib import Path

# Path to the .svs file to test
# svs_path = found_20x_path
svs_path= "D:/Aamir Gulzar/WSI_Raw_data/9fc46b76-6d18-4268-9254-7476ae14f0d2/TCGA-A6-5667-01Z-00-DX1.1973b80d-b6b8-4ed8-9bc1-3aef51fbd9e6.svs"
svs_filename = os.path.basename(svs_path)
source_dir = os.path.dirname(svs_path)

# Label check (you must have this function and metadata file available)
bool_valid, label = check_svs_file(svs_filename, "./slides_metadata/slide_metadata_filtered.csv", filename_column="Filename", label_column="Label")
# if not bool_valid:
#     print("❌ SVS file is not valid or not labeled properly.")
# else:
#     print(f"✅ SVS file is valid. Label: {label}")
    
newnameofsvs = f"{svs_filename[:12]}_{label}"
flat_dir = "./FLAT_DIRECTORY"

# Create patches using CLAM script
command = [
    'python', './CLAM/create_patches_fp.py',
    '--source', source_dir,
    '--save_dir', flat_dir,
    '--patch_size', '512',
    '--step_size', '512',
    '--seg', '--patch'
]

print(f"🚀 Running patch extraction for: {svs_filename}")
with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as proc:
    for line in proc.stdout:
        print(line, end='')

# Rename .h5 file
h5_path = rename_h5_file(svs_filename, label, h5_dir="./FLAT_DIRECTORY/patches")
print(f"✅ Renamed h5 file: {h5_path}")

# Generate skipped patch filename
skipped_csv = Path("./FLAT_DIRECTORY/patches") / f"{os.path.splitext(svs_filename)[0]}_skipped_patches.csv"

# Create PNG patches from h5
png_patch_output_dir = os.path.join("./Patches", newnameofsvs)
print(f"🎨 Generating PNG patches in: {png_patch_output_dir}")
create_patches_from_h5(h5_path, svs_path, png_patch_output_dir)

🚀 Running patch extraction for: TCGA-AY-4070-01Z-00-DX1.dd650ac6-8480-4fd8-85b8-15a7840a5933.svs
source:  D:/Aamir Gulzar/WSI_Raw_data/021821c9-68f3-4be3-97ec-4ade5b4aaa60
patch_save_dir:  ./FLAT_DIRECTORY\patches
mask_save_dir:  ./FLAT_DIRECTORY\masks
stitch_save_dir:  ./FLAT_DIRECTORY\stitches
source : D:/Aamir Gulzar/WSI_Raw_data/021821c9-68f3-4be3-97ec-4ade5b4aaa60
save_dir : ./FLAT_DIRECTORY
patch_save_dir : ./FLAT_DIRECTORY\patches
mask_save_dir : ./FLAT_DIRECTORY\masks
stitch_save_dir : ./FLAT_DIRECTORY\stitches
{'seg_params': {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False, 'keep_ids': 'none', 'exclude_ids': 'none'}, 'filter_params': {'a_t': 100, 'a_h': 16, 'max_n_holes': 8}, 'patch_params': {'use_padding': True, 'contour_fn': 'four_pt'}, 'vis_params': {'vis_level': -1, 'line_thickness': 250}}
Computed patch_level = 0 for 40x WSI (target 20x)

  0%|          | 0/1 [00:00<?, ?it/s]
100%|##########| 1/1 [00:21<00:00, 21.64s/it]
100%|##########| 1/1 [00

## Clam with 10x each of patch size 512

:/Aamir Gulzar/WSI_Raw_data/021821c9-68f3-4be3-97ec-4ade5b4aaa60/TCGA-AY-4070-01Z-00-DX1.dd650ac6-8480-4fd8-85b8-15a7840a5933.svs

In [20]:
import os
import subprocess
from pathlib import Path

# Path to the .svs file to test
# svs_path = found_20x_path
svs_filename = os.path.basename(svs_path)
source_dir = os.path.dirname(svs_path)

# Label check (you must have this function and metadata file available)
bool_valid, label = check_svs_file(svs_filename, "./slides_metadata/slide_metadata_filtered.csv", filename_column="Filename", label_column="Label")
# if not bool_valid:
#     print("❌ SVS file is not valid or not labeled properly.")
# else:
#     print(f"✅ SVS file is valid. Label: {label}")
    
newnameofsvs = f"{svs_filename[:12]}_{label}"
flat_dir = "./FLAT_DIRECTORY"

# Create patches using CLAM script
command = [
    'python', './CLAM/create_patches_fp.py',
    '--source', source_dir,
    '--save_dir', flat_dir,
    '--patch_size', '512',
    '--step_size', '512',
    '--seg', '--patch'
]

print(f"🚀 Running patch extraction for: {svs_filename}")
with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as proc:
    for line in proc.stdout:
        print(line, end='')

# Rename .h5 file
h5_path = rename_h5_file(svs_filename, label, h5_dir="./FLAT_DIRECTORY/patches")
print(f"✅ Renamed h5 file: {h5_path}")

# Generate skipped patch filename
skipped_csv = Path("./FLAT_DIRECTORY/patches") / f"{os.path.splitext(svs_filename)[0]}_skipped_patches.csv"

# Create PNG patches from h5
png_patch_output_dir = os.path.join("./Patches", newnameofsvs)
print(f"🎨 Generating PNG patches in: {png_patch_output_dir}")
create_patches_from_h5(h5_path, svs_path, png_patch_output_dir)

🚀 Running patch extraction for: TCGA-AY-4070-01Z-00-DX1.dd650ac6-8480-4fd8-85b8-15a7840a5933.svs
source:  D:/Aamir Gulzar/WSI_Raw_data/021821c9-68f3-4be3-97ec-4ade5b4aaa60
patch_save_dir:  ./FLAT_DIRECTORY\patches
mask_save_dir:  ./FLAT_DIRECTORY\masks
stitch_save_dir:  ./FLAT_DIRECTORY\stitches
source : D:/Aamir Gulzar/WSI_Raw_data/021821c9-68f3-4be3-97ec-4ade5b4aaa60
save_dir : ./FLAT_DIRECTORY
patch_save_dir : ./FLAT_DIRECTORY\patches
mask_save_dir : ./FLAT_DIRECTORY\masks
stitch_save_dir : ./FLAT_DIRECTORY\stitches
{'seg_params': {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False, 'keep_ids': 'none', 'exclude_ids': 'none'}, 'filter_params': {'a_t': 100, 'a_h': 16, 'max_n_holes': 8}, 'patch_params': {'use_padding': True, 'contour_fn': 'four_pt'}, 'vis_params': {'vis_level': -1, 'line_thickness': 250}}
Computed patch_level = 0 for 40x WSI (target 20x)

  0%|          | 0/1 [00:00<?, ?it/s]
100%|##########| 1/1 [00:04<00:00,  4.62s/it]
100%|##########| 1/1 [00

## Patches with custom method

In [37]:
import os
import h5py
import openslide
from PIL import Image
import numpy as np
import cv2
import csv

def get_white_pixel_ratio(pil_img, satThresh=30, valThresh=200):
    if pil_img.mode != 'RGB':
        pil_img = pil_img.convert('RGB')

    img_np = np.array(pil_img, dtype=np.uint8)
    hsv_img = cv2.cvtColor(img_np, cv2.COLOR_RGB2HSV)

    # Mask: low saturation AND high brightness (value)
    sat_mask = hsv_img[:, :, 1] < satThresh
    val_mask = hsv_img[:, :, 2] > valThresh
    white_mask = np.logical_and(sat_mask, val_mask)

    return np.sum(white_mask) / white_mask.size

def create_patches_from_h5(h5_path, svs_path, output_dir, skip_log_path):
    os.makedirs(output_dir, exist_ok=True)

    BASE_PATCH_SIZE = 2048
    SUB_PATCH_SIZE = 512
    base_name = os.path.splitext(os.path.basename(h5_path))[0]

    with h5py.File(h5_path, 'r') as h5_file:
        coords = h5_file['coords'][:]

    slide = openslide.OpenSlide(svs_path)
    patch_count = 0

    log_exists = os.path.exists(skip_log_path)
    
    # ✅ Open log file in write mode and keep it open during patch loop
    with open(skip_log_path, 'a', newline='') as log_file:
        writer = csv.writer(log_file)
        if not log_exists:
            writer.writerow(["PatchName", "X", "Y", "WhiteRatio"])  # Write header only once

        for i, (x, y) in enumerate(coords):
            base_patch = slide.read_region((int(x), int(y)), 0, (BASE_PATCH_SIZE, BASE_PATCH_SIZE)).convert('RGB')

            for row in range(4):
                for col in range(4):
                    offset_x = col * SUB_PATCH_SIZE
                    offset_y = row * SUB_PATCH_SIZE

                    abs_x = int(x) + offset_x
                    abs_y = int(y) + offset_y

                    sub_patch = base_patch.crop((
                        offset_x, offset_y,
                        offset_x + SUB_PATCH_SIZE, offset_y + SUB_PATCH_SIZE
                    ))

                    white_ratio = get_white_pixel_ratio(sub_patch)
                    patch_name = f"{base_name}_x{abs_x}_y{abs_y}_patch{patch_count:05d}.png"

                    # if white_ratio > 0.8:
                    #     writer.writerow([patch_name, abs_x, abs_y, round(white_ratio, 4)])
                    #     continue

                    sub_patch.save(os.path.join(output_dir, patch_name))
                    patch_count += 1

    print(f"✅ Done! Saved {patch_count} coordinate-named 512x512 patches to {output_dir}")

## Patches at 40x, each of 512 first and again 512

In [25]:
import os
import subprocess
from pathlib import Path

# Path to the .svs file to test
# svs_path = found_20x_path
svs_filename = os.path.basename(svs_path)
source_dir = os.path.dirname(svs_path)

# Label check (you must have this function and metadata file available)
bool_valid, label = check_svs_file(svs_filename, "./slides_metadata/slide_metadata_filtered.csv", filename_column="Filename", label_column="Label")
# if not bool_valid:
#     print("❌ SVS file is not valid or not labeled properly.")
# else:
#     print(f"✅ SVS file is valid. Label: {label}")
    
newnameofsvs = f"{svs_filename[:12]}_{label}"
flat_dir = "./FLAT_DIRECTORY"

# Create patches using CLAM script
command = [
    'python', './CLAM/create_patches_fp.py',
    '--source', source_dir,
    '--save_dir', flat_dir,
    '--patch_size', '512',
    '--step_size', '512',
    '--seg', '--patch'
]

print(f"🚀 Running patch extraction for: {svs_filename}")
with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as proc:
    for line in proc.stdout:
        print(line, end='')

# Rename .h5 file
h5_path = rename_h5_file(svs_filename, label, h5_dir="./FLAT_DIRECTORY/patches")
print(f"✅ Renamed h5 file: {h5_path}")

# Generate skipped patch filename
skipped_csv = Path("./FLAT_DIRECTORY/patches") / f"{os.path.splitext(svs_filename)[0]}_skipped_patches.csv"

# Create PNG patches from h5
png_patch_output_dir = os.path.join("./Patches", newnameofsvs)
print(f"🎨 Generating PNG patches in: {png_patch_output_dir}")
create_patches_from_h5(h5_path, svs_path, png_patch_output_dir, skipped_csv)

🚀 Running patch extraction for: TCGA-AY-4070-01Z-00-DX1.dd650ac6-8480-4fd8-85b8-15a7840a5933.svs
source:  D:/Aamir Gulzar/WSI_Raw_data/021821c9-68f3-4be3-97ec-4ade5b4aaa60
patch_save_dir:  ./FLAT_DIRECTORY\patches
mask_save_dir:  ./FLAT_DIRECTORY\masks
stitch_save_dir:  ./FLAT_DIRECTORY\stitches
source : D:/Aamir Gulzar/WSI_Raw_data/021821c9-68f3-4be3-97ec-4ade5b4aaa60
save_dir : ./FLAT_DIRECTORY
patch_save_dir : ./FLAT_DIRECTORY\patches
mask_save_dir : ./FLAT_DIRECTORY\masks
stitch_save_dir : ./FLAT_DIRECTORY\stitches
{'seg_params': {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False, 'keep_ids': 'none', 'exclude_ids': 'none'}, 'filter_params': {'a_t': 100, 'a_h': 16, 'max_n_holes': 8}, 'patch_params': {'use_padding': True, 'contour_fn': 'four_pt'}, 'vis_params': {'vis_level': -1, 'line_thickness': 250}}
Computed patch_level = 0 for 40x WSI (target 20x)

  0%|          | 0/1 [00:00<?, ?it/s]
100%|##########| 1/1 [00:21<00:00, 21.83s/it]
100%|##########| 1/1 [00

## Patches at 20x, each of 1024 first and again 512

In [27]:
import os
import subprocess
from pathlib import Path

# Path to the .svs file to test
# svs_path = found_20x_path
svs_filename = os.path.basename(svs_path)
source_dir = os.path.dirname(svs_path)

# Label check (you must have this function and metadata file available)
bool_valid, label = check_svs_file(svs_filename, "./slides_metadata/slide_metadata_filtered.csv", filename_column="Filename", label_column="Label")
# if not bool_valid:
#     print("❌ SVS file is not valid or not labeled properly.")
# else:
#     print(f"✅ SVS file is valid. Label: {label}")
    
newnameofsvs = f"{svs_filename[:12]}_{label}"
flat_dir = "./FLAT_DIRECTORY"

# Create patches using CLAM script
command = [
    'python', './CLAM/create_patches_fp.py',
    '--source', source_dir,
    '--save_dir', flat_dir,
    '--patch_size', '1024',
    '--step_size', '1024',
    '--seg', '--patch'
]

print(f"🚀 Running patch extraction for: {svs_filename}")
with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as proc:
    for line in proc.stdout:
        print(line, end='')

# Rename .h5 file
h5_path = rename_h5_file(svs_filename, label, h5_dir="./FLAT_DIRECTORY/patches")
print(f"✅ Renamed h5 file: {h5_path}")

# Generate skipped patch filename
skipped_csv = Path("./FLAT_DIRECTORY/patches") / f"{os.path.splitext(svs_filename)[0]}_skipped_patches.csv"

# Create PNG patches from h5
png_patch_output_dir = os.path.join("./Patches", newnameofsvs)
print(f"🎨 Generating PNG patches in: {png_patch_output_dir}")
create_patches_from_h5(h5_path, svs_path, png_patch_output_dir, skipped_csv)

🚀 Running patch extraction for: TCGA-AY-4070-01Z-00-DX1.dd650ac6-8480-4fd8-85b8-15a7840a5933.svs
source:  D:/Aamir Gulzar/WSI_Raw_data/021821c9-68f3-4be3-97ec-4ade5b4aaa60
patch_save_dir:  ./FLAT_DIRECTORY\patches
mask_save_dir:  ./FLAT_DIRECTORY\masks
stitch_save_dir:  ./FLAT_DIRECTORY\stitches
source : D:/Aamir Gulzar/WSI_Raw_data/021821c9-68f3-4be3-97ec-4ade5b4aaa60
save_dir : ./FLAT_DIRECTORY
patch_save_dir : ./FLAT_DIRECTORY\patches
mask_save_dir : ./FLAT_DIRECTORY\masks
stitch_save_dir : ./FLAT_DIRECTORY\stitches
{'seg_params': {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False, 'keep_ids': 'none', 'exclude_ids': 'none'}, 'filter_params': {'a_t': 100, 'a_h': 16, 'max_n_holes': 8}, 'patch_params': {'use_padding': True, 'contour_fn': 'four_pt'}, 'vis_params': {'vis_level': -1, 'line_thickness': 250}}
Computed patch_level = 0 for 40x WSI (target 20x)

  0%|          | 0/1 [00:00<?, ?it/s]
100%|##########| 1/1 [00:21<00:00, 21.38s/it]
100%|##########| 1/1 [00

## Patches at 20x, each of 1024 first and again 512 with white background filtering

In [35]:
create_patches_from_h5(h5_path, svs_path, png_patch_output_dir, skipped_csv)

✅ Done! Saved 1169 coordinate-named 512x512 patches to ./Patches\TCGA-AY-4070_nonMSIH


## Patches at 10x, each of 2048 first and again 512

In [38]:
import os
import subprocess
from pathlib import Path

# Path to the .svs file to test
# svs_path = found_20x_path
svs_filename = os.path.basename(svs_path)
source_dir = os.path.dirname(svs_path)

# Label check (you must have this function and metadata file available)
bool_valid, label = check_svs_file(svs_filename, "./slides_metadata/slide_metadata_filtered.csv", filename_column="Filename", label_column="Label")
# if not bool_valid:
#     print("❌ SVS file is not valid or not labeled properly.")
# else:
#     print(f"✅ SVS file is valid. Label: {label}")
    
newnameofsvs = f"{svs_filename[:12]}_{label}"
flat_dir = "./FLAT_DIRECTORY"

# Create patches using CLAM script
command = [
    'python', './CLAM/create_patches_fp.py',
    '--source', source_dir,
    '--save_dir', flat_dir,
    '--patch_size', '2048',
    '--step_size', '2048',
    '--seg', '--patch'
]

print(f"🚀 Running patch extraction for: {svs_filename}")
with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as proc:
    for line in proc.stdout:
        print(line, end='')

# Rename .h5 file
h5_path = rename_h5_file(svs_filename, label, h5_dir="./FLAT_DIRECTORY/patches")
print(f"✅ Renamed h5 file: {h5_path}")

# Generate skipped patch filename
skipped_csv = Path("./FLAT_DIRECTORY/patches") / f"{os.path.splitext(svs_filename)[0]}_skipped_patches.csv"

# Create PNG patches from h5
png_patch_output_dir = os.path.join("./Patches", newnameofsvs)
print(f"🎨 Generating PNG patches in: {png_patch_output_dir}")
create_patches_from_h5(h5_path, svs_path, png_patch_output_dir, skipped_csv)

🚀 Running patch extraction for: TCGA-AY-4070-01Z-00-DX1.dd650ac6-8480-4fd8-85b8-15a7840a5933.svs
source:  D:/Aamir Gulzar/WSI_Raw_data/021821c9-68f3-4be3-97ec-4ade5b4aaa60
patch_save_dir:  ./FLAT_DIRECTORY\patches
mask_save_dir:  ./FLAT_DIRECTORY\masks
stitch_save_dir:  ./FLAT_DIRECTORY\stitches
source : D:/Aamir Gulzar/WSI_Raw_data/021821c9-68f3-4be3-97ec-4ade5b4aaa60
save_dir : ./FLAT_DIRECTORY
patch_save_dir : ./FLAT_DIRECTORY\patches
mask_save_dir : ./FLAT_DIRECTORY\masks
stitch_save_dir : ./FLAT_DIRECTORY\stitches
{'seg_params': {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False, 'keep_ids': 'none', 'exclude_ids': 'none'}, 'filter_params': {'a_t': 100, 'a_h': 16, 'max_n_holes': 8}, 'patch_params': {'use_padding': True, 'contour_fn': 'four_pt'}, 'vis_params': {'vis_level': -1, 'line_thickness': 250}}
Computed patch_level = 0 for 40x WSI (target 20x)

  0%|          | 0/1 [00:00<?, ?it/s]
100%|##########| 1/1 [00:22<00:00, 22.28s/it]
100%|##########| 1/1 [00

## creating patches of one 512x512 from 1024

In [42]:
def create_patches_from_h5(h5_path, svs_path, output_dir):
    os.makedirs(output_dir, exist_ok=True)

    PATCH_SIZE = 1024
    
    # Extract base name from h5 file (without extension)
    base_name = os.path.splitext(os.path.basename(h5_path))[0]
    
    # Load coordinates
    with h5py.File(h5_path, 'r') as h5_file:
        coords = h5_file['coords'][:]  # shape (N, 2), each row is (x, y)
    
    # Open the WSI
    slide = openslide.OpenSlide(svs_path)
    
    # Extract and save patches
    for i, (x, y) in enumerate(coords):
        patch = slide.read_region((int(x), int(y)), 0, (PATCH_SIZE, PATCH_SIZE)).convert('RGB')
        
        # Filename format: [basename]_x[y]_y[y]_patch[i].png
        patch_name = f"{base_name}_x{int(x)}_y{int(y)}_patch{i:05d}.png"
        patch.save(os.path.join(output_dir, patch_name))
    
    print(f"✅ Done! Saved {len(coords)} patches to {output_dir}")

In [43]:
import os
import subprocess
from pathlib import Path

# Path to the .svs file to test
# svs_path = found_20x_path
svs_filename = os.path.basename(svs_path)
source_dir = os.path.dirname(svs_path)

# Label check (you must have this function and metadata file available)
bool_valid, label = check_svs_file(svs_filename, "./slides_metadata/slide_metadata_filtered.csv", filename_column="Filename", label_column="Label")
# if not bool_valid:
#     print("❌ SVS file is not valid or not labeled properly.")
# else:
#     print(f"✅ SVS file is valid. Label: {label}")
    
newnameofsvs = f"{svs_filename[:12]}_{label}"
flat_dir = "./FLAT_DIRECTORY"

# Create patches using CLAM script
command = [
    'python', './CLAM/create_patches_fp.py',
    '--source', source_dir,
    '--save_dir', flat_dir,
    '--patch_size', '1024',
    '--step_size', '1024',
    '--seg', '--patch'
]

print(f"🚀 Running patch extraction for: {svs_filename}")
with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True) as proc:
    for line in proc.stdout:
        print(line, end='')

# Rename .h5 file
h5_path = rename_h5_file(svs_filename, label, h5_dir="./FLAT_DIRECTORY/patches")
print(f"✅ Renamed h5 file: {h5_path}")

# Generate skipped patch filename
skipped_csv = Path("./FLAT_DIRECTORY/patches") / f"{os.path.splitext(svs_filename)[0]}_skipped_patches.csv"

# Create PNG patches from h5
png_patch_output_dir = os.path.join("./Patches", newnameofsvs)
print(f"🎨 Generating PNG patches in: {png_patch_output_dir}")
create_patches_from_h5(h5_path, svs_path, png_patch_output_dir)

🚀 Running patch extraction for: TCGA-AY-4070-01Z-00-DX1.dd650ac6-8480-4fd8-85b8-15a7840a5933.svs
source:  D:/Aamir Gulzar/WSI_Raw_data/021821c9-68f3-4be3-97ec-4ade5b4aaa60
patch_save_dir:  ./FLAT_DIRECTORY\patches
mask_save_dir:  ./FLAT_DIRECTORY\masks
stitch_save_dir:  ./FLAT_DIRECTORY\stitches
source : D:/Aamir Gulzar/WSI_Raw_data/021821c9-68f3-4be3-97ec-4ade5b4aaa60
save_dir : ./FLAT_DIRECTORY
patch_save_dir : ./FLAT_DIRECTORY\patches
mask_save_dir : ./FLAT_DIRECTORY\masks
stitch_save_dir : ./FLAT_DIRECTORY\stitches
{'seg_params': {'seg_level': -1, 'sthresh': 8, 'mthresh': 7, 'close': 4, 'use_otsu': False, 'keep_ids': 'none', 'exclude_ids': 'none'}, 'filter_params': {'a_t': 100, 'a_h': 16, 'max_n_holes': 8}, 'patch_params': {'use_padding': True, 'contour_fn': 'four_pt'}, 'vis_params': {'vis_level': -1, 'line_thickness': 250}}
Computed patch_level = 0 for 40x WSI (target 20x)

  0%|          | 0/1 [00:00<?, ?it/s]
100%|##########| 1/1 [00:21<00:00, 21.21s/it]
100%|##########| 1/1 [00

In [44]:
from PIL import Image
import os
from tqdm import tqdm

# --- Paths ---
input_dir = './Patches/TCGA-AY-4070_nonMSIH/'   # e.g., "./patches_40x"
output_dir = './Patches/path_to_save_512x512_patches'    # e.g., "./patches_20x"

# Create output dir if not exists
os.makedirs(output_dir, exist_ok=True)

# --- Resize all patches ---
for fname in tqdm(os.listdir(input_dir)):
    if fname.endswith(".png"):
        input_path = os.path.join(input_dir, fname)
        output_path = os.path.join(output_dir, fname)

        # Open and resize image
        img = Image.open(input_path)
        img_resized = img.resize((512, 512), resample=Image.BICUBIC)

        # Save resized image
        img_resized.save(output_path)

100%|████████████████████████████████████████████████████████████████████████████████| 336/336 [00:21<00:00, 15.82it/s]


## Testing levels of downsample

In [None]:
import os
import subprocess
from pathlib import Path

# Top-level directory containing subfolders with .svs files
wsi_root = "D:/Aamir Gulzar/WSI_Raw_data"
flat_dir = "./FLAT_DIRECTORY"
count=0

# Loop through all subfolders
for subdir in os.listdir(wsi_root):
    # if count==2:
    #     break
    # count+=1
    full_subdir_path = os.path.join(wsi_root, subdir)
    if not os.path.isdir(full_subdir_path):
        continue  # skip non-folder items

    # Search for .svs file in this subfolder
    svs_files = [f for f in os.listdir(full_subdir_path) if f.endswith('.svs')]
    if not svs_files:
        print(f"❌ No .svs file found in: {full_subdir_path}")
        continue
    else:
        print(svs_files)

    # If multiple .svs files exist, process each one
    for svs_file in svs_files:

        bool_valid, label= is_valid_svs_file(svs_file)

        # print(bool_valid)
        source_dir = full_subdir_path
        source_dir_quoted = f"{source_dir}"
        flat_dir_quoted = f"{flat_dir}"
        output_dir= "./FLAT_DIRECTORY/downsampled"
        target_size = (1024, 1024)
        svs_path = os.path.join(source_dir_quoted, svs_file)

        print(f"🚀 Processing: {svs_file} in {subdir}")
        level= downsample_and_resize_wsi(svs_path, output_dir, target_size, label)
        print(f"level: {level}\n")

In [None]:
new_h5_path= rename_h5_file(svs_file, label, h5_dir="./FLAT_DIRECTORY/patches")
base_dir = "./FLAT_DIRECTORY"
sub_dir = "patches"

# Create full path
print(new_h5_path)
base_dir= "./Patches"
sub_dir= f"{svs_file[:12]}_{label}"
output_dir = os.path.join(base_dir, sub_dir) 
print(output_dir)
create_patches_from_h5(new_h5_path, svs_path, output_dir)