构造所有切片的标签信息

In [9]:
import csv
from pathlib import Path
import os


root_dir = Path('/data/wsi/BRACS_WSI')
output_csv = Path('/data/wsi/BRACS-process/labels/all.csv')
splits = ['test', 'train', 'val']
# splits = ['test']


def create_dataset_csv():
    data_rows = []
    row_id = 1
    header = ['id', 'wsi_name', 'wsi_label', 'wsi_path']

    print(f"Script running...")
    print(f"Scanning for .svs files in: {root_dir.resolve()}")

    # 遍历 test, train, val 目录
    for split in splits:
        split_path = root_dir / split
        if not split_path.exists():
            print(f"Warning: Directory not found, skipping: {split_path}")
            continue

        print(f"Processing sub-directory: {split}")
        file_pattern = '*/*/*.svs'
        
        for wsi_file in split_path.glob(file_pattern):
            try:
                fine_label_dir_name = wsi_file.parent.name
                wsi_label = fine_label_dir_name.replace('Type_', '').lower()
                wsi_name = wsi_file.stem
                wsi_path = str(wsi_file.resolve().with_suffix(''))
                data_rows.append([row_id, wsi_name, wsi_label, wsi_path])
                row_id += 1
                
            except Exception as e:
                print(f"Error processing file {wsi_file}: {e}")

    if not data_rows:
        print("\nError: No .svs files found.")
        print("Please check the following:")
        print(f"1. Is the 'root_dir' variable correct? (Currently: {root_dir})")
        print(f"2. Is the script in the same directory as {root_dir}?")
        print(f"3. Does the directory structure match 'split/coarse/fine/*.svs'?")
        return

    # 写入 CSV 文件
    try:
        with open(output_csv, 'w', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            writer.writerow(header)
            writer.writerows(data_rows)
        
        print(f"\nSuccess! ")
        print(f"Successfully created: {output_csv}")
        print(f"Total entries found: {len(data_rows)}")
        
    except IOError as e:
        print(f"\nError: Could not write to file {output_csv}. {e}")

# --- 运行脚本 ---
if __name__ == "__main__":
    create_dataset_csv()

Script running...
Scanning for .svs files in: /data/wsi/BRACS_WSI
Processing sub-directory: test
Processing sub-directory: train
Processing sub-directory: val

Success! 
Successfully created: /data/wsi/BRACS-process/labels/all.csv
Total entries found: 492


创建软链接到 wsi-soft-link 目录

In [5]:
import csv
from pathlib import Path
import os
import sys

# --- 配置 ---

csv_file_path = Path('/data/wsi/BRACS-process/labels/all.csv')
link_dir = Path('/data/wsi/BRACS-process/wsi-soft-link')
WSI_EXTENSION = '.svs'

# --- 脚本 ---

def create_links_from_csv():
    print(f"Starting script...")
    
    # 步骤 1: 检查 CSV 文件是否存在
    if not csv_file_path.exists():
        print(f"Error: CSV file not found at: {csv_file_path}")
        print("Please make sure the path is correct and step 1 was completed.")
        return

    print(f"Ensuring link directory exists at: {link_dir}")
    try:
        link_dir.mkdir(parents=True, exist_ok=True)
    except OSError as e:
        print(f"Error: Could not create directory {link_dir}. {e}")
        print("Please check permissions.")
        return

    print(f"Reading CSV from: {csv_file_path}")
    
    created_count = 0
    skipped_count = 0
    error_count = 0

    try:
        with open(csv_file_path, 'r', encoding='utf-8') as f:
            # 使用 DictReader 可以通过列名访问数据
            reader = csv.DictReader(f)
            if 'wsi_path' not in reader.fieldnames or 'wsi_name' not in reader.fieldnames:
                print(f"Error: CSV must contain 'wsi_path' and 'wsi_name' columns.")
                return

            for row in reader:
                try:
                    source_file_str = row['wsi_path'] + WSI_EXTENSION
                    source_file = Path(source_file_str)
                    link_name = row['wsi_name'] + WSI_EXTENSION
                    link_path = link_dir / link_name
                    if not source_file.exists():
                        print(f"Warning: Source file not found, skipping: {source_file}")
                        error_count += 1
                        continue
                        
                    if link_path.exists() or link_path.is_symlink():
                        # print(f"Skipping: Link already exists for {link_name}")
                        skipped_count += 1
                        continue

                    os.symlink(source_file, link_path)
                    created_count += 1

                except Exception as e:
                    print(f"Error creating link for {row['wsi_name']}: {e}")
                    error_count += 1

    except IOError as e:
        print(f"Error reading CSV file: {e}")
        return
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
        return

    print("\n--- Process complete ---")
    print(f"Links created: {created_count}")
    print(f"Links skipped (already exist): {skipped_count}")
    print(f"Errors (source file not found, etc.): {error_count}")
    print(f"Total rows processed: {created_count + skipped_count + error_count}")
    print(f"All links are in: {link_dir}")

if __name__ == "__main__":
    create_links_from_csv()

Starting script...
Ensuring link directory exists at: /data/wsi/BRACS-process/wsi-soft-link
Reading CSV from: /data/wsi/BRACS-process/labels/all.csv

--- Process complete ---
Links created: 0
Links skipped (already exist): 87
Errors (source file not found, etc.): 0
Total rows processed: 87
All links are in: /data/wsi/BRACS-process/wsi-soft-link
