In [None]:
import pandas as pd
import numpy as np
from glob import glob
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import SpectralClustering
import SimpleITK as sitk
import os

# load feature file
def load_features(file):
    df = pd.read_csv(file)
    return df

# feature standardization
def preprocess_features(features_df):
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features_df.iloc[:, 1:])  # 排除超像素索引
    return features_scaled

# clustering
def spectral_clustering(features_scaled, n_clusters):
    clustering = SpectralClustering(n_clusters=n_clusters,affinity='nearest_neighbors', random_state=42)
    cluster_labels = clustering.fit_predict(features_scaled)
    return cluster_labels

# Map the clustering results back to each supervoxel label
def map_clusters_to_mask(cluster_labels, features_df, mask_file, output_dir):
    mask = sitk.ReadImage(mask_file)
    mask_data = sitk.GetArrayFromImage(mask)

    # Create a new mask to store the clustering results
    new_mask_data = np.zeros_like(mask_data, dtype=np.int32)

    # keep the background at 0
    cluster_labels = cluster_labels + 1
    
    # Replace the supervoxels lable with the corresponding cluster label
    for superpixel_id, label in zip(features_df.iloc[:, 0], cluster_labels):
        new_mask_data[mask_data == superpixel_id] = label
    
    # save multiregion masks
    new_mask = sitk.GetImageFromArray(new_mask_data)
    new_mask.CopyInformation(mask) 
    output_file = os.path.join(output_dir, f'subregion_{os.path.basename(mask_file)}')
    sitk.WriteImage(new_mask, output_file)

# example usage
features_dir = 'File path'  
mask_dir = 'File path' 
output_dir = 'File path' 

features_df = load_features(features_dir)    
features_scaled = preprocess_features(features_df)
n_clusters = 2  # the number of subregions used in this study
cluster_labels = spectral_clustering(features_scaled, n_clusters)
map_clusters_to_mask(cluster_labels, features_df, mask_dir, output_dir)