# Sparse-MENDER Visium Experiments

### Import Dependencies

In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
import json
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
import pysodb
import sys
sys.path.append("..")
from smender.SMENDER import SMENDER_single
from smender.utils import compute_PAS, compute_CHAOS
from ann.AnnoyANN import AnnoyANN
from ann.HNSWANN import HNSWANN

### Define Configuration, Constants and Directories

In [None]:
# Configuration
ANN_TYPE = 'none'  # Options: 'annoy', 'hnsw', 'none'
DIM_REDUCTION = 'nmf'  # Options: 'pca', 'nmf', 'ica', 'fa'

# Configuration for plots and results
ANN_TYPE_STR = 'Original' # Options: 'Annoy', 'HNSW', 'Original'
DIM_REDUCTION_STR = 'PCA' # Options: 'PCA', 'NMF', 'ICA', 'FA'
DATASET_STR = "Visium MOB"

# Validate ANN_TYPE
ANN_MAP = {
    'annoy': AnnoyANN,
    'hnsw': HNSWANN,
    'none': None
}
if ANN_TYPE not in ANN_MAP:
    raise ValueError(f"Invalid ANN_TYPE: {ANN_TYPE}. Choose from {list(ANN_MAP.keys())}")
SELECTED_ANN = ANN_MAP[ANN_TYPE]

# Validate DIM_REDUCTION
VALID_DIM_REDUCTIONS = ['pca', 'nmf', 'ica', 'fa']
if DIM_REDUCTION not in VALID_DIM_REDUCTIONS:
    raise ValueError(f"Invalid DIM_REDUCTION: {DIM_REDUCTION}. Choose from {VALID_DIM_REDUCTIONS}")

# Define dataset
dataset_name = "Lebrigand2022The"
experiment_name = "GSM4656181_10x_Visium"

# Multiprocessing for final SMENDER run
MP_PROCESSES = 4  # Adjust based on CPU cores

# Set random seeds for reproducibility
seed = 100
np.random.seed(seed)
sc.settings.verbosity = 1

# Define directories
result_dir = os.path.join(os.path.pardir, "results", "visium", ANN_TYPE, DIM_REDUCTION)
plots_result_dir = os.path.join(os.path.pardir, "plots", "visium", ANN_TYPE, DIM_REDUCTION)
os.makedirs(result_dir, exist_ok=True)
os.makedirs(plots_result_dir, exist_ok=True)

# Define batch key
batch_obs = None

### Load the Visium MOB Dataset

In [3]:
sodb = pysodb.SODB()
adata_raw = sodb.load_experiment(dataset_name, experiment_name)

load experiment[GSM4656181_10x_Visium] in dataset[Lebrigand2022The]


In [4]:
# Generate spatial plot for raw data (as in visium.pdf)
print("Generating raw spatial plot...")
raw_spatial_path = os.path.join(plots_result_dir, f"raw_spatial.png")
ax = sc.pl.spatial(adata_raw, color=None, show=False)
plt.savefig(raw_spatial_path, dpi=200, bbox_inches='tight')
plt.close()

Generating raw spatial plot...


### Prepare Dictionaries to Store Final Results 

In [5]:
results_dict = {}
scores_dict = {}
adata_list = []

### Run SMENDER

In [6]:
# Prepare input data
print("Preparing input data...")
adata = adata_raw.copy()
# Compute cell types since no reliable annotations are available
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=4000)
sc.pp.normalize_total(adata, inplace=True)
sc.pp.log1p(adata)
sc.pp.pca(adata)
sc.pp.neighbors(adata)
sc.tl.leiden(adata, resolution=2, key_added='ct')
adata.obs['ct'] = adata.obs['ct'].astype('category')

# Run SMENDER
print("Running SMENDER...")
smender = SMENDER_single(
    adata,
    ct_obs='ct',
    random_seed=seed,
    verbose=0,
    ann=SELECTED_ANN,
    dim_reduction=DIM_REDUCTION
)

print("Setting SMENDER parameters...")
smender.set_smender_para(
    nn_mode='ring',
    nn_para=6,
    n_scales=2
)

print("Extracting multi-scale context representation...")
smender.run_representation()

print("Running clustering...")
smender.run_clustering_normal(-0.2)

# Transfer clusters
pred_key = "smender_clusters"
adata_raw.obs[pred_key] = smender.adata_MENDER.obs['MENDER'].astype('category')

Preparing input data...
Running SMENDER...
Setting SMENDER parameters...
Extracting multi-scale context representation...
Running clustering...


### Compute Metrics

In [12]:
# Function to compute metrics (only PAS and CHAOS, no ground truth)
def compute_metrics(adata, cluster_key):
    if cluster_key not in adata.obs:
        raise KeyError(f"Cluster key {cluster_key} not found in adata.obs")
    if adata.obs[cluster_key].isna().any():
        raise ValueError(f"NaN values found in {cluster_key}. Handle NaN values before computing metrics.")
    return {
        'PAS': compute_PAS(adata, cluster_key),
        'CHAOS': compute_CHAOS(adata, cluster_key)
    }

# Compute metrics
print("Computing metrics...")
final_scores = compute_metrics(adata_raw, pred_key)
print(f"\nFinal Metrics:\n{final_scores}")

Computing metrics...

Final Metrics:
{'PAS': np.float64(0.04139433551198257), 'CHAOS': np.float64(0.10675158333506653)}


### Visualize

In [13]:
# Function to plot UMAP
def plot_umap(adata, title, color_key, prefix, save_path=None):
    _, ax = plt.subplots()
    main_title = f"{prefix} - {title}"
    subtitle = f'\n{DATASET_STR} - {ANN_TYPE_STR} + {DIM_REDUCTION_STR}'
    sc.pl.umap(adata, color=color_key, title=main_title + subtitle, ax=ax, show=False)
    if save_path:
        plt.savefig(save_path, dpi=200, bbox_inches='tight')
        plt.close()

In [14]:
# Plot UMAP for ground truth
final_umap_smender_path = os.path.join(plots_result_dir, f"final_smender_clusters_umap_{DATASET_STR}_{ANN_TYPE_STR}_{DIM_REDUCTION_STR}.png")
plot_umap(adata_raw, "All Samples", pred_key, "SMENDER", save_path=final_umap_smender_path)

In [15]:
# Generate spatial plot for clustered data
print("Generating clustered spatial plot...")
clustered_spatial_path = os.path.join(plots_result_dir, f"clustered_spatial_{DATASET_STR}_{ANN_TYPE_STR}_{DIM_REDUCTION_STR}.png")
sc.pl.spatial(smender.adata_MENDER, color='MENDER', size=1, show=False)
plt.savefig(clustered_spatial_path, dpi=200, bbox_inches='tight')
plt.close()

Generating clustered spatial plot...


### Save Results

In [17]:
# Save results
output_file = os.path.join(result_dir, f"smender_{DATASET_STR}_{ANN_TYPE}_{DIM_REDUCTION}_results.json")
with open(output_file, 'w') as f:
    json.dump({
        'results': {
            'n_cells': adata_raw.n_obs,
            'n_genes': adata_raw.n_vars,
            'cluster_counts': adata_raw.obs['smender_clusters'].value_counts().to_dict()
        },
        'scores': final_scores,
    }, f, indent=4)
print(f"Results, scores, and performance metrics saved to {output_file}")

Results, scores, and performance metrics saved to ..\results\visium\none\pca\smender_Visium MOB_none_pca_results.json
