In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import scanpy as sc
import random
seed = 0
np.random.seed(seed)
random.seed(seed)
import scipy.sparse as sp
import time
from banksy_utils.filter_utils import filter_cells
from banksy_utils.load_data import load_adata
from banksy_utils.filter_utils import normalize_total, filter_hvg
from banksy.main import median_dist_to_nearest_neighbour
from banksy.initialize_banksy import initialize_banksy
from banksy.main import concatenate_all
from banksy.embed_banksy import generate_banksy_matrix
from banksy_utils.umap_pca import pca_umap
from banksy.cluster_methods import run_Leiden_partition
from banksy.plot_banksy import plot_results
import anndata as ad

In [3]:
max_m = 1
nbr_weight_decay = "scaled_gaussian"
lambda_list = [0.8]
pca_dims = [20]
n_neighbors=8
condition_key = "batch"

In [8]:
adata_batch1 = sc.read_h5ad('/home/dbj/SpaLP/gittest/SpaLP/general/STARmapPLUS/MERFISH_-1.076.h5ad')
adata_batch2 = sc.read_h5ad('/home/dbj/SpaLP/gittest/SpaLP/general/STARmapPLUS/MERFISH_-1.077.h5ad')
adata_batch3 = sc.read_h5ad('/home/dbj/SpaLP/gittest/SpaLP/general/STARmapPLUS/MERFISH_-1.078.h5ad')

In [9]:
adata_list=[adata_batch1,adata_batch2,adata_batch3]
adata_original = ad.concat(adata_list, join="inner")
adata_original.obs['batch']=adata_original.obs['slice']

In [10]:
adata_new = sc.AnnData(sp.csr_matrix((adata_original.shape[0], adata_original.shape[1]),dtype=np.float32))
adata_new.var_names = adata_original.var_names
adata_new.obs_names = adata_original.obs_names
adata_new.obsm["spatial"] = adata_original.obsm["spatial"]
adata_new.obs[condition_key] = adata_original.obs[condition_key]

In [11]:
%%time
adata_batch_list = []
for batch in adata_original.obs['batch'].unique():
    adata = adata_original[adata_original.obs['batch'] == batch].copy()
    max_m = 1 # use both mean and AFT
    nbr_weight_decay = "scaled_gaussian" # can also choose "reciprocal", "uniform" or "ranked"
    lambda_list = [0.8]
    pca_dims = [20]
    adata.obs["spatial_x"] = adata.obsm['spatial'][:, 0]
    adata.obs["spatial_y"] = adata.obsm['spatial'][:, 1]
    
    banksy_dict = initialize_banksy(
        adata,
        ("spatial_x", "spatial_y", "spatial"),
        n_neighbors,
        nbr_weight_decay=nbr_weight_decay,
        max_m=max_m,
        plt_edge_hist=False,
        plt_nbr_weights=False,
        plt_agf_angles=False, 
        plt_theta=False)
    banksy_dict, banksy_matrix = generate_banksy_matrix(adata,banksy_dict,lambda_list,max_m)
    pca_umap(banksy_dict,pca_dims = pca_dims,add_umap = True,plt_remaining_var = False)
    adata.obsm['banksy'] = banksy_dict[nbr_weight_decay][lambda_list[0]]["adata"].obsm["reduced_pc_20"]
    adata_batch_list.append(adata)
    
adata_original = ad.concat(adata_batch_list, join="outer")
sc.external.pp.harmony_integrate(adata_original, "batch", basis='banksy')
adata_new.obsm['banksy'] = adata_original.obsm['X_pca_harmony']


Median distance to closest cell = 10.340140733893607

---- Ran median_dist_to_nearest_neighbour in 0.09 s ----

---- Ran generate_spatial_distance_graph in 0.16 s ----

---- Ran row_normalize in 0.11 s ----

---- Ran generate_spatial_weights_fixed_nbrs in 1.07 s ----

---- Ran generate_spatial_distance_graph in 0.20 s ----

---- Ran theta_from_spatial_graph in 0.20 s ----

---- Ran row_normalize in 0.09 s ----

---- Ran generate_spatial_weights_fixed_nbrs in 1.13 s ----

Runtime Feb-11-2026-12-19

1122 genes to be analysed:
Gene List:
Index(['Htr7', 'Gzmk', 'Arhgap36', 'Sema3c', 'Rxrg', 'Itga8', 'Glp2r', 'Ramp3',
       'Car12', 'Chn2',
       ...
       'Galnt14', 'Kcnh8', 'Pifo', 'Epb41l4a', 'Matn2', 'Gata3', 'Fat1',
       'Zim1', 'Lmo1', 'Cntnap3'],
      dtype='object', name='gene_name', length=1122)

Check if X contains only finite (non-NAN) values
Decay Type: scaled_gaussian
Weights Object: {'weights': {0: <Compressed Sparse Row sparse matrix of dtype 'float64'
	with 342832 sto

2026-02-11 12:22:37,156 - harmonypy - INFO - Computing initial centroids with sklearn.KMeans...
2026-02-11 12:22:44,919 - harmonypy - INFO - sklearn.KMeans initialization complete.
2026-02-11 12:22:45,396 - harmonypy - INFO - Iteration 1 of 10
2026-02-11 12:23:34,466 - harmonypy - INFO - Iteration 2 of 10
2026-02-11 12:24:21,888 - harmonypy - INFO - Iteration 3 of 10
2026-02-11 12:25:10,846 - harmonypy - INFO - Iteration 4 of 10
2026-02-11 12:26:02,282 - harmonypy - INFO - Iteration 5 of 10
2026-02-11 12:26:46,754 - harmonypy - INFO - Iteration 6 of 10
2026-02-11 12:27:32,663 - harmonypy - INFO - Iteration 7 of 10
2026-02-11 12:28:10,875 - harmonypy - INFO - Iteration 8 of 10
2026-02-11 12:28:44,858 - harmonypy - INFO - Iteration 9 of 10
2026-02-11 12:29:18,879 - harmonypy - INFO - Converged after 9 iterations


CPU times: user 3h 29min 20s, sys: 5h 52min 12s, total: 9h 21min 32s
Wall time: 10min 9s


In [None]:
adata_new.write_h5ad('/home/dbj/SpaLP/gittest/repoduced/result/BANKSY_MERFISH76-77-78.h5ad',compression='gzip')