**This tutorial is for analyzing mouse mPFC data using SpaGCN**

# Mouse mPFC data by STARmap

In [None]:
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter
from sklearn.metrics import adjusted_rand_score
from scipy.sparse import csc_matrix
import scanpy as sc
import numpy as np
import pandas as pd
import SpaGCN as spg
import anndata
import torch
import random
import warnings
warnings.filterwarnings("ignore")
random.seed(2025)

In [None]:
# RData file path
data = '../data/mPFC/starmap_mpfc.RData'

# Loading RData Files
ro.r['load'](data)

# Get the starmap_cnts and starmap_info lists
cnts = ro.r['starmap_cnts']
info = ro.r['starmap_info']

# Get the name of the list
names = list(ro.r['names'](cnts))

for cluster_number in np.arange(10,21,1): 
    print(cluster_number)
    
    with open(f"../result/SpaGCN/result_SpaGCN_k={cluster_number}.txt", "w") as f:
        f.write("sample\tseed\tari_value\n")
        
        # Use localconverter to convert and process the corresponding data frame
        for cnts_df, info_df, name in zip(cnts, info, names):
            with localconverter(ro.default_converter + pandas2ri.converter):
                cnts_df = ro.conversion.rpy2py(cnts_df)
                info_df = ro.conversion.rpy2py(info_df)

                # Create adata object
                adata=anndata.AnnData(cnts_df.T,dtype='float64')
                adata.obs['array_row'] = np.array(info_df.loc[:,['x']])
                adata.obs['array_col'] = np.array(info_df.loc[:,['y']])
                adata.obs['ground_truth'] = np.array(info_df.loc[:,['c']])

                x_array = adata.obs['array_row']
                y_array = adata.obs['array_col']
                adata.var_names_make_unique()
                spg.prefilter_genes(adata,min_cells=10) # avoiding all genes are zeros
                spg.prefilter_specialgenes(adata)
                sc.pp.normalize_per_cell(adata)
                sc.pp.log1p(adata)

                s=1
                b=49
                adj=spg.calculate_adj_matrix(x=x_array,y=y_array, histology=False)

                df = pd.DataFrame()
                for seed in range(10):
                    print("seed =",seed)
                    l=spg.search_l(0.5, adj, start=0.01, end=1000, tol=0.01, max_run=100)
                    res=spg.search_res(adata, adj, l, cluster_number, start=0.7, step=0.1, tol=5e-3, lr=0.05, max_epochs=20, r_seed=seed, t_seed=seed, n_seed=seed)

                    clf=spg.SpaGCN()
                    clf.set_l(l)
                    # Set seed
                    random.seed(seed)
                    torch.manual_seed(seed)
                    np.random.seed(seed)
                    # Run SpaGCN
                    clf.train(adata,adj,init_spa=True,init="louvain",res=res, tol=5e-3, lr=0.05, max_epochs=200)
                    labels, _=clf.predict()

                    # Record cluster labels
                    df[str(name)+'_'+str(seed)] = labels

                    # Calculating ARI
                    ari_value = adjusted_rand_score(adata.obs['ground_truth'],labels)
                    print('ari_value =',ari_value)
                    f.write(f"{str(name)}\t{str(seed)}\t{str(ari_value)}\n")
                    
                # Save the results
                df.to_csv(f'../result/SpaGCN/{name}_k={cluster_number}.csv')