In [1]:
from utils.misc import *
from train_single.train import Train,Test
from datasets.data_utils import load_dataset
from model_single.Creat_model import creat_model

In [2]:
from torch.backends import cudnn
def fix_seed(seed):
    #seed = 2023
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    cudnn.deterministic = True
    cudnn.benchmark = False
    
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'   

In [3]:
import networkx
import torch
import scipy.sparse as sp
import numpy as np
import os
import random
from munkres import Munkres
from sklearn import metrics
from sklearn.metrics import adjusted_rand_score as ari_score
from sklearn.metrics.cluster import normalized_mutual_info_score as nmi_score
from sklearn.decomposition import PCA
from torch.utils.data import Dataset
import argparse
import yaml
from torch_geometric.utils.convert import to_networkx
import pynvml

def build_args():
    
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--dataset', type=str, default="acm")
    parser.add_argument('--seed', type=int, default=20)
    parser.add_argument('--cuda', type=bool, default=True)
    parser.add_argument('--n_input', type=int, default=None)
    parser.add_argument('--n_z', type=int, default=None)
    parser.add_argument('--freedom_degree', type=float, default=1.0)
    parser.add_argument('--epoch', type=int, default=None)
    parser.add_argument('--shuffle', type=bool, default=True)
    parser.add_argument('--sigma', type=float, default=None)
    parser.add_argument('--loss_n', type=float, default=None)
    parser.add_argument('--loss_w', type=float, default=None)
    parser.add_argument('--loss_s', type=float, default=None)
    parser.add_argument('--loss_a', type=float, default=None)
    parser.add_argument('--lr', type=float, default=1e-4)
    parser.add_argument('--acc', type=float, default=-1)
    parser.add_argument('--f1', type=float, default=-1)
    args = parser.parse_args([])
    return args

In [4]:
def pca(adata, use_reps=None, n_comps=10):
    
    """Dimension reduction with PCA algorithm"""
    
    from sklearn.decomposition import PCA
    from scipy.sparse.csc import csc_matrix
    from scipy.sparse.csr import csr_matrix
    pca = PCA(n_components=n_comps)
    if use_reps is not None:
       feat_pca = pca.fit_transform(adata.obsm[use_reps])
    else: 
       if isinstance(adata.X, csc_matrix) or isinstance(adata.X, csr_matrix):
          feat_pca = pca.fit_transform(adata.X.toarray()) 
       else:   
          feat_pca = pca.fit_transform(adata.X)
    
    return feat_pca

def clr_normalize_each_cell(adata, inplace=True):
    
    """Normalize count vector for each cell, i.e. for each row of .X"""

    import numpy as np
    import scipy

    def seurat_clr(x):
        # TODO: support sparseness
        s = np.sum(np.log1p(x[x > 0]))
        exp = np.exp(s / len(x))
        return np.log1p(x / exp)

    if not inplace:
        adata = adata.copy()
    
    # apply to dense or sparse matrix, along axis. returns dense matrix
    adata.X = np.apply_along_axis(
        seurat_clr, 1, (adata.X.A if scipy.sparse.issparse(adata.X) else np.array(adata.X))
    )
    return adata     

In [5]:
args = build_args()
args = load_configs(args, "config/configs.yml")
# set_random_seed(2024)
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

load configs


In [6]:
import scanpy as sc
adata_omics1=sc.read_h5ad("/home/hfzhang/data/空间转录组/human_breast_cancer.h5ad")

In [7]:

# RNA
sc.pp.filter_genes(adata_omics1, min_cells=10)
sc.pp.highly_variable_genes(adata_omics1, flavor="seurat_v3", n_top_genes=3000)
sc.pp.normalize_total(adata_omics1, target_sum=1e4)
sc.pp.log1p(adata_omics1)
sc.pp.scale(adata_omics1)

adata_omics1 =  adata_omics1[:, adata_omics1.var['highly_variable']]

In [8]:
from utils.preprocess import *
from utils.utils import *

2025-03-20 13:56:57.594826: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-03-20 13:56:57.719845: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-20 13:56:58.286594: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/lib:
2025-03-20 13:56:58.286682: W tensorflow/compiler/xla/stream_executor/p

In [9]:
def create_adj(adata,k):
    cell_position_omics1 = adata.obsm['spatial']
    adj_omics1 = construct_graph_by_coordinate(cell_position_omics1, n_neighbors=k)
    adata.uns['adj_spatial'] = adj_omics1
    adj_spatial_omics1 = adata.uns['adj_spatial']
    adj_spatial_omics1 = transform_adjacent_matrix(adj_spatial_omics1)
    adj_spatial_omics1 = adj_spatial_omics1.toarray()
    adj_spatial_omics1 = adj_spatial_omics1 + adj_spatial_omics1.T
    adj_spatial_omics1 = np.where(adj_spatial_omics1>1, 1, adj_spatial_omics1)
    adj = preprocess_graph(adj_spatial_omics1)
    return adj

In [10]:
label = adata_omics1.obs['cluster'].astype(str) #先全部转成string  
label[pd.isna(label)] = "nan"  # 将缺失值替换为 "NA"，前提是你用了pandas
# label=adata_omics1.obs['ground_truth'].values
classes, label = np.unique(label, return_inverse=True)
classes = classes.tolist()

In [24]:
args.n_input=3000
args.n_clusters=len(classes)
args.n_clusters
args.Type='Stereo-CITE-seq'
args.loss_n=0.1
args.lr=0.001
args.n_clusters=len(classes)

In [23]:
random_seed=2024
args.random_seed=random_seed
fix_seed(random_seed)

In [16]:
device='cuda:0'

In [27]:
args.sigma=0.3

In [19]:
args.tool='kmeans'

In [32]:
# adata_omics2_train,adata_omics2_test =adata_omics2, adata_omics2
args.n_clusters1=len(set(label))
args.n_clusters2=len(set(label))
adj_train=create_adj(adata_omics1,k)
adj_train = adj_train.to(device)
features_omics1_train = torch.FloatTensor(adata_omics1.X.copy()).to(device)
model = creat_model('spamgcn', args).to(device)
model=Train(50, model,adata_omics1 ,features_omics1_train, adj_train, label, device, args)
nmi, ari, ami, homogeneity, completeness, v_measure=Test(model,adata_omics1,features_omics1_train,adj_train,label,device,args,'kmeans')


tool: kmeans
  9 loss: 1.8417551517486572
Epoch_  9 , nmi 0.5896 , ari 0.4721 , ami 0.5822 , homogeneity 0.6082 , completeness 0.5721 , v_measure 0.5896
 19 loss: 1.7967519760131836
Epoch_ 19 , nmi 0.6803 , ari 0.5806 , ami 0.6745 , homogeneity 0.6985 , completeness 0.6631 , v_measure 0.6803
 29 loss: 1.769916296005249
Epoch_ 29 , nmi 0.7202 , ari 0.6526 , ami 0.7151 , homogeneity 0.7359 , completeness 0.7051 , v_measure 0.7202
 39 loss: 1.7556101083755493
Epoch_ 39 , nmi 0.7050 , ari 0.5984 , ami 0.6997 , homogeneity 0.7261 , completeness 0.6851 , v_measure 0.7050
 49 loss: 1.7443386316299438
Epoch_ 49 , nmi 0.7154 , ari 0.6146 , ami 0.7103 , homogeneity 0.7330 , completeness 0.6987 , v_measure 0.7154
xunlian
name: acm
NMI : 0.7202
ARI : 0.6526
AMI  : 0.7151
Epoch_  0 , nmi 0.7097 , ari 0.6100 , ami 0.7045 , homogeneity 0.7262 , completeness 0.6939 , v_measure 0.7097
聚类方法为kmeans
test
name: acm
NMI : 0.7097
ARI : 0.6100
AMI  : 0.7045
