In [1]:
import networkx as nx
from scipy.spatial import distance_matrix
import random
import pandas as pd
import numpy as np
import scanpy as sc
import matplotlib.pyplot as plt
import os
import scipy as sp
import math
methods=['spiral','harmony_SEDR','harmony_STAGATE','seurat','harmony','DeepST','STAligner','GraphST','BASS']
methods1=['SPIRAL','harmony_SEDR','harmony_STAGATE','seurat','harmony','DeepST','STAligner','GraphST','BASS']

In [2]:
def create_graph(adata, degree = 4):
        """
        Converts spatial coordinates into graph using networkx library.
        
        param: adata - ST Slice 
        param: degree - number of edges per vertex

        return: 1) G - networkx graph
                2) node_dict - dictionary mapping nodes to spots
        """
        D = distance_matrix(adata.obsm['spatial'], adata.obsm['spatial'])
        # Get column indexes of the degree+1 lowest values per row
        idx = np.argsort(D, 1)[:, 0:degree+1]
        # Remove first column since it results in self loops
        idx = idx[:, 1:]

        G = nx.Graph()
        for r in range(len(idx)):
            for c in idx[r]:
                G.add_edge(r, c)

        node_dict = dict(zip(range(adata.shape[0]), adata.obs.index))
        return G, node_dict
    
def generate_graph_from_labels(adata, labels_dict,knn):
    """
    Creates and returns the graph and dictionary {node: cluster_label} for specified layer
    """
    
    g, node_to_spot = create_graph(adata,knn)
    spot_to_cluster = labels_dict

    # remove any nodes that are not mapped to a cluster
    removed_nodes = []
    for node in node_to_spot.keys():
        if (node_to_spot[node] not in spot_to_cluster.keys()):
            removed_nodes.append(node)

    for node in removed_nodes:
        del node_to_spot[node]
        g.remove_node(node)
        
    labels = dict(zip(g.nodes(), [spot_to_cluster[node_to_spot[node]] for node in g.nodes()]))
    return g, labels

def spatial_coherence_score(graph, labels):
    g, l = graph, labels
    true_entropy = spatial_entropy(g, l)
    entropies = []
    for i in range(1000):
        new_l = list(l.values())
        random.shuffle(new_l)
        labels = dict(zip(l.keys(), new_l))
        entropies.append(spatial_entropy(g, labels))
        
    return (true_entropy - np.mean(entropies))/np.std(entropies)

def spatial_entropy(g, labels):
    """
    Calculates spatial entropy of graph  
    """
    # construct contiguity matrix C which counts pairs of cluster edges
    cluster_names = np.unique(list(labels.values()))
    C = pd.DataFrame(0,index=cluster_names, columns=cluster_names)

    for e in g.edges():
        C[labels[e[0]]][labels[e[1]]] += 1

    # calculate entropy from C
    C_sum = C.values.sum()
    H = 0
    for i in range(len(cluster_names)):
        for j in range(i, len(cluster_names)):
            if (i == j):
                z = C[cluster_names[i]][cluster_names[j]]
            else:
                z = C[cluster_names[i]][cluster_names[j]] + C[cluster_names[j]][cluster_names[i]]
            if z != 0:
                H += -(z/C_sum)*math.log(z/C_sum)
    return H

输入数据

In [11]:
dirs="/data02/tguo/space_batch_effect/mouse_brain/"
# # sample_name=["all_posterior1","all_posterior2","all_anterior1","all_anterior2"]
sample_name=["posterior1","posterior2"]
# sample_name=["anterior1","anterior2"]
# dirs="/data02/tguo/space_batch_effect/Hippo/"
# sample_name=['10X_Normal','10X_DAPI','10X_FFPE']
i=0
feat=pd.read_csv(dirs+"gtt_input/"+str(sample_name[i])+"_mat.csv",header=0,index_col=0,sep=',')
meta=pd.read_csv(dirs+"gtt_input/"+str(sample_name[i])+"_meta.csv",header=0,index_col=0,sep=',')
coord=pd.read_csv(dirs+"gtt_input/"+str(sample_name[i])+"_coord.csv",header=0,index_col=0,sep=',')
flags='_'+str(sample_name[i])
for sample in np.array(sample_name)[1:len(sample_name)]:
    feat=pd.concat((feat,pd.read_csv(dirs+"gtt_input/"+str(sample)+"_mat.csv",header=0,index_col=0,sep=',')))
    meta=pd.concat((meta,pd.read_csv(dirs+"gtt_input/"+str(sample)+"_meta.csv",header=0,index_col=0,sep=',')))
    coord=pd.concat((coord,pd.read_csv(dirs+"gtt_input/"+str(sample)+"_coord.csv",header=0,index_col=0,sep=',')))
    flags=flags+'_'+str(sample)
# meta.loc[:,'celltype'][meta.loc[:,'celltype']=='AC']='CA'
adata = sc.AnnData(feat)
adata.var_names_make_unique()
coord = coord.loc[adata.obs_names, ['x', 'y']]
adata.obsm["spatial"] = coord.to_numpy()
adata.X=sp.sparse.csr_matrix(adata.X)
adata.obs= meta.loc[adata.obs_names, :]   
adata.obs['batch']=np.array(adata.obs['batch'],dtype=str)
idx=np.where(adata.obs['celltype']!='Low_Quality')[0]
adata=adata[idx,:]

In [3]:
# dirs="/data02/tguo/space_batch_effect/mouse_OB/"
# sample_name=["BGI","SlideV2"]
# IDX=np.arange(len(sample_name))
# i=0
# feat=pd.read_csv(dirs+"gtt_input/BGI_SlideV2_10X/"+str(sample_name[i])+"_mat.csv",header=0,index_col=0,sep=',')
# meta=pd.read_csv(dirs+"gtt_input/BGI_SlideV2_10X/"+str(sample_name[i])+"_meta.csv",header=0,index_col=0,sep=',')
# coord=pd.read_csv(dirs+"gtt_input/BGI_SlideV2_10X/"+str(sample_name[i])+"_coord.csv",header=0,index_col=0,sep=',')
# flags='_'+str(sample_name[i])
# for sample in np.array(sample_name)[1:len(sample_name)]:
#     feat1=pd.read_csv(dirs+"gtt_input/BGI_SlideV2_10X/"+str(sample)+"_mat.csv",header=0,index_col=0,sep=',')
#     meta1=pd.read_csv(dirs+"gtt_input/BGI_SlideV2_10X/"+str(sample)+"_meta.csv",header=0,index_col=0,sep=',')
#     coord1=pd.read_csv(dirs+"gtt_input/BGI_SlideV2_10X/"+str(sample)+"_coord.csv",header=0,index_col=0,sep=',')
#     if sample=="SlideV2":
#         used_barcodes=np.loadtxt(dirs+"origin/used_barcodes.txt",dtype=str)
#         used_barcodes=['SlideV2-'+x for x in used_barcodes]
#         cells=np.intersect1d(used_barcodes,feat1.index)
#         feat1=feat1.loc[cells,:]
#         meta1=meta1.loc[cells,:]
#         coord1=coord1.loc[cells,:]
#     feat=pd.concat((feat,feat1))
#     meta=pd.concat((meta,meta1))
#     coord=pd.concat((coord,coord1))
#     flags=flags+'_'+str(sample)
    
# adata = sc.AnnData(feat)
# adata.var_names_make_unique()
# coord = coord.loc[adata.obs_names, ['x', 'y']]
# adata.obsm["spatial"] = coord.to_numpy()
# adata.X=sp.sparse.csr_matrix(adata.X)
# adata.obs= meta.loc[adata.obs_names, :]   
# adata.obs['batch']=np.array(adata.obs['batch'],dtype=str)
# idx=np.where(adata.obs['celltype']!='Low_Quality')[0]
# adata=adata[idx,:]

In [18]:
dirs="/data02/tguo/space_batch_effect/human_DLPFC_10x/"
sample_name=[151507,151508,151509,151510,151669,151670,151671,151672,151673,151674,151675,151676]
IDX=[9,10]
flags1=str(sample_name[IDX[0]])
for i in np.arange(1,len(IDX)):
    flags1=flags1+'-'+str(sample_name[IDX[i]])
    
i=IDX[0]
feat=pd.read_csv(dirs+"gtt_input_scanpy/"+flags1+'_'+str(sample_name[i])+"_features.txt",header=0,index_col=0,sep=',')
meta=pd.read_csv(dirs+"gtt_input_scanpy/"+flags1+'_'+str(sample_name[i])+"_label.txt",header=0,index_col=0,sep=',')
coord=pd.read_csv(dirs+"gtt_input_scanpy/"+flags1+'_'+str(sample_name[i])+"_positions.txt",header=0,index_col=0,sep=',')
flags='_'+str(sample_name[i])
for sample in np.array(sample_name)[IDX[1:len(IDX)]]:
    feat=pd.concat((feat,pd.read_csv(dirs+"gtt_input_scanpy/"+flags1+'_'+str(sample)+"_features.txt",header=0,index_col=0,sep=',')))
    meta=pd.concat((meta,pd.read_csv(dirs+"gtt_input_scanpy/"+flags1+'_'+str(sample)+"_label.txt",header=0,index_col=0,sep=',')))
    coord=pd.concat((coord,pd.read_csv(dirs+"gtt_input_scanpy/"+flags1+'_'+str(sample)+"_positions.txt",header=0,index_col=0,sep=',')))
    flags=flags+'_'+str(sample)
adata = sc.AnnData(feat)
adata.var_names_make_unique()
coord = coord.loc[adata.obs_names, ['x', 'y']]
adata.obsm["spatial"] = coord.to_numpy()
adata.X=sp.sparse.csr_matrix(adata.X)
adata.obs= meta.loc[adata.obs_names, :]   
adata.obs['batch']=np.array(adata.obs['batch'],dtype=str)

聚类的类别的SCS

In [None]:
k=0
clust_cate=['seuratmethod','louvain','louvain','louvain','louvain','louvain','louvain','louvain']
BS=['_refine_512','','','','','','','']
clust_cate=['mclust']*8
BS=['','','','','','','','']
for method in methods[1:8]:
    clust=pd.read_csv(dirs+"metrics/"+method+flags+"_"+clust_cate[k]+"_clust_modify"+BS[k]+".csv",header=0,index_col=0,sep=',').loc[adata.obs_names,]
    adata.obs[method+'_clust']=np.array(clust.values,dtype=str)
    k+=1
clust=pd.read_csv(dirs+'metrics/BASS_BASS_clust'+flags+'.csv',index_col=0,header=0)
adata.obs['BASS_clust']=np.array(clust.loc[adata.obs_names,:].values,dtype=str)    
knn=6
ub=np.unique(adata.obs['batch'])
scs=[]
for i in np.arange(len(ub)):
    idx=np.where(adata.obs['batch']==ub[i])[0]
    adata1=adata[idx,:]
    if ub[i]=='10X':
        knn=6
    for method in methods:
        g,node_dict=generate_graph_from_labels(adata1, adata1.obs[method+'_clust'],knn)
        scs.append(spatial_coherence_score(g, node_dict))
    
a1=pd.DataFrame(scs,columns=['scs'])
a2=pd.DataFrame(methods1*len(ub),columns=['method'])
data1=pd.concat((a1,a2),axis=1)
data.to_csv(dirs+'metrics/spatial_coherence_score_louvain'+flags+"_clusters_modify.csv")
data.to_csv(dirs+'metrics/spatial_coherence_score_'+clust_cate[0]+'-'+clust_cate[1]+flags+"_clusters_modify.csv")

alignment后的scs

In [12]:
######mouse OB########
# clust_cate='_louvain'
# coord1=pd.read_csv(dirs+"gtt_output/coordinate_file/gtt_new_coordinate"+flags+clust_cate+".csv",header=0,index_col=0)
# coord2=pd.read_csv(dirs+"methods/paste_new_coord"+flags+".csv",header=0,index_col=0)
# cells=np.array(coord1.index[np.where((coord1.loc[:,"celltype"]!='UN')&(coord1.loc[:,"celltype"]!='AOB')&(coord1.loc[:,"celltype"]!='AOBgr'))])
# cells=np.intersect1d(cells,adata.obs_names)
# adata=adata[cells,]
# coord1=coord1.loc[cells,:]
# coord2=coord2.loc[cells,:]
# idx=np.where((coord1.loc[:,'celltype']=='GL_1')|(coord1.loc[:,'celltype']=='GL_2'))[0]
# coord1.iloc[idx,3]='GL'
# adata.obs['celltype']=coord1.loc[:,'celltype']

#####sagittal#######
for angle in [30,90,120,150,180]:
    coord1=pd.read_csv(dirs+"gtt_output/SPIRAL_alignment/new_coord"+flags+"_rotate"+str(angle)+"_modify.csv",header=0,index_col=0)
    coord2=pd.read_csv(dirs+"methods/paste_new_coord"+flags+"_rotate"+str(angle)+".csv",header=0,index_col=0)

    coord1=coord1.loc[adata.obs_names, ['x', 'y']]
    adata.obsm["spatial"]=coord1.to_numpy()
    knn=6
    g1,node_dict1=generate_graph_from_labels(adata, adata.obs['celltype'],knn)
    scs1=spatial_coherence_score(g1, node_dict1)

    coord2=coord2.loc[adata.obs_names, ['x', 'y']]
    adata.obsm["spatial"]=coord2.to_numpy()
    knn=6
    g2,node_dict2=generate_graph_from_labels(adata, adata.obs['celltype'],knn)
    scs2=spatial_coherence_score(g2, node_dict2)
    scs=np.array([np.abs(scs1),np.abs(scs2)]).reshape(2,1)
    method=np.array(['SPIRAL','PASTE']).reshape(2,1)
    data=np.hstack((method,scs))
    pd.DataFrame(data,columns=['method','scs']).to_csv(dirs+'metrics/spatial_coherence_score_louvain'+flags+"_celltype_rotate"+str(angle)+".csv")

In [40]:
coord3=pd.read_csv(dirs+"methods/STIM_new_coord"+flags+".csv",header=0,index_col=0).loc[adata.obs_names,]
adata.obsm["spatial"]=coord3.to_numpy()

knn=6
g1,node_dict1=generate_graph_from_labels(adata, adata.obs['celltype'],knn)
scs1=spatial_coherence_score(g1, node_dict1)

clust_cate='louvain'
scs=pd.read_csv(dirs+'metrics/spatial_coherence_score_'+clust_cate+flags+"_celltype.csv",header=0,index_col=0)
scs=pd.concat((scs,pd.DataFrame(np.array([['STIM',-scs1]]),columns=['method','scs'])))
scs.iloc[0,0]='SPIRAL'
scs.to_csv(dirs+'metrics/spatial_coherence_score_'+clust_cate+flags+"_celltype.csv")


In [None]:
scs=np.array([np.abs(scs1),np.abs(scs2)]).reshape(-1,1)
method=np.array(['GraphSCIDRL','PASTE']).reshape(-1,1)
data=np.hstack((method,scs))
pd.DataFrame(data,columns=['method','scs']).to_csv(dirs+'metrics/spatial_coherence_score_'+clust_cate+flags+"_celltype.csv")