In [1]:
import os
import scanpy as sc
import pandas as pd
import numpy as np
import sklearn.neighbors
import scipy.sparse as sp
from sklearn.decomposition import PCA

In [2]:
def Cal_Spatial_Net(adata, rad_cutoff=None, k_cutoff=None, model='Radius', verbose=True):
    """\
    Construct the spatial neighbor networks.

    Parameters
    ----------
    adata
        AnnData object of scanpy package.
    rad_cutoff
        radius cutoff when model='Radius'
    k_cutoff
        The number of nearest neighbors when model='KNN'
    model
        The network construction model. When model=='Radius', the spot is connected to spots whose distance is less than rad_cutoff. When model=='KNN', the spot is connected to its first k_cutoff nearest neighbors.
    
    Returns
    -------
    The spatial networks are saved in adata.uns['Spatial_Net']
    """

    assert(model in ['Radius', 'KNN'])
    if verbose:
        print('------Calculating spatial graph...')
    coor = pd.DataFrame(adata.obsm['spatial'])
    coor.index = adata.obs.index
#     coor.columns = ['imagerow', 'imagecol']

    if model == 'Radius':
        nbrs = sklearn.neighbors.NearestNeighbors(radius=rad_cutoff).fit(coor)
        distances, indices = nbrs.radius_neighbors(coor, return_distance=True)
        KNN_list = []
        for it in range(indices.shape[0]):
            KNN_list.append(pd.DataFrame(zip([it]*indices[it].shape[0], indices[it], distances[it])))
    
    if model == 'KNN':
        nbrs = sklearn.neighbors.NearestNeighbors(n_neighbors=k_cutoff+1).fit(coor)
        distances, indices = nbrs.kneighbors(coor)
        KNN_list = []
        for it in range(indices.shape[0]):
            KNN_list.append(pd.DataFrame(zip([it]*indices.shape[1],indices[it,:], distances[it,:])))

    KNN_df = pd.concat(KNN_list)
    KNN_df.columns = ['Cell1', 'Cell2', 'Distance']

    Spatial_Net = KNN_df.copy()
    Spatial_Net = Spatial_Net.loc[Spatial_Net['Distance']>0,]
    id_cell_trans = dict(zip(range(coor.shape[0]), np.array(coor.index), ))
    Spatial_Net['Cell1'] = Spatial_Net['Cell1'].map(id_cell_trans)
    Spatial_Net['Cell2'] = Spatial_Net['Cell2'].map(id_cell_trans)
    if verbose:
        print('The graph contains %d edges, %d cells.' %(Spatial_Net.shape[0], adata.n_obs))
        print('%.4f neighbors per cell on average.' %(Spatial_Net.shape[0]/adata.n_obs))

    adata.uns['Spatial_Net'] = Spatial_Net

模拟

In [4]:
dirs="/data02/tguo/space_batch_effect/simulate/"
# batch_sim="_1_2"
# types="_3batch_types6"
batch_sim="_1"
types="_types7"

feat=sc.read_mtx(dirs+"origin/mat"+str(batch_sim)+types+".mtx")
feat=np.transpose(feat.X.toarray())
gene=np.loadtxt(dirs+"origin/mat_rownames"+str(batch_sim)+types+".txt",dtype=str)
cells=np.loadtxt(dirs+"origin/mat_colnames"+str(batch_sim)+types+".txt",dtype=str)
feat=pd.DataFrame(feat,index=cells,columns=gene)
meta=pd.read_csv(dirs+"origin/meta"+str(batch_sim)+types+".txt",header=0,index_col=0,sep=',')
batch=meta.loc[:,'batch'].values
cells=np.array(meta.index)
cell1=cells[np.where(batch==np.unique(batch)[0])[0]]
cell2=cells[np.where(batch==np.unique(batch)[1])[0]]
feat1=feat.loc[cell1,:]
feat2=feat.loc[cell2,:]
feat1.to_csv(dirs+"gtt_input/feat1"+str(batch_sim)+types+".csv")
feat2.to_csv(dirs+"gtt_input/feat2"+str(batch_sim)+types+".csv")
meta1=meta.loc[cell1,:]
meta2=meta.loc[cell2,:]
meta1.to_csv(dirs+"gtt_input/meta1"+str(batch_sim)+types+".csv")
meta2.to_csv(dirs+"gtt_input/meta2"+str(batch_sim)+types+".csv")
posi=pd.read_csv(dirs+"origin/spatial_posi"+str(batch_sim)+types+".txt",header=0,index_col=0,sep=',')
coord=posi.loc[:,['x','y']]
coord1=coord.loc[cell1,:]
coord2=coord.loc[cell2,:]
coord1.to_csv(dirs+"gtt_input/coord1"+str(batch_sim)+types+".csv")
coord2.to_csv(dirs+"gtt_input/coord2"+str(batch_sim)+types+".csv")


# cell3=cells[np.where(batch==np.unique(batch)[2])[0]]
# feat3=feat.loc[cell3,:]
# feat3.to_csv(dirs+"gtt_input/feat3"+str(batch_sim)+types+".csv")
# meta3=meta.loc[cell3,:]
# meta3.to_csv(dirs+"gtt_input/meta3"+str(batch_sim)+types+".csv")
# coord3=coord.loc[cell3,:]
# coord3.to_csv(dirs+"gtt_input/coord3"+str(batch_sim)+types+".csv")

In [6]:
knn=10
dirs="/data02/tguo/space_batch_effect/simulate/"
# batch_sim="_1_2"
# types="_3batch_types6"
batch_sim="_1"
types="_types7"
idx=2
features=pd.read_csv(dirs+"gtt_input/feat"+str(idx)+str(batch_sim)+types+".csv",header=0,index_col=0,sep=',')
meta=pd.read_csv(dirs+"gtt_input/meta"+str(idx)+str(batch_sim)+types+".csv",header=0,index_col=0,sep=',')
coord=pd.read_csv(dirs+"gtt_input/coord"+str(idx)+str(batch_sim)+types+".csv",header=0,index_col=0,sep=',')

# idx=np.where((meta.loc[:,'celltype'].values=='Group5')|(meta.loc[:,'celltype'].values=='Group6')|
#              (meta.loc[:,'celltype'].values=='Group3')|(meta.loc[:,'celltype'].values=='Group4'))[0]
# features=features.iloc[idx,:]
# meta=meta.iloc[idx,:]
# coord=coord.iloc[idx,:]
# features.to_csv(dirs+"gtt_input/feat2"+str(batch_sim)+types+"_subset.csv")
# meta.to_csv(dirs+"gtt_input/meta2"+str(batch_sim)+types+"_subset.csv")
# coord.to_csv(dirs+"gtt_input/coord2"+str(batch_sim)+types+"_subset.csv")


adata = sc.AnnData(features)
adata.var_names_make_unique()
adata.X=sp.csr_matrix(adata.X)
adata.obsm["spatial"] = coord.loc[:,['x','y']].to_numpy()
Cal_Spatial_Net(adata, rad_cutoff=None, k_cutoff=knn, model='KNN', verbose=True)
if 'highly_variable' in adata.var.columns:
    adata_Vars =  adata[:, adata.var['highly_variable']]
else:
    adata_Vars = adata
features = pd.DataFrame(adata_Vars.X.toarray()[:, ], index=adata_Vars.obs.index, columns=adata_Vars.var.index)
cells = np.array(features.index)
cells_id_tran = dict(zip(cells, range(cells.shape[0])))
if 'Spatial_Net' not in adata.uns.keys():
    raise ValueError("Spatial_Net is not existed! Run Cal_Spatial_Net first!")

Spatial_Net = adata.uns['Spatial_Net']
G_df = Spatial_Net.copy()
np.savetxt(dirs+"gtt_input/edge"+str(idx)+"_KNN_"+str(knn)+str(batch_sim)+types+".csv",G_df.values[:,:2],fmt='%s')

------Calculating spatial graph...
The graph contains 60000 edges, 6000 cells.
10.0000 neighbors per cell on average.


DLPFC

In [11]:
rad=150
KNN=6
dirs="/data02/tguo/space_batch_effect/human_DLPFC_10x/"
sample_name=[151507,151508,151509,151510,151669,151670,151671,151672,151673,151674,151675,151676]

IDX=[10,11]
flags=str(sample_name[IDX[0]])
for i in np.arange(1,len(IDX)):
    flags=flags+'-'+str(sample_name[IDX[i]])
for i in IDX:
    sample1=sample_name[i]
    features=pd.read_csv(dirs+"gtt_input_scanpy/"+flags+'_'+str(sample1)+"_features.txt",header=0,index_col=0,sep=',')
    meta=pd.read_csv(dirs+"gtt_input_scanpy/"+flags+'_'+str(sample1)+"_label.txt",header=0,index_col=0,sep=',')
    coord=pd.read_csv(dirs+"gtt_input_scanpy/"+flags+'_'+str(sample1)+"_positions.txt",header=0,index_col=0,sep=',')
    # meta=meta.iloc[:meta.shape[0]-1,:]
    adata = sc.AnnData(features)
    adata.var_names_make_unique()
    adata.X=sp.csr_matrix(adata.X)
    adata.obsm["spatial"] = coord.loc[:,['x','y']].to_numpy()
    Cal_Spatial_Net(adata, rad_cutoff=rad, k_cutoff=6, model='KNN', verbose=True)
    if 'highly_variable' in adata.var.columns:
        adata_Vars =  adata[:, adata.var['highly_variable']]
    else:
        adata_Vars = adata
    features = pd.DataFrame(adata_Vars.X.toarray()[:, ], index=adata_Vars.obs.index, columns=adata_Vars.var.index)
    cells = np.array(features.index)
    cells_id_tran = dict(zip(cells, range(cells.shape[0])))
    if 'Spatial_Net' not in adata.uns.keys():
        raise ValueError("Spatial_Net is not existed! Run Cal_Spatial_Net first!")

    Spatial_Net = adata.uns['Spatial_Net']
    G_df = Spatial_Net.copy()
    np.savetxt(dirs+"gtt_input_scanpy/"+flags+'_'+str(sample1)+"_edge_KNN_"+str(KNN)+".csv",G_df.values[:,:2],fmt='%s')

    # G_df['Cell1'] = G_df['Cell1'].map(cells_id_tran)
    # G_df['Cell2'] = G_df['Cell2'].map(cells_id_tran)
    # adj = sp.coo_matrix((np.ones(G_df.shape[0]), (G_df['Cell1'], G_df['Cell2'])), shape=(adata.n_obs, adata.n_obs))
    # adj+=adj.T.multiply(adj.T>adj)-adj.multiply(adj.T>adj)
    # features=torch.FloatTensor(features.values)

------Calculating spatial graph...
The graph contains 21396 edges, 3566 cells.
6.0000 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 20586 edges, 3431 cells.
6.0000 neighbors per cell on average.


PDAC

In [20]:
knn=4
dirs="/data02/tguo/space_batch_effect/PDAC/"
sample="PDAC-E-ST1"
feat=pd.read_csv(dirs+"gtt_input/"+sample+"_mat.csv",header=0,index_col=0,sep=',')
coord=pd.read_csv(dirs+"gtt_input/"+sample+"_coord.csv",header=0,index_col=0,sep=',')
coord.columns=['x','y']
adata = sc.AnnData(feat)
adata.var_names_make_unique()
adata.X=sp.csr_matrix(adata.X)
adata.obsm["spatial"] = coord.loc[:,['x','y']].to_numpy()
Cal_Spatial_Net(adata, k_cutoff=knn, model='KNN', verbose=True)
if 'highly_variable' in adata.var.columns:
    adata_Vars =  adata[:, adata.var['highly_variable']]
else:
    adata_Vars = adata
features = pd.DataFrame(adata_Vars.X.toarray()[:, ], index=adata_Vars.obs.index, columns=adata_Vars.var.index)
cells = np.array(features.index)
cells_id_tran = dict(zip(cells, range(cells.shape[0])))
if 'Spatial_Net' not in adata.uns.keys():
    raise ValueError("Spatial_Net is not existed! Run Cal_Spatial_Net first!")

Spatial_Net = adata.uns['Spatial_Net']
G_df = Spatial_Net.copy()
np.savetxt(dirs+"gtt_input/"+sample+"_edge_KNN_"+str(knn)+".csv",G_df.values[:,:2],fmt='%s')



------Calculating spatial graph...
The graph contains 1436 edges, 359 cells.
4.0000 neighbors per cell on average.


mouse brain

In [4]:
knn=6
rad=10
dirs="/data02/tguo/space_batch_effect/mouse_brain/"
sample="pa_anterior2"
feat=pd.read_csv(dirs+"gtt_input/"+sample+"_mat.csv",header=0,index_col=0,sep=',')
coord=pd.read_csv(dirs+"gtt_input/"+sample+"_coord.csv",header=0,index_col=0,sep=',')
coord.columns=['x','y']
adata = sc.AnnData(feat)
adata.var_names_make_unique()
adata.X=sp.csr_matrix(adata.X)
adata.obsm["spatial"] = coord.loc[:,['x','y']].to_numpy()
Cal_Spatial_Net(adata, rad_cutoff=rad, k_cutoff=knn, model='KNN', verbose=True)
if 'highly_variable' in adata.var.columns:
    adata_Vars =  adata[:, adata.var['highly_variable']]
else:
    adata_Vars = adata
features = pd.DataFrame(adata_Vars.X.toarray()[:, ], index=adata_Vars.obs.index, columns=adata_Vars.var.index)
cells = np.array(features.index)
cells_id_tran = dict(zip(cells, range(cells.shape[0])))
if 'Spatial_Net' not in adata.uns.keys():
    raise ValueError("Spatial_Net is not existed! Run Cal_Spatial_Net first!")

Spatial_Net = adata.uns['Spatial_Net']
G_df = Spatial_Net.copy()
np.savetxt(dirs+"gtt_input/"+sample+"_edge_KNN_"+str(knn)+".csv",G_df.values[:,:2],fmt='%s')



------Calculating spatial graph...
The graph contains 16950 edges, 2825 cells.
6.0000 neighbors per cell on average.


breast cancer

In [37]:
knn=6
rad=10
dirs="/data02/tguo/space_batch_effect/breast_cancer/"
sample="10X-2"
feat=pd.read_csv(dirs+"gtt_input/"+sample+"_mat.csv",header=0,index_col=0,sep=',')
coord=pd.read_csv(dirs+"gtt_input/"+sample+"_coord.csv",header=0,index_col=0,sep=',')
coord.columns=['x','y']
adata = sc.AnnData(feat)
adata.var_names_make_unique()
adata.X=sp.csr_matrix(adata.X)
adata.obsm["spatial"] = coord.loc[:,['x','y']].to_numpy()
Cal_Spatial_Net(adata, rad_cutoff=rad, k_cutoff=knn, model='KNN', verbose=True)
if 'highly_variable' in adata.var.columns:
    adata_Vars =  adata[:, adata.var['highly_variable']]
else:
    adata_Vars = adata
features = pd.DataFrame(adata_Vars.X.toarray()[:, ], index=adata_Vars.obs.index, columns=adata_Vars.var.index)
cells = np.array(features.index)
cells_id_tran = dict(zip(cells, range(cells.shape[0])))
if 'Spatial_Net' not in adata.uns.keys():
    raise ValueError("Spatial_Net is not existed! Run Cal_Spatial_Net first!")

Spatial_Net = adata.uns['Spatial_Net']
G_df = Spatial_Net.copy()
np.savetxt(dirs+"gtt_input/"+sample+"_edge_KNN_"+str(knn)+".csv",G_df.values[:,:2],fmt='%s')



------Calculating spatial graph...
The graph contains 23922 edges, 3987 cells.
6.0000 neighbors per cell on average.


mouse OB

In [8]:
knn=8
rad=1.5
mode='KNN'
mode_num=knn
dirs="/data02/tguo/space_batch_effect/mouse_OB/"
# extra_dirs="Stereo-seq-higher-resolution-filter/"
extra_dirs="Stereo-bin34-filter/"
sample="SlideV2"
feat=pd.read_csv(dirs+"gtt_input/"+extra_dirs+sample+"_mat.csv",header=0,index_col=0,sep=',')
coord=pd.read_csv(dirs+"gtt_input/"+extra_dirs+sample+"_coord.csv",header=0,index_col=0,sep=',')
coord.columns=['x','y']
adata = sc.AnnData(feat)
adata.var_names_make_unique()
adata.X=sp.csr_matrix(adata.X)
adata.obsm["spatial"] = coord.loc[:,['x','y']].to_numpy()
Cal_Spatial_Net(adata, rad_cutoff=rad, k_cutoff=knn, model=mode, verbose=True)
if 'highly_variable' in adata.var.columns:
    adata_Vars =  adata[:, adata.var['highly_variable']]
else:
    adata_Vars = adata
features = pd.DataFrame(adata_Vars.X.toarray()[:, ], index=adata_Vars.obs.index, columns=adata_Vars.var.index)
cells = np.array(features.index)
cells_id_tran = dict(zip(cells, range(cells.shape[0])))
if 'Spatial_Net' not in adata.uns.keys():
    raise ValueError("Spatial_Net is not existed! Run Cal_Spatial_Net first!")

Spatial_Net = adata.uns['Spatial_Net']
G_df = Spatial_Net.copy()


a=G_df.values[:,:2]
np.savetxt(dirs+"gtt_input/"+extra_dirs+sample+"_edge_"+mode+"_"+str(mode_num)+".csv",G_df.values[:,:2],fmt='%s')

# from itertools import product
# li=['A','B','C','D','E','F','G','H','I']
# combs = [''.join(comb) for comb in product(li, repeat=len(li))]
# def multiassign(d, keys, values):
#     for k, v in zip(keys, values):
#         d[k] = v
# combs=np.array(combs)[np.arange(len(adata.obs_names))].tolist()
# cell={}
# multiassign(cell, list(adata.obs_names), combs)
# feat=pd.read_csv(dirs+"gtt_input/"+sample+"_mat.csv",header=0,index_col=0,sep=',')
# feat.index=[cell[i] for i in feat.index]
# feat.to_csv(dirs+"gtt_input/"+sample+"_mat.csv")
# meta=pd.read_csv(dirs+"gtt_input/"+sample+"_meta.csv",header=0,index_col=0,sep=',')
# meta.index=[cell[i] for i in meta.index]
# meta.to_csv(dirs+"gtt_input/"+sample+"_meta.csv")
# coord=pd.read_csv(dirs+"gtt_input/"+sample+"_coord.csv",header=0,index_col=0,sep=',')
# coord.index=[cell[i] for i in coord.index]
# coord.to_csv(dirs+"gtt_input/"+sample+"_coord.csv")



------Calculating spatial graph...
The graph contains 148296 edges, 18537 cells.
8.0000 neighbors per cell on average.


In [22]:
########expression########
knn=10
pca_dim=30
dirs="/data02/tguo/space_batch_effect/mouse_OB/"
sample="scRNA"
feat=pd.read_csv(dirs+"gtt_input/"+sample+"_mat.csv",header=0,index_col=0,sep=',')
coord=PCA(n_components=pca_dim).fit_transform(feat.values)

adata = sc.AnnData(feat)
adata.var_names_make_unique()
adata.X=sp.csr_matrix(adata.X)
adata.obsm["spatial"] = coord
Cal_Spatial_Net(adata, rad_cutoff=None, k_cutoff=knn, model='KNN', verbose=True)
if 'highly_variable' in adata.var.columns:
    adata_Vars =  adata[:, adata.var['highly_variable']]
else:
    adata_Vars = adata
features = pd.DataFrame(adata_Vars.X.toarray()[:, ], index=adata_Vars.obs.index, columns=adata_Vars.var.index)
cells = np.array(features.index)
cells_id_tran = dict(zip(cells, range(cells.shape[0])))
if 'Spatial_Net' not in adata.uns.keys():
    raise ValueError("Spatial_Net is not existed! Run Cal_Spatial_Net first!")

Spatial_Net = adata.uns['Spatial_Net']
G_df = Spatial_Net.copy()


# a=G_df.values[:,:2]
# pd.DataFrame(a).to_csv(dirs+"gtt_input/"+sample+"_edge_KNN_"+str(knn)+".csv")
np.savetxt(dirs+"gtt_input/"+sample+"_edge_KNN_"+str(knn)+"_expression.csv",G_df.values[:,:2],fmt='%s')

KeyboardInterrupt: 

In [4]:
##########higher resolution of stereo-seq##########
knn=8
rad=50
mode='KNN'
dirs="/data02/tguo/space_batch_effect/mouse_OB/"
sample="Stereo-v2"
feat=pd.read_csv(dirs+"gtt_input/"+sample+"_mat.csv",header=0,index_col=0,sep=',')
coord=pd.read_csv(dirs+"gtt_input/"+sample+"_coord.csv",header=0,index_col=0,sep=',')
coord.columns=['x','y']
adata = sc.AnnData(feat)
adata.var_names_make_unique()
adata.X=sp.csr_matrix(adata.X)
adata.obsm["spatial"] = coord.loc[:,['x','y']].to_numpy()
Cal_Spatial_Net(adata, rad_cutoff=rad, k_cutoff=knn, model=mode, verbose=True)
if 'highly_variable' in adata.var.columns:
    adata_Vars =  adata[:, adata.var['highly_variable']]
else:
    adata_Vars = adata
features = pd.DataFrame(adata_Vars.X.toarray()[:, ], index=adata_Vars.obs.index, columns=adata_Vars.var.index)
cells = np.array(features.index)
cells_id_tran = dict(zip(cells, range(cells.shape[0])))
if 'Spatial_Net' not in adata.uns.keys():
    raise ValueError("Spatial_Net is not existed! Run Cal_Spatial_Net first!")

Spatial_Net = adata.uns['Spatial_Net']
G_df = Spatial_Net.copy()


a=G_df.values[:,:2]
np.savetxt(dirs+"gtt_input/"+sample+"_edge_"+mode+"_"+str(knn)+".csv",G_df.values[:,:2],fmt='%s')


------Calculating spatial graph...
The graph contains 152872 edges, 19109 cells.
8.0000 neighbors per cell on average.


MOSTA

In [10]:
knn=10
rad=10
dirs="/data02/tguo/space_batch_effect/MOSTA/"
sample="E9.5"
feat=pd.read_csv(dirs+"gtt_input/"+sample+"_mat.csv",header=0,index_col=0,sep=',')
coord=pd.read_csv(dirs+"gtt_input/"+sample+"_coord.csv",header=0,index_col=0,sep=',')
coord.columns=['x','y']
adata = sc.AnnData(feat)
adata.var_names_make_unique()
adata.X=sp.csr_matrix(adata.X)
adata.obsm["spatial"] = coord.loc[:,['x','y']].to_numpy()
Cal_Spatial_Net(adata, rad_cutoff=rad, k_cutoff=knn, model='KNN', verbose=True)
if 'highly_variable' in adata.var.columns:
    adata_Vars =  adata[:, adata.var['highly_variable']]
else:
    adata_Vars = adata
features = pd.DataFrame(adata_Vars.X.toarray()[:, ], index=adata_Vars.obs.index, columns=adata_Vars.var.index)
cells = np.array(features.index)
cells_id_tran = dict(zip(cells, range(cells.shape[0])))
if 'Spatial_Net' not in adata.uns.keys():
    raise ValueError("Spatial_Net is not existed! Run Cal_Spatial_Net first!")

Spatial_Net = adata.uns['Spatial_Net']
G_df = Spatial_Net.copy()
np.savetxt(dirs+"gtt_input/"+sample+"_edge_KNN_"+str(knn)+".csv",G_df.values[:,:2],fmt='%s')

------Calculating spatial graph...
The graph contains 410920 edges, 51365 cells.
8.0000 neighbors per cell on average.


Colon

In [8]:
knn=6
rad=10
dirs="/data02/tguo/space_batch_effect/Colon/"
sample="2113"
feat=pd.read_csv(dirs+"gtt_input/"+sample+"_mat.csv",header=0,index_col=0,sep=',')
coord=pd.read_csv(dirs+"gtt_input/"+sample+"_coord.csv",header=0,index_col=0,sep=',')
coord.columns=['x','y']
adata = sc.AnnData(feat)
adata.var_names_make_unique()
adata.X=sp.csr_matrix(adata.X)
adata.obsm["spatial"] = coord.loc[:,['x','y']].to_numpy()
Cal_Spatial_Net(adata, rad_cutoff=rad, k_cutoff=knn, model='KNN', verbose=True)
if 'highly_variable' in adata.var.columns:
    adata_Vars =  adata[:, adata.var['highly_variable']]
else:
    adata_Vars = adata
features = pd.DataFrame(adata_Vars.X.toarray()[:, ], index=adata_Vars.obs.index, columns=adata_Vars.var.index)
cells = np.array(features.index)
cells_id_tran = dict(zip(cells, range(cells.shape[0])))
if 'Spatial_Net' not in adata.uns.keys():
    raise ValueError("Spatial_Net is not existed! Run Cal_Spatial_Net first!")

Spatial_Net = adata.uns['Spatial_Net']
G_df = Spatial_Net.copy()
np.savetxt(dirs+"gtt_input/"+sample+"_edge_KNN_"+str(knn)+".csv",G_df.values[:,:2],fmt='%s')



------Calculating spatial graph...
The graph contains 26052 edges, 4342 cells.
6.0000 neighbors per cell on average.


10x hippo

In [6]:
knn=6
dirs="/data02/tguo/space_batch_effect/Hippo/"
sample="10X_Normal"
feat=pd.read_csv(dirs+"gtt_input/"+sample+"_mat.csv",header=0,index_col=0,sep=',')
coord=pd.read_csv(dirs+"gtt_input/"+sample+"_coord.csv",header=0,index_col=0,sep=',')
coord.columns=['x','y']
adata = sc.AnnData(feat)
adata.var_names_make_unique()
adata.X=sp.csr_matrix(adata.X)
adata.obsm["spatial"] = coord.loc[:,['x','y']].to_numpy()
Cal_Spatial_Net(adata, rad_cutoff=None, k_cutoff=knn, model='KNN', verbose=True)
if 'highly_variable' in adata.var.columns:
    adata_Vars =  adata[:, adata.var['highly_variable']]
else:
    adata_Vars = adata
features = pd.DataFrame(adata_Vars.X.toarray()[:, ], index=adata_Vars.obs.index, columns=adata_Vars.var.index)
cells = np.array(features.index)
cells_id_tran = dict(zip(cells, range(cells.shape[0])))
if 'Spatial_Net' not in adata.uns.keys():
    raise ValueError("Spatial_Net is not existed! Run Cal_Spatial_Net first!")

Spatial_Net = adata.uns['Spatial_Net']
G_df = Spatial_Net.copy()
np.savetxt(dirs+"gtt_input/"+sample+"_edge_KNN_"+str(knn)+".csv",G_df.values[:,:2],fmt='%s')



------Calculating spatial graph...
The graph contains 16212 edges, 2702 cells.
6.0000 neighbors per cell on average.


slide-seq v1 hippo

In [26]:
knn=6
rad=50
dirs="/data02/tguo/space_batch_effect/slide-seq/"
sample_name=['Puck_180531_13', 'Puck_180531_16', 'Puck_180531_17','Puck_180531_18', 'Puck_180531_19', 'Puck_180531_22','Puck_180531_23']
for i in np.arange(len(sample_name)):
    sample1=sample_name[i]
    features=pd.read_csv(dirs+"gtt_input/"+str(sample1)+"_scanpy_mat.csv",header=0,index_col=0,sep=',')
    meta=pd.read_csv(dirs+"gtt_input/"+str(sample1)+"_STAGATE_meta.csv",header=0,index_col=0,sep=',')
    coord=pd.read_csv(dirs+"gtt_input/"+str(sample1)+"_coord.csv",header=0,index_col=0,sep=',')
    # meta=meta.iloc[:meta.shape[0]-1,:]
    adata = sc.AnnData(features)
    adata.var_names_make_unique()
    adata.X=sp.csr_matrix(adata.X)
    adata.obsm["spatial"] = coord.loc[:,['x','y']].to_numpy()
    Cal_Spatial_Net(adata, rad_cutoff=rad, k_cutoff=knn, model='Radius', verbose=True)
    if 'highly_variable' in adata.var.columns:
        adata_Vars =  adata[:, adata.var['highly_variable']]
    else:
        adata_Vars = adata
    features = pd.DataFrame(adata_Vars.X.toarray()[:, ], index=adata_Vars.obs.index, columns=adata_Vars.var.index)
    cells = np.array(features.index)
    cells_id_tran = dict(zip(cells, range(cells.shape[0])))
    if 'Spatial_Net' not in adata.uns.keys():
        raise ValueError("Spatial_Net is not existed! Run Cal_Spatial_Net first!")

    Spatial_Net = adata.uns['Spatial_Net']
    G_df = Spatial_Net.copy()
    np.savetxt(dirs+"gtt_input/"+str(sample1)+"_edge_Radius_"+str(rad)+".csv",G_df.values[:,:2],fmt='%s')

    # G_df['Cell1'] = G_df['Cell1'].map(cells_id_tran)
    # G_df['Cell2'] = G_df['Cell2'].map(cells_id_tran)
    # adj = sp.coo_matrix((np.ones(G_df.shape[0]), (G_df['Cell1'], G_df['Cell2'])), shape=(adata.n_obs, adata.n_obs))
    # adj+=adj.T.multiply(adj.T>adj)-adj.multiply(adj.T>adj)
    # features=torch.FloatTensor(features.values)

------Calculating spatial graph...
The graph contains 22974 edges, 1692 cells.
13.5780 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 20546 edges, 1476 cells.
13.9201 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 11794 edges, 1138 cells.
10.3638 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 18278 edges, 1363 cells.
13.4101 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 22344 edges, 1788 cells.
12.4966 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 24544 edges, 1835 cells.
13.3755 neighbors per cell on average.
------Calculating spatial graph...
The graph contains 18922 edges, 1616 cells.
11.7092 neighbors per cell on average.
