In [None]:
from Train_STAGATE import train_STAGATE
from utils import mclust_R, Stats_Spatial_Net, Cal_Spatial_Net
import squidpy as sq
import scanpy as sc
import numpy as np
import pandas as pd
import os
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder 

seed = 2022

# 对squidpy中集成的10X Visium数据集进行处理

In [None]:
adata = sq.datasets.visium_hne_adata(path='../dataset/visium_hne.h5ad')

In [None]:
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=3000)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

In [None]:
Cal_Spatial_Net(adata, k_cutoff=6, model='KNN') # 生成网络，保存在.uns['Spatial_Net'】中 
adata = train_STAGATE(adata, n_epochs=30, random_seed=seed)
adata.obsm['STAGATE']

In [None]:
adata = mclust_R(adata, 15) # 对embedding进行聚类，聚类结果保存在.obs['mclust']中
sc.pl.spatial(adata, color=['mclust'])  # 绘制图

In [None]:
label = LabelEncoder().fit_transform(adata.obs['cluster'])
pred = LabelEncoder().fit_transform(adata.obs['mclust'])
metrics.adjusted_rand_score(label, pred)    # 计算预测值与真实值之间的兰德系数

In [None]:
sc.pl.umap(adata, color=["mclust"])
sc.tl.paga(adata)
sc.pl.paga_compare(adata)

# 处理DLPFC数据

In [None]:
id = '151507'
adata = sc.read_visium(path='../dataset/DLPFC/' + id)
adata.var_names_make_unique()

In [None]:
#Normalization
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=3000)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

In [None]:
# read the annotation
Ann_df = pd.read_csv(os.path.join('../dataset', 'DLPFC', '151507', 'ground_truth.txt'), sep='\t', header=None, index_col=0)
Ann_df.columns = ['Ground Truth']
adata.obs['Ground Truth'] = Ann_df.loc[adata.obs_names, 'Ground Truth']
sc.pl.spatial(adata, color=['Ground Truth'])

In [None]:
Cal_Spatial_Net(adata, k_cutoff=6, model='KNN') # 生成网络，保存在.uns['Spatial_Net']中 
adata = train_STAGATE(adata, n_epochs=50, random_seed=seed)
adata.obsm['STAGATE']

In [None]:
adata = mclust_R(adata, 7) # 对embedding进行聚类，聚类结果保存在.obs['mclust']中
sc.pl.spatial(adata, color=['mclust'])  # 绘制图

In [None]:
label = LabelEncoder().fit_transform(adata.obs['Ground Truth'])
pred = LabelEncoder().fit_transform(adata.obs['mclust'])
metrics.adjusted_rand_score(label, pred)    # 计算预测值与真实值之间的兰德系数

In [None]:
sc.pp.neighbors(adata, use_rep='STAGATE')
sc.tl.umap(adata)
sc.pl.umap(adata, color=["mclust"])
sc.tl.paga(adata, groups='mclust')
sc.pl.paga_compare(adata)

# 处理成年老鼠大脑切片数据集

In [None]:
adata = sc.read_visium(path=os.path.join('..', 'dataset', 'Adult_Mouse_Brain_Section_1'))
adata.var_names_make_unique()

In [None]:
#Normalization
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=3000)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

In [None]:
Cal_Spatial_Net(adata, rad_cutoff=300)
adata = train_STAGATE(adata, n_epochs=30, random_seed=seed)
adata.obsm['STAGATE']

In [None]:
adata = mclust_R(adata, 22) # 对embedding进行聚类，聚类结果保存在.obs['mclust']中
sc.pl.spatial(adata, color=['mclust'])  # 绘制图

In [None]:
sc.pp.neighbors(adata, use_rep='STAGATE')
sc.tl.umap(adata)
sc.pl.umap(adata, color=["mclust"])
sc.tl.paga(adata, groups='mclust')
sc.pl.paga_compare(adata)

# Slide-seqV2 mouse olfactory bulb

In [None]:
counts = pd.read_csv(os.path.join('..', 'dataset', 'Slide-seqV2_MoB', 'data', 'Puck_200127_15.digital_expression.txt'), sep='\t', index_col=0)
coor_df = pd.read_csv(os.path.join('..', 'dataset', 'Slide-seqV2_MoB', 'data', 'Puck_200127_15_bead_locations.csv'), index_col=0)

adata = sc.AnnData(counts.T)
adata.var_names_make_unique()
coor_df = coor_df.loc[adata.obs_names, ['xcoord', 'ycoord']]
adata.obsm["spatial"] = coor_df.to_numpy()

used_barcode = pd.read_csv(os.path.join('..', 'dataset', 'Slide-seqV2_MoB', 'used_barcodes.txt'), sep='\t', header=None)
adata = adata[used_barcode[0],]

In [None]:
#Normalization
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=3000)
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)

In [None]:
Cal_Spatial_Net(adata, rad_cutoff=300)
adata = train_STAGATE(adata, n_epochs=30, random_seed=seed)
adata.obsm['STAGATE']

In [None]:
adata = mclust_R(adata, 9) # 对embedding进行聚类，聚类结果保存在.obs['mclust']中
sc.pl.spatial(adata, color=['mclust'])  # 绘制图

In [None]:
sc.pp.neighbors(adata, use_rep='STAGATE')
sc.tl.umap(adata)
sc.pl.umap(adata, color=["mclust"])
sc.tl.paga(adata, groups='mclust')
sc.pl.paga_compare(adata)