In [1]:
import scanpy as sc
import decoupler as dc

# 参数和变量定义
organism = 'human'
split_complexes = False
obsm_key_estimate = 'ulm_estimate'
obsm_key_pvals = 'ulm_pvals'
n_markers = 3
n_sources = ['PAX5', 'EBF1', 'RFXAP']
n_targets = 15
node_size = 100
figsize_network = (5, 5)
color_map = 'RdBu_r'
vcenter = 0

# 加载数据
adata = sc.datasets.pbmc3k_processed()

# 检索 CollecTRI 基因调控网络
net = dc.get_collectri(organism=organism, split_complexes=split_complexes)

# 使用单变量线性模型 (ULM) 进行活动推断
def run_ulm_inference(adata, net):
    dc.run_ulm(
        mat=adata,
        net=net,
        source='source',
        target='target',
        weight='weight',
        verbose=True
    )
    adata.obsm['collectri_ulm_estimate'] = adata.obsm[obsm_key_estimate].copy()
    adata.obsm['collectri_ulm_pvals'] = adata.obsm[obsm_key_pvals].copy()

# 提取活动分数
def get_tf_activities(adata, obsm_key):
    acts = dc.get_acts(adata, obsm_key=obsm_key)
    return acts

# 可视化活动
def visualize_tf_activities(acts, color_map, vcenter):
    sc.pl.umap(acts, color=['PAX5', 'louvain'], cmap=color_map, vcenter=vcenter)
    sc.pl.violin(acts, keys=['PAX5'], groupby='louvain', rotation=90)

# 确定每个细胞类型的顶级 TF
def rank_and_extract_top_markers(acts, groupby, n_markers):
    df = dc.rank_sources_groups(acts, groupby=groupby, reference='rest', method='t-test_overestim_var')
    source_markers = df.groupby('group').head(n_markers).groupby('group')['names'].apply(lambda x: list(x)).to_dict()
    return source_markers

# 绘制网络图
def plot_tf_network(net, n_sources, n_targets, node_size, figsize):
    dc.plot_network(
        net=net,
        n_sources=n_sources,
        n_targets=n_targets,
        node_size=node_size,
        s_cmap='white',
        t_cmap='white',
        c_pos_w='darkgreen',
        c_neg_w='darkred',
        figsize=figsize
    )

# 执行推断和可视化
run_ulm_inference(adata, net)
acts = get_tf_activities(adata, obsm_key_estimate)
visualize_tf_activities(acts, color_map, vcenter)
source_markers = rank_and_extract_top_markers(acts, groupby='louvain', n_markers=n_markers)
plot_tf_network(net, n_sources, n_targets, node_size, figsize_network)


  0%|          | 0.00/23.5M [00:00<?, ?B/s]

OSError: Unable to synchronously open file (truncated file: eof = 396024, sblock->base_addr = 0, stored_eof = 24653425)