In [1]:
import os
import scanpy as sc
import pandas as pd

# 输入输出路径
input_dir = "/media/AnalysisDisk1/xuzekai/20250715_BMI/DEG/celltype_h5ad/"
output_dir = "/media/AnalysisDisk1/xuzekai/20250715_BMI/DEG/scanpy_20251112/"

# 创建输出目录（如果不存在）
os.makedirs(output_dir, exist_ok=True)

# 遍历所有 .h5ad 文件
for filename in os.listdir(input_dir):
    if filename.endswith(".h5ad"):
        # 提取细胞类型名（去掉 .h5ad 后缀）
        cell_type = filename[:-5]  
        filepath = os.path.join(input_dir, filename)

        print(f"Processing: {cell_type}")

        # 读取 AnnData 对象
        adata = sc.read(filepath)

        # 确保 groupby 列存在
        if 'BMI_group' not in adata.obs.columns:
            print(f"  Warning: 'BMI_group' not found in {filename}, skipping.")
            continue


        # 执行三组比较
        comparisons = [
            {'groups': ['Underweight'], 'reference': 'normal', 'name': 'Underweight_vs_Normal'},
            {'groups': ['Underweight'], 'reference': 'Overweight', 'name': 'Underweight_vs_Overweight'},
            {'groups': ['Overweight'], 'reference': 'normal', 'name': 'Overweight_vs_Normal'}
        ]

        all_results = []

        for comp in comparisons:
            group_list = comp['groups']
            ref = comp['reference']
            key_name = f"rank_genes_{comp['name']}"

            try:
                sc.tl.rank_genes_groups(
                    adata,
                    groupby='BMI_group',
                    groups=group_list,
                    reference=ref,
                    method='wilcoxon',
                    key_added='DEG'
                )
                result = sc.get.rank_genes_groups_df(adata, group=None,key='DEG_filtered')  # 获取所有基因
                result['comparison'] = comp['name']  # 添加比较标签
                all_results.append(result)

                # 清理临时结果
                if 'rank_genes_groups' in adata.uns:
                    del adata.uns['rank_genes_groups']

            except Exception as e:
                print(f"  Error in {comp['name']}: {e}")
                continue

        # 合并所有比较结果
        if all_results:
            combined_df = pd.concat(all_results, ignore_index=True)
            # 重命名列以更清晰
            combined_df.rename(columns={'names': 'gene', 'scores': 'score', 'logfoldchanges': 'log2FC',
                                        'pvals': 'p_val', 'pvals_adj': 'p_val_adj'}, inplace=True)
            # 保存为 CSV
            output_file = os.path.join(output_dir, f"{cell_type}.csv")
            combined_df.to_csv(output_file, index=False)
            print(f"  Saved: {output_file}")
        else:
            print(f"  No results for {cell_type}, skipping save.")

print(" All files processed.")

Processing: CD8_Tem_CCR7neg



KeyboardInterrupt

