In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [None]:
adata1 = sc.read('/mnt/f/pvn/outer/new_protocal/step2_standard/step2_all_dedoubletcancer.h5ad')

In [None]:
adata2 = adata1[adata1.obs['celltype_level1'].isin(['Monocytes'])]

In [None]:
adata=adata2.raw.to_adata()

In [None]:
sc.pp.highly_variable_genes(
    adata, n_top_genes=4000, flavor="seurat",batch_key="sample",
)

In [None]:
adata.raw = adata.copy()

In [None]:
highly_variable_genes = adata.var[adata.var['highly_variable']].index
hsp_genes = [gene for gene in highly_variable_genes if gene.startswith('Hsp')]
mt_genes = [gene for gene in highly_variable_genes if gene.startswith('mt-')]
rps_genes = [gene for gene in highly_variable_genes if gene.startswith('Rps') or gene.startswith('Rpl')]
print("Highly variable Hsp genes: ", hsp_genes)
print("Highly variable mt genes: ", mt_genes)
print("Highly variable rps genes: ", rps_genes)

In [None]:
filtered_highly_variable_genes = [gene for gene in highly_variable_genes if gene not in hsp_genes and gene not in mt_genes and gene not in rps_genes]
adata.var['highly_variable'] = adata.var_names.isin(filtered_highly_variable_genes)
highly_variable_genes = adata.var['highly_variable']
print(f"Number of highly variable genes: {highly_variable_genes.sum()}")
adata = adata[:, adata.var["highly_variable"]]

In [None]:
sc.pp.regress_out(adata, keys=["total_counts", "pct_counts_mt","pct_counts_hsp"])
sc.pp.scale(adata, max_value=10)
sc.pp.pca(adata, n_comps=50)
import scanpy.external as sce
sce.pp.harmony_integrate(adata, key="sample")
sc.pp.neighbors(adata, n_neighbors=30, n_pcs=50,use_rep='X_pca_harmony')
sc.tl.umap(adata)
sc.tl.leiden(adata, resolution=1.2,key_added='leiden_res1.2')
sc.tl.leiden(adata,restrict_to=('leiden_res1.2',['8','10']), resolution=0.5,key_added='leiden_res1.2_1')
sc.pl.umap(adata, color=["leiden_res1.2","leiden_res1.2_1"], legend_loc="on data")

In [None]:
annotations = {
    '0': 'CD4 T cells', '1': 'CD8 T cells', '2': 'CD4 T cells', '3': 'CD8 T cells', '4': 'NK cells', '5': 'CD8 T cells', '6': 'CD8 T cells',
    '7,0': 'CD8 T cells','7,1': 'CD8 T cells','7,2': 'CD4 T cells', '8': 'CD8 T cells','9':'CD8 T cells', 
    '10':'CD8 T cells', '11': 'NK cells','12':'CD4 T cells'
}

In [None]:
# 创建或更新 'celltype' 列
adata.obs['celltype_level2'] = adata.obs['leiden_res1.2_1'].map(annotations)
# 查看注释结果
print(adata.obs[['leiden_res1.2_1', 'celltype_level2']])

In [None]:
adata.write("/mnt/f/pvn/outer/new_protocal/step3_recluster/cd4_8_T_nk_cell.h5ad")