In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [None]:
adata1 = sc.read('/mnt/f/pvn/outer/new_protocal/step2_standard/step2all_level23.h5ad')

In [None]:
adata=adata1.raw.to_adata()

In [None]:
sc.pp.highly_variable_genes(adata, n_top_genes=2000, flavor="seurat",batch_key="sample")

In [None]:
adata.raw = adata.copy()

In [None]:
highly_variable_genes = adata.var[adata.var['highly_variable']].index
hsp_genes = [gene for gene in highly_variable_genes if gene.startswith('Hsp')]
mt_genes = [gene for gene in highly_variable_genes if gene.startswith('mt-')]
rps_genes = [gene for gene in highly_variable_genes if gene.startswith('Rps') or gene.startswith('Rpl')]
print("Highly variable Hsp genes: ", hsp_genes)
print("Highly variable mt genes: ", mt_genes)
print("Highly variable rps genes: ", rps_genes)

In [None]:
filtered_highly_variable_genes = [gene for gene in highly_variable_genes if gene not in hsp_genes and gene not in mt_genes and gene not in rps_genes]
adata.var['highly_variable'] = adata.var_names.isin(filtered_highly_variable_genes)
highly_variable_genes = adata.var['highly_variable']
print(f"Number of highly variable genes: {highly_variable_genes.sum()}")

In [None]:
adata = adata[:, adata.var["highly_variable"]]

In [None]:
sc.pp.regress_out(adata, keys=["total_counts", "pct_counts_mt","pct_counts_hsp"])

In [None]:
sc.pp.scale(adata, max_value=10)

In [None]:
sc.pp.pca(adata, n_comps=50)

In [None]:
sc.pl.pca_variance_ratio(adata, log=True, n_pcs=50)

In [None]:
import scanpy.external as sce
sce.pp.harmony_integrate(adata, key="sample")

In [None]:
sc.pp.neighbors(adata, n_neighbors=30, n_pcs=50,use_rep='X_pca_harmony')

In [None]:
sc.tl.umap(adata)

In [None]:
sc.tl.leiden(adata, resolution=0.8,key_added='leiden_res0.8')

In [None]:
sc.pl.umap(adata, color=["leiden_res0.8"], legend_loc="on data")

In [None]:
check_genes = [
    'Ptprc',  
    'Ms4a1', 'Cd19', 'Cd79a', 'Ebf1','Igkc', # B cell
    'Mzb1', 'Jchain', 'Xbp1',  # Plasma cell 
    'Cd3d', 'Cd3e', 'Cd3g','Cd8a','Cd4',  # T cell
    'Ncam1', 'Cd160','Ncr1', # NK cell
    'Tpsb2', 'Cpa3', 'Tpsab1', 'Kit',  # Mast cell
    'Csf3r','S100a9','S100a8','Ly6g','Cxcr2',  # FCGR3A/B=CD16
    'Cd14', 'Vcan',  # Monocyte
    'Cd68', 'Csf1r', 'C1qa', 'C1qb',  # Macrophage
    'Flt3',  'Fcer1a', 'Lamp3','Cd74' ,'Bst2','Ccl22', # DC
    'Mki67', 'Tuba1b','Stmn1','Top2a'  #cycling
]

In [None]:
annotations = {
    '0': 'Neutrophils', '1': 'Monocytes', '2': 'Monocytes', '3': 'Monocytes', '4': 'Monocytes', '5': 'Monocytes', '6': 'Monocytes',
    '7': 'Monocytes',
    '8':'T cells', '9': 'Monocytes', '10': 'Neutrophils', '11': 'DCs','12': 'T cells', 
    '13': 'DCs','14': 'B cells',
}

In [None]:
adata.obs['celltype_level1'] = adata.obs['leiden_res0.8'].map(annotations)
print(adata.obs[['leiden_res0.8', 'celltype_level1']])
sc.pl.umap(adata, color=["celltype_level1"], legend_loc="right margin")

In [None]:
adata.write("/mnt/f/pvn/outer/new_protocal/step2_standard/step2_all_dedoubletcancer.h5ad")