In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import scrublet as  scr
import anndata
import scanpy.external as sce
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
COPD = sc.read("processed_data_py/COPD_newdata_0808.h5ad")
mye = COPD[COPD.obs["cellclass"] == "Myeloid"].copy()

In [None]:
sc.pp.highly_variable_genes(mye, n_top_genes=2000, flavor = "seurat")
mye.raw = mye
mye = mye[:, mye.var.highly_variable]
sc.pp.regress_out(mye, ['total_counts', 'pct_counts_mt'])
sc.pp.scale(mye)
sc.tl.pca(mye, n_comps = 50)
sc.external.pp.bbknn(mye, batch_key = "sample")
sc.tl.umap(mye)
sc.pl.umap(mye, color=["Group"], size=1)
sc.pp.neighbors(mye, n_neighbors=30, n_pcs=50)
sc.tl.leiden(mye,resolution = 1)
sc.pl.umap(mye, color=["leiden"],size=1, legend_loc='on data')

In [None]:
sc.pl.dotplot(mye,{"Epithelial":["EPCAM","SCGB1A1","TP63","CHGA"],
                        "Mesenchymal":["COL1A1","PDGFRA"],
                        "Endothleial":["PECAM1","CDH5",],
                        "Immune":["PTPRC","MS4A1","TNFRSF17","CD3E","CD4","CD8A","IL9","IL21","IL22","CXCR5","GATA3","IFNG","FOXP3","CTLA4","NKG7","GNLY",
                                 "CLEC9A","CLNK","CLEC10A","CD1C","CD1E","SCT","SMPD3","LAMP3","LAD1","FCN1","VCAN","CD14","MTSS1","FCGR3A",
                                 "CD68","F13A1","FOLR2","MARCO","TREM2","SPP1","FABP4","MS4A2","FCGR3B","MCTP2"],
                       "MKI67":["TOP2A"]}, 
                  groupby="leiden",dendrogram=False,standard_scale="var",cmap='Spectral_r')

In [None]:
mye.obs["celltype"] = ""
mye.obs.loc[mye.obs["leiden"].isin(["9","10","13","17","18","21","23","25"]),"celltype"] = "Doublets"
mye.obs.loc[mye.obs["leiden"].isin(["16"]),"celltype"] = "cDC1"
mye.obs.loc[mye.obs["leiden"].isin(["6"]),"celltype"] = "cDC2"
mye.obs.loc[mye.obs["leiden"].isin(["15"]),"celltype"] = "pDC"
mye.obs.loc[mye.obs["leiden"].isin(["19"]),"celltype"] = "Migratory DC"

mye.obs.loc[mye.obs["leiden"].isin(["5"]),"celltype"] = "Interstitial macrophage"
mye.obs.loc[mye.obs["leiden"].isin(["2","8","11","14","22"]),"celltype"] = "Alveolar macrophage"
mye.obs.loc[mye.obs["leiden"].isin(["4","20"]),"celltype"] = "Monocyte-derived macrophage"
mye.obs.loc[mye.obs["leiden"].isin(["1","24"]),"celltype"] = "Classical monocyte"
mye.obs.loc[mye.obs["leiden"].isin(["7","12"]),"celltype"] = "Non-classical monocyte"

mye.obs.loc[mye.obs["leiden"].isin(["3"]),"celltype"] = "Mast"
mye.obs.loc[mye.obs["leiden"].isin(["0"]),"celltype"] = "Neutrophil"

In [None]:
mye_meta = mye.obs[["celltype"]]       
mye_meta.to_csv("tables/mye_meta.csv")
mye = mye.raw.to_adata()
mye = mye[mye.obs["celltype"] != "Doublets"].copy()

In [None]:
lym = COPD[COPD.obs["cellclass"] == "Lymphoid"].copy()
sc.pp.highly_variable_genes(lym, n_top_genes=2000, flavor = "seurat")
lym.raw = lym
lym = lym[:, lym.var.highly_variable]
sc.pp.regress_out(lym, ['total_counts', 'pct_counts_mt'])
sc.pp.scale(lym)
sc.tl.pca(lym, n_comps = 50)
sc.external.pp.bbknn(lym, batch_key = "sample")
sc.tl.umap(lym)
sc.pp.neighbors(lym, n_neighbors=30, n_pcs=50)
sc.tl.leiden(lym,resolution = 1.2)
sc.pl.umap(lym, color=["leiden"],size=1, legend_loc='on data')

In [None]:
sc.pl.dotplot(lym,{"Epithelial":["EPCAM","SCGB1A1","SFTPC","CHGA"],
                        "Mesenchymal":["COL1A1","PDGFRA"],
                        "Endothleial":["PECAM1","CDH5",],
                        "Immune":["PTPRC","MS4A1","TNFRSF17","CD3E","CD4","CD8A","CD8B","CCR7","IL17RB","LTB","GZMB","LRRK2",
                                  "IL17A","IL17F","CCL4","CCL5","IFNG","PPARG","IL13","GATA3","IFNG","FOXP3","CTLA4","NKG7","GNLY",
                                 "CLEC9A","CLNK","CLEC10A","CD1C","SCT","SMPD3","MCTP2","FXYD5","AREG","NFKB1","BACH2","ATP8B4","IL7R","CCR6",
                                 "IFIT2","IFIT3","KLRB1","PTGDR2","CSF3R","FCGR3B"],
                        "MKI67":"TOP2A"}, 
                  groupby="leiden",dendrogram=False,standard_scale="var",cmap='Spectral_r')

In [None]:
lym.obs["celltype"] = "NKT"
lym.obs.loc[lym.obs["leiden"].isin(["11","16","21","22","25","26","30"]),"celltype"] = "Doublets"
lym.obs.loc[lym.obs["leiden"].isin(["13"]),"celltype"] = "B"
lym.obs.loc[lym.obs["leiden"].isin(["14","32"]),"celltype"] = "Plasma"

lym.obs.loc[lym.obs["leiden"].isin(["19"]),"celltype"] = "Treg"
lym.obs.loc[lym.obs["leiden"].isin(["2","8","10","12"]),"celltype"] = "CD4+ T"
lym.obs.loc[lym.obs["leiden"].isin(["3","6","15"]),"celltype"] = "CD8+ T"
lym.obs.loc[lym.obs["leiden"].isin(["27"]),"celltype"] = "Proliferating T"

lym.obs.loc[lym.obs["leiden"].isin(["1","9","23"]),"celltype"] = "NK"
lym.obs.loc[lym.obs["leiden"].isin(["29"]),"celltype"] = "Basophil"

In [None]:
lym_meta = lym.obs[["celltype"]]    
lym_meta.to_csv("tables/lym_meta.csv")
lym = lym.raw.to_adata()
lym = lym[lym.obs["celltype"] != "Doublets"].copy()

In [None]:
immune = anndata.concat([lym, mye])
sc.pp.highly_variable_genes(immune, n_top_genes=2000, flavor = "seurat")
immune.raw = immune
immune = immune[:, immune.var.highly_variable]
sc.pp.regress_out(immune, ['total_counts', 'pct_counts_mt'])
sc.pp.scale(immune)
sc.tl.pca(immune, n_comps = 50)
sc.external.pp.bbknn(immune, batch_key = "sample")
sc.tl.umap(immune)
sc.pl.umap(immune, color=["celltype"], size=1)

In [None]:
sc.pl.dotplot(immune,["CLEC9A","CLNK","CLEC10A","CD1C","LAD1","CCL19","SCT","SMPD3","CD68","MARCO",
                      "FABP4","F13A1","FOLR2","TREM2","FCN1","VCAN","CD14","FCGR3A","MS4A2","FCGR3B",
                      "ENPP3","IL3RA","CD79A","TNFRSF17","DERL3","IGHG1","CD3E",
                      "FOXP3","CTLA4","CD4","CD5","CD8A","CD8B","TOP2A","NKG7","GNLY"],
                  groupby="celltype",dendrogram=False,standard_scale="var",cmap="RdBu_r")

In [None]:
immune.obs['UMAP1'] = immune.obsm['X_umap'][:, 0]
immune.obs['UMAP2'] = immune.obsm['X_umap'][:, 1]
immune.obs.to_csv("./tables/immune_meta_umap.csv")
immune.write("./processed_data_py/COPD_immune.h5ad")