In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import scrublet as  scr
import anndata
import scanpy.external as sce
from matplotlib import pyplot as plt
%matplotlib inline

In [None]:
COPD = sc.read("processed_data_py/COPD_newdata_0808.h5ad")
epi =  COPD[COPD.obs["cellclass"] == "Epithelial"].copy()

In [None]:
sc.pp.highly_variable_genes(epi, n_top_genes=3000, flavor = "seurat")
epi.raw = epi
epi = epi[:, epi.var.highly_variable]
sc.pp.regress_out(epi, ['total_counts', 'pct_counts_mt'])
sc.pp.scale(epi)
sc.tl.pca(epi, n_comps = 50)
sc.external.pp.bbknn(epi, batch_key = "sample")
sc.tl.umap(epi)
sc.pp.neighbors(epi, n_neighbors=30, n_pcs=50)
sc.tl.leiden(epi,resolution = 1)
sc.pl.umap(epi, color=["leiden"],size=1, legend_loc='on data')

In [None]:
sc.pl.dotplot(epi,{"AT1":["AGER","HOPX","RTKN2","COL4A1","GPC5"], 
                   "AT2":["SFTPC","LAMP3","HHIP"], 
                   "Secretory":["SCGB3A2","SCGB3A1","SCGB1A1"], 
                   "Basal":["TP63","KRT5"], "Goblet":["MUC5B","BPIFB1"],
                   "Ciliated":["FOXJ1","PIFO"], "PNEC":["CHGA","GRP"],
                   "Doublet":["COL1A2","COL3A1","CDH5","PECAM1","PTPRC","CD68"], }, 
                  groupby="leiden",dendrogram=False,standard_scale="var",cmap="RdBu_r")

In [None]:
epi.obs["celltype"] = "AT2"
epi.obs.loc[epi.obs["leiden"].isin(["14","17","20","22"]),"celltype"] = "Doublets"
epi.obs.loc[epi.obs["leiden"].isin(["4","13","16"]),"celltype"] = "AT1"
epi.obs.loc[epi.obs["leiden"].isin(["19"]),"celltype"] = "Immature AT1"
epi.obs.loc[epi.obs["leiden"].isin(["12"]),"celltype"] = "TRB Secretory"
epi.obs.loc[epi.obs["leiden"].isin(["5"]),"celltype"] = "PreTB Secretory"
epi.obs.loc[epi.obs["leiden"].isin(["11"]),"celltype"] = "Goblet"
epi.obs.loc[epi.obs["leiden"].isin(["18"]),"celltype"] = "Basal"
epi.obs.loc[epi.obs["leiden"].isin(["28"]),"celltype"] = "PNEC"
epi.obs.loc[epi.obs["leiden"].isin(["7","8","10"]),"celltype"] = "Ciliated"

In [None]:
sc.pl.umap(epi, color=["celltype"],size=1, legend_loc='on data')

In [None]:
epi_meta = epi.obs[["celltype"]]
epi_meta.to_csv("tables/epi_meta.csv")

In [None]:
epi = epi.raw.to_adata()
epi = epi[epi.obs["celltype"] != "Doublets"].copy()

In [None]:
sc.tl.pca(epi, n_comps = 50)
sc.external.pp.bbknn(epi, batch_key = "sample")
sc.tl.umap(epi)
sc.pl.umap(epi, color=["celltype"],size=1, legend_loc='on data')

In [None]:
sc.pl.dotplot(epi,{"AT1":["AGER","HOPX","RTKN2","COL4A1","GPC5"], "AT2":["SFTPC","LAMP3","HHIP"], 
                   "Secretory":["SCGB3A2","SCGB3A1","SCGB1A1"], 
                   "Basal":["TP63","KRT5"], "Goblet":["MUC5B","BPIFB1"],"Ciliated":["FOXJ1","PIFO"], "PNEC":["CHGA","GRP"]}, 
                  groupby="celltype",dendrogram=False,standard_scale="var",cmap="RdBu_r")

In [None]:
epi.obs['UMAP1'] = epi.obsm['X_umap'][:, 0]
epi.obs['UMAP2'] = epi.obsm['X_umap'][:, 1]
epi.obs.to_csv("./tables/epi_meta_umap.csv")
epi.write("./processed_data_py/COPD_epi.h5ad")