In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import scanpy.external as sce
import scipy
sc.settings.verbosity = 3
sc.logging.print_header()
sc.set_figure_params(dpi=100, dpi_save=600)

In [None]:

import matplotlib as mp
import matplotlib.pyplot as pl
from matplotlib import rcParams
import matplotlib as mpl
import os

In [None]:
#read data

In [None]:
adata_WT = sc.read(filename='/Users/bkim6/Desktop/BJ/SZ_Cracd_KO/H5AD/For_NCBI/CracdWT.h5ad')

In [None]:
adata_KO = sc.read(filename='/Users/bkim6/Desktop/BJ/SZ_Cracd_KO/H5AD/For_NCBI/CracdKO.h5ad')

In [None]:
#combine data

In [None]:
adata = adata_WT.concatenate(adata_KO)

In [None]:
sc.pl.umap(adata, color=['leiden'])

In [None]:
new_cluster_names = [
    'CracdWT', 'CracdKO']
adata.rename_categories('batch', new_cluster_names)

In [None]:
adata.obs['celltype']=adata.obs['leiden']

In [None]:
new_cluster_names = ['AT2-1', 'Ciliated-1', 'AT2-2', 'AT2-3', 'AT2-4',
                    'AT2-5', 'AT2-6', 'Mesenchymal-1', 'Endothelial', 'Club',
                    'AT2-7', 'Ciliated-2', 'AT1', 'Basal', 'Proliferation', 'AT2-8',
                    'Immune-1', 'NE', 'Mesenchymal-2', 'Immune-2']
adata.rename_categories('celltype', new_cluster_names)

In [None]:
adata.obs['celltype'].cat.reorder_categories(['AT1', 'AT2-1', 'AT2-2', 'AT2-3', 'AT2-4',
                    'AT2-5', 'AT2-6', 'AT2-7', 'AT2-8', 'Basal',
                                             'Ciliated-1', 'Ciliated-2', 'Club',
                                             'NE', 'Proliferation', 'Endothelial',
                                             'Immune-1', 'Immune-2', 'Mesenchymal-1', 'Mesenchymal-2'], inplace=True)

In [None]:
#Umap by celltype

In [None]:
sc.pl.umap(adata, color=['celltype'],
          save = 'combined_umap.pdf')

In [None]:
#Marker genes

In [None]:
sc.pl.dotplot(adata, ['Trp63', 'Krt5', 'Calca', 'Chga', 'Reg3g', 'Scgb3a1', 'Ccdc153',
                      'Foxj1', 'Scgb3a2', 'Scgb1a1', 'Krt8', 'Sftpa1', 'Sftpc', 'Pdpn', 
                     'Hopx', 'Ager', 'Mki67', 'Top2a', 'Epcam', 'Ptprc', 'Pecam1', 'Eln',
                     'Col1a1'], groupby='celltype', swap_axes=True,
                        save='Dotplot_celltype_marker.pdf')



In [None]:
sc.pl.umap(adata, color=['Epcam', 'Ptprc', 'Pecam1', 'Col1a1'],
           save='Lung_marker_feature.pdf',
           legend_loc='right margin', frameon=False, add_outline=True, vmin=0, vmax=4,
           outline_width=(0.05,0.05), size=50, use_raw=True, cmap='Reds')

In [None]:
sc.pl.umap(adata, color=['Mki67', 'Ager', 'Krt8'],
           save='Lung_marker_feature2.pdf',
           legend_loc='right margin', frameon=False, add_outline=True, vmin=0, vmax=4,
           outline_width=(0.05,0.05), size=50, use_raw=True, cmap='Reds')

In [None]:
sc.pl.umap(adata, color=['Foxj1', 'Chga', 'Krt5'],
           save='Lung_marker_feature3.pdf',
           legend_loc='right margin', frameon=False, add_outline=True, vmin=0, vmax=4,
           outline_width=(0.05,0.05), size=50, use_raw=True, cmap='Reds')

In [None]:
sc.pl.umap(adata, color=['Scgb1a1', 'Sftpc'],
           save='Lung_marker_feature4.pdf',
           legend_loc='right margin', frameon=False, add_outline=True, vmin=0, vmax = 8,
           outline_width=(0.05,0.05), size=50, use_raw=True, cmap='Reds')

In [None]:
#Cluster specific genes

In [None]:
sc.tl.rank_genes_groups(adata, 'celltype', method='wilcoxon')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

In [None]:
result = adata.uns['rank_genes_groups']
groups = result['names'].dtype.names
degs_by_cluster = pd.DataFrame({group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names', 'logfoldchanges', 'pvals_adj']})
degs_by_cluster.to_csv("DEG_adata.csv")
degs_by_cluster[:10]

In [None]:
sc.pl.rank_genes_groups_heatmap(adata, groupby=['celltype'], n_genes = 10, show_gene_labels=False, dendrogram=False,
                                       save='Heatmap.pdf', cmap='bwr')

In [None]:
new_cluster_names = [
    'CracdWT', 'CracdKO']
adata.rename_categories('batch', new_cluster_names)

In [None]:
sc.pl.umap(adata, color=['batch', 'celltype'],
           legend_loc='right margin', frameon=False, add_outline=True, vmin=0, vmax=1, 
           outline_width=(0.05,0.05), size=50, use_raw=True, cmap='Reds')