In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import scrublet as scr
import os
import warnings
import functools
import seaborn as sns
import scipy.stats
import anndata
import matplotlib
import diffxpy.api as de
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42

warnings.filterwarnings('ignore')
os.chdir(os.path.expanduser('/home/jovyan/Prostate_analysis/scanpy'))
sc.settings.verbosity = 1
sc.logging.print_versions()

  data = yaml.load(f.read()) or {}
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


scanpy==1.4.5.post2 anndata==0.6.22.post1 umap==0.3.10 numpy==1.17.2 scipy==1.4.1 pandas==0.25.1 scikit-learn==0.22.1 statsmodels==0.11.0rc1 python-igraph==0.7.1 louvain==0.6.1


In [2]:
def exportDEres(adata, key, column, filename):
    scores = pd.DataFrame(data = adata.uns[key]['scores'][column], index = adata.uns[key]['names'][column])
    lfc = pd.DataFrame(data = adata.uns[key]['logfoldchanges'][column], index = adata.uns[key]['names'][column])
    pvals = pd.DataFrame(data = adata.uns[key]['pvals'][column], index = adata.uns[key]['names'][column])
    padj = pd.DataFrame(data = adata.uns[key]['pvals_adj'][column], index = adata.uns[key]['names'][column])
    scores = scores.loc[scores.index.dropna()]
    lfc = lfc.loc[lfc.index.dropna()]
    pvals = pvals.loc[pvals.index.dropna()]
    padj = padj.loc[padj.index.dropna()]
    dfs = [scores, lfc, pvals, padj]
    df_final = functools.reduce(lambda left,right: pd.merge(left,right,left_index = True, right_index = True), dfs)
    df_final.columns = ['scores', 'logfoldchanges', 'pvals', 'pvals_adj']

    df_final.to_csv(filename, sep = '\t')

In [3]:
results_file = 'out/prostate.h5ad'
adata = sc.read_h5ad(results_file)
adata = adata[~(adata.obs['celltype'].isin(['Sperm','MNP','T cell', 'B cell', 'NK cell']))]
ndata = adata[adata.obs['group'] == 'normal']

### do wilcoxon test
sc.tl.rank_genes_groups(ndata, groupby = 'celltype', n_genes = 30000, method = 'wilcoxon')
sc.tl.filter_rank_genes_groups(ndata, min_fold_change=1)
outpath1a = 'out/DEG/normal celltype markers/'
outpath1b = 'out/DEG/normal celltype markers/full/'
outpath2a = 'out/DEG/tumor vs normal/'
outpath2b = 'out/DEG/tumor vs normal/full/'
if not os.path.exists(outpath1a):
        os.makedirs(outpath1a)
if not os.path.exists(outpath2a):
        os.makedirs(outpath2a)
if not os.path.exists(outpath1b):
        os.makedirs(outpath1b)
if not os.path.exists(outpath2b):
        os.makedirs(outpath2b)

for x in list(set(ndata.obs['celltype'])):
    exportDEres(ndata, 'rank_genes_groups_filtered', str(x), outpath1a+str(x.replace('/','-'))+'_normal.txt')
    exportDEres(ndata, 'rank_genes_groups', str(x), outpath1b+str(x.replace('/','-'))+'_normal.txt')
for i in list(set(adata.obs['celltype'])):
    adatax = adata[adata.obs['celltype'] == i]
    sc.tl.rank_genes_groups(adatax, groupby = 'group', method = 'wilcoxon', n_genes=30000)
    sc.tl.filter_rank_genes_groups(adatax, min_fold_change=1, max_out_group_fraction=1)
    exportDEres(adatax, 'rank_genes_groups_filtered', 'tumor', outpath2a+str(i)+'_tumor_vs_normal.txt')
    exportDEres(adatax, 'rank_genes_groups', 'tumor', outpath2b+str(i)+'_tumor_vs_normal.txt')

In [4]:
results_file = 'out/mnp.integrated.h5ad'
adata = sc.read_h5ad(results_file)
ndata = adata[adata.obs['group'] == 'normal']

### do wilcoxon test for normal celltype markers
sc.tl.rank_genes_groups(ndata, groupby = 'celltype-immune', n_genes = 30000, method = 'wilcoxon')
sc.tl.filter_rank_genes_groups(ndata, min_fold_change=1)
    
### do wilcoxon test
sc.tl.rank_genes_groups(ndata, groupby = 'celltype-immune', n_genes = 30000, method = 'wilcoxon')
for x in list(set(ndata.obs['celltype-immune'])):
    exportDEres(ndata, 'rank_genes_groups_filtered', str(x), outpath1a+str(x.replace('/','-'))+'_normal.txt')
    exportDEres(ndata, 'rank_genes_groups', str(x), outpath1b+str(x.replace('/','-'))+'_normal.txt')
for i in list(set(adata.obs['celltype-immune'])):
    adatax = adata[adata.obs['celltype-immune'] == i]
    sc.tl.rank_genes_groups(adatax, groupby = 'group', method = 'wilcoxon', n_genes=30000)
    sc.tl.filter_rank_genes_groups(adatax, min_fold_change=1, max_out_group_fraction=1)
    exportDEres(adatax, 'rank_genes_groups_filtered', 'tumor', outpath2a+str(i)+'_tumor_vs_normal.txt')
    exportDEres(adatax, 'rank_genes_groups', 'tumor', outpath2b+str(i)+'_tumor_vs_normal.txt')

In [5]:
results_file = 'out/lymphoid.h5ad'
adata = sc.read_h5ad(results_file)
ndata = adata[adata.obs['group'] == 'normal']

### do wilcoxon test for normal celltype markers
sc.tl.rank_genes_groups(ndata, groupby = 'celltype-immune', n_genes = 30000, method = 'wilcoxon')
sc.tl.filter_rank_genes_groups(ndata, min_fold_change=1)
    
### do wilcoxon test
sc.tl.rank_genes_groups(ndata, groupby = 'celltype-immune', n_genes = 30000, method = 'wilcoxon')
for x in list(set(ndata.obs['celltype-immune'])):
    exportDEres(ndata, 'rank_genes_groups_filtered', str(x), outpath1a+str(x.replace('/','-'))+'_normal.txt')
    exportDEres(ndata, 'rank_genes_groups', str(x), outpath1b+str(x.replace('/','-'))+'_normal.txt')
for i in list(set(adata.obs['celltype-immune'])):
    adatax = adata[adata.obs['celltype-immune'] == i]
    sc.tl.rank_genes_groups(adatax, groupby = 'group', method = 'wilcoxon', n_genes=30000)
    sc.tl.filter_rank_genes_groups(adatax, min_fold_change=1, max_out_group_fraction=1)
    exportDEres(adatax, 'rank_genes_groups_filtered', 'tumor', outpath2a+str(i)+'_tumor_vs_normal.txt')
    exportDEres(adatax, 'rank_genes_groups', 'tumor', outpath2b+str(i)+'_tumor_vs_normal.txt')