# Differential cell abundance analysis

This workbook performs differential cell abundance analysis at cell type level. 
It plots cell fractions stratified according to conditions. 

In [None]:
import besca as bc
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
from scipy import sparse, io
import scvelo as scv
import os
import time
import logging
import pkg_resources
import seaborn as sns

sc.logging.print_versions()

# for standard processing, set verbosity to minimum
sc.settings.verbosity = 0  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80)
version = '2.3'
start0 = time.time()

In [None]:
analysis_name = 'sw_besca24'
species='mouse' ## or mouse for now
conversion=None
#analysis_name = 'standard_workflow_besca2_CLR' #use _CLR or _DSB for citeseq analysis
annot_author = 'schwalip' ### replace with userid
clusters='leiden'
split_condition='readout_id' #'readout_id' per the study registration CVs
root_path = os.getcwd()

citeseq=False
if clusters == 'leiden':
    results_folder = os.path.join(root_path, 'analyzed')
    if citeseq:
        results_folder = os.path.join(results_folder, analysis_name, 'citeseq' , 'citeseq') 
    else:
        results_folder = os.path.join(results_folder, analysis_name)
else:
    results_folder = os.path.join(root_path, 'analyzed', analysis_name, 'citeseq', clusters)

In [None]:
### Create export file and folder names
results_file = os.path.join(results_folder, analysis_name + '.annotated.h5ad')
figdir=os.path.join(root_path, 'analyzed', analysis_name+'/figures/')
sc.settings.figdir = figdir
if not os.path.exists(figdir):
    os.makedirs(figdir)

In [None]:
results_folder_out = os.path.join(root_path, 'analyzed', analysis_name+'/cell_abundance/')
if not os.path.exists(results_folder_out):
    os.makedirs(results_folder_out)
    


In [None]:
results_file

In [None]:
adata = sc.read_h5ad(results_file)
#adata
adata.uns['log1p'] = {'base' : None}

In [None]:
set(adata.obs['treatment_id'])

In [None]:
### Set parameters 
subsetvar='readout_id'
condition='treatment_id'


In [None]:
set(adata.obs['celltype1'])

In [None]:
pdata=adata.copy() #[adata.obs['celltype0'].isin(['hematopoietic cell','endothelial cell',
                    #                     'epithelial cell','malignant cell','fibroblast'])].copy()
initialsubset='All'
fullsubset='All'

In [None]:
sc.settings.set_figure_params(dpi=80)

In [None]:
sc.pl.umap(adata, color='treatment_id', save='-treatment_id-'+fullsubset+'.pdf')
sc.pl.umap(adata, color='treatment_id', save='-treatment_id-'+fullsubset+'.svg')


In [None]:
sc.pl.umap(adata, color='celltype1', save='-celltype1-'+fullsubset+'.pdf')
sc.pl.umap(adata, color='celltype1', save='-celltype1-'+fullsubset+'.svg')

In [None]:
sc.pl.umap(adata, color='leiden', legend_loc='on data',save='-leiden-'+fullsubset+'.pdf')
sc.pl.umap(adata, color='leiden', legend_loc='on data',save='-leiden-'+fullsubset+'.svg')

In [None]:
adata.obs['celltype']=adata.obs['dblabel'].copy()
pdata.obs['celltype']=pdata.obs['dblabel'].copy()

In [None]:
adata.obs['celltype_simple']=adata.obs['celltype2'].copy()
pdata.obs['celltype_simple']=pdata.obs['celltype2'].copy()

### Some initial plots

In [None]:
sc.pl.umap(pdata, color='celltype', save='-celltype-'+initialsubset+'.pdf')
sc.pl.umap(pdata, color='celltype', save='-celltype-'+initialsubset+'.svg')

In [None]:
sc.pl.umap(pdata, color='leiden', legend_loc='on data',save='-leiden-'+initialsubset+'.pdf')

In [None]:
sc.pl.umap(pdata, color='treatment_id', save='-treatmentID-'+initialsubset+'.pdf')

In [None]:
pdata.obs['celltypelei']=list(pdata.obs['leiden'].astype('str')+'-'+pdata.obs['celltype'].astype('str'))
adata.obs['celltypelei']=list(adata.obs['leiden'].astype('str')+'-'+adata.obs['celltype'].astype('str'))

In [None]:
adata.obs['celltypeleisimp']=list(adata.obs['leiden'].astype('str')+'-'+adata.obs['celltype_simple'].astype('str'))
pdata.obs['celltypeleisimp']=list(pdata.obs['leiden'].astype('str')+'-'+pdata.obs['celltype_simple'].astype('str'))

In [None]:
sc.pl.umap(pdata, color='celltypelei')

In [None]:
var1='celltypelei'
var2='treatment_id'
var2='sample_id'


In [None]:
figdir

In [None]:
plt.rcParams["figure.figsize"] = (7,3)

tmp=bc.tl.count_occurrence_subset(pdata, var1, count_variable=var2, return_percentage=True)
tmp.transpose().plot.bar(stacked=True)
plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
plt.tick_params(axis='x', labelsize=12)
plt.tick_params(axis='y', labelsize=12)
plt.savefig(figdir+'Barplots-'+var1+'_per_'+var2+'.pdf')
plt.savefig(figdir+'Barplots-'+var1+'_per_'+var2+'.svg')

#    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
#    ax.set_xlabel(xlabel='Response ',fontsize=10)

In [None]:
var2='celltype_simple'
var1='treatment_id'
#var2='sample_id'


In [None]:
plt.rcParams["figure.figsize"] = (7,3)

tmp=bc.tl.count_occurrence_subset(pdata, var1, count_variable=var2, return_percentage=True)
tmp.transpose().plot.bar(stacked=True)
plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
plt.tick_params(axis='x', labelsize=12)
plt.tick_params(axis='y', labelsize=12)
plt.savefig(figdir+'Barplots-'+var1+'_per_'+var2+'.pdf')
plt.savefig(figdir+'Barplots-'+var1+'_per_'+var2+'.svg')
#    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
#    ax.set_xlabel(xlabel='Response ',fontsize=10)

In [None]:
var2='celltype4'
var1='treatment_id'
#var2='sample_id'


plt.rcParams["figure.figsize"] = (7,3)

tmp=bc.tl.count_occurrence_subset(pdata, var1, count_variable=var2, return_percentage=True)
tmp.transpose().plot.bar(stacked=True)
plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
plt.tick_params(axis='x', labelsize=12)
plt.tick_params(axis='y', labelsize=12)
plt.savefig(figdir+'Barplots-'+var1+'_per_'+var2+'.pdf')
plt.savefig(figdir+'Barplots-'+var1+'_per_'+var2+'.svg')
#    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
#    ax.set_xlabel(xlabel='Response ',fontsize=10)

In [None]:
var2='celltype2'
var1='treatment_id'
#var2='sample_id'


plt.rcParams["figure.figsize"] = (7,3)

tmp=bc.tl.count_occurrence_subset(pdata, var1, count_variable=var2, return_percentage=True)
tmp.transpose().plot.bar(stacked=True)
plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
plt.tick_params(axis='x', labelsize=12)
plt.tick_params(axis='y', labelsize=12)
plt.savefig(figdir+'Barplots-'+var1+'_per_'+var2+'.pdf')
plt.savefig(figdir+'Barplots-'+var1+'_per_'+var2+'.svg')
#    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
#    ax.set_xlabel(xlabel='Response ',fontsize=10)

In [None]:
plt.rcParams["figure.figsize"] = plt.rcParamsDefault["figure.figsize"]

In [None]:
scv.tl.score_genes_cell_cycle(pdata)

In [None]:
sc.pl.umap(pdata, color=['phase','S_score','G2M_score'],ncols=1,save='-phasev2-'+initialsubset+'.pdf')

In [None]:
sc.tl.embedding_density(pdata,  basis='umap',groupby='phase')

In [None]:
sc.pl.embedding_density(pdata,  basis='umap',key='umap_density_phase',fg_dotsize=40,bg_dotsize=40,ncols=3,
                        save='-phase-'+initialsubset+'.pdf')


In [None]:
sc.tl.embedding_density(pdata,  basis='umap',groupby='treatment_id')

In [None]:
sc.pl.embedding_density(pdata,  basis='umap',key='umap_density_treatment_id',ncols=3,
                        fg_dotsize=40,bg_dotsize=40 ,save='-treatment-'+initialsubset+'.pdf')
    
#scanpy.pl.embedding_density(adata, basis='umap', key=None, groupby=None, 
#group='all', color_map='YlOrRd', bg_dotsize=80, fg_dotsize=180, vmax=1, vmin=0, vcenter=None, norm=None, ncols=4, hspace=0.25, wspace=None, title=None, show=None, save=None, ax=None, return_fig=None, **kwargs)

In [None]:
tempcols=['orangered','orange','teal','indigo', 'purple','black']
#tempcols=['black','black','black','black','black']

In [None]:
initialsubset

In [None]:
sc.settings.set_figure_params()

In [None]:
mytreats=list(set(pdata.obs['treatment_id']))
i=0
for treat in mytreats:
    sc.pl.umap(pdata, groups=[treat],palette=[tempcols[i]],
               color='treatment_id', save='-treatmentID-'+treat+'.'+initialsubset+'.pdf')
    i=i+1

In [None]:
#cls=['5','6','14','12','3','4','17','16','14','12','0','15','10','8','9','13','2','1','11']

In [None]:
mytreats=list(set(pdata.obs['treatment_id']))
i=0
for treat in mytreats:
    sc.pl.umap(pdata, groups=[treat],palette=[tempcols[i]],
               color='treatment_id', save='-treatmentID-'+treat+'.selclusters.pdf')
    i=i+1

In [None]:
mytreats=list(set(pdata.obs['treatment_id']))
i=0
for treat in mytreats:
    sc.pl.umap(pdata, groups=[treat],palette=['dodgerblue'],
               color='treatment_id', save='-treatmentID-'+treat+'.selclusters-singlecol.pdf')
    i=i+1

In [None]:
goi=goinew=['Tnf', 'Il6', 'Ifna2',
'Ifng', 'Il12b', 'Il12a', 'Ccl3', 'Ccl4',
'Il10', 'Tgfb1', 'Csf1', 'Csf2', 'Gzmb',
'Prf1', 'Il2', 'Ccl8', 'Cxcl9', 'Cxcl10',
'Xcl1',
'Ccl2', 'Cxcl13', 'Ccl5', 'Cxcl12',
'Ccl4', 'Il4', 'Cxcl2', 'Il1b',  'Ccl19']



In [None]:
pdata.obs['sample_id']=pdata.obs['individual_id'].astype('str')+"-"+pdata.obs['treatment_id'].astype('str')
adata.obs['sample_id']=adata.obs['individual_id'].astype('str')+"-"+adata.obs['treatment_id'].astype('str')

#### Differential expression between the clusters

### Plotting of cell type frequencies

### Stratify per celltype1

In [None]:
subsetvar='individual_id'

In [None]:
### Plot the percentage of cell types per condition
countvar='celltype1' ### this is the annotation level of choice
fig=bc.pl.celllabel_quant_boxplot(pdata, count_variable = countvar, subset_variable = subsetvar, condition_identifier = condition, 
                                  plot_percentage = True,condition_order=None,figsize=(10,4))
fig.savefig(results_folder_out+'Percentage-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.pdf')

In [None]:
### Or get the numbers
df=bc.tl.count_occurrence_subset_conditions(pdata, count_variable = countvar, subset_variable = subsetvar, condition_identifier = condition,  return_percentage = True)
df

In [None]:
### Number cell types per condition
fig=bc.pl.celllabel_quant_boxplot(pdata, count_variable = countvar, subset_variable = subsetvar, 
                                  condition_identifier = condition, plot_percentage = False,condition_order=None,figsize=(10,5))
fig.savefig(results_folder_out+'Number-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.pdf')

In [None]:
### Or get the numbers
df=bc.tl.count_occurrence_subset_conditions(pdata, count_variable = countvar, subset_variable = subsetvar,
                                            condition_identifier = condition,  return_percentage = False)
df

In [None]:
results_folder_out+'Percentage-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.pdf'

### Stratify per celltype

In [None]:
### Plot the percentage of cell types per condition
countvar='celltype4' ### this is the annotation level of choice
fig=bc.pl.celllabel_quant_boxplot(pdata, count_variable = countvar, subset_variable = subsetvar, 
                                  condition_identifier = condition, plot_percentage = True,condition_order=None,figsize=(14,4))
fig.savefig(results_folder_out+'Percentage-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.pdf')


In [None]:
### Or get the numbers
df=bc.tl.count_occurrence_subset_conditions(pdata, count_variable = countvar, subset_variable = subsetvar, condition_identifier = condition,  return_percentage = True)
df


In [None]:
df.to_csv(results_folder_out+'Percentage-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.tsv')

In [None]:
### Number cell types per condition
fig=bc.pl.celllabel_quant_boxplot(pdata, count_variable = countvar, subset_variable = subsetvar, 
                                  condition_identifier = condition, plot_percentage = False,condition_order=None,figsize=(14,4))
fig.savefig(results_folder_out+'Number-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.pdf')


In [None]:
### Or get the numbers
df=bc.tl.count_occurrence_subset_conditions(pdata, count_variable = countvar, subset_variable = subsetvar, condition_identifier = condition,  return_percentage = False)
df


In [None]:
df.to_csv(results_folder_out+'Number-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.tsv')

Large differences in population frequencies across treatments. 

#### Stratify per leiden cluster 

In [None]:
### Plot the percentage of cell types per condition
countvar='leiden' ### this is the annotation level of choice
fig=bc.pl.celllabel_quant_boxplot(pdata, count_variable = countvar, 
                                  subset_variable = subsetvar, condition_identifier = condition, 
                                  plot_percentage = True,condition_order=None,figsize=(14,4))
fig.savefig(results_folder_out+'Percentage-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.pdf')


In [None]:
### Or get the numbers
df=bc.tl.count_occurrence_subset_conditions(pdata, count_variable = countvar, subset_variable = subsetvar, condition_identifier = condition,  return_percentage = True)
df


In [None]:

df.to_csv(results_folder_out+'Percentage-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.tsv')
### Number cell types per condition
fig=bc.pl.celllabel_quant_boxplot(pdata, count_variable = countvar, 
                                  subset_variable = subsetvar, condition_identifier = condition, 
                                  plot_percentage = False,condition_order=None,figsize=(14,5))
fig.savefig(results_folder_out+'Number-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.pdf')


In [None]:
### Or get the numbers
df=bc.tl.count_occurrence_subset_conditions(pdata, count_variable = countvar, subset_variable = subsetvar, condition_identifier = condition,  return_percentage = False)
df


In [None]:
df.to_csv(results_folder_out+'Number-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.tsv')


### Stratify per celltype3


In [None]:
### Plot the percentage of cell types per condition
countvar='celltype3' ### this is the annotation level of choice
fig=bc.pl.celllabel_quant_boxplot(pdata, count_variable = countvar, subset_variable = subsetvar, 
                                  condition_identifier = condition, plot_percentage = True,condition_order=None,figsize=(14,4))
fig.savefig(results_folder_out+'Percentage-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.pdf')


In [None]:
### Or get the numbers
df=bc.tl.count_occurrence_subset_conditions(pdata, count_variable = countvar, subset_variable = subsetvar, condition_identifier = condition,  return_percentage = True)
df


In [None]:
### Number cell types per condition
fig=bc.pl.celllabel_quant_boxplot(pdata, count_variable = countvar, 
                                  subset_variable = subsetvar, condition_identifier = condition, 
                                  plot_percentage = False,condition_order=None,figsize=(14,5))
fig.savefig(results_folder_out+'Number-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.pdf')


In [None]:
### Or get the numbers
df=bc.tl.count_occurrence_subset_conditions(pdata, count_variable = countvar, subset_variable = subsetvar, condition_identifier = condition,  return_percentage = False)
df




Large differences in population frequencies across treatements. 

### Stratify per celltype_simple

In [None]:
tmp=pdata.copy() #adata[adata.obs['celltype_simple']!='other'].copy()

In [None]:
### Plot the percentage of cell types per condition
countvar='celltype_simple' ### this is the annotation level of choice
fig=bc.pl.celllabel_quant_boxplot(tmp, count_variable = countvar, subset_variable = subsetvar, 
                                  condition_identifier = condition, plot_percentage = True,condition_order=None,figsize=(14,4))
fig.savefig(results_folder_out+'Percentage-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.pdf')


In [None]:
### Or get the numbers
df=bc.tl.count_occurrence_subset_conditions(tmp, count_variable = countvar, subset_variable = subsetvar, condition_identifier = condition,  return_percentage = True)
df


In [None]:
### Number cell types per condition
fig=bc.pl.celllabel_quant_boxplot(tmp, count_variable = countvar, 
                                  subset_variable = subsetvar, condition_identifier = condition, plot_percentage = False,condition_order=None,figsize=(14,5))
fig.savefig(results_folder_out+'Number-'+countvar+'-'+subsetvar+'-'+condition+'-'+initialsubset+'.pdf')


In [None]:
### Or get the numbers
df=bc.tl.count_occurrence_subset_conditions(pdata, count_variable = countvar, subset_variable = subsetvar, condition_identifier = condition,  return_percentage = False)
df


### Export proportions for statistical tests in R

Cells were sorted, frequency comparisons not meaningful 

In [None]:
### If needed create a CONDITION variable (if not there yet)
#adata.obs['CONDITION']=adata.obs['time'].astype(str)+"-"+adata.obs['treatment_id'].astype(str)

In [None]:
mysubs=['celltype','celltype2','celltype3', 'celltype4']
condi='treatment_id'
for what in mysubs:
    df1=bc.tl.count_occurrence_subset_conditions(pdata, subset_variable = subsetvar, count_variable = what, condition_identifier = condi,  return_percentage = True)
    df1.to_csv(results_folder_out+'CelltypeFreq_'+condi+'_'+what+'_'+subsetvar+'.tsv')
    df2=bc.tl.count_occurrence_subset_conditions(pdata, subset_variable = subsetvar, count_variable = what, condition_identifier = condi,  return_percentage = False)
    df2.to_csv(results_folder_out+'CelltypeNrs_'+condi+'_'+what+'_'+subsetvar+'.tsv')

### Additional, custom frequency plots

In [None]:
### Function to get p values (t test & wilcox)
from scipy import stats
import itertools
def getPs(cellFreqs,myconditions,name1):
    totest=list(itertools.combinations(myconditions, 2))
    pwilc={}
    pt={}
    for pairs in totest:
        pwilc[pairs[0]+'-'+pairs[1]]=stats.mannwhitneyu(list(cellFreqs.loc[cellFreqs[name1]==pairs[0],:].iloc[:,1]), 
                           list(cellFreqs.loc[cellFreqs[name1]==pairs[1],:].iloc[:,1]))[1]
        pt[pairs[0]+'-'+pairs[1]]=stats.ttest_ind(list(cellFreqs.loc[cellFreqs[name1]==pairs[0],:].iloc[:,1]), 
                           list(cellFreqs.loc[cellFreqs[name1]==pairs[1],:].iloc[:,1]))[1]

    myps=pd.DataFrame([pwilc,pt]).transpose()
    myps.columns=['MannWhitney','T-test']
    return(myps)


In [None]:
condi='treatment_id'
#### One can also choose to plot cell types individually
what='celltype2'
myfreq=pd.read_csv(results_folder_out+'CelltypeFreq_'+condi+'_'+what+'_'+subsetvar+'.tsv',header=0)
myfreq.index=myfreq.iloc[:,0]


In [None]:
myfreq

In [None]:
cond=[x.split(' ')[2] for x in list(myfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(myfreq.drop(columns=['Unnamed: 0']).columns)]

In [None]:
toplot=myfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['CONDITION']=cond
#toplot['tissue']=[x.split('_')[2] for x in list(toplot['CONDITION'])] ### this depends on your data
#toplot['time']=[x.split('_')[1] for x in list(toplot['CONDITION'])] ### this depends on your data

In [None]:
toplot['CONDITION']

In [None]:
### Plotting color and plotting order 
color_dict = {'C_t0': 'black', 'T_1': 'red','T_2':'red', 'T_t3': 'red', 'T_t4': 'red', 'T_t5': 'red'}
ploto=['C_t0','T_1','T_2','T_t3','T_t4','T_t5']

#color_dict=treatcol.copy()

In [None]:
### Axis needs to be adjusted for cell type numbers
fig, axes = plt.subplots(3, 4,figsize=(12,8), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()
pvals={}
pvals2={}
pvals3={}
pvals4={}
pvals5={}
i=0
for mycell in list(myfreq.index):
    ax=sns.boxplot(y=mycell,x='CONDITION',data=toplot,ax=axes[i],
                   palette=color_dict,orient='v',order=ploto)
    ax=sns.swarmplot(y=mycell,x='CONDITION',data=toplot,color='black',ax=axes[i],
                    order=ploto)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    pvals[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_1',:]
    pvals2[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_2',:]
    pvals5[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_t5',:]
    pvals3[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_t3',:]
    pvals4[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_t4',:]
    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.pdf')
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.svg')

In [None]:
toplot

In [None]:
pd.DataFrame(pvals)

In [None]:
a1=list(pd.DataFrame(pvals).loc[:,pd.DataFrame(pvals).min(axis=0)<=0.05].columns)

a2=list(pd.DataFrame(pvals2).loc[:,pd.DataFrame(pvals2).min(axis=0)<=0.05].columns)

a3=list(pd.DataFrame(pvals3).loc[:,pd.DataFrame(pvals3).min(axis=0)<=0.05].columns)

a4=list(pd.DataFrame(pvals4).loc[:,pd.DataFrame(pvals4).min(axis=0)<=0.05].columns)

a5=list(pd.DataFrame(pvals5).loc[:,pd.DataFrame(pvals5).min(axis=0)<=0.05].columns)


In [None]:
set(a1+a2+a3+a4+a5)

#### Celltype


In [None]:
#### One can also choose to plot cell types individually
what='celltype4'
myfreq=pd.read_csv(results_folder_out+'CelltypeFreq_'+condi+'_'+what+'_'+subsetvar+'.tsv',header=0)
myfreq.index=myfreq.iloc[:,0]


cond=[x.split(' ')[2] for x in list(myfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(myfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=myfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['CONDITION']=cond
#toplot['tissue']=[x.split('_')[2] for x in list(toplot['CONDITION'])] ### this depends on your data
#toplot['time']=[x.split('_')[1] for x in list(toplot['CONDITION'])] ### this depends on your data

### subset as above
#toplot=toplot.loc[toplot['tissue']=='LN',:]
### subset as above
#toplot=toplot.loc[toplot['CONDITION']!='T_24_Tumor_C_24_Tumor',:]
toplot['CONDITION']



In [None]:



### Axis needs to be adjusted for cell type numbers
fig, axes = plt.subplots(5, 5,figsize=(14,10), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()
pvals={}
pvals2={}
pvals3={}
pvals4={}
pvals5={}
i=0
for mycell in list(myfreq.index):
    ax=sns.boxplot(y=mycell,x='CONDITION',data=toplot,ax=axes[i],
                   palette=color_dict,orient='v',order=ploto)
    ax=sns.swarmplot(y=mycell,x='CONDITION',data=toplot,color='black',ax=axes[i],
                    order=ploto)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=7)
    ax.tick_params(axis='y', labelsize=7)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=7)
    #ax.set_xlabel(xlabel='Response ',fontsize=6)
    pvals[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_1',:]
    pvals2[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_2',:]
    pvals5[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_t5',:]
    pvals3[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_t3',:]
    pvals4[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_t4',:]
    i=i+1
       
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.pdf')
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.svg')
pd.DataFrame(pvals)

In [None]:
pd.DataFrame(pvals2)

In [None]:
a1=list(pd.DataFrame(pvals).loc[:,pd.DataFrame(pvals).min(axis=0)<=0.05].columns)
a1

In [None]:
a2=list(pd.DataFrame(pvals2).loc[:,pd.DataFrame(pvals2).min(axis=0)<=0.05].columns)
a2

In [None]:
a5=list(pd.DataFrame(pvals5).loc[:,pd.DataFrame(pvals5).min(axis=0)<=0.05].columns)
a5

In [None]:
a3=list(pd.DataFrame(pvals3).loc[:,pd.DataFrame(pvals3).min(axis=0)<=0.05].columns)
a3

In [None]:
a4=list(pd.DataFrame(pvals4).loc[:,pd.DataFrame(pvals4).min(axis=0)<=0.05].columns)
a4

In [None]:
toplotc=list(set(a1+a2+a3+a4+a5))
toplotc

In [None]:
#### One can also choose to plot cell types individually
what='celltype4'
myfreq=pd.read_csv(results_folder_out+'CelltypeFreq_'+condi+'_'+what+'_'+subsetvar+'.tsv',header=0)
myfreq.index=myfreq.iloc[:,0]


cond=[x.split(' ')[2] for x in list(myfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(myfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=myfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['CONDITION']=cond

### Axis needs to be adjusted for cell type numbers
fig, axes = plt.subplots(3, 5,figsize=(14,6), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()
pvals={}
i=0
for mycell in toplotc:
    ax=sns.boxplot(y=mycell,x='CONDITION',data=toplot,ax=axes[i],
                   palette=color_dict,orient='v',order=ploto)
    ax=sns.swarmplot(y=mycell,x='CONDITION',data=toplot,color='black',ax=axes[i],
                    order=ploto)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=7)
    ax.tick_params(axis='y', labelsize=7)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=7)
    #ax.set_xlabel(xlabel='Response ',fontsize=6)
    pvals[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_1',:]
    i=i+1
       
fig.savefig(figdir+'Celltypefreq-Response-TILs-significants_vsComb_vsVehicle'+what+'.pdf')
fig.savefig(figdir+'Celltypefreq-Response-TILs-significants_vsComb_vsVehicle'+what+'.svg')

pd.DataFrame(pvals)

#### Leiden

In [None]:

#### One can also choose to plot cell types individually
what='celltype3'
myfreq=pd.read_csv(results_folder_out+'CelltypeFreq_'+condi+'_'+what+'_'+subsetvar+'.tsv',header=0)
myfreq.index=myfreq.iloc[:,0]

cond=[x.split(' ')[2] for x in list(myfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(myfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=myfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['CONDITION']=cond

### Axis needs to be adjusted for cell type numbers
fig, axes = plt.subplots(4, 6,figsize=(14,9), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()
pvals={}
pvals2={}
pvals3={}
pvals4={}
pvals5={}
i=0
for mycell in list(myfreq.index):
    ax=sns.boxplot(y=mycell,x='CONDITION',data=toplot,ax=axes[i],
                   palette=color_dict,orient='v',order=ploto)
    ax=sns.swarmplot(y=mycell,x='CONDITION',data=toplot,color='black',ax=axes[i],
                    order=ploto)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=7)
    ax.tick_params(axis='y', labelsize=7)
    ax.set_ylabel(ylabel='Perc '+str(mycell),fontsize=7)
    #ax.set_xlabel(xlabel='Response ',fontsize=6)
    #ax.set_xlabel(xlabel='Response ',fontsize=6)
    pvals[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_1',:]
    pvals2[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_2',:]
    pvals5[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_t5',:]
    pvals3[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_t3',:]
    pvals4[mycell]=getPs(toplot.loc[:,['CONDITION',mycell]],ploto,'CONDITION').loc['C_t0-T_t4',:]
    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.pdf')
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.svg')

In [None]:
pd.DataFrame(pvals)

In [None]:
a1=list(pd.DataFrame(pvals).loc[:,pd.DataFrame(pvals).min(axis=0)<=0.05].columns)
a1

a2=list(pd.DataFrame(pvals2).loc[:,pd.DataFrame(pvals2).min(axis=0)<=0.05].columns)
a2

a5=list(pd.DataFrame(pvals5).loc[:,pd.DataFrame(pvals5).min(axis=0)<=0.05].columns)
a5

a3=list(pd.DataFrame(pvals3).loc[:,pd.DataFrame(pvals3).min(axis=0)<=0.05].columns)
a3

a4=list(pd.DataFrame(pvals4).loc[:,pd.DataFrame(pvals4).min(axis=0)<=0.05].columns)
a4

toplotc=list(set(a1+a2+a3+a4+a5))
toplotc

In [None]:
#### One can also choose to plot cell types individually
myfreq=pd.read_csv(results_folder_out+'CelltypeFreq_'+condi+'_'+what+'_'+subsetvar+'.tsv',header=0)
myfreq.index=myfreq.iloc[:,0]


cond=[x.split(' ')[2] for x in list(myfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(myfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=myfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['CONDITION']=cond

### Axis needs to be adjusted for cell type numbers
fig, axes = plt.subplots(3, 6,figsize=(16,7), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()
pvals={}
i=0
for mycell in toplotc:
    ax=sns.boxplot(y=mycell,x='CONDITION',data=toplot,ax=axes[i],
                   palette=color_dict,orient='v',order=ploto)
    ax=sns.swarmplot(y=mycell,x='CONDITION',data=toplot,color='black',ax=axes[i],
                    order=ploto)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=7)
    ax.tick_params(axis='y', labelsize=7)
    ax.set_ylabel(ylabel='Perc '+str(mycell),fontsize=7)
    #ax.set_xlabel(xlabel='Response ',fontsize=6)
    i=i+1
       
fig.savefig(figdir+'Celltypefreq-Response-TILs-significants_vsComb_vsVehicle'+what+'.pdf')
fig.savefig(figdir+'Celltypefreq-Response-TILs-significants_vsComb_vsVehicle'+what+'.svg')


In [None]:
! jupyter nbconvert --to html CellAbundance.ipynb