In [None]:
import besca as bc
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
from scipy import sparse, io
import os
import time
import logging
import seaborn as sns
from scipy.stats import fisher_exact
sc.logging.print_versions()

# for standard processing, set verbosity to minimum
sc.settings.verbosity = 0  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80)
version = '2.8'
start0 = time.time()

In [None]:
### Plot parameters for publication 
def set_pub():    
    small_size = 10
    medium_size = 12
    large_size = 14

    resolution = 300 #in dpi
    plt.rcParams['font.weight'] = 'normal'
    #plt.rc('font', **{'family':'sans-serif','sans-serif':['Helvetica']})
    plt.rc('axes', titlesize=large_size, titleweight = "bold")               # fontsize of the axes title
    plt.rc('axes', labelsize=medium_size, labelweight = "bold")               # fontsize of the x and y labels
    plt.rc('xtick', labelsize=small_size)               # fontsize of the tick labels
    plt.rc('ytick', labelsize=small_size)               # fontsize of the tick labels
    plt.rc('legend', fontsize=small_size, title_fontsize = medium_size)               # legend fontsize
    plt.rc('figure', titlesize=large_size, titleweight = "bold")              # fontsize of the figure title
    plt.rc('savefig', dpi=resolution)                   # higher res outputs

    plt.rcParams['svg.fonttype'] = 'none'


set_pub()

In [None]:
#define standardized filepaths based on above input
root_path = os.getcwd()
bescapath_full = os.path.dirname(bc.__file__)
bescapath = os.path.split(bescapath_full)[0]

### Uncomment this when running after the standard workflow
analysis_name = 'sw_besca2_immune'

clusters='leiden'


In [None]:
results_folder = os.path.join(root_path, 'analyzed', analysis_name)
adata = sc.read_h5ad(os.path.join(results_folder, analysis_name + '.annotated.h5ad') )

figdir=os.path.join(root_path, 'analyzed', analysis_name+'/figures/publication/')
sc.settings.figdir = figdir
if not os.path.exists(figdir):
    os.makedirs(figdir)

In [None]:
os.path.join(results_folder, analysis_name + '.annotated.h5ad')

In [None]:
figdir

### Adjust nomenclature 

In [None]:
adata.obs['RCat']=adata.obs['Response3'].copy()
adata.obs['RCat']=list(adata.obs['RCat'].replace('NR', 'NR_nadj'))
adata.obs['RCat']=list(adata.obs['RCat'].replace('PD', 'NR_adj'))
#adata.obs['RCat']=list(adata.obs['RCat'].replace('TF', 'R_adj'))
#adata.obs['RCat']=list(adata.obs['RCat'].replace('R', 'R_nadj'))

In [None]:
#set(adata.obs['celltype3'])

In [None]:
newc=adata.obs['celltype2_merged'].copy()
newc=newc.replace('CD4-positive, alpha-beta T cell', 'CD4-positive T cell').copy()
newc=newc.replace('CD8-positive, alpha-beta T cell', 'CD8-positive T cell').copy()
newc=newc.replace('lymphocyte of B lineage', 'B cell').copy()
adata.obs['celltype2_pub']=list(newc)

In [None]:
newc=adata.obs['celltype3_merged'].copy()
newc=newc.replace('CD4-positive, alpha-beta T cell', 'CD4-positive T cell').copy()
newc=newc.replace('CD4-positive, alpha-beta cytotoxic T cell', 'CD4-positive, cytotoxic T cell').copy()
newc=newc.replace('CD8-positive, alpha-beta cytotoxic T cell', 'exhausted-like CD8-positive T cell').copy()
newc=newc.replace('central memory CD4-positive, alpha-beta T cell', 'central memory CD4-positive T cell').copy()
newc=newc.replace('effector memory CD4-positive, alpha-beta T cell', 'effector memory CD4-positive T cell').copy()
newc=newc.replace('effector memory CD8-positive, alpha-beta T cell', 'effector memory CD8-positive T cell').copy()
newc=newc.replace('CD8-positive, alpha-beta cytokine secreting effector T cell', 'cytokine secreting effector CD8-positive T cell').copy()
newc=newc.replace('lymphocyte of B lineage', 'B cell').copy()
newc=newc.replace('naive thymus-derived CD4-positive, alpha-beta T cell', 'naive CD4-positive T cell').copy()
newc=newc.replace('naive thymus-derived CD8-positive, alpha-beta T cell', 'naive CD8-positive T cell').copy()
newc=newc.replace('proliferating CD8-positive, alpha-beta T cell', 'proliferating CD8-positive T cell').copy()
newc=newc.replace('proliferating CD4-positive, alpha-beta T cell', 'proliferating CD4-positive T cell').copy()
newc=newc.replace('IL7R-max CD4-positive, alpha-beta cytotoxic T cell', 'IL7R-max CD4-positive T cell').copy()
newc=newc.replace('exhausted-like CD8-positive, alpha-beta T cell', 'exhausted-like CD8-positive T cell').copy()
newc=newc.replace('exhausted-like CD4-positive, alpha-beta T cell', 'exhausted-like CD4-positive T cell').copy()

adata.obs['celltype3_pub']=list(newc)

In [None]:
newc=adata.obs['celltype4_merged'].copy()
newc=newc.replace('CD4-positive, alpha-beta T cell', 'CD4-positive T cell').copy()
newc=newc.replace('CD4-positive, alpha-beta cytotoxic T cell', 'CD4-positive, cytotoxic T cell').copy()
newc=newc.replace('CD8-positive, alpha-beta cytotoxic T cell', 'exhausted-like CD8-positive T cell').copy()
newc=newc.replace('central memory CD4-positive, alpha-beta T cell', 'central memory CD4-positive T cell').copy()
newc=newc.replace('effector memory CD4-positive, alpha-beta T cell', 'effector memory CD4-positive T cell').copy()
newc=newc.replace('effector memory CD8-positive, alpha-beta T cell', 'effector memory CD8-positive T cell').copy()
newc=newc.replace('CD8-positive, alpha-beta cytokine secreting effector T cell', 'cytokine secreting effector CD8-positive T cell').copy()
newc=newc.replace('lymphocyte of B lineage', 'B cell').copy()
newc=newc.replace('naive thymus-derived CD4-positive, alpha-beta T cell', 'naive CD4-positive T cell').copy()
newc=newc.replace('naive thymus-derived CD8-positive, alpha-beta T cell', 'naive CD8-positive T cell').copy()
newc=newc.replace('proliferating CD8-positive, alpha-beta T cell', 'proliferating CD8-positive T cell').copy()
newc=newc.replace('proliferating CD4-positive, alpha-beta T cell', 'proliferating CD4-positive T cell').copy()
newc=newc.replace('IL7R-max CD4-positive, alpha-beta cytotoxic T cell', 'IL7R-max CD4-positive T cell').copy()
newc=newc.replace('exhausted-like CD8-positive, alpha-beta T cell', 'exhausted-like CD8-positive T cell').copy()
newc=newc.replace('exhausted-like CD4-positive, alpha-beta T cell', 'exhausted-like CD4-positive T cell').copy()

adata.obs['celltype4_pub']=list(newc)

In [None]:
#adata.write(os.path.join(results_folder, analysis_name + '.annotated.h5ad'))

In [None]:
#adata=sc.read(results_folder, analysis_name + '.annotated.h5ad')
adata = sc.read_h5ad(os.path.join(results_folder, analysis_name + '.annotated.h5ad') )

### Read TMB data, plot and analyse

In [None]:
### Patients 38 and 12 should be removed from R vs. NR analysis
#ddata=ddata[ddata.obs['PatientID']!='P38'].copy() 
#ddata=ddata[ddata.obs['PatientID']!='P12'].copy() 

In [None]:
tmb=pd.read_csv('/Fullanalysis//TMB.csv')

In [None]:
tmb

In [None]:
### Patients 38 and 12 are rmoved from R vs. NR analysis
tmb=tmb.loc[~tmb['SampleID'].isin(['M38_TIL','M12_TIL','M64_TIL','M79_TIL']),:]

In [None]:
rlist=[]
patid=[]
ihc=[]
gender=[]
lesion=[]
mutation=[]
for myid in list(tmb['SampleID']):
    mysub=adata[adata.obs['SampleID']==myid].copy()
    rlist.append(list(mysub.obs['RCat'])[0])
    patid.append(list(mysub.obs['PatientID'])[0])
    ihc.append(list(mysub.obs['CD3IHC_RICZ'])[0])
    gender.append(list(mysub.obs['Gender'])[0])
    lesion.append(list(mysub.obs['Lesion'])[0])
    mutation.append(list(mysub.obs['Mutation'])[0])

In [None]:
tmb['RCat']=rlist
tmb['PatientID']=patid
tmb['CD3IHC_RICZ']=ihc
tmb['Gender']=gender
tmb['Lesion']=lesion
tmb['Mutation']=mutation

In [None]:
tmb['group']='PatientID'

In [None]:
tmb

In [None]:
tmb.groupby('RCat')['CD3IHC_RICZ'].value_counts()


In [None]:
tmb.loc[tmb['SampleID']!='M64_TIL',:].groupby('RCat')['TMB (Mts/Mb)'].median()

In [None]:
#R; TF; NR_nadj; NR_adj

In [None]:
color_dict = {'R': 'coral', 'TF': 'firebrick', 'NR_nadj': 'lightskyblue','NR_adj': 'royalblue'}

In [None]:
from scipy import stats
import itertools

totest=list(itertools.combinations(['R','TF','NR_nadj','NR_adj'], 2))
pwilc={}
pt={}
for pairs in totest:
    pwilc[pairs[0]+'-'+pairs[1]]=stats.mannwhitneyu(list(tmb.loc[tmb['RCat']==pairs[0],:]['TMB (Mts/Mb)']), 
                           list(tmb.loc[tmb['RCat']==pairs[1],:]['TMB (Mts/Mb)']))[1]
    pt[pairs[0]+'-'+pairs[1]]=stats.ttest_ind(list(tmb.loc[tmb['RCat']==pairs[0],:]['TMB (Mts/Mb)']), 
                           list(tmb.loc[tmb['RCat']==pairs[1],:]['TMB (Mts/Mb)']))[1]

In [None]:
myps=pd.DataFrame([pwilc,pt]).transpose()
myps.columns=['MannWhitney','T-test']

In [None]:
myps.to_csv(figdir+'Pvals-TMB.tsv',sep='\t')

In [None]:
myps

In [None]:
fig=sns.boxplot(x='RCat',y='TMB (Mts/Mb)',data=tmb.loc[tmb['SampleID']!='M64_TIL',:],
            palette=color_dict)
fig=sns.swarmplot(x='RCat',y='TMB (Mts/Mb)',data=tmb.loc[tmb['SampleID']!='M64_TIL',:],
              color='black')
#fig.figure.savefig(figdir+'/TMB-per-response.pdf', bbox_inches="tight", dpi=300) 
#fig.figure.savefig(figdir+'/TMB-per-response.eps', format='eps', bbox_inches="tight", dpi=300)
#fig.figure.savefig(figdir+'/TMB-per-response.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
tmb

In [None]:
tmb.index=tmb['PatientID']

In [None]:
tmb

In [None]:
adata=adata[adata.obs['celltype0']!='melanocytic melanoma cell'].copy()

### Score additional signatures 

In [None]:
sade={}
sade['good']=['PLAC8', 'LTB', 'LY9', 'SELL', 'TCF7',  'CCR7','IL7R']
sade['bad']=['CCL3', 'CD38', 'HAVCR2', 'ENTPD1', 'WARS']

sc.tl.score_genes(adata,sade['good'],score_name="SADE_G",use_raw=True)
sc.tl.score_genes(adata,sade['bad'],score_name="SADE_B",use_raw=True)

In [None]:
tirosh={}
tirosh['Tnai']=['CCR7', 'TCF7', 'SELL', 'LEF1']
tirosh['Texh']=['PDCD1', 'TIGIT', 'LAG3', 'HAVCR2', 'CTLA4']
tirosh['Tcyt']=['NKG7', 'CCL4', 'CST7', 'PRF1', 'GZMB', 'GZMA', 'IFNG', 'CCL3']


sc.tl.score_genes(adata,tirosh['Tnai'],score_name="TIR_NAI",use_raw=True)
sc.tl.score_genes(adata,tirosh['Texh'],score_name="TIR_EXH",use_raw=True)
sc.tl.score_genes(adata,tirosh['Tcyt'],score_name="TIR_CYT",use_raw=True)


In [None]:
chu={}
chu['TStr']=['HSPA1A', 'NR4A1', 'BAG3', 'HSPA1B']
sc.tl.score_genes(adata,chu['TStr'],score_name="CHU_TSTR",use_raw=True)

In [None]:
li={}
li['NaiTcell']=['CCR7','IL7R','TCF7']
li['MemTcell']=[ 'SELL', 'C1orf21', 'KLRB1', 'ARL4C']
li['CD8Cyt']=['GZMH', 'GNLY', 'FGFBP2', 'CX3CR1','KLF2','TBX21', 'PLAC8', 'FGR','SPON2', 'MYBL1','ZNF683','KLRG1']
li['CD8Dys']=['PDCD1','LAG3','TIGIT', 'CXCL13','RBPJ', 'ZBED2', 'ETV1', 'ID3', 'MAF', 'PRDM1','EOMES', 'IFNG', 
              'HAVCR2','PTMS','FAM3C','ICOS','TNFRSF4', 'CCL4L2', 'PRDM1','SPOCK2', 'CCL3', 'TOX', 'ENTPD1','ITGAE']
li['CD8Trans']=['GZMK']
li['CD4Treg']=['FOXP3','IKZF2','IL2RA'] #ENTPD1, ITGAE, KLRG1
li['TExh']=['TNFRSF9', 'CSF1', 'TIGIT']

sc.tl.score_genes(adata,li['NaiTcell'],score_name="LI_NAI",use_raw=True)
sc.tl.score_genes(adata,li['MemTcell'],score_name="LI_MEM",use_raw=True)
sc.tl.score_genes(adata,li['CD8Cyt'],score_name="LI_CYT",use_raw=True)
sc.tl.score_genes(adata,li['CD8Dys'],score_name="LI_DYS",use_raw=True)
sc.tl.score_genes(adata,li['CD8Trans'],score_name="LI_TRANS",use_raw=True)
sc.tl.score_genes(adata,li['TExh'],score_name="LI_EXH",use_raw=True)


In [None]:
wu={}
wu['CD4Tcell']=['IL6ST','CRIP1']
wu['Teff']=['CX3CR1','GNLY', 'NKG7',  'GZMH', 'KLRD1', 'GZMB', 'PRF1', 
       'IFITM2', 'LITAF','ITGB2','GZMA','GPR56','KLRC2','GZMM','RAP1B'] ## KLRC2 is higher in Rs
wu['TEM']=['GZMK','CCL4',   'DUSP2', 'CD74','DNAJB1','FOS','CCL3','IFNG'] ## DNAJ1, DUSP2, GZMK is higher in NRs
wu['TRM']=['CCL4', 'XCL1',   'XCL2',   'ZNF683']
wu['IL17']=['NCR3','KLRB1','LYAR','IL7R']

sc.tl.score_genes(adata,wu['CD4Tcell'],score_name="WU_CD4",use_raw=True)
sc.tl.score_genes(adata,wu['Teff'],score_name="WU_TEFF",use_raw=True)
sc.tl.score_genes(adata,wu['TEM'],score_name="WU_TEM",use_raw=True)
sc.tl.score_genes(adata,wu['TRM'],score_name="WU_TRM",use_raw=True)
sc.tl.score_genes(adata,wu['IL17'],score_name="WU_IL17",use_raw=True)

In [None]:
maier={}
maier['aDC']=['CD80', 'CD86', 'CD40', 'RELB','CD83','CD274', 'PDCD1LG2','CD200','FAS','SOCS1','SOCS2','ALDH1A2']
maier['cDC1']=['XCR1','CLEC9A','CADM1']
maier['cDC2']=['ITGAM','CD209A','SIRPA'] #Itgam, Cd209a and Sirpa 

sc.tl.score_genes(adata,maier['aDC'],score_name="MA-MREGDC",use_raw=True)
sc.tl.score_genes(adata,maier['cDC1'],score_name="MA-cDC1",use_raw=True)
sc.tl.score_genes(adata,maier['cDC2'],score_name="MA-cDC2",use_raw=True)

In [None]:
res=['S1PR5','KLRG1','SLAMF6','CXCR3','S1PR1','ITGB7','CD8A','IL7R','TCF7','PDCD1']
sc.tl.score_genes(adata,res,score_name="ResCD8Tcell",use_raw=True)


In [None]:
inhrec=['CD160','LAG3','CD224A','BTLA','PDCD1','HAVCR2','TIGIT','CD1010'] ## Lowe antiIl2 and anti comb compared to untreated and antiPD1
eff=['GZMA','GZMB','LAMP1'] 
migration=['CCR2','CXCR3','CXCR4','CX3CR1','S1PR1','ITGA1','ITGA4','ITGAE','ITGB1','ITGB7','CD44','LY6C2','CXCR5']
tf=['KLF2','LEF1','BACH2','TBX21','TCF7','AHR','BATHF','BCL6','EGR1','EGR2',
'EOMES','FOXO1','FOXO3','IKZF2','IRF4','MAF','NFATC1','NR4A1','NR4A2','NR4A3','PRDM1','TOX','TOX2']


In [None]:
sc.tl.score_genes(adata,inhrec,score_name="INHREC",use_raw=True)
sc.tl.score_genes(adata,eff,score_name="EFF",use_raw=True)
sc.tl.score_genes(adata,migration,score_name="MIGRATION",use_raw=True)
sc.tl.score_genes(adata,tf,score_name="TF",use_raw=True)

In [None]:
apoptosis= root_path+'/ALL_geneset_apoptosis.gmt'
bc.tl.sig.combined_signature_score(adata, apoptosis,
                             UP_suffix='_UP', DN_suffix='_DN', method='scanpy',
                             overwrite=False, verbose=False,
                             use_raw=True, conversion=None)
#score_HALLMARK_APOPTOSIS_scanpy
#score_APOPTOTIC_PROGRAM_scanpy
#score_IND_OF_APOP_BY_EXCEL_SIGNALS_scanpy

### Further read annotations from separate analyses 

In [None]:
velodir=results_folder+'/velocity/'

In [None]:
velodir

In [None]:
cd4tcdata=sc.read(velodir+'Velo-subCD4TnoTregnoProlif_v2-dyn.calculatedVelo.final.h5ad')

In [None]:
#sc.pl.umap(cd4tcdata,color='celltype3_pub')

In [None]:
velodir=results_folder+'/velocity/'
cd8tcdata=sc.read(velodir+'Velo-CD8Tcell-All-PBMCandTIL-dyn_after_terminal_initial_exclude.h5ad')
cd8tcdata_full=sc.read(velodir+'Velo-CD8Tcell-All-PBMCandTIL-dyn_after_terminal_initial.h5ad')

In [None]:
sc.tl.score_genes(cd8tcdata_full,chu['TStr'],score_name="CHU_TSTR",use_raw=True)

sc.tl.score_genes(cd8tcdata_full,sade['good'],score_name="SADE_G",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,sade['bad'],score_name="SADE_B",use_raw=True)

sc.tl.score_genes(cd8tcdata_full,tirosh['Tnai'],score_name="TIR_NAI",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,tirosh['Texh'],score_name="TIR_EXH",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,tirosh['Tcyt'],score_name="TIR_CYT",use_raw=True)


sc.tl.score_genes(cd8tcdata_full,li['NaiTcell'],score_name="LI_NAI",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,li['MemTcell'],score_name="LI_MEM",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,li['CD8Cyt'],score_name="LI_CYT",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,li['CD8Dys'],score_name="LI_DYS",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,li['CD8Trans'],score_name="LI_TRANS",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,li['TExh'],score_name="LI_EXH",use_raw=True)


sc.tl.score_genes(cd8tcdata_full,wu['CD4Tcell'],score_name="WU_CD4",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,wu['Teff'],score_name="WU_TEFF",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,wu['TEM'],score_name="WU_TEM",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,wu['TRM'],score_name="WU_TRM",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,wu['IL17'],score_name="WU_IL17",use_raw=True)

sc.tl.score_genes(cd8tcdata_full,res,score_name="ResCD8Tcell",use_raw=True)

sc.tl.score_genes(cd8tcdata_full,inhrec,score_name="INHREC",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,eff,score_name="EFF",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,migration,score_name="MIGRATION",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,tf,score_name="TF",use_raw=True)

bc.tl.sig.combined_signature_score(cd8tcdata_full, apoptosis,
                             UP_suffix='_UP', DN_suffix='_DN', method='scanpy',
                             overwrite=False, verbose=False,
                             use_raw=True, conversion=None)
#score_HALLMARK_APOPTOSIS_scanpy


In [None]:
apoptosissig = bc.tl.sig.read_GMT_sign(apoptosis,directed=False)

In [None]:
macrodata_full=sc.read(velodir+'Velo-MonoMacro-All-PBMCandTIL-dyn.h5ad')

In [None]:
velosubset='MonoMacro'

In [None]:
dcdata=sc.read(velodir+'Velo-DCs-All-PBMCandTIL-dyn.h5ad')
macrodata=sc.read(velodir+'Velo-MonoMacro-All-PBMCandTIL-dyn.subsampled.origmap.h5ad')

bc.tl.sig.combined_signature_score(dcdata, apoptosis,
                             UP_suffix='_UP', DN_suffix='_DN', method='scanpy',
                             overwrite=False, verbose=False,
                             use_raw=True, conversion=None)

bc.tl.sig.combined_signature_score(macrodata, apoptosis,
                             UP_suffix='_UP', DN_suffix='_DN', method='scanpy',
                             overwrite=False, verbose=False,
                             use_raw=True, conversion=None)


In [None]:
sc.tl.score_genes(macrodata,inhrec,score_name="INHREC",use_raw=True)
sc.tl.score_genes(macrodata,eff,score_name="EFF",use_raw=True)
sc.tl.score_genes(macrodata,migration,score_name="MIGRATION",use_raw=True)
sc.tl.score_genes(macrodata,tf,score_name="TF",use_raw=True)

In [None]:
#list(cd8tcdata.obs.columns)

In [None]:
#cd8tcdata.obs['terminal_states_probs'].sort_values()

### Plot the UMAPs individually


In [None]:
tildata=adata[adata.obs['Sample type']=='TIL'].copy()
pbmcdata=adata[adata.obs['Sample type']=='PBMC'].copy()

### or read the separate analysis ###

## Plot Heatmap of marker gene expression

In [None]:
gmt_file_anno= bescapath + '/besca/datasets/genesets/CellNames_scseqCMs6_sigs.gmt'

mymarkers = bc.tl.sig.read_GMT_sign(gmt_file_anno,directed=False)
mymarkers = bc.tl.sig.filter_siggenes(adata, mymarkers) ### remove genes not present in dataset or empty signatures


In [None]:
#mymarkers

In [None]:
### Hand-picked markers ###

cDC1=['CLEC9A', 'CLNK', 'XCR1', 'ASB2', 'VAC14'][0:2]# --- should allow us to uniquely identify
cDC2=['FCER1A', 'CD1C', 'CD1E', 'PKIB', 'CLEC10A'][0:2]# --- should allow us to uniquely identify
TAMCx=['CXCL11', 'CXCL10', 'CXCL9'][0:2]# --- also expressed by other macrophages and sometimes DCs, but DCs will be identified by other markers
TMid=['MARCO', 'BAG3', 'CLEC5A', 'PPARG', 'CXCL5'][0:2] #--- should allow us to uniquely identify
TAM=['GPR34','C3', 'APOE', 'MSR1', 'F13A1'][0:2]# -- should allow us to uniquely identify
NK=['KLRC1',  'KLRF1', 'NCR1', 'IL18RAP'][0:2]# -- should allow us to identify all NKs; if one adds SELL then the two types can be distinguished
Tmem=['IL7R',  'KLRB1', 'AQP3','GPR183' ][0:2]
Tc4CM=['TNFRSF4', 'TNFRSF18' ]
macro=['FCGR1A','MRC1']
plasma=['SDC1','TNFRSF17']
tmo=['MGST1','S100A12', 'S100A8', 'S100A9', 'VCAN', 'CD300E', 'IL1B'][0:3]

#sc.pl.dotplot(tildata,var_names=list(treg)[0:15],groupby='my_type')
tregc=['FOXP3','RTKN2','IL2RA','LAYN']
tcmil7=['NCR3','GRAP2','KLRG1']
tcnai=['LEF1','FOXP1','CCR7','SELL','TCF7','IL7R']
tc8em=['LYAR','GZMM','C12orf75','PIK3R1']
tc8exp=['GPR171','XCL1','XCL2','TGIF1','PDCD1']
exh=['LAG3','CD38','IFNG','ENTPD1','HAVCR2','TOX']
cytox=['KLRD1','PRF1','GZMK','GZMB']
cc=['TOP2A','MKI67','TCF19','STMN1','PCNA']
bcell=['CD19','CD79A','MS4A1']

dcact=['LAMP3','CLEC9A','CLEC10A','CD68','CD163','MS4A1','CD40','CD80','CD86','CD70']

In [None]:
set(adata.obs['celltype3_pub'])

In [None]:
myelobc=adata[adata.obs['celltype1'].isin(['myeloid leukocyte','lymphocyte of B lineage'])].copy()
tnk=adata[adata.obs['celltype1'].isin(['T cell','natural killer cell'])].copy()
cdc=adata[adata.obs['celltype3_pub'].isin(['CCR7-positive myeloid dendritic cell',
                                          'CD1c-positive myeloid dendritic cell',
                                           'CD141-positive myeloid dendritic cell'])].copy()

#### B cells and Myeloids

In [None]:
cmarkers=['NaiBcell','MemBcell','GermCenterBcell','Plasma','Myeloid','cDC1','cDC2','cDC_CCR7','pDC',
                    'NClassMonocyte','Macrophage_MARCO','Macrophage_MSR1','Macrophage_CXCL9','ClassMonocyte']

In [None]:

markersgoi=mymarkers['Bcell'].copy()
for x in cmarkers:
    markersgoi=markersgoi+mymarkers[x][0:4]

In [None]:
#markersgoi

In [None]:
import scanpy as sc
#sc.tl.dendrogram(myelobc)
sc.tl.dendrogram(myelobc, groupby='celltype3_pub')

### Filter for future downstream analyses

In [None]:
sc.settings.set_figure_params()

In [None]:
cdata=adata.copy() #adata[adata.obs['cell_group']!='Mel'].copy()
### output for Diff Abund analysis
cdata=cdata[cdata.obs['experiment']!='M64_TIL'].copy() ### more dropouts
cdata=cdata[cdata.obs['experiment']!='M67_TIL'].copy() ### more dropouts less cells
cdata=cdata[cdata.obs['experiment']!='M79_TIL'].copy() ### higher mitochondria
cdata=cdata[cdata.obs['experiment']!='M43_TIL'].copy() ### less cells, too shallow sequencing


In [None]:
sc.pl.umap(cdata, color='PatientID')

In [None]:
showallplots=False

In [None]:
if showallplots:
    pct=bc.tl.count_occurrence_subset(cdata[cdata.obs['Sample type']=='TIL'], count_variable = 'celltype3_pub', 
                              subset_variable = 'PatientID',return_percentage=True)


In [None]:
if showallplots:
    pct.transpose().plot.bar(stacked=True, figsize=(20, 10))
    plt.legend(bbox_to_anchor=(1.0, 1.0))
    plt.savefig(figdir+'Frequencies-per-individual-TIL.pdf', bbox_inches="tight", dpi=300)
    plt.savefig(figdir+'Frequencies-per-individual-TIL.eps', format='eps', bbox_inches="tight", dpi=300)
    plt.savefig(figdir+'Frequencies-per-individual-TIL.svg', format='svg', bbox_inches="tight", dpi=300)
    plt.show()

In [None]:
if showallplots:
    pct=bc.tl.count_occurrence_subset(cdata[cdata.obs['Sample type']=='PBMC'], count_variable = 'celltype3_pub', 
                              subset_variable = 'PatientID',return_percentage=True)


In [None]:
if showallplots:
    pct.transpose().plot.bar(stacked=True, figsize=(20, 10))
    plt.legend(bbox_to_anchor=(1.0, 1.0))
    plt.savefig(figdir+'Frequencies-per-individual-PBMC.pdf', bbox_inches="tight", dpi=300)
    plt.savefig(figdir+'Frequencies-per-individual-PBMC.eps', format='eps', bbox_inches="tight", dpi=300)
    plt.savefig(figdir+'Frequencies-per-individual-PBMC.svg', format='svg', bbox_inches="tight", dpi=300)
    plt.show()

In [None]:
ddata=cdata[cdata.obs['PatientID']!=38].copy()
ddata=ddata[ddata.obs['PatientID']!=12]
tildata=ddata[ddata.obs['Sample type']=='TIL'].copy()
pbmcdata=ddata[ddata.obs['Sample type']=='PBMC'].copy()


In [None]:
### Remove patients 
pbmcdata=pbmcdata[pbmcdata.obs['PatientID']!=67]
pbmcdata=pbmcdata[pbmcdata.obs['PatientID']!=68]
pbmcdata=pbmcdata[pbmcdata.obs['PatientID']!=87]

In [None]:
ddata.obs['PatientID']=ddata.obs['PatientID'].cat.remove_unused_categories()
tildata.obs['PatientID']=tildata.obs['PatientID'].cat.remove_unused_categories()
pbmcdata.obs['PatientID']=pbmcdata.obs['PatientID'].cat.remove_unused_categories()

In [None]:
ddata.obs['PatientID']=ddata.obs['PatientID'].astype('str')
tildata.obs['PatientID']=tildata.obs['PatientID'].astype('str')
pbmcdata.obs['PatientID']=pbmcdata.obs['PatientID'].astype('str')

In [None]:
tildata.obs.to_csv(results_folder+ analysis_name + '_fullMeta_res2.tsv',sep='\t')
#### write adata.obs for further processing 
pbmcdata.obs.to_csv(results_folder+ analysis_name + '_fullMeta_PBMC_res2.tsv',sep='\t')
ddata.obs.to_csv(results_folder+ analysis_name + '_fullMeta_res2_all.tsv',sep='\t')


In [None]:
results_folder+ analysis_name + '_fullMeta_res2.tsv'

In [None]:
forvelo=ddata[ddata.obs['experiment'].isin(list(set(tildata.obs['experiment']).union(pbmcdata.obs['experiment'])))].copy()


In [None]:
forvelo.write(os.path.join(results_folder, analysis_name + '.annotated.filtered.h5ad'))

plot (ATF3, BIRC3, ANXA1), TGFb (TGIF1, PPP1R15A) and TNFa signaling (PPP1R15A,BTG3,BTG2,PLEK,TNFAIP3), for Tcdata

In [None]:
### Prepare means
condition='RCat'
#condlist=list(set(cdata.obs[condition]))
condlist=['R','TF','NR_nadj','NR_adj']
cd8_TIL=cd8tcdata[cd8tcdata.obs['Sample type'].isin(['TIL'])]
cd8_PBMC=cd8tcdata[cd8tcdata.obs['Sample type'].isin(['PBMC'])]
cd4_TIL=cd4tcdata[cd4tcdata.obs['Sample type'].isin(['TIL'])]

In [None]:
cd4_TIL=tildata[tildata.obs['celltype3_pub'].isin(['CD4-positive, cytotoxic T cell','IL7R-max CD4-positive T cell',
                                                    'central memory CD4-positive T cell', 'effector memory CD4-positive T cell',
                                                           'exhausted-like CD4-positive T cell','naive CD4-positive T cell',
                                                            'proliferating CD4-positive T cell','regulatory T cell'])]

In [None]:
mean,fct=bc.get_means(cd4_TIL,'PatientID', condition)


In [None]:
### Prepare means
condition='RCat'
#condlist=list(set(cdata.obs[condition]))
condlist=['R','TF','NR_nadj','NR_adj']

goi=['ZNF331','TNFAIP3','ANXA1','DNAJA1','PPP1R15A','NR4A2','JUNB','SERTAD1']

for myg in goi:
    bc.pl.box_per_ind(mean, myg,
                  condition,order=condlist) # palette=color_dict
    #plt.savefig(figdir+'Geneplots_cd8til_'+myg+'.pdf', bbox_inches="tight", dpi=300)
    #plt.savefig(figdir+'Geneplots_cd8til_'+myg+'.eps', format='eps', bbox_inches="tight", dpi=300)
    plt.savefig(figdir+'Geneplots_cd4til_'+myg+'.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
figdir+'Geneplots_cd4til_'+myg+'.svg'

In [None]:

mean,fct=bc.get_means(cd8_TIL,'PatientID', condition)
#meanmacro,fct=bc.get_means(macromacro,'PatientID', condition)
#meanmtil,fct=bc.get_means(macro_TIL,'PatientID', condition)
#meanmpbmc,fct=bc.get_means(macro_PBMC,'PatientID', condition)

### Prepare means
condition='RCat'
#condlist=list(set(cdata.obs[condition]))
condlist=['R','TF','NR_nadj','NR_adj']
#macro_TIL=macrodata[macrodata.obs['Sample type'].isin(['TIL'])]
#macro_PBMC=macrodata[macrodata.obs['Sample type'].isin(['PBMC'])]


In [None]:

#sc.pl.matrixplot(macromacro, var_names=goi, groupby='RCat', standard_scale='var')


goi=['BTG2','AMICA1','KLRC2','XCL1','ATF3','BIRC3','ANXA1','TGIF1','PPP1R15A','TNFAIP3']

myg=goi[5]


In [None]:
figdir

In [None]:
### Prepare means
condition='RCat'
#condlist=list(set(cdata.obs[condition]))
condlist=['R','TF','NR_nadj','NR_adj']
macro_TIL=macrodata[macrodata.obs['Sample type'].isin(['TIL'])]
macro_PBMC=macrodata[macrodata.obs['Sample type'].isin(['PBMC'])]

macromacro=macro_TIL[macro_TIL.obs['celltype2_pub'].isin(['macrophage'])]
monomacro=macro_TIL[macro_TIL.obs['celltype2_pub'].isin(['classical monocyte'])]
mean,fct=bc.get_means(monomacro,'PatientID', condition)
meanmacro,fct=bc.get_means(macromacro,'PatientID', condition)
meanmtil,fct=bc.get_means(macro_TIL,'PatientID', condition)
meanmpbmc,fct=bc.get_means(macro_PBMC,'PatientID', condition)


In [None]:
porder=[63,72,40,86,33,91,29,87,13,34,67,68,69,83,79,82,11,43,64,77]
pcolor=['coral','coral','coral','lightskyblue','lightskyblue','lightskyblue','lightskyblue','lightskyblue',
                  'firebrick','firebrick','firebrick','firebrick','firebrick','firebrick','firebrick','firebrick','firebrick',
                  'royalblue','royalblue','royalblue']


In [None]:
tmb.index=tmb.index.astype('str')

In [None]:
#mycol=['MITFP', 'MHC1','REGIO','CITRESUP','CITRESDN']
pbmctest=adata[adata.obs['Sample type']=='PBMC'].copy()
dfPatP=pd.DataFrame(pbmctest.obs.groupby(['PatientID']).mean()).copy()
dfPatP['RCat']=[pbmctest[pbmctest.obs['PatientID']==x].obs['RCat'][0] for x in list(dfPatP.index)]
dfPatP['Mutation']=[pbmctest[pbmctest.obs['PatientID']==x].obs['Mutation'][0] for x in list(dfPatP.index)]

dfPatP['nr_cells']=list(bc.tl.count_occurrence(pbmctest,count_variable='PatientID').loc[dfPatP.index,'Counts'])

dfPatP

In [None]:
dfPatP.to_csv(figdir+'ValuesPerPatient-PBMC-per-response.tsv',sep='\t')


In [None]:
from matplotlib import rcParams
rcParams['figure.figsize'] = 8,4
sns.barplot(x=dfPatP.index,y='nr_cells',
            data=dfPatP, order=porder, palette=pcolor)

In [None]:
matcd8=cd8tcdata_full[cd8tcdata_full.obs['celltype3_pub'].isin(['exhausted-like CD8-positive T cell','cytokine secreting effector CD8-positive T cell', 'proliferating CD8-positive T cell'])].copy()
matcd8=matcd8[matcd8.obs['Sample type'].isin(['TIL'])].copy()
#matcd8=matcd8[matcd8.obs['leiden']!='2'].copy()
#matcd8=matcd8[matcd8.obs['leiden']!='4'].copy()

In [None]:
#mycol=['MITFP', 'MHC1','REGIO','CITRESUP','CITRESDN']
dfPatcd8=pd.DataFrame(matcd8.obs.groupby(['PatientID']).mean()).copy()
dfPatcd8['RCat']=[matcd8[matcd8.obs['PatientID']==x].obs['RCat'][0] for x in list(dfPatcd8.index)]
dfPatcd8['Mutation']=[matcd8[matcd8.obs['PatientID']==x].obs['Mutation'][0] for x in list(dfPatcd8.index)]
dfPatcd8['Lesion']=[matcd8[matcd8.obs['PatientID']==x].obs['Lesion'][0] for x in list(dfPatcd8.index)]

dfPatcd8['nr_cells']=list(bc.tl.count_occurrence(matcd8,count_variable='PatientID').loc[dfPatcd8.index,'Counts'])

dfPatcd8

In [None]:
#mycol=['MITFP', 'MHC1','REGIO','CITRESUP','CITRESDN']
dfPat=pd.DataFrame(tildata.obs.groupby(['PatientID']).mean()).copy()
dfPat['RCat']=[tildata[tildata.obs['PatientID']==x].obs['RCat'][0] for x in list(dfPat.index)]
dfPat['Mutation']=[tildata[tildata.obs['PatientID']==x].obs['Mutation'][0] for x in list(dfPat.index)]
dfPat['Lesion']=[tildata[tildata.obs['PatientID']==x].obs['Lesion'][0] for x in list(dfPat.index)]

dfPat['nr_cells']=list(bc.tl.count_occurrence(tildata,count_variable='PatientID').loc[dfPat.index,'Counts'])

dfPat

In [None]:
color_dict_mut = {'NRAS': 'coral', 'BRAF': 'lightskyblue'}

In [None]:
color_dict_tiss = {'LN': 'orange', 'Brain': 'blue','Subc':'red'}

In [None]:
mysigs=['score_Bcell_scanpy','score_NaiTcell_scanpy','TIR_NAI','SADE_B','SADE_G','TIR_CYT','TIR_EXH',
            'score_CD4Tcell_scanpy','score_CytotoxCD8Tcell_scanpy','score_ClassMonocyte_scanpy',
            'score_Myeloid_scanpy','score_cDC_scanpy','score_cDC_CCR7_scanpy','score_cDC1_scanpy',
            'score_ExhCD8Tcell_scanpy','score_RegTcell_scanpy','score_Macrophage_scanpy'
           ,'score_Macrophage_MSR1_scanpy' ,'score_Macrophage_MARCO_scanpy', 'MA-MREGDC','MA-cDC1','MA-cDC2',
       'INHREC','MIGRATION','EFF','score_HALLMARK_APOPTOSIS_scanpy',
        'score_HALLMARK_HYPOXIA_scanpy','score_HALLMARK_ANGIOGENESIS_scanpy']

In [None]:
pwilc={}
for myx in mysigs:
    pwilc[myx]=stats.mannwhitneyu(list(dfPat.loc[dfPat.loc[:,'Mutation'].isin(['BRAF']),myx]), 
                           list(dfPat.loc[dfPat.loc[:,'Mutation'].isin(['NRAS']),myx]))[1]
    plt.figure(figsize=(2,3.5))
    fig=sns.boxplot(x='Mutation',y=myx,data=dfPat.loc[dfPat.loc[:,'Mutation'].isin(['BRAF','NRAS']),:],
            palette=color_dict_mut)
    fig=sns.swarmplot(x='Mutation',y=myx,data=dfPat.loc[dfPat.loc[:,'Mutation'].isin(['BRAF','NRAS']),:],
             color='black')
    #fig.figure.savefig(figdir+'/'+myx+'-per-mutation.pdf', bbox_inches="tight", dpi=300)
    #fig.figure.savefig(figdir+'/'+myx+'-per-mutation.eps', format='eps', bbox_inches="tight", dpi=300)
    fig.figure.savefig(figdir+'/'+myx+'-per-mutation.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
pd.Series(pwilc).sort_values().to_csv(figdir+'/Signatures-per-mutation-pvals.csv')
pd.Series(pwilc).sort_values()

In [None]:
pwilc={}
for myx in mysigs:
    pwilc[myx]=stats.mannwhitneyu(list(dfPat.loc[dfPat.loc[:,'Lesion'].isin(['LN']),myx]), 
                           list(dfPat.loc[dfPat.loc[:,'Lesion'].isin(['Brain','Subc']),myx]))[1]
    plt.figure(figsize=(2,3.5))
    fig=sns.boxplot(x='Lesion',y=myx,data=dfPat.loc[dfPat.loc[:,'Lesion'].isin(['LN','Brain','Subc']),:],
            palette=color_dict_tiss)
    fig=sns.swarmplot(x='Lesion',y=myx,data=dfPat.loc[dfPat.loc[:,'Lesion'].isin(['LN','Brain','Subc']),:],
             color='black')
    #fig.figure.savefig(figdir+'/'+myx+'-per-lesion.pdf', bbox_inches="tight", dpi=300)
    #fig.figure.savefig(figdir+'/'+myx+'-per-lesion.eps', format='eps', bbox_inches="tight", dpi=300)
    fig.figure.savefig(figdir+'/'+myx+'-per-lesion.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
pd.Series(pwilc).sort_values().to_csv(figdir+'/'+myx+'-per-lesion-pvals.csv')
pd.Series(pwilc).sort_values()

In [None]:
pwilc={}
for myx in mysigs:
    pwilc[myx]=stats.mannwhitneyu(list(dfPat.loc[dfPat.loc[:,'RCat'].isin(['R','TF']),myx]), 
                           list(dfPat.loc[dfPat.loc[:,'RCat'].isin(['NR_adj','NR_nadj']),myx]))[1]
    plt.figure(figsize=(3.5,3.5))
    fig=sns.boxplot(x='RCat',y=myx,data=dfPat.loc[dfPat.loc[:,'RCat'].isin(['R','TF','NR_adj','NR_nadj']),:],
            palette=color_dict,order=['R','TF','NR_nadj','NR_adj'])
    fig=sns.swarmplot(x='RCat',y=myx,data=dfPat.loc[dfPat.loc[:,'RCat'].isin(['R','TF','NR_adj','NR_nadj']),:],
             color='black',order=['R','TF','NR_nadj','NR_adj'])
    #fig.figure.savefig(figdir+'/Signatures-per-response.pdf', bbox_inches="tight", dpi=300)
    #fig.figure.savefig(figdir+'/Signatures-per-response.eps', format='eps', bbox_inches="tight", dpi=300)
    fig.figure.savefig(figdir+'/Signatures-per-response.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
pd.Series(pwilc).sort_values().to_csv(figdir+'/Signatures-per-response-pvals.csv')
pd.Series(pwilc).sort_values()

In [None]:
mysigscd8=['score_NaiTcell_scanpy','TIR_NAI','SADE_B','SADE_G','TIR_CYT','TIR_EXH',
            'score_CytotoxCD8Tcell_scanpy',
            'score_ExhCD8Tcell_scanpy',
       'INHREC','MIGRATION','EFF','score_HALLMARK_APOPTOSIS_scanpy',
          'score_HALLMARK_HYPOXIA_scanpy','score_HALLMARK_ANGIOGENESIS_scanpy']

In [None]:
pwilc={}
for myx in mysigscd8:
    pwilc[myx]=stats.mannwhitneyu(list(dfPatcd8.loc[dfPatcd8.loc[:,'RCat'].isin(['R','TF']),myx]), 
                           list(dfPatcd8.loc[dfPatcd8.loc[:,'RCat'].isin(['NR_adj','NR_nadj']),myx]))[1]
    plt.figure(figsize=(3.5,3.5))
    fig=sns.boxplot(x='RCat',y=myx,data=dfPatcd8.loc[dfPatcd8.loc[:,'RCat'].isin(['R','TF','NR_adj','NR_nadj']),:],
            palette=color_dict,order=['R','TF','NR_nadj','NR_adj'])
    fig=sns.swarmplot(x='RCat',y=myx,data=dfPatcd8.loc[dfPatcd8.loc[:,'RCat'].isin(['R','TF','NR_adj','NR_nadj']),:],
             color='black',order=['R','TF','NR_nadj','NR_adj'])
    fig.figure.savefig(figdir+'/Signatures-per-response-matureCD8-'+myx+'.pdf', bbox_inches="tight", dpi=300)
    fig.figure.savefig(figdir+'/Signatures-per-response-matureCD8-'+myx+'.eps', format='eps', bbox_inches="tight", dpi=300)
    fig.figure.savefig(figdir+'/Signatures-per-response-matureCD8-'+myx+'.svg', format='svg', bbox_inches="tight", dpi=300)
    
pd.Series(pwilc).sort_values().to_csv(figdir+'/Signatures-per-response-matureCD8-pvals.csv')
pd.Series(pwilc).sort_values()

In [None]:
pd.Series(pwilc).sort_values()

In [None]:
from matplotlib import rcParams
rcParams['figure.figsize'] = 5,3
sc.pl.violin(matcd8[matcd8.obs['leiden'].isin(['11','5','0','6'])],order=['0','6','11','5'],
             keys='score_HALLMARK_APOPTOSIS_scanpy', groupby='leiden', save='-apoptosis-per-matCD8Tcell-leiden.svg')

In [None]:
rcParams['figure.figsize'] = 5,3
sc.pl.violin(matcd8[matcd8.obs['leiden'].isin(['11','5','0','6'])],order=['0','6','11','5'],
             keys='score_HALLMARK_HYPOXIA_scanpy', groupby='leiden', save='-hypoxia-per-matCD8Tcell-leiden.svg')

In [None]:
rcParams['figure.figsize'] = 5,3
sc.pl.violin(matcd8[matcd8.obs['leiden'].isin(['11','5','0','6'])],order=['0','6','11','5'],
             keys='score_HALLMARK_ANGIOGENESIS_scanpy', groupby='leiden', save='-angiogenesis-per-matCD8Tcell-leiden.svg')

In [None]:
rcParams['figure.figsize'] = 5,3
sc.pl.violin(matcd8[matcd8.obs['leiden'].isin(['11','5','0','6','8','9'])],order=['8','9','0','6','11','5'],
             keys='MIGRATION', groupby='leiden', save='-migration-per-matCD8Tcell-leiden.svg')

In [None]:
tmp=pd.DataFrame(dfPat.loc[dfPat.loc[:,'Mutation'].isin(['BRAF','NRAS']),['Lesion','Mutation']]).groupby(['Mutation','Lesion']).size().copy()
tmp=tmp.unstack(level=0,fill_value=0).copy()
tmp.index=list(tmp.index)

In [None]:
plt.figure(figsize=(2,3.5))
ax=tmp.transpose().plot.bar(stacked=True,figsize=(3,4))
ax.set_ylabel("Nr. patients")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
#ax.figure.savefig(figdir+'/Mutation-vs-Tissue.pdf', bbox_inches="tight", dpi=300)
#ax.figure.savefig(figdir+'/Mutation-vs-Tissue.eps', format='eps', bbox_inches="tight", dpi=300)
ax.figure.savefig(figdir+'/Mutation-vs-Tissue.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
tmp['BRAF']

In [None]:
tmp['NRAS']

In [None]:
stats.fisher_exact([[7,3],[2,5]], alternative='two-sided') ### Difference in proportions is not significant

In [None]:
tmp=pd.DataFrame(dfPat.loc[dfPat.loc[:,'Mutation'].isin(['BRAF','NRAS']),['RCat','Mutation']]).groupby(['Mutation','RCat']).size().copy()
tmp=tmp.unstack(level=0,fill_value=0).copy()
tmp.index=list(tmp.index)

In [None]:
plt.figure(figsize=(2,3.5))
ax=tmp.transpose().plot.bar(stacked=True,figsize=(3,4))
ax.set_ylabel("Nr. patients")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
#ax.figure.savefig(figdir+'/Mutation-vs-Response.pdf', bbox_inches="tight", dpi=300)
#ax.figure.savefig(figdir+'/Mutation-vs-Response.eps', format='eps', bbox_inches="tight", dpi=300)
ax.figure.savefig(figdir+'/Mutation-vs-Response.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
dfPat_out=pd.concat([tmb.drop(columns=['RCat','Lesion','Mutation']),dfPat],axis=1)
dfPat_out.to_csv(figdir+'ValuesPerPatient-TILs-per-response_TMB.tsv',sep='\t')

In [None]:
dfPat.to_csv(figdir+'ValuesPerPatient-TILs-per-response.tsv',sep='\t')


In [None]:
dfPat.loc[:,['RCat','Lesion']]

In [None]:
tmp=pd.DataFrame(dfPat.loc[:,['RCat','Lesion']]).groupby(['RCat','Lesion']).size().copy()
tmp=tmp.unstack(level=0,fill_value=0).copy()
tmp.index=list(tmp.index)

In [None]:
plt.figure(figsize=(2,3.5))
ax=tmp.transpose().plot.bar(stacked=True,figsize=(3,4))
ax.set_ylabel("Nr. patients")
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.show()
#ax.figure.savefig(figdir+'/Lesion-vs-Response.pdf', bbox_inches="tight", dpi=300)
#ax.figure.savefig(figdir+'/Lesion-vs-Response.eps', format='eps', bbox_inches="tight", dpi=300)
ax.figure.savefig(figdir+'/Lesion-vs-Response.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
color_dict = {'R': 'coral', 'TF': 'firebrick', 'NR_nadj': 'lightskyblue','NR_adj': 'royalblue'}

dfPat.loc[:,['score_Bcell_scanpy','score_Tcells_scanpy']].corr(method='pearson')



In [None]:
dfPat.loc[:,['score_Bcell_scanpy','score_Myeloid_scanpy']].corr(method='pearson')


In [None]:
dfPat.loc[:,['score_Bcell_scanpy','score_Plasma_scanpy']].corr(method='pearson')


In [None]:
dfPat.loc[:,['score_Bcell_scanpy','score_cDC_CCR7_scanpy']].corr(method='pearson')


In [None]:
dfPat.loc[:,['score_Bcell_scanpy','score_cDC1_scanpy']].corr(method='pearson')


In [None]:
dfPat.loc[:,['score_Bcell_scanpy','score_cDC2_scanpy']].corr(method='pearson')


In [None]:
dfPat.loc[:,['score_Bcell_scanpy','score_CD4_scanpy']].corr(method='pearson')


In [None]:
dfPat.loc[:,['score_Bcell_scanpy','score_CD8_scanpy']].corr(method='pearson')


In [None]:
dfPat.loc[:,['score_Bcell_scanpy','score_ExhCD8Tcell_scanpy']].corr(method='pearson')


In [None]:
dfPat=dfPat.loc[:,~dfPat.columns.duplicated()]

In [None]:
dfPat_out=dfPat_out.loc[:,~dfPat_out.columns.duplicated()]

In [None]:
dfPat_out

In [None]:
dfPat

In [None]:
rcParams['figure.figsize'] = 5,5
sns.scatterplot(x='score_Bcell_scanpy',y='score_CD8_scanpy',hue='RCat',data=dfPat,
                hue_order=['R','TF','NR_adj','NR_nadj'],
               palette=color_dict).set_title('pp')
plt.legend(bbox_to_anchor=(1.05, 1.05),borderaxespad=0.5)
plt.tight_layout()
for i in range(len(dfPat.index)):
 plt.text(x=dfPat.reset_index().score_Bcell_scanpy[i],y=dfPat.reset_index().score_CD8_scanpy[i],s=dfPat.index[i], 
          fontdict=dict(color='black',size=10)) #bbox=dict(facecolor='gray',alpha=0.2)
#ax.figure.savefig(figdir+'/Bcell_vs_CD8_Response.pdf', bbox_inches="tight", dpi=300)
#ax.figure.savefig(figdir+'/Bcell_vs_CD8_Response.eps', format='eps', bbox_inches="tight", dpi=300)
ax.figure.savefig(figdir+'/Bcell_vs_CD8_Response.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
rcParams['figure.figsize'] = 5,5
sns.scatterplot(x='score_Bcell_scanpy',y='score_ExhCD8Tcell_scanpy',hue='RCat',data=dfPat,
                hue_order=['R','TF','NR_adj','NR_nadj'],
               palette=color_dict).set_title('pp')
plt.legend(bbox_to_anchor=(1.05, 1.05),borderaxespad=0.5)
plt.tight_layout()
for i in range(len(dfPat.index)):
 plt.text(x=dfPat.reset_index().score_Bcell_scanpy[i],y=dfPat.reset_index().score_ExhCD8Tcell_scanpy[i],s=dfPat.index[i], 
          fontdict=dict(color='black',size=10)) #bbox=dict(facecolor='gray',alpha=0.2)
#ax.figure.savefig(figdir+'/Bcell_vs_ExhCD8_Response.pdf', bbox_inches="tight", dpi=300)
#ax.figure.savefig(figdir+'/Bcell_vs_ExhCD8_Response.eps', format='eps', bbox_inches="tight", dpi=300)
ax.figure.savefig(figdir+'/Bcell_vs_ExhCD8_Response.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
rcParams['figure.figsize'] = 5,5
sns.scatterplot(x='score_Bcell_scanpy',y='score_ExhCD8Tcell_scanpy',hue='Lesion',data=dfPat).set_title('pp')
plt.legend(bbox_to_anchor=(1.05, 1.05),borderaxespad=0.5)
plt.tight_layout()
for i in range(len(dfPat.index)):
 plt.text(x=dfPat.reset_index().score_Bcell_scanpy[i],y=dfPat.reset_index().score_ExhCD8Tcell_scanpy[i],s=dfPat.index[i], 
          fontdict=dict(color='black',size=10)) #bbox=dict(facecolor='gray',alpha=0.2)
#ax.figure.savefig(figdir+'/Bcell_vs_ExhCD8_Lesion.pdf', bbox_inches="tight", dpi=300)
#ax.figure.savefig(figdir+'/Bcell_vs_ExhCD8_Lesion.eps', format='eps', bbox_inches="tight", dpi=300)
ax.figure.savefig(figdir+'/Bcell_vs_ExhCD8_Lesion.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
rcParams['figure.figsize'] = 5,5
sns.scatterplot(x='score_Bcell_scanpy',y='score_ExhCD8Tcell_scanpy',hue='Mutation',data=dfPat).set_title('pp')
plt.legend(bbox_to_anchor=(1.05, 1.05),borderaxespad=0.5)
plt.tight_layout()
for i in range(len(dfPat.index)):
 plt.text(x=dfPat.reset_index().score_Bcell_scanpy[i],y=dfPat.reset_index().score_ExhCD8Tcell_scanpy[i],s=dfPat.index[i], 
          fontdict=dict(color='black',size=10)) #bbox=dict(facecolor='gray',alpha=0.2)
#ax.figure.savefig(figdir+'/Bcell_vs_ExhCD8_Mutation.pdf', bbox_inches="tight", dpi=300)
#ax.figure.savefig(figdir+'/Bcell_vs_ExhCD8_Mutation.eps', format='eps', bbox_inches="tight", dpi=300)
ax.figure.savefig(figdir+'/Bcell_vs_ExhCD8_Mutation.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
rcParams['figure.figsize'] = 7,5
sns.scatterplot(x='score_Bcell_scanpy',y='score_cDC_CCR7_scanpy',hue='RCat',data=dfPat,
                hue_order=['R','TF','NR_adj','NR_nadj'],
               palette=color_dict).set_title('pp')
plt.legend(bbox_to_anchor=(1.05, 1.05),borderaxespad=0.5)
plt.tight_layout()
for i in range(len(dfPat.index)):
 plt.text(x=dfPat.reset_index().score_Bcell_scanpy[i],y=dfPat.reset_index().score_cDC_CCR7_scanpy[i],s=dfPat.index[i], 
          fontdict=dict(color='black',size=10)) #bbox=dict(facecolor='gray',alpha=0.2)
#ax.figure.savefig(figdir+'/Bcell_vs_aDC_Response.pdf', bbox_inches="tight", dpi=300)
#ax.figure.savefig(figdir+'/Bcell_vs_aDC_Response.eps', format='eps', bbox_inches="tight", dpi=300)
ax.figure.savefig(figdir+'/Bcell_vs_aDC_Response.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
rcParams['figure.figsize'] = 7,5
sns.scatterplot(x='score_Bcell_scanpy',y='score_cDC_CCR7_scanpy',hue='Mutation',data=dfPat).set_title('pp')
plt.legend(bbox_to_anchor=(1.05, 1.05),borderaxespad=0.5)
plt.tight_layout()
for i in range(len(dfPat.index)):
 plt.text(x=dfPat.reset_index().score_Bcell_scanpy[i],y=dfPat.reset_index().score_cDC_CCR7_scanpy[i],s=dfPat.index[i], 
          fontdict=dict(color='black',size=10)) #bbox=dict(facecolor='gray',alpha=0.2)
#ax.figure.savefig(figdir+'/Bcell_vs_aDC_Mutation.pdf', bbox_inches="tight", dpi=300)
#ax.figure.savefig(figdir+'/Bcell_vs_aDC_Mutation.eps', format='eps', bbox_inches="tight", dpi=300)
ax.figure.savefig(figdir+'/Bcell_vs_aDC_Mutation.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
rcParams['figure.figsize'] = 7,5
sns.scatterplot(x='score_Bcell_scanpy',y='score_cDC_CCR7_scanpy',hue='Lesion',data=dfPat).set_title('pp')
plt.legend(bbox_to_anchor=(1.05, 1.05),borderaxespad=0.5)
plt.tight_layout()
for i in range(len(dfPat.index)):
 plt.text(x=dfPat.reset_index().score_Bcell_scanpy[i],y=dfPat.reset_index().score_cDC_CCR7_scanpy[i],s=dfPat.index[i], 
          fontdict=dict(color='black',size=10)) #bbox=dict(facecolor='gray',alpha=0.2)
#ax.figure.savefig(figdir+'/Bcell_vs_aDC_Lesion.pdf', bbox_inches="tight", dpi=300)
#ax.figure.savefig(figdir+'/Bcell_vs_aDC_Lesion.eps', format='eps', bbox_inches="tight", dpi=300)
ax.figure.savefig(figdir+'/Bcell_vs_aDC_Lesion.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
sc.settings.set_figure_params()

In [None]:
sc.pl.umap(tildata, color=['score_ExhCD8Tcell_scanpy','score_Bcell_scanpy','score_cDC_CCR7_scanpy'])

In [None]:
dfPat

In [None]:
rcParams['figure.figsize'] = 8,4
fig=sns.barplot(x=dfPat.index,y='nr_cells',
            data=dfPat, order=[str(x) for x in porder], palette=pcolor)
fig.figure.savefig(figdir+'/Cells-per-patient.svg', format='svg', bbox_inches="tight", dpi=300)

#### Read pseudobulk analysis and perform pathway enrichment plots 

In [None]:
camera_in=pd.read_csv(results_folder+'/DE/edgeR_analyzes/result_mut_celltype2_pub-all/edgeR_output-mut/geneset-analysis/camera-results.txt', sep='\t')

In [None]:
camera_in=camera_in.loc[camera_in['Namespace']=='user',:]
camera_in=camera_in.loc[camera_in['FDR.cor0.01']<0.05,:]
#camera_in=camera_in.loc[np.abs(camera_in['EffectSize'])>=2,:]
camera_in=camera_in.loc[np.abs(camera_in['NGenes'])>3,:]
camera_in.index=camera_in['GeneSet']
camera_in

In [None]:
plt.figure(figsize=(0.75,6))
sns.set(font_scale=0.8)
fig=sns_plot=sns.heatmap(pd.DataFrame(camera_in['Score.cor0.01'].sort_values(ascending=False)),center=0.00,
                     cmap=sns.diverging_palette(220, 20, as_cmap=True), vmax=5, vmin=-5)
fig = sns_plot.get_figure()
fig.savefig(figdir+'Pseudobulk-mutation-signatureenrichment.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'/Pseudobulk-mutation-signatureenrichment.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'/Pseudobulk-mutation-signatureenrichment.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
camera_in=pd.read_csv(results_folder+'/DE/edgeR_analyzes/result_lesion_celltype2_pub-all/edgeR_output-lesion/geneset-analysis/camera-results.txt', sep='\t')
camera_in=camera_in.loc[camera_in['Namespace']=='user',:]
camera_in=camera_in.loc[np.abs(camera_in['NGenes'])>3,:]
camera_in

In [None]:
camera_in_lnbr=camera_in.loc[camera_in['Contrast']=='LN_vs_Brain',:]
camera_in_subcbr=camera_in.loc[camera_in['Contrast']=='Subc_vs_Brain',:]
camera_in_subcln=camera_in.loc[camera_in['Contrast']=='Subc_vs_LN',:]

In [None]:
camera_in_lnbr.index=camera_in_lnbr['GeneSet']
camera_in_subcbr.index=camera_in_subcbr['GeneSet']
camera_in_subcln.index=camera_in_subcln['GeneSet']

In [None]:
camera_in_subcln

In [None]:
toplot=list(set(camera_in_lnbr.loc[camera_in_lnbr['FDR.cor0.01']<0.05,'GeneSet']).union(set(camera_in_subcbr.loc[camera_in_subcbr['FDR.cor0.01']<0.05,'GeneSet'])).union(set(camera_in_subcln.loc[camera_in_subcln['FDR.cor0.01']<0.05,'GeneSet'])))
toplot2=list(set(camera_in_lnbr.loc[np.abs(camera_in_lnbr['Score.cor0.01'])>=2,'GeneSet']).union(set(camera_in_subcbr.loc[np.abs(camera_in_subcbr['Score.cor0.01'])>=2,'GeneSet'])).union(set(camera_in_subcln.loc[np.abs(camera_in_subcln['Score.cor0.01'])>=2,'GeneSet'])))
toplot=list(set(toplot).intersection(set(toplot2))-set(['MelMelanoma_sc']))
toplot

In [None]:
mergedpd=pd.concat([camera_in_lnbr.loc[toplot,'Score.cor0.01'],camera_in_subcln.loc[toplot,'Score.cor0.01']*(-1),camera_in_subcbr.loc[toplot,'Score.cor0.01']*(-1)], axis=1)

In [None]:
mergedpd.columns=['LN_vs_Brain','LN_vs_Subc','Brain_vs_Subc']

In [None]:
#camera_in=camera_in.loc[camera_in['FDR.cor0.01']<0.05,:]
#camera_in=camera_in.loc[np.abs(camera_in['EffectSize'])>=2,:]

#camera_in.index=camera_in['GeneSet']
mergedpd.sort_values('LN_vs_Brain')

In [None]:
plt.figure(figsize=(3,8))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(mergedpd.sort_values('LN_vs_Brain'),center=0.00,
                     cmap=sns.diverging_palette(220, 20, as_cmap=True), vmax=5, vmin=-5)
fig = sns_plot.get_figure()
fig.savefig(figdir+'Pseudobulk-lesion-signatureenrichment.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'/Pseudobulk-lesion-signatureenrichment.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'/Pseudobulk-lesion-signatureenrichment.svg', format='svg', bbox_inches="tight", dpi=300)

### CellPhoneDB analysis


In [None]:
### Read in the Tumor data as well and write for cellphonedb analysis 

In [None]:
allcells=tildata.copy()


In [None]:
### write for cellphoneDB
mypatients=list(set(tildata.obs['PatientID']))

In [None]:
for x in mypatients:
    p11=tildata[tildata.obs['PatientID']==x].copy()
    mymeta=pd.DataFrame([list(p11.obs.index),list(p11.obs['celltype3_pub'])]).transpose()
    mymeta.columns=['Cell','cell_type'] #Cell	cell_type
    mymeta.index=mymeta['Cell']
    mymeta=mymeta.drop(columns='Cell')
    mysub=x
    mymeta.to_csv(results_folder+'/cellphonedb/counts/CellphoneDBmeta_'+mysub+'.txt', sep='\t')

    myvals=pd.DataFrame(p11.raw.X.todense())
    myvals.columns=p11.raw.var['ENSEMBL']
    myvals.index=p11.obs.index

    myvals.transpose().to_csv(results_folder+'/cellphonedb/counts/CellphoneDBvals_'+mysub+'.txt', sep='\t')

    results_folder+'/CellphoneDBvals_'+mysub+'.csv'

In [None]:
sc.pl.umap(tildata, color='PatientID')

#### Read and process data

In [None]:
mypatients=['11','13','29','33','34','40','43','63','64','67','68','69','72','82','86','91']

In [None]:
allna={}
allpd1={}
dfsum=pd.DataFrame()
dfsumpd1=pd.DataFrame()
cdc1nknai=pd.DataFrame()
cdc1nkcyt=pd.DataFrame()
cdc1tc8exp=pd.DataFrame()
cdc1tc8exh=pd.DataFrame()
cdc1tc8ccex=pd.DataFrame()
cdc1tc8em=pd.DataFrame()
cdc1tctr=pd.DataFrame()
cdc1tc4nai=pd.DataFrame()
resp=[]
ofc='CCR7-positive myeloid dendritic cell'
for x in mypatients:
    df=pd.read_csv(results_folder+'/cellphonedb/out_'+x+'/significant_means.txt',sep='\t')
    df.index=df.iloc[:,1]
    tocomp=['CD141-positive myeloid dendritic cell|CD56-bright cytokine secreting natural killer cell',
           'CD141-positive myeloid dendritic cell|cytokine secreting effector CD8-positive T cell',
           'CD141-positive myeloid dendritic cell|proliferating CD8-positive T cell',
            'CD141-positive myeloid dendritic cell|proliferating CD4-positive T cell',
           'CCR7-positive myeloid dendritic cell|proliferating CD4-positive T cell',
            'CCR7-positive myeloid dendritic cell|exhausted-like CD8-positive T cell',
            'CCR7-positive myeloid dendritic cell|proliferating CD8-positive T cell',
            'CCR7-positive myeloid dendritic cell|CD56-bright cytokine secreting natural killer cell',
           'CD1c-positive myeloid dendritic cell|proliferating CD4-positive T cell',
           'effector memory CD4-positive T cell|CCR7-positive myeloid dendritic cell',
           'exhausted-like CD4-positive T cell|CCR7-positive myeloid dendritic cell',
            'exhausted-like CD8-positive T cell|CCR7-positive myeloid dendritic cell',
           'proliferating CD8-positive T cell|CCR7-positive myeloid dendritic cell',
           'proliferating CD4-positive T cell|CCR7-positive myeloid dendritic cell']
    exists=[str(ofc+'|CD56-bright cytokine secreting natural killer cell'),
           str(ofc+'|cytotoxic CD56-dim natural killer cell'),
           str(ofc+'|cytokine secreting effector CD8-positive T cell'),
           str(ofc+'|exhausted-like CD8-positive T cell'),
           str(ofc+'|proliferating CD8-positive T cell'),
           str(ofc+'|regulatory T cell'),
           str(ofc+'|effector memory CD8-positive T cell'),
           str(ofc+'|naive CD4-positive T cell')]
    for y in list(set(exists)-set(df.columns)):
        df[y]=np.nan
    for y in list(set(tocomp)-set(df.columns)):
        df[y]=np.nan
    dft=df.transpose().copy()
    for y in list(set(['XCR1_XCL1','PDCD1_CD274'])-set(dft.columns)):
        dft[y]=np.nan
    df=dft.transpose().copy()
    allna[x]=df.loc[df.index.str.match('PDCD1_CD274'),:].iloc[0,12:len(df.columns)].dropna()
    allpd1[x]=df.loc[df.index.str.match('XCR1_XCL1'),:].iloc[0,12:len(df.columns)].dropna()
    dfsum=pd.concat([dfsum,df.loc[df.index.str.match('XCR1_XCL1'),tocomp]])
    dfsumpd1=pd.concat([dfsumpd1,df.loc[df.index.str.match('PDCD1_CD274'),tocomp]])
    resp.append(list(set(tildata[tildata.obs['PatientID'].astype(str)==x].obs['RCat']))[0])
    #cdc1nknai=cdc1nknai.drop_duplicates(inplace=True,axis=1)
    cdc1nknai=cdc1nknai.join(df.loc[:,str(ofc+'|CD56-bright cytokine secreting natural killer cell')],how='outer',rsuffix=x)
    cdc1nkcyt=cdc1nkcyt.join(df.loc[:,str(ofc+'|cytotoxic CD56-dim natural killer cell')],how='outer',rsuffix=x)
    cdc1tc8exp=cdc1tc8exp.join(df.loc[:,str(ofc+'|cytokine secreting effector CD8-positive T cell')],how='outer',rsuffix=x)
    cdc1tc8exh=cdc1tc8exh.join(df.loc[:,str(ofc+'|exhausted-like CD8-positive T cell')],how='outer',rsuffix=x)
    cdc1tc8ccex=cdc1tc8ccex.join(df.loc[:,str(ofc+'|proliferating CD8-positive T cell')],how='outer',rsuffix=x)
    cdc1tctr=cdc1tctr.join(df.loc[:,str(ofc+'|regulatory T cell')],how='outer',rsuffix=x)
    cdc1tc8em=cdc1tc8em.join(df.loc[:,str(ofc+'|effector memory CD8-positive T cell')],how='outer',rsuffix=x)
    cdc1tc4nai=cdc1tc4nai.join(df.loc[:,str(ofc+'|naive CD4-positive T cell')],how='outer',rsuffix=x)
dfsum.index=mypatients

In [None]:
cellsoi=['CD56-bright cytokine secreting natural killer cell','effector memory CD8-positive T cell']
toinclude=['XCR1_XCL1','PDCD1_CD274','CD40_CD40LG',
           'PDCD1_PDCD1LG2','PDCD1_CD274','LGALS9_HAVCR2','CD27_CD70','CD80_CD274','CD28_CD80']

def getcella(cellsoi, ofc='CD141-positive myeloid dendritic cell', toinclude=['XCR1_XCL1','PDCD1_CD274'], 
             mypath='/cellphonedb/'):
    cdc1coi={}
    for i in cellsoi:
        cdc1coi[i]=pd.DataFrame()
    for x in mypatients:
        df=pd.read_csv(results_folder+mypath+'/out_'+x+'/significant_means.txt',sep='\t')
        df.index=df.iloc[:,1]
        exists=[str(ofc+'|'+x) for x in cdc1coi]
        for y in list(set(exists)-set(df.columns)):
            df[y]=np.nan
        dft=df.transpose().copy()
        for y in list(set(toinclude)-set(dft.columns)):
            dft[y]=np.nan
        df=dft.transpose().copy()
        for i in cellsoi:
            cdc1coi[i]=cdc1coi[i].join(df.loc[:,str(ofc+'|'+i)],how='outer',rsuffix=x)
    for i in cellsoi:
        cdc1coi[i].columns=mypatients
    return(cdc1coi)

In [None]:
def getcellb(cellsoi, ofc='CD141-positive myeloid dendritic cell', toinclude=['XCL1_XCR1','CD274_PDCD1'],
            mypath='/cellphonedb/'):
    cdc1coi={}
    for i in cellsoi:
        cdc1coi[i]=pd.DataFrame()
    for x in mypatients:
        df=pd.read_csv(results_folder+mypath+'/out_'+x+'/significant_means.txt',sep='\t')
        df.index=df.iloc[:,1]
        exists=[str(x+'|'+ofc) for x in cdc1coi]
        for y in list(set(exists)-set(df.columns)):
            df[y]=np.nan
        dft=df.transpose().copy()
        for y in list(set(toinclude)-set(dft.columns)):
            dft[y]=np.nan
        df=dft.transpose().copy()
        for i in cellsoi:
            cdc1coi[i]=cdc1coi[i].join(df.loc[:,str(i+'|'+ofc)],how='outer',rsuffix=x)
    for i in cellsoi:
        cdc1coi[i].columns=mypatients
    return(cdc1coi)

In [None]:
def gettopinteract(cellsoi, cdc1_interact, minfraction,patchoice):
    topcdc1=set()
    for i in cellsoi:
        temp=cdc1_interact[i].loc[:,patchoice]
        toptemp=temp.apply(lambda x: len(x.dropna()),axis=1).sort_values()/len(patchoice)
        toptemp=toptemp[toptemp>minfraction]

        #plt.figure(figsize=(12,6))
        #toptemp.plot.bar(color='gray')
        topcdc1=topcdc1.union(set(list(toptemp.index)))
    return(list(topcdc1))

In [None]:
results_folder

#### Focus on individual cell types  

In [None]:
#### Get genes that are enriched in a specific cell type
topDE=bc.tl.dge.get_de(tildata,'celltype3_pub',topnr=5000, logfc=np.log(1.5),padj=0.1)

In [None]:
#mypatients=['11','34','68','82','83','86','91']
mypatients=['11','13','29','33','34','40','43','63','64','67','68','69','72','77','82','83','86','87','91']


In [None]:
#topDEmut=bc.tl.dge.get_de(tildata,'celltype3_pub',topnr=5000, logfc=np.log(1.5),padj=0.1)

In [None]:
#dfPatP

In [None]:
rpatients=list(set([str(x) for x in list(dfPatP.loc[dfPatP['RCat'].isin(['R','TF']),:].index)]).intersection(set(mypatients)))
nrpatients=list(set([str(x) for x in list(dfPatP.loc[dfPatP['RCat'].isin(['NR_nadj','NR_adj']),:].index)]).intersection(set(mypatients)))

In [None]:
brafpatients=list(set([str(x) for x in list(dfPatP.loc[dfPatP['Mutation'].isin(['BRAF']),:].index)]).intersection(set(mypatients)))
nraspatients=list(set([str(x) for x in list(dfPatP.loc[dfPatP['Mutation'].isin(['NRAS']),:].index)]).intersection(set(mypatients)))

In [None]:
rpatients

In [None]:
nrpatients

In [None]:
brafpatients

In [None]:
nraspatients

In [None]:
ofc='CD141-positive myeloid dendritic cell'
#ofc='melanocytic melanoma cell'
#ofc='CD141-positive myeloid dendritic cell'
#ofc='CCR7-positive myeloid dendritic cell'
#ofc='CD1c-positive myeloid dendritic cell'
#ofc='CD4-positive, cytotoxic T cell'
#ofc='cytokine secreting effector CD8-positive T cell'
#ofc='effector memory CD8-positive T cell'

#ofc='classical monocyte'
#ofc='melanocytic melanoma cell'
#ofc='CXCL9-positive macrophage'
#ofc='MARCO-positive macrophage'
#ofc='MSR1-positive macrophage'
#ofc='exhausted-like CD8-positive T cell'

#cellsoi=list(set(allcells.obs['celltype3_pub'])) #-set([ofc]))
cellsoi=list(set(tildata.obs['celltype3_pub'])) # with or without self-interaction -set([ofc]))
#cdc1_interact=getcella(cellsoi,ofc, toinclude,'/cellphonedb/allcells/')

In [None]:
list(set(tildata.obs['celltype3_pub']))

In [None]:
### Only look at specific interactions - myeloids vs. CD8 T cells
cellsoi=[
       'cytokine secreting effector CD8-positive T cell',
       'CD56-bright cytokine secreting natural killer cell',
       'effector memory CD8-positive T cell',  'proliferating CD8-positive T cell',
       'exhausted-like CD8-positive T cell',  'naive CD8-positive T cell',
       'mature NK T cell', 
       'cytotoxic CD56-dim natural killer cell', 
       'proliferating NK cell','regulatory T cell', 
    'CD4-positive, cytotoxic T cell','naive CD4-positive T cell','proliferating CD4-positive T cell',
 'effector memory CD4-positive T cell','IL7R-max CD4-positive T cell','exhausted-like CD4-positive T cell']

In [None]:
cellsoi=[
 'MSR1-positive macrophage',
 'CD141-positive myeloid dendritic cell',
 'MARCO-positive macrophage',
 'non-classical monocyte',
 'immature conventional dendritic cell',
 'CD1c-positive myeloid dendritic cell',
 'CCR7-positive myeloid dendritic cell',
 'classical monocyte',
 'CXCL9-positive macrophage',
 'macrophage',
 'plasmacytoid dendritic cell']

Read in all the interactions per cell type of interest vs. all other cell types into a dictionary; keys are cell types, values is a dataframe with patients as columns and interactions as rows 

In [None]:
cdc1_interact=getcella(cellsoi,ofc, toinclude,'/cellphonedb/')


In [None]:
#cdc1_interact_rev=getcellb(cellsoi,ofc, toinclude,'/cellphonedb/allcells/')
cdc1_interact_rev=getcellb(cellsoi,ofc, toinclude,'/cellphonedb/')

In [None]:
#cdc1_interact_rev['exhausted-like CD8-positive T cell']['11'].sort_values(ascending=False)[0:20]

In [None]:
cdc1_interact['exhausted-like CD8-positive T cell']['11'].sort_values(ascending=False)[0:20]

In [None]:
topcut=1
topcdc1=gettopinteract(cellsoi, cdc1_interact, topcut,mypatients)
topcdc1_rev=gettopinteract(cellsoi, cdc1_interact_rev, topcut,mypatients)


In [None]:
#topcdc1_ronly=gettopinteract(cellsoi, cdc1_interact, topcut,rpatients)
#topcdc1_nronly=gettopinteract(cellsoi, cdc1_interact, topcut,nrpatients)

In [None]:
#cdc1_interact['proliferating CD4-positive T cell']

In [None]:
#cdc1_interact['proliferating CD4-positive T cell'].loc[cdc1_interact['proliferating CD4-positive T cell'].sum(axis=1)>0,:]

In [None]:
def prep_tables(cdc1_interact,rpatients,nrpatients,mypcut=0.3):
    #mypcut=0.3

    cdc1tab=pd.concat([v.apply(lambda x: len(x.dropna()),axis=1) for v in cdc1_interact.values()],axis=1)
    cdc1tab.columns=list(cdc1_interact.keys())
    cdc1tab=cdc1tab.loc[cdc1tab.apply(lambda x: x.sum(),axis=1)>1,:]

    cdc1tabr=pd.concat([v.loc[:,rpatients].apply(lambda x: len(x.dropna()),axis=1) for v in cdc1_interact.values()],axis=1)
    cdc1tabr.columns=list(cdc1_interact.keys())
    cdc1tabr=cdc1tabr.loc[cdc1tab.index,:]

    cdc1tabnr=pd.concat([v.loc[:,nrpatients].apply(lambda x: len(x.dropna()),axis=1) for v in cdc1_interact.values()],axis=1)
    cdc1tabnr.columns=list(cdc1_interact.keys())
    cdc1tabnr=cdc1tabnr.loc[cdc1tab.index,:]

    totest=set(cdc1tabr.loc[cdc1tabr.apply(lambda x: x.sum(),axis=1)>=len(rpatients)/2,:].index).union(set(cdc1tabnr.loc[cdc1tabnr.apply(lambda x: x.sum(),axis=1)>=len(nrpatients)/2,:].index))
    myps={}
    for x in list(totest):
        celltest=list(cdc1tabr.loc[x,:][cdc1tabr.loc[x,:]+cdc1tabnr.loc[x,:]>=min([len(rpatients)/2,len(nrpatients)/2])].index)
        for y in celltest:
            myps[x+'|'+y]=fisher_exact(np.array([[cdc1tabr.loc[x,y],len(rpatients)],[cdc1tabnr.loc[x,y],len(nrpatients)]]))[1]
        
    selected=list(set([x.split('|')[0] for x in list(pd.Series(myps)[pd.Series(myps)<=mypcut].sort_values().index)]))
    toreturn=[cdc1tab,cdc1tabr,cdc1tabnr,selected,pd.Series(myps)]
    return(toreturn)



In [None]:
cellsoi

Transform into a table with nr of significant interactions per cell type, keep only interactions detected in at least two patients (any cell type of interest).

In [None]:
[cdc1tab,cdc1tabr,cdc1tabnr,selected,myps]=prep_tables(cdc1_interact,rpatients,nrpatients,0.3)


In [None]:
[cdc1tab2,cdc1tabbraf,cdc1tabnras,selected2,myps2]=prep_tables(cdc1_interact,brafpatients,nraspatients,0.3)

In [None]:
[cdc1tab_rev,cdc1tabr_rev,cdc1tabnr_rev,selected_rev,myps_rev]=prep_tables(cdc1_interact_rev,rpatients,nrpatients,0.3)
#cdc1tab_rev=pd.concat([v.apply(lambda x: len(x.dropna()),axis=1) for v in cdc1_interact_rev.values()],axis=1)/len(mypatients)
#cdc1tab_rev.columns=list(cdc1_interact_rev.keys())


In [None]:
[cdc1tab_rev2,cdc1tabbraf_rev,cdc1tabnras_rev,selected_rev2,myps_rev2]=prep_tables(cdc1_interact_rev,brafpatients,nraspatients,0.3)


In [None]:
cdc1tabr

In [None]:
pd.Series(myps)[pd.Series(myps)<=0.3].sort_values().to_csv(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_pvals_fisher.csv', sep='\t')
pd.Series(myps)[pd.Series(myps)<=0.3].sort_values()

In [None]:
pd.Series(myps2)[pd.Series(myps2)<=0.3].sort_values().to_csv(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_pvals_fisher_mutations.csv', sep='\t')
pd.Series(myps2)[pd.Series(myps2)<=0.3].sort_values()

In [None]:
pd.Series(myps_rev)[pd.Series(myps_rev)<=0.3].sort_values().to_csv(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_pvals_fisher.csv', sep='\t')
pd.Series(myps_rev)[pd.Series(myps_rev)<=0.3].sort_values()

In [None]:
pd.Series(myps_rev2)[pd.Series(myps_rev2)<=0.3].sort_values().to_csv(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_pvals_fisher_mutations.csv', sep='\t')
pd.Series(myps_rev2)[pd.Series(myps_rev2)<=0.3].sort_values()

In [None]:
cdc1tab=cdc1tab/len(mypatients)
cdc1tabr=cdc1tabr/len(rpatients)
cdc1tabnr=cdc1tabnr/len(nrpatients)

cdc1tab_rev=cdc1tab_rev/len(mypatients)
cdc1tabr_rev=cdc1tabr_rev/len(rpatients)
cdc1tabnr_rev=cdc1tabnr_rev/len(nrpatients)

cdc1tabbraf=cdc1tabr/len(brafpatients)
cdc1tabnras=cdc1tabnr/len(nraspatients)

cdc1tabbraf_rev=cdc1tabbraf_rev/len(brafpatients)
cdc1tabnras_rev=cdc1tabnras_rev/len(nraspatients)

In [None]:
cdc1tab.loc[cdc1tab.apply(lambda x: x.sum(),axis=1)>0,:].to_csv(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_maxsum_1.csv', sep='\t')
cdc1tab_rev.loc[cdc1tab_rev.apply(lambda x: x.sum(),axis=1)>0,:].to_csv(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_maxsum_1.csv', sep='\t')

In [None]:
#cdc1tab_rev.loc['XCR1_XCL1',:].sort_values()

In [None]:
#cdc1tab.loc['XCR1_XCL1',:].sort_values()

#### Retain only those expressed relatively highly in celltype of interest

In [None]:
selected

In [None]:
selected2

In [None]:
selected_rev

In [None]:
selected_rev2

In [None]:
specgenes=set(topDE[ofc].sort_values('Log2FC', ascending=False)['Name']).intersection(set([x.split("_")[0] for x in selected]))

In [None]:
specgenesmut=set(topDE[ofc].sort_values('Log2FC', ascending=False)['Name']).intersection(set([x.split("_")[0] for x in selected2]))

In [None]:
specgenes_rev=set(topDE[ofc].sort_values('Log2FC', ascending=False)['Name']).intersection(set([x.split("_")[1] for x in selected_rev]))

In [None]:
specgenes_rev_mut=set(topDE[ofc].sort_values('Log2FC', ascending=False)['Name']).intersection(set([x.split("_")[1] for x in selected_rev2]))

In [None]:
specgenes

In [None]:
specgenesmut

In [None]:
specgenes_rev

In [None]:
specgenes_rev_mut

In [None]:
sc.settings.set_figure_params()

In [None]:
sc.pl.dotplot(tildata,var_names=list(specgenesmut.union(specgenes_rev_mut)),groupby='celltype3_pub')

In [None]:
sc.pl.dotplot(tildata,var_names=list(specgenes.union(specgenes_rev)),groupby='celltype3_pub')

In [None]:
tildata.obs['PatRCat']=tildata.obs['PatientID'].astype(str)+'_'+tildata.obs['RCat'].astype(str)

In [None]:
myorder=['29_NR_nadj','33_NR_nadj','86_NR_nadj','87_NR_nadj',
         '91_NR_nadj','43_NR_adj','64_NR_adj','77_NR_adj','11_TF',
         '13_TF','34_TF','67_TF','68_TF','69_TF','79_TF','82_TF','83_TF','40_R','63_R','72_R']

In [None]:
specgenes_rev

In [None]:
list(specgenes.union(specgenes_rev))

In [None]:
#sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub']==ofc],categories_order=myorder,
#                 var_names=list(specgenes.union(specgenes_rev)),groupby='PatRCat', standard_scale='var')

In [None]:
selectedk=[]
for y in selected:
    if (y.split('_')[0] in list(specgenes)):
        selectedk.append(y)

selectedk_rev=[]
for y in selected_rev:
    if (y.split('_')[1] in list(specgenes_rev)):
        selectedk_rev.append(y)

In [None]:
selectedkmut=[]
for y in selected2:
    if (y.split('_')[0] in list(specgenesmut)):
        selectedkmut.append(y)

selectedk_revmut=[]
for y in selected_rev2:
    if (y.split('_')[1] in list(specgenes_rev_mut)):
        selectedk_revmut.append(y)

In [None]:
selectedk_rev

In [None]:
selectedk

In [None]:
pa=7
pb=5

In [None]:
#plt.figure(figsize=(6,24))
maxcut=2
sns.set(font_scale=0.7)
#fig=sns.clustermap(cdc1tab_rev.loc[cdc1tab_rev.apply(lambda x: x.sum(),axis=1)>maxcut,:],figsize=(7,18))
fig=sns.clustermap(cdc1tab_rev.loc[selectedk_rev,:],figsize=(pa,pb))
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_selected_fisherp03.pdf') 
plt.setp(fig.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_selected_fisherp03_alldata.pdf') 
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_maxsum_'+str(maxcut)+'.pdf') 

In [None]:
((cdc1tabr_rev+0.01)/(0.01+cdc1tabnr_rev)).to_csv(figdir+'/CellphoneDB-ratio_'+ofc.replace(" ", "_")+'_rev_selected_fisherp03-RvsNRonly.csv')
((cdc1tabr+0.01)/(0.01+cdc1tabnr)).to_csv(figdir+'/CellphoneDB-ratio_'+ofc.replace(" ", "_")+'_selected_fisherp03-RvsNRonly.csv')


In [None]:
cdc1tabr.to_csv(figdir+'/CellphoneDB-ratio_'+ofc.replace(" ", "_")+'_selected_fisherp03-Rs.csv')
cdc1tabnr.to_csv(figdir+'/CellphoneDB-ratio_'+ofc.replace(" ", "_")+'_selected_fisherp03-NRs.csv')
cdc1tabr_rev.to_csv(figdir+'/CellphoneDB-ratio_'+ofc.replace(" ", "_")+'_rev_selected_fisherp03-Rs.csv')
cdc1tabnr_rev.to_csv(figdir+'/CellphoneDB-ratio_'+ofc.replace(" ", "_")+'_rev_selected_fisherp03-NRs.csv')

In [None]:
revt=pd.DataFrame(selectedk_rev)
revt['Cat']='Rev'
revt.columns=['Inter','Dir']
tab=pd.DataFrame(selectedk)
tab['Cat']='For'
tab.columns=['Inter','Dir']
pd.concat([tab,revt]).to_csv(figdir+'/CellphoneDB-selected_'+ofc.replace(" ", "_")+'_fisherp03-Interactions.csv')

In [None]:
pd.concat([tab,revt])

In [None]:
#cdc1tabr_rev

In [None]:
myratio=(cdc1tabr_rev.loc[selectedk_rev,:]+0.01)/(0.01+cdc1tabnr_rev.loc[selectedk_rev,:])

#fig=sns.clustermap(np.log10(myratio),figsize=(7,18),cmap='bwr')
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_maxsum_'+str(maxcut)+'-RvsNRonly.pdf') 
fig=sns.clustermap(np.log10(myratio),figsize=(pa,pb),cmap='bwr', vmax=2, vmin=-2)
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_selected_fisherp03-RvsNRonly.pdf') 
plt.setp(fig.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_selected_fisherp03-RvsNRonly_alldata.pdf') 


In [None]:
#plt.figure(figsize=(6,24))
maxcut=2
sns.set(font_scale=0.7)
pb=4
#fig=sns.clustermap(cdc1tab_rev.loc[cdc1tab_rev.apply(lambda x: x.sum(),axis=1)>maxcut,:],figsize=(7,18))
fig=sns.clustermap(cdc1tab_rev.loc[selectedk_revmut,:],figsize=(pa,pb))
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_selected_mut_fisherp03.pdf')
plt.setp(fig.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_selected_mut_fisherp03_alldata.pdf')
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_maxsum_'+str(maxcut)+'.pdf') 

In [None]:
myratio=(cdc1tabbraf_rev.loc[selectedk_revmut,:]+0.01)/(0.01+cdc1tabnras_rev.loc[selectedk_revmut,:])

#fig=sns.clustermap(np.log10(myratio),figsize=(7,18),cmap='bwr')
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_maxsum_'+str(maxcut)+'-RvsNRonly.pdf') 
fig=sns.clustermap(np.log10(myratio),figsize=(pa,pb),cmap='bwr', vmax=2, vmin=-2)
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_selected_fisherp03-mutationComp.pdf') 
plt.setp(fig.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_selected_fisherp03-mutationComp_alldata.pdf') 


In [None]:
#cdc1tab.loc[cdc1tab.apply(lambda x: x.sum(),axis=1)>maxcut,:]['CCR7-positive myeloid dendritic cell'].sort_values()

In [None]:
#plt.figure(figsize=(6,24))
maxcut=2
sns.set(font_scale=0.7)
pb=4
#fig=sns.clustermap(cdc1tab.loc[cdc1tab.apply(lambda x: x.sum(),axis=1)>maxcut,:],figsize=(7,18))
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_maxsum_'+str(maxcut)+'.pdf') 
fig=sns.clustermap(cdc1tab.loc[selectedk,:],figsize=(pa,pb))
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_selected_fisherp03.pdf') 
plt.setp(fig.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_selected_fisherp03_alldata.pdf') 

In [None]:
#sc.pl.dotplot(allcells[allcells.obs['PatientID'].isin(mypatients)], 
#              var_names=['CAMP', 'FPR2'],groupby=['celltype3_pub'], dot_max=0.1)

In [None]:
#myindex=fig.dendrogram_row.reordered_ind.copy()

In [None]:
#fig=sns.clustermap(cdc1tabr.loc[cdc1tab.apply(lambda x: x.sum(),axis=1)>maxcut,:].iloc[myindex,:],row_cluster=False,figsize=(7,18))
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_maxsum_'+str(maxcut)+'-Ronly.pdf') 
fig=sns.clustermap(cdc1tabr.loc[selectedk,:],figsize=(pa,pb))
plt.setp(fig.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_selected_fisherp03-Ronly.pdf') 


In [None]:
#fig=sns.clustermap(cdc1tabnr.loc[cdc1tab.apply(lambda x: x.sum(),axis=1)>maxcut,:].iloc[myindex,:],row_cluster=False,figsize=(7,18))
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_maxsum_'+str(maxcut)+'-NRonly.pdf') 
fig=sns.clustermap(cdc1tabnr.loc[selectedk,:],figsize=(pa,pb))
plt.setp(fig.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_selected_fisherp03-NRonly.pdf') 


In [None]:
#myratio=(cdc1tabr.loc[cdc1tab.apply(lambda x: x.sum(),axis=1)>maxcut,:].iloc[myindex,:]+0.01)/(0.01+cdc1tabnr.loc[cdc1tab.apply(lambda x: x.sum(),axis=1)>maxcut,:].iloc[myindex,:])

In [None]:
myratio=(cdc1tabr.loc[selectedk,:]+0.01)/(0.01+cdc1tabnr.loc[selectedk,:])

#fig=sns.clustermap(np.log10(myratio),figsize=(7,18),cmap='bwr')
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_maxsum_'+str(maxcut)+'-RvsNRonly.pdf') 
fig=sns.clustermap(np.log10(myratio),figsize=(pa,pb),cmap='bwr', vmax=2, vmin=-2)
plt.setp(fig.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_selected_fisherp03-RvsNRonly.pdf') 


In [None]:
#plt.figure(figsize=(6,24))
maxcut=2
sns.set(font_scale=0.7)
pb=5
#fig=sns.clustermap(cdc1tab_rev.loc[cdc1tab_rev.apply(lambda x: x.sum(),axis=1)>maxcut,:],figsize=(7,18))
fig=sns.clustermap(cdc1tab.loc[selectedkmut,:],figsize=(pa,pb))
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_selected_mut_fisherp03.pdf')
plt.setp(fig.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_selected_mut_fisherp03_alldata.pdf')
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_rev_maxsum_'+str(maxcut)+'.pdf') 

In [None]:
myratio=(cdc1tabbraf.loc[selectedkmut,:]+0.01)/(0.01+cdc1tabnras.loc[selectedkmut,:])

#fig=sns.clustermap(np.log10(myratio),figsize=(7,18),cmap='bwr')
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_maxsum_'+str(maxcut)+'-RvsNRonly.pdf') 
fig=sns.clustermap(np.log10(myratio),figsize=(pa,pb),cmap='bwr',vmax=0.8,vmin=-0.8)
plt.setp(fig.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)

fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_selected_fisherp03-mutationComp.pdf') 


In [None]:
mycb1=pd.read_csv(figdir+'CellphoneDB-ratio_CCR7-positive_myeloid_dendritic_cell_selected_fisherp03-RvsNRonly.csv', sep=',')
mycb2=pd.read_csv(figdir+'CellphoneDB-ratio_CD141-positive_myeloid_dendritic_cell_selected_fisherp03-RvsNRonly.csv', sep=',')
mycb3=pd.read_csv(figdir+'CellphoneDB-ratio_CD1c-positive_myeloid_dendritic_cell_selected_fisherp03-RvsNRonly.csv', sep=',')
mycb1.index=mycb1['interacting_pair']
mycb2.index=mycb2['interacting_pair']
mycb3.index=mycb3['interacting_pair']

mycb1r=pd.read_csv(figdir+'CellphoneDB-ratio_CCR7-positive_myeloid_dendritic_cell_rev_selected_fisherp03-RvsNRonly.csv', sep=',')
mycb2r=pd.read_csv(figdir+'CellphoneDB-ratio_CD141-positive_myeloid_dendritic_cell_rev_selected_fisherp03-RvsNRonly.csv', sep=',')
mycb3r=pd.read_csv(figdir+'CellphoneDB-ratio_CD1c-positive_myeloid_dendritic_cell_rev_selected_fisherp03-RvsNRonly.csv', sep=',')
mycb1r.index=mycb1r['interacting_pair']
mycb2r.index=mycb2r['interacting_pair']
mycb3r.index=mycb3r['interacting_pair']

In [None]:
mycbpv1=pd.read_csv(figdir+'CellphoneDB-selected_CCR7-positive_myeloid_dendritic_cell_fisherp03-Interactions.csv', sep=',')
mycbpv2=pd.read_csv(figdir+'CellphoneDB-selected_CD141-positive_myeloid_dendritic_cell_fisherp03-Interactions.csv', sep=',')
mycbpv3=pd.read_csv(figdir+'CellphoneDB-selected_CD1c-positive_myeloid_dendritic_cell_fisherp03-Interactions.csv', sep=',')

#mycbpv1=pd.read_csv(figdir+'CellphoneDB-CCR7-positive_myeloid_dendritic_cell_rev_pvals_fisher.csv', sep='\t')
#mycbpv2=pd.read_csv(figdir+'CellphoneDB-CD141-positive_myeloid_dendritic_cell_rev_pvals_fisher.csv', sep='\t')
#mycbpv3=pd.read_csv(figdir+'CellphoneDB-CD1c-positive_myeloid_dendritic_cell_rev_pvals_fisher.csv', sep='\t')

In [None]:
oi=list(set(pd.concat([mycbpv1,mycbpv2,mycbpv3])['Inter']))
coi=oi.copy()

In [None]:
#oi=['P2RY6_NAMPT', 'TNFRSF1A_GRN', 'CD40_TNFSF13B', 'XCR1_XCL1', 'CD58_CD2', 'LGALS9_HAVCR2'
#'IL15_IL15R', 'TNFRSF14_BTLA', 'TNFRSF14_LTA']


In [None]:
#pd.concat([mycbpv1,mycbpv2,mycbpv3]).loc[:,['Inter','Dir']].to_csv(figdir+'CellphoneDB-selected_allcDC_Interactions.csv')
pd.concat([mycbpv1,mycbpv2,mycbpv3]).loc[:,['Inter','Dir']]

In [None]:
a1=mycb1.loc[list(set(mycb1.index).intersection(set(oi))),:]
a1.index='aDC: '+a1.index

a1r=mycb1r.loc[list(set(mycb1r.index).intersection(set(oi))),:]
a1r.index=':aDC '+a1r.index

In [None]:
a2=mycb2.loc[list(set(mycb2.index).intersection(set(oi))),:]
a2.index='cDC1: '+a2.index

a2r=mycb2r.loc[list(set(mycb2r.index).intersection(set(oi))),:]
a2r.index=':cDC1 '+a2r.index

In [None]:
a3=mycb3.loc[list(set(mycb3.index).intersection(set(oi))),:]
a3.index='cDC2: '+a3.index

a3r=mycb3r.loc[list(set(mycb3r.index).intersection(set(oi))),:]
a3r.index=':cDC2 '+a3r.index

In [None]:
#toplot=pd.concat([a1,a2,a3, a1r,a2r, a3r ]).drop(columns='interacting_pair')
toplot=pd.concat([a1,a2,a3 ]).drop(columns='interacting_pair')
toplotr=pd.concat([a1r,a2r,a3r ]).drop(columns='interacting_pair')

In [None]:
(np.log10(toplot).max(axis=1)+np.log10(toplot).min(axis=1))[(np.log10(toplot).max(axis=1)+np.log10(toplot).min(axis=1))>1.25]

In [None]:
toshow=(np.log10(toplot).max(axis=1)+np.log10(toplot).min(axis=1))[(np.log10(toplot).max(axis=1)+np.log10(toplot).min(axis=1))>1.25]
toshow2=(np.log10(toplot).max(axis=1)+np.log10(toplot).min(axis=1))[(np.log10(toplot).max(axis=1)+np.log10(toplot).min(axis=1))<-1.25]

In [None]:
toshow3=(np.abs(np.log10(toplot)).max(axis=1)+np.abs(np.log10(toplot)).max(axis=1))[(np.abs(np.log10(toplot)).max(axis=1)+np.abs(np.log10(toplot)).max(axis=1))>3]
#toshow4=(np.log10(toplot).max(axis=1)+np.log10(toplot).max(axis=1))[(np.log10(toplot).max(axis=1)+np.log10(toplot).max(axis=1))>2.5]


In [None]:
toshowr=(np.log10(toplotr).max(axis=1)+np.log10(toplotr).min(axis=1))[(np.log10(toplotr).max(axis=1)+np.log10(toplotr).min(axis=1))>1.25]
toshow2r=(np.log10(toplotr).max(axis=1)+np.log10(toplotr).min(axis=1))[(np.log10(toplotr).max(axis=1)+np.log10(toplotr).min(axis=1))<-1.25]

In [None]:
toshow3r=(np.abs(np.log10(toplotr)).max(axis=1)+np.abs(np.log10(toplotr)).max(axis=1))[(np.abs(np.log10(toplotr)).max(axis=1)+np.abs(np.log10(toplotr)).max(axis=1))>3]
#toshow4=(np.log10(toplot).max(axis=1)+np.log10(toplot).max(axis=1))[(np.log10(toplot).max(axis=1)+np.log10(toplot).max(axis=1))>2.5]


In [None]:
a1=np.log10(toplot).loc[list((set(toshow2.index).union(toshow.index)).intersection(set(toshow3.index))),:]

In [None]:
fig=sns.clustermap(a1,
                   figsize=(10,4),cmap='bwr', vmax=1.5, vmin=-1.5)

In [None]:
a2=np.log10(toplotr).loc[list((set(toshow2r.index).union(toshowr.index)).intersection(set(toshow3r.index))),:]

In [None]:
fig=sns.clustermap(a2,
                   figsize=(10,5),cmap='bwr', vmax=1.5, vmin=-1.5)

In [None]:
fig=sns.clustermap(pd.concat([a1,a2]),
                   figsize=(8,8),cmap='bwr', vmax=1.5, vmin=-1.5)
fig.savefig(figdir+'/CellphoneDB-FinalFig_cDC.pdf', bbox_inches="tight", dpi=300) 
fig.savefig(figdir+'/CellphoneDB-FinalFig_cDC.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'/CellphoneDB-FinalFig_cDC.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
ofc='MSR1-positive macrophage'

In [None]:
mycb1=pd.read_csv(figdir+'CellphoneDB-ratio_classical_monocyte_selected_fisherp03-RvsNRonly.csv', sep=',')
mycb2=pd.read_csv(figdir+'CellphoneDB-ratio_MSR1-positive_macrophage_selected_fisherp03-RvsNRonly.csv', sep=',')
mycb3=pd.read_csv(figdir+'CellphoneDB-ratio_MARCO-positive_macrophage_selected_fisherp03-RvsNRonly.csv', sep=',')
mycb4=pd.read_csv(figdir+'CellphoneDB-ratio_CXCL9-positive_macrophage_selected_fisherp03-RvsNRonly.csv', sep=',')

mycb1.index=mycb1['interacting_pair']
mycb2.index=mycb2['interacting_pair']
mycb3.index=mycb3['interacting_pair']
mycb4.index=mycb4['interacting_pair']

mycb1r=pd.read_csv(figdir+'CellphoneDB-ratio_classical_monocyte_rev_selected_fisherp03-RvsNRonly.csv', sep=',')
mycb2r=pd.read_csv(figdir+'CellphoneDB-ratio_MSR1-positive_macrophage_rev_selected_fisherp03-RvsNRonly.csv', sep=',')
mycb3r=pd.read_csv(figdir+'CellphoneDB-ratio_MARCO-positive_macrophage_rev_selected_fisherp03-RvsNRonly.csv', sep=',')
mycb4r=pd.read_csv(figdir+'CellphoneDB-ratio_CXCL9-positive_macrophage_rev_selected_fisherp03-RvsNRonly.csv', sep=',')

mycb1r.index=mycb1r['interacting_pair']
mycb2r.index=mycb2r['interacting_pair']
mycb3r.index=mycb3r['interacting_pair']
mycb4r.index=mycb4r['interacting_pair']



mycbpv1=pd.read_csv(figdir+'CellphoneDB-selected_classical_monocyte_fisherp03-Interactions.csv', sep=',')
mycbpv2=pd.read_csv(figdir+'CellphoneDB-selected_MSR1-positive_macrophage_fisherp03-Interactions.csv', sep=',')
mycbpv3=pd.read_csv(figdir+'CellphoneDB-selected_MARCO-positive_macrophage_fisherp03-Interactions.csv', sep=',')
mycbpv4=pd.read_csv(figdir+'CellphoneDB-selected_CXCL9-positive_macrophage_fisherp03-Interactions.csv', sep=',')


oi=list(set(pd.concat([mycbpv1,mycbpv2,mycbpv3])['Inter']))

moi=oi.copy()
#oi=['P2RY6_NAMPT', 'TNFRSF1A_GRN', 'CD40_TNFSF13B', 'XCR1_XCL1', 'CD58_CD2', 'LGALS9_HAVCR2'
#'IL15_IL15R', 'TNFRSF14_BTLA', 'TNFRSF14_LTA']




a1=mycb1.loc[list(set(mycb1.index).intersection(set(oi))),:]
a1.index='mono: '+a1.index

a1r=mycb1r.loc[list(set(mycb1r.index).intersection(set(oi))),:]
a1r.index=':mono '+a1r.index

a2=mycb2.loc[list(set(mycb2.index).intersection(set(oi))),:]
a2.index='macroMSR1: '+a2.index

a2r=mycb2r.loc[list(set(mycb2r.index).intersection(set(oi))),:]
a2r.index=':macroMSR1 '+a2r.index

a3=mycb3.loc[list(set(mycb3.index).intersection(set(oi))),:]
a3.index='macroMARCO: '+a3.index

a3r=mycb3r.loc[list(set(mycb3r.index).intersection(set(oi))),:]
a3r.index=':macroMARCO '+a3r.index

a4=mycb4.loc[list(set(mycb4.index).intersection(set(oi))),:]
a4.index='macroCXCL9: '+a4.index

a4r=mycb4r.loc[list(set(mycb4r.index).intersection(set(oi))),:]
a4r.index=':macroCXCL9 '+a4r.index

#toplot=pd.concat([a1,a2,a3, a1r,a2r, a3r ]).drop(columns='interacting_pair')
toplot=pd.concat([a1,a2,a3, a4 ]).drop(columns='interacting_pair')
toplotr=pd.concat([a1r,a2r,a3r, a4r ]).drop(columns='interacting_pair')



In [None]:
pd.concat([mycbpv1,mycbpv2,mycbpv3]).loc[:,['Inter','Dir']].to_csv(figdir+'CellphoneDB-selected_allMacroMono_Interactions.csv')
pd.concat([mycbpv1,mycbpv2,mycbpv3]).loc[:,['Inter','Dir']]

In [None]:
toshow=(np.log10(toplot).max(axis=1)+np.log10(toplot).min(axis=1))[(np.log10(toplot).max(axis=1)+np.log10(toplot).min(axis=1))>1.75]
toshow2=(np.log10(toplot).max(axis=1)+np.log10(toplot).min(axis=1))[(np.log10(toplot).max(axis=1)+np.log10(toplot).min(axis=1))<-1.75]

toshow3=(np.abs(np.log10(toplot)).max(axis=1)+np.abs(np.log10(toplot)).max(axis=1))[(np.abs(np.log10(toplot)).max(axis=1)+np.abs(np.log10(toplot)).max(axis=1))>3]
#toshow4=(np.log10(toplot).max(axis=1)+np.log10(toplot).max(axis=1))[(np.log10(toplot).max(axis=1)+np.log10(toplot).max(axis=1))>2.5]


toshowr=(np.log10(toplotr).max(axis=1)+np.log10(toplotr).min(axis=1))[(np.log10(toplotr).max(axis=1)+np.log10(toplotr).min(axis=1))>1.75]
toshow2r=(np.log10(toplotr).max(axis=1)+np.log10(toplotr).min(axis=1))[(np.log10(toplotr).max(axis=1)+np.log10(toplotr).min(axis=1))<-1.75]

toshow3r=(np.abs(np.log10(toplotr)).max(axis=1)+np.abs(np.log10(toplotr)).max(axis=1))[(np.abs(np.log10(toplotr)).max(axis=1)+np.abs(np.log10(toplotr)).max(axis=1))>3]
#toshow4=(np.log10(toplot).max(axis=1)+np.log10(toplot).max(axis=1))[(np.log10(toplot).max(axis=1)+np.log10(toplot).max(axis=1))>2.5]


a1=np.log10(toplot).loc[list((set(toshow2.index).union(toshow.index)).intersection(set(toshow3.index))),:]

#fig=sns.clustermap(a1,
#                   figsize=(10,4),cmap='bwr', vmax=1.5, vmin=-1.5)

a2=np.log10(toplotr).loc[list((set(toshow2r.index).union(toshowr.index)).intersection(set(toshow3r.index))),:]

#fig=sns.clustermap(a2,
#                   figsize=(10,5),cmap='bwr', vmax=1.5, vmin=-1.5)


In [None]:

fig=sns.clustermap(pd.concat([a1,a2]),
                   figsize=(8,9),cmap='bwr', vmax=2, vmin=-2)
fig.savefig(figdir+'/CellphoneDB-FinalFig_MonoMacro.pdf', bbox_inches="tight", dpi=300) 
fig.savefig(figdir+'/CellphoneDB-FinalFig_MonoMacro.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'/CellphoneDB-FinalFig_MonoMacro.svg', format='svg', bbox_inches="tight", dpi=300)

##### Intersect with differential expression results

In [None]:
cdde=['CD8_cytseceffCD8-TILonly','CD8_EM-TILonly','CD8_Exh-TILonly',
      'CD8_NKlikeT-TILonly','CD8_Prolif-TILonly','CD8T-all-noNK-TILonly','CD8TandNK-TILonly','NK-all-TILonly',
      'NK_CD56brightcytokineNK-TILonly','NK_CD56dimcytotoxNK-TILonly','CD4T-all-TILonly',
     "CD4T-AllnoTregMAIT-TILonly","CD4T-effector-TILonly","CD4T-naiveCM-TILonly"]

In [None]:
    import gseapy
    #gseapy.get_library_name()
    
    dbs=['KEGG_2021_Human','GO_Biological_Process_2021','Reactome_2016',
     'Human_Gene_Atlas','WikiPathways_2019_Human','ChEA_2016',
     'CellMarker_Augmented_2021','Azimuth_Cell_Types_2021','COVID-19_Related_Gene_Sets_2021',
     'MSigDB_Hallmark_2020']


In [None]:
sns.set(font_scale=0.8)

In [None]:
figdir+'enrichr/'

In [None]:
    dbs=[
     'MSigDB_Hallmark_2020']

In [None]:
for mydb in dbs:
    monor=pd.read_csv(figdir+'enrichr/CD4T-AllnoTregMAIT-TILonly.Rhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')
    mononr=pd.read_csv(figdir+'enrichr/CD4T-AllnoTregMAIT-TILonly.NRhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')

    macror=pd.read_csv(figdir+'enrichr/CD4T-effector-TILonly.Rhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')
    macronr=pd.read_csv(figdir+'enrichr/CD4T-effector-TILonly.NRhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')

    cdcr=pd.read_csv(figdir+'enrichr/CD4T-naiveCM-TILonly.Rhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')
    cdcnr=pd.read_csv(figdir+'enrichr/CD4T-naiveCM-TILonly.NRhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')
  
    monor=monor.loc[:,['Term','Combined Score']]
    mononr=mononr.loc[:,['Term','Combined Score']]
    mononr['allNR']=mononr['Combined Score']
    monor['allR']=monor['Combined Score']
    mononr.index=list(mononr['Term'])
    monor.index=list(monor['Term'])

    macror=macror.loc[:,['Term','Combined Score']]
    macronr=macronr.loc[:,['Term','Combined Score']]
    macronr['effNR']=macronr['Combined Score']
    macror['effR']=macror['Combined Score']
    macronr.index=list(macronr['Term'])
    macror.index=list(macror['Term'])
    
    
    cdcr=cdcr.loc[:,['Term','Combined Score']]
    cdcnr=cdcnr.loc[:,['Term','Combined Score']]
    cdcnr['naiNR']=cdcnr['Combined Score']
    cdcr['naiR']=cdcr['Combined Score']
    cdcnr.index=list(cdcnr['Term'])
    cdcr.index=list(cdcr['Term'])


    toplot=pd.concat([mononr, monor, macronr,macror, cdcr, cdcnr], axis=1).loc[:,['allNR','allR','effNR','effR', 'naiR','naiNR']].fillna(0)
    toplot=toplot.loc[toplot.max(axis=1)>20,:]
        #toplot[toplot>500]=500
    #sns.clustermap(np.log1p(toplot).transpose(), figsize=(8,1.5))
    #
    
    toplot=toplot.loc[(((toplot['allNR']-toplot['allR'])>10) &((toplot['effNR']-toplot['effR'])>10)&((toplot['naiNR']-toplot['naiR'])>10))|(((toplot['allNR']-toplot['allR'])<-10) &((toplot['effNR']-toplot['effR'])<-10)&((toplot['naiNR']-toplot['naiR'])<-10)),:]
    tmp_plot=sns.clustermap(np.log1p(toplot).transpose(), figsize=(8,2))
    #fig = tmp_plot.get_figure()
    #tmp_plot.savefig(figdir+'enrichr/CD4Tcell-leiden-velo-TIL-TILonly-'+mydb+'.human.enrichr.pdf', bbox_inches="tight", dpi=300)
    #tmp_plot.savefig(figdir+'enrichr/CD4Tcell-leiden-velo-TIL-TILonly-'+mydb+'.human.enrichr.eps', format='eps', bbox_inches="tight", dpi=300)
    tmp_plot.savefig(figdir+'enrichr/CD4Tcell-leiden-velo-TIL-TILonly-'+mydb+'.human.enrichr.svg', format='svg', bbox_inches="tight", dpi=300)


In [None]:
for mydb in dbs:
    monor=pd.read_csv(figdir+'enrichr/CD8_cytseceffCD8-TILonly.Rhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')
    mononr=pd.read_csv(figdir+'enrichr/CD8_cytseceffCD8-TILonly.NRhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')

    macror=pd.read_csv(figdir+'enrichr/CD8_Exh-TILonly.Rhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')
    macronr=pd.read_csv(figdir+'enrichr/CD8_Exh-TILonly.NRhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')

    cdcr=pd.read_csv(figdir+'enrichr/CD8_EM-TILonly.Rhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')
    cdcnr=pd.read_csv(figdir+'enrichr/CD8_EM-TILonly.NRhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')
    

    monor=monor.loc[:,['Term','Combined Score']]
    mononr=mononr.loc[:,['Term','Combined Score']]
    mononr['cytokineNR']=mononr['Combined Score']
    monor['cytokineR']=monor['Combined Score']
    mononr.index=list(mononr['Term'])
    monor.index=list(monor['Term'])

    macror=macror.loc[:,['Term','Combined Score']]
    macronr=macronr.loc[:,['Term','Combined Score']]
    macronr['exhNR']=macronr['Combined Score']
    macror['exhR']=macror['Combined Score']
    macronr.index=list(macronr['Term'])
    macror.index=list(macror['Term'])
    
    
    cdcr=cdcr.loc[:,['Term','Combined Score']]
    cdcnr=cdcnr.loc[:,['Term','Combined Score']]
    cdcnr['emNR']=cdcnr['Combined Score']
    cdcr['emR']=cdcr['Combined Score']
    cdcnr.index=list(cdcnr['Term'])
    cdcr.index=list(cdcr['Term'])


    toplot=pd.concat([macronr,macror, mononr, monor, cdcr, cdcnr], axis=1).loc[:,['cytokineNR','cytokineR','emNR','emR', 'exhR','exhNR']].fillna(0)
    toplot=toplot.loc[toplot.max(axis=1)>20,:]
        #toplot[toplot>500]=500
    #sns.clustermap(np.log1p(toplot).transpose(), figsize=(8,1.5))
    #
    
    toplot=toplot.loc[(((toplot['cytokineNR']-toplot['cytokineR'])>10) &((toplot['emNR']-toplot['emR'])>10)&((toplot['exhNR']-toplot['exhR'])>10))|(((toplot['exhNR']-toplot['exhR'])<-10) &((toplot['cytokineNR']-toplot['cytokineR'])<-10)&((toplot['emNR']-toplot['emR'])<-10)),:]
    tmp_plot=sns.clustermap(np.log1p(toplot).transpose(), figsize=(8,2))
    #fig = tmp_plot.get_figure()
    #tmp_plot.savefig(figdir+'enrichr/CD8Tcell-leiden-velo-TIL-TILonly-'+mydb+'.human.enrichr.pdf', bbox_inches="tight", dpi=300)
    #tmp_plot.savefig(figdir+'enrichr/CD8Tcell-leiden-velo-TIL-TILonly-'+mydb+'.human.enrichr.eps', format='eps', bbox_inches="tight", dpi=300)
    tmp_plot.savefig(figdir+'enrichr/CD8Tcell-leiden-velo-TIL-TILonly-'+mydb+'.human.enrichr.svg', format='svg', bbox_inches="tight", dpi=300)


In [None]:
mycd='CD8_cytseceffCD8-TILonly'

In [None]:

#cdde=['CD8_cytseceffCD8-TILonly','CD8_EM-TILonly','CD8_Exh-TILonly',
#      'CD8_NKlikeT-TILonly','CD8_Prolif-TILonly','CD8TandNK-TILonly',
#      'NK_CD56brightcytokineNK-TILonly','NK_CD56dimcytotoxNK-TILonly','cd4tcells-TILonly',
#     'CD4T-effector-TILonly'] #"CD4T-AllnoTregMAIT-TILonly", ,"CD4T-naiveCM-TILonly"

In [None]:
cdde=['CD8_cytseceffCD8-TILonly','CD8_EM-TILonly','CD8_Exh-TILonly',
      'CD8_NKlikeT-TILonly','CD8_Prolif-TILonly','CD8T-all-noNK-TILonly','CD8TandNK-TILonly','NK-all-TILonly',
      'NK_CD56brightcytokineNK-TILonly','NK_CD56dimcytotoxNK-TILonly','CD4T-all-TILonly',
     "CD4T-AllnoTregMAIT-TILonly","CD4T-effector-TILonly","CD4T-naiveCM-TILonly"]

In [None]:
cdde_CD4=['CD4T-all-TILonly',
     "CD4T-AllnoTregMAIT-TILonly","CD4T-effector-TILonly","CD4T-naiveCM-TILonly"]
cdde=cdde_CD4.copy()

In [None]:
cddenk=['NK-all-TILonly',
 'NK_CD56brightcytokineNK-TILonly',
 'NK_CD56dimcytotoxNK-TILonly']
cdde=cddenk.copy()

In [None]:
cdde_no_NK=['CD8_cytseceffCD8-TILonly',
 'CD8_EM-TILonly',
 'CD8_Exh-TILonly',
 'CD8_NKlikeT-TILonly',
 'CD8_Prolif-TILonly',
 'CD8T-all-noNK-TILonly']
cdde=cdde_no_NK.copy()

In [None]:
cdde=['CD4T-all-TILonly',
     "CD4T-AllnoTregMAIT-TILonly","CD4T-effector-TILonly","CD4T-naiveCM-TILonly",
     'NK-all-TILonly',
 'NK_CD56brightcytokineNK-TILonly',
 'NK_CD56dimcytotoxNK-TILonly','CD8_cytseceffCD8-TILonly',
 'CD8_EM-TILonly',
 'CD8_Exh-TILonly',
 'CD8_NKlikeT-TILonly',
 'CD8_Prolif-TILonly',
 'CD8T-all-noNK-TILonly']

In [None]:
alldesR=pd.DataFrame(None)
alldesNR=pd.DataFrame(None)
for mycd in cdde:
    #cdcNRgenes=pd.read_csv(figdir+'DEanalysis_wilcoxon.cd4-TILonly.NRhigh.tsv',sep='\t')
    #cdcRgenes=pd.read_csv(figdir+'DEanalysis_wilcoxon.cd4-TILonly.Rhigh.tsv',sep='\t')

    #cdcNRgenes=pd.read_csv(figdir+'DEanalysis_wilcoxon.cdc-TILonly.NRhigh.tsv',sep='\t')
    #cdcRgenes=pd.read_csv(figdir+'DEanalysis_wilcoxon.cdc-TILonly.Rhigh.tsv',sep='\t')

    cdcNRgenes=pd.read_csv(figdir+'DEanalysis_wilcoxon.'+mycd+'.NRhigh.tsv',sep='\t')
    cdcRgenes=pd.read_csv(figdir+'DEanalysis_wilcoxon.'+mycd+'.Rhigh.tsv',sep='\t')

    cdcNRgenes['Comparison']=mycd
    cdcRgenes['Comparison']=mycd
    alldesR=pd.concat([alldesR, cdcRgenes])
    alldesNR=pd.concat([alldesNR, cdcNRgenes])

In [None]:
cdde

In [None]:
strde=set(alldesR.loc[(alldesR['LN-both']==1)&((alldesR['Brain-both']==1)|(alldesR['Other-both']==1))&(alldesR['Nadj']==1)&(alldesR['Adj']==1),'Unnamed: 0']).intersection(set(alldesR['Unnamed: 0'].value_counts()[alldesR['Unnamed: 0'].value_counts()>6].index))

In [None]:
strde

In [None]:
strdenr=set(alldesNR.loc[(alldesNR['LN-both']==1)&((alldesNR['Brain-both']==1)&(alldesNR['Other-both']==1))&(alldesNR['Nadj']==1)&(alldesNR['Adj']==1),'Unnamed: 0']).intersection(set(alldesNR['Unnamed: 0'].value_counts()[alldesNR['Unnamed: 0'].value_counts()>6].index))

In [None]:
strdenr

In [None]:
strde=list(strde)+list(strdenr)

In [None]:
#strdecd8=strde.copy()
#strdenk=strde.copy()
#strdecd4=strde.copy()

In [None]:
set(tildata.obs['celltype3_pub'])

In [None]:
sc.settings.set_figure_params()

In [None]:
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['CD4-positive, cytotoxic T cell','IL7R-max CD4-positive T cell',
                                                    'central memory CD4-positive T cell', 'effector memory CD4-positive T cell',
                                                           'exhausted-like CD4-positive T cell','naive CD4-positive T cell',
                                                            'proliferating CD4-positive T cell','regulatory T cell',
                                                           'cytotoxic CD56-dim natural killer cell','proliferating NK cell',
                                                'CD56-bright cytokine secreting natural killer cell',
                                                'proliferating NK cell','cytokine secreting effector CD8-positive T cell',
                                                 'effector memory CD8-positive T cell',
                                                 'mature NK T cell','naive CD8-positive T cell',
                                                'proliferating CD8-positive T cell',
                                                 'exhausted-like CD8-positive T cell'])],
                 var_names=list(strde), groupby='RCat', standard_scale='var',
                 save='Stringent_DE.allTNK-TILonly.svg')

In [None]:
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['CD4-positive, cytotoxic T cell','IL7R-max CD4-positive T cell',
                                                    'central memory CD4-positive T cell', 'effector memory CD4-positive T cell',
                                                           'exhausted-like CD4-positive T cell','naive CD4-positive T cell',
                                                            'proliferating CD4-positive T cell','regulatory T cell'])],
                 var_names=list(strdecd4), groupby='RCat', standard_scale='var',
                 save='Stringent_DE.CD4s-TILonly.svg')

In [None]:

sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['cytotoxic CD56-dim natural killer cell','proliferating NK cell',
                                                'CD56-bright cytokine secreting natural killer cell',
                                                'proliferating NK cell'])],
                 var_names=list(strdenk), groupby='RCat', standard_scale='var',
                 save='Stringent_DE.NKs-TILonly.svg')

In [None]:
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['cytokine secreting effector CD8-positive T cell',
                                                 'effector memory CD8-positive T cell',
                                                 'mature NK T cell','naive CD8-positive T cell',
                                                'proliferating CD8-positive T cell',
                                                 'exhausted-like CD8-positive T cell'])],
                 var_names=list(strdecd8), groupby='RCat', standard_scale='var',
                 save='Stringent_DE.CD8s_noNK-TILonly.svg')


In [None]:
macroNR=(set([x.split("_")[1] for x in list(moi)]).union(set([x.split("_")[0] for x in list(moi)]))).intersection(set(alldesNR['Unnamed: 0']))
macroNR

In [None]:
cdcNR=(set([x.split("_")[1] for x in list(coi)]).union(set([x.split("_")[0] for x in list(coi)]))).intersection(set(alldesNR['Unnamed: 0']))
cdcNR

In [None]:
macroR=(set([x.split("_")[1] for x in list(moi)]).union(set([x.split("_")[0] for x in list(moi)]))).intersection(set(alldesR['Unnamed: 0']))
macroR

In [None]:
cdcR=(set([x.split("_")[1] for x in list(coi)]).union(set([x.split("_")[0] for x in list(coi)]))).intersection(set(alldesR['Unnamed: 0']))
cdcR

In [None]:
#cdde=['CD8_cytseceffCD8-TILonly','CD8_EM-TILonly','CD8_Exh-TILonly',
#      'CD8_NKlikeT-TILonly','CD8_Prolif-TILonly','CD8TandNK-TILonly',
#      'NK_CD56brightcytokineNK-TILonly','NK_CD56dimcytotoxNK-TILonly']
subsetleg='CD8_cytseceffCD8-TILonly'

In [None]:
alldesR.loc[alldesR['Unnamed: 0'].isin(list(macroR.union(cdcR))),:]

In [None]:
alldesNR.loc[alldesNR['Unnamed: 0'].isin(list(macroNR.union(cdcNR))),:]

In [None]:
sc.settings.set_figure_params()

In [None]:
nkgoi=['SELL','XCL1', 'CD2', 'CD74', 'CD44','ANXA1']
cd8goi=['XCL1', 'CD44', 'SEMA4A', 'CD74', 'NOTCH1', 'FAS', 'TNFRSF14', 'NAMPT', 'ANXA1'] #'ICAM1', 'SPP1',
cd8nkgoi=['CD44','BTLA','SELL','SEMA4A','NAMPT','FAS','TNFRSF14','CD74','ANXA1','AREG'] #'ICAM1', 'SPP1',
cd8exh=['CD44','SPN','NAMPT','FAS','TNFRSF14','CD74','ANXA1'] #'SPP1',
emgoi=['CD74','ANXA1','HLA-DPB1'] #'SPP1',
cytcd8goi=['XCL1','TNFRSF14','CD74','ANXA1','ITGAL','HLA-DPB1'] #'SPP1',
cd4goi=['BTLA','TNFRSF1B','HAVCR2','ANXA1'] #'SPP1',, TNF CD4T-AllnoTregMAIT-TILonly
cd4goiIL7R=['BTLA','GRN','HAVCR2','ANXA1'] #'SPP1', TNF CD4T-AllnoTregMAIT-TILonly
cd4effgoi=['HAVCR2','ANXA1'] # TNF
cd4naigoi=['SELPLG', 'ANXA1'] # TNF

In [None]:
subsetleg='NKs'
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['cytotoxic CD56-dim natural killer cell','proliferating NK cell',
                                                'CD56-bright cytokine secreting natural killer cell',
                                                'proliferating NK cell'])],
                 var_names=nkgoi, groupby='RCat', standard_scale='var',
                 save='CellphoneDB_and_DE.'+subsetleg+'-TILonly.svg')


In [None]:
subsetleg='CD4_nai'
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin([ 'central memory CD4-positive T cell','naive CD4-positive T cell'])],
                 var_names=cd4naigoi, groupby='RCat', standard_scale='var',
                 save='CellphoneDB_and_DE.'+subsetleg+'-TILonly.svg')


In [None]:
subsetleg='CD4_eff'
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['CD4-positive, cytotoxic T cell',
                                                            'proliferating CD4-positive T cell',
                                                           'exhausted-like CD4-positive T cell',
                                                            
                                                           'effector memory CD4-positive T cell'])],
                 var_names=cd4effgoi, groupby='RCat', standard_scale='var',
                 save='CellphoneDB_and_DE.'+subsetleg+'-TILonly.svg')


In [None]:
subsetleg='CD4_allnoTregMAIT'
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['CD4-positive, cytotoxic T cell',
                                                            'proliferating CD4-positive T cell',
                                                           'central memory CD4-positive T cell',
                                                           'exhausted-like CD4-positive T cell',
                                                            
                                                           'effector memory CD4-positive T cell',
                                                           'naive CD4-positive T cell'])],
                 var_names=cd4goiIL7R, groupby='RCat', standard_scale='var',
                 save='CellphoneDB_and_DE.'+subsetleg+'-TILonly.svg')


In [None]:
subsetleg='CD4_all'
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['IL7R-max CD4-positive T cell',
                                                            'CD4-positive, cytotoxic T cell',
                                                            'proliferating CD4-positive T cell',
                                                           'central memory CD4-positive T cell',
                                                           'exhausted-like CD4-positive T cell',
                                                           'effector memory CD4-positive T cell',
                                                            'regulatory T cell',
                                                           'naive CD4-positive T cell'])],
                 var_names=cd4goi, groupby='RCat', standard_scale='var',
                 save='CellphoneDB_and_DE.'+subsetleg+'-TILonly.svg')


In [None]:
subsetleg='CD8_cytseceffCD8'
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['cytokine secreting effector CD8-positive T cell'])],
                 var_names=cytcd8goi, groupby='RCat', standard_scale='var',
                 save='CellphoneDB_and_DE.'+subsetleg+'-TILonly.svg')


In [None]:
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['cytokine secreting effector CD8-positive T cell'])],
                 var_names=list(strde), groupby='RCat', standard_scale='var',
                 save='Stringent_DE.'+subsetleg+'-TILonly.svg')


In [None]:
subsetleg='CD8_EM-TILonly'
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['effector memory CD8-positive T cell'])],
                 var_names=emgoi, groupby='RCat', standard_scale='var',
                 save='CellphoneDB_and_DE.'+subsetleg+'-TILonly.svg')


In [None]:
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['effector memory CD8-positive T cell'])],
                 var_names=list(strde), groupby='RCat', standard_scale='var',
                 save='Stringent_DE.'+subsetleg+'-TILonly.svg')


In [None]:
subsetleg='CD8_Exh-TILonly'
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['exhausted-like CD8-positive T cell'])],
                 var_names=cd8exh, groupby='RCat', standard_scale='var',
                 save='CellphoneDB_and_DE.'+subsetleg+'-TILonly.svg')


In [None]:
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['exhausted-like CD8-positive T cell'])],
                 var_names=list(strde), groupby='RCat', standard_scale='var',
                 save='Stringent_DE.'+subsetleg+'-TILonly.svg')


In [None]:
subsetleg='CD8T-all-noNK-TILonly'
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['cytokine secreting effector CD8-positive T cell',
                                               'effector memory CD8-positive T cell',
                                                 'mature NK T cell','naive CD8-positive T cell',
                                                'proliferating CD8-positive T cell',
                                                 'exhausted-like CD8-positive T cell'])],
                 var_names=cd8goi, groupby='RCat', standard_scale='var',
                 save='CellphoneDB_and_DE.'+subsetleg+'-TILonly.svg')


In [None]:
sc.settings.set_figure_params()

In [None]:
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub'].isin(['cytokine secreting effector CD8-positive T cell',
                                                 'proliferating NK cell','effector memory CD8-positive T cell',
                                                 'mature NK T cell','naive CD8-positive T cell',
                                                'proliferating CD8-positive T cell',
                                                 'exhausted-like CD8-positive T cell',
                                                'cytotoxic CD56-dim natural killer cell',
                                                'CD56-bright cytokine secreting natural killer cell',
                                                'proliferating NK cell'])],
                 var_names=list(strde), groupby='RCat', standard_scale='var',
                 save='Stringent_DE.'+subsetleg+'-TILonly.svg')


In [None]:
### Prepare means
condition='RCat'
#condlist=list(set(cdata.obs[condition]))
condlist=['R','TF','NR_nadj','NR_adj']
mean,fct=bc.get_means(tildata[tildata.obs['celltype3_pub'].isin(['cytokine secreting effector CD8-positive T cell'])],'PatientID', condition)


In [None]:

bc.pl.box_per_ind(mean, cytcd8goi[0],
                  condition,order=condlist)

In [None]:

bc.pl.box_per_ind(mean, cytcd8goi[1],
                  condition,order=condlist)

In [None]:
sc.pl.dotplot(tildata[tildata.obs['celltype3_pub'].isin(['cytokine secreting effector CD8-positive T cell',
                                                 'proliferating NK cell''effector memory CD8-positive T cell',
                                                 'mature NK T cell','naive CD8-positive T cell',
                                                'proliferating CD8-positive T cell',
                                                 'exhausted-like CD8-positive T cell',
                                                'cytotoxic CD56-dim natural killer cell',
                                                'CD56-bright cytokine secreting natural killer cell',
                                                'proliferating NK cell'])], 
              var_names=list(set(cd8nkgoi).union(set(cd8exh).union(set(emgoi)).union(set(cytcd8goi)))), groupby='celltype3_pub')

In [None]:


sc.pl.matrixplot(cdata,var_names=list(strDE)[0:60], groupby='RCat', standard_scale='var',save='DEanalysis_wilcoxon.'+subsetleg+'-TILonly.NRhigh.pdf')


sc.pl.dotplot(cdata,var_names=list(strDE)[0:60], groupby='RCat')


In [None]:
set([x.split('_')[0].split(" ")[1] for x in list(toplotr.index)]).intersection(set(cdcRgenes['Unnamed: 0']))

In [None]:
set(cdcNRgenes['Unnamed: 0']).intersection(['P2RY6','TNFRSF1A','HBEGF','CCL20'])

In [None]:
set(cdcNRgenes['Unnamed: 0']).intersection(['NAMPT'])

In [None]:
#cdcNRgenes

In [None]:
b=(set(cdcNRgenes['Unnamed: 0']).union(set(cdcRgenes['Unnamed: 0']))).intersection(specgenes)
#b=(set(cd8NRgenes['Unnamed: 0']).union(set(cd8Rgenes['Unnamed: 0']))).intersection(specgenes)
b

In [None]:
#set(cd8NRgenes['Unnamed: 0'])

In [None]:
a=(set(cdcNRgenes['Unnamed: 0']).union(set(cdcRgenes['Unnamed: 0']))).intersection(specgenes_rev)
#a=(set(cd8NRgenes['Unnamed: 0']).union(set(cd8Rgenes['Unnamed: 0']))).intersection(specgenes_rev)
a

In [None]:
(set(cdcRgenes['Unnamed: 0']).union(set(cdcRgenes['Unnamed: 0']))).intersection(specgenes)

In [None]:
sc.pl.matrixplot(tildata[tildata.obs['celltype3_pub']==ofc],categories_order=myorder,
                 var_names=list(b.union(a)),groupby='PatRCat', standard_scale='var')
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_selected_fisherp03-pickedDEcandidates.pdf') 

In [None]:
#sns.clustermap(topcdc1_rev.loc[topcdc1_rev,:],figsize=(7,18))
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_topcut_'+str(topcut)+'_rev.pdf') 

In [None]:
sns.clustermap(cdc1tab.loc[topcdc1,:],figsize=(7,18))
fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_topcut_'+str(topcut)+'.pdf') 

In [None]:

cd8NRgenes=pd.read_csv(figdir+'DEanalysis_wilcoxon.cd8tcells-noNKT-TILonly.NRhigh.tsv',sep='\t')
cd8Rgenes=pd.read_csv(figdir+'DEanalysis_wilcoxon.cd8tcells-noNKT-TILonly.Rhigh.tsv',sep='\t')

In [None]:


inpath='/Fullanalysis/analyzed/sw_besca2_immune/velocity/publication/'


lingenes0to6=pd.read_csv(inpath+'Lingenes_CD8Tcell-All_PBMCandTIL_rbdata_leiden_0to6.tsv',sep='\t')
lingenes0to5=pd.read_csv(inpath+'Lingenes_CD8Tcell-All_PBMCandTIL_nrbdata_leiden_0to11.tsv',sep='\t')
lingenes0to6=lingenes0to6.loc[lingenes0to6['to 5']>0.1]
lingenes0to5=lingenes0to5.loc[lingenes0to5['to 5']>0.1]

lingenes9to0R=pd.read_csv(inpath+'Lingenes_CD8Tcell-All_PBMCandTIL_rbdata_leiden_9to0.tsv',sep='\t')
lingenes9to11NR=pd.read_csv(inpath+'Lingenes_CD8Tcell-All_PBMCandTIL_nrbdata_leiden_9to11.tsv',sep='\t')
lingenes9to11NR=lingenes9to11NR.loc[lingenes9to11NR['to 5']>0.1]
lingenes9to0R=lingenes9to0R.loc[lingenes9to11NR['to 5']>0.1]

topc6genes=pd.read_csv(inpath+'DEclustergenes_5vs6_CD8Tcell-All_PBMCandTIL_subdata_leiden_6.tsv',sep='\t')
topc5genes=pd.read_csv(inpath+'DEclustergenes_5vs6_CD8Tcell-All_PBMCandTIL_subdata_leiden_5.tsv',sep='\t')

rgenesc0R=pd.read_csv(inpath+'DEresponsegenes_c0_CD8Tcell-All_PBMCandTIL_subdata_leiden_R.tsv',sep='\t')
rgenesc0NR=pd.read_csv(inpath+'DEresponsegenes_c0_CD8Tcell-All_PBMCandTIL_subdata_leiden_PD.tsv',sep='\t')

rgenesc9R=pd.read_csv(inpath+'DEresponsegenes_c9_CD8Tcell-All_PBMCandTIL_subdata_leiden_R.tsv',sep='\t')
rgenesc9NR=pd.read_csv(inpath+'DEresponsegenes_c9_CD8Tcell-All_PBMCandTIL_subdata_leiden_PD.tsv',sep='\t')

rgenesc8R=pd.read_csv(inpath+'DEresponsegenes_c8_CD8Tcell-All_PBMCandTIL_subdata_leiden_R.tsv',sep='\t')
rgenesc8NR=pd.read_csv(inpath+'DEresponsegenes_c8_CD8Tcell-All_PBMCandTIL_subdata_leiden_PD.tsv',sep='\t')

In [None]:
topc5genes

In [None]:
topc6genes

In [None]:
    import gseapy
    #gseapy.get_library_name()
    
    dbs=['KEGG_2021_Human','GO_Biological_Process_2021','Reactome_2016',
     'Human_Gene_Atlas','WikiPathways_2019_Human','ChEA_2016',
     'CellMarker_Augmented_2021','Azimuth_Cell_Types_2021','COVID-19_Related_Gene_Sets_2021',
     'MSigDB_Hallmark_2020']



In [None]:
    cd8tcdata_full.raw.var['MeanExpr']=cd8tcdata_full.raw.X.mean(axis=0).tolist()[0]

In [None]:
for j in dbs:
    gseapy.enrichr(gene_list=list(topc6genes.Name), description='DEanalysis_wilcoxon.veloCD8Tcells-6High-5vs6.Rhigh.tsv', 
        gene_sets=j, background=list(cd8tcdata_full.raw.var.loc[cd8tcdata_full.raw.var['MeanExpr']>0.01]['SYMBOL']),cutoff=0.05, format='png',outdir=figdir+'enrichr/veloCD8Tcells-6High-5vs6/')
    gseapy.enrichr(gene_list=list(topc5genes.Name), description='DEanalysis_wilcoxon.veloCD8Tcells-5High-5vs6.Rhigh.tsv', 
        gene_sets=j, background=list(cd8tcdata_full.raw.var.loc[cd8tcdata_full.raw.var['MeanExpr']>0.01]['SYMBOL']),cutoff=0.05, format='png',outdir=figdir+'enrichr/veloCD8Tcells-5High-5vs6/')


In [None]:
a

In [None]:
topR=((set(rgenesc0R['Name']).union(set(cd8Rgenes['Unnamed: 0']))).intersection(set(topc6genes['Name']))).union(set(lingenes9to0R['Unnamed: 0']))

In [None]:
topNR=(set(rgenesc0NR['Name']).union(set(cd8NRgenes['Unnamed: 0']))).intersection(set(topc5genes['Name'])).union(set(lingenes9to11NR['Unnamed: 0']))

In [None]:
shared=topR.intersection(topNR)
topNR=topNR-shared
topR=topR-shared

In [None]:
topR

In [None]:
topNR.intersection(specgenes)

In [None]:
oinr=topNR.copy() #cdcNRgenes.copy()
oir=topR.copy()  #cdcRgenes.copy()

In [None]:
cdc1tab

In [None]:
cdc1tab

In [None]:
mymatchnr

In [None]:
oinr.intersection(set(['MIF','TNFRSF17','CD70','CXCL13','CXCR5','CD74','CD72','APP']))

In [None]:
sns.clustermap(cdc1tab.loc[mymatchnr,:],figsize=(10,8))
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_topcut_'+str(topcut)+'.pdf') 

In [None]:
oinr.intersection(set(['CD70','TNFRSF17']))

In [None]:
x='ACKR1_CCL17'

In [None]:
list(filter(lambda x:i in x.split('_')[0], list(cdc1tab.index)))

In [None]:
mymatch=set()
for i in list(oinr):
    mymatch=mymatch.union(set(list(filter(lambda x:i==x.split('_')[0], list(cdc1tab.index)))))
mymatchnr=list(mymatch).copy()

In [None]:


mymatch=set()
for i in list(oir):
    mymatch=mymatch.union(set(list(filter(lambda x:i==x.split('_')[0], list(cdc1tab.index)))))
mymatchr=list(mymatch).copy()



In [None]:
mymatchnr

In [None]:
mymatchr

In [None]:
sns.clustermap(cdc1tab.loc[mymatchr,:],figsize=(12,4))
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_topcut_'+str(topcut)+'.pdf') 

In [None]:
if len(mymatchr)>0:
    sns.clustermap(cdc1tab.loc[mymatchnr,:],figsize=(12,5))
    #fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_topcut_'+str(topcut)+'.pdf') 

In [None]:
mymatch=set()
for i in list(oinr['Name']):
    mymatch=mymatch.union(set(list(filter(lambda x:i in x, list(cdc1tab_rev.index)))))
mymatchnr=list(mymatch).copy()

mymatch=set()
for i in list(oir['Name']):
    mymatch=mymatch.union(set(list(filter(lambda x:i in x, list(cdc1tab_rev.index)))))
mymatchr=list(mymatch).copy()

sns.clustermap(cdc1tab_rev.loc[mymatchnr,:],figsize=(8,5))
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_topcut_'+str(topcut)+'.pdf') 

In [None]:
if len(mymatchr)>0:
    sns.clustermap(cdc1tab_rev.loc[mymatchr,:],figsize=(8,8))
    #fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_topcut_'+str(topcut)+'.pdf') 

In [None]:
#### Any of the genes of interest among the interacting parters? 
DEclustergenes6=pd.read_csv('/Fullanalysis/analyzed/sw_besca2_immune/velocity/publication/DEclustergenes_5vs6_CD8Tcell-All_PBMCandTIL_subdata_leiden_6.tsv',sep='\t')
DEclustergenes5=pd.read_csv('/Fullanalysis/analyzed/sw_besca2_immune/velocity/publication/DEclustergenes_5vs6_CD8Tcell-All_PBMCandTIL_subdata_leiden_5.tsv',sep='\t')

In [None]:
cdc1tab

In [None]:
mymatch=set()
for i in list(DEclustergenes6['Name']):
    mymatch=mymatch.union(set(list(filter(lambda x:i in x, list(cdc1tab.index)))))
mymatch6=list(mymatch)

In [None]:
mymatch=set()
for i in list(DEclustergenes5['Name']):
    mymatch=mymatch.union(set(list(filter(lambda x:i in x, list(cdc1tab.index)))))
mymatch5=list(mymatch)

In [None]:
sns.clustermap(cdc1tab.loc[mymatch6,:],figsize=(8,8))
#fig.savefig(figdir+'/CellphoneDB-'+ofc.replace(" ", "_")+'_topcut_'+str(topcut)+'.pdf') 

In [None]:
sns.clustermap(cdc1tab.loc[mymatch5,:],figsize=(8,5))

### Proportion plots

In [None]:
propdir=figdir+'/frequencies/'
allcd8=ddata[ddata.obs['celltype2_pub']=='CD8-positive T cell'].copy()
allcd8.obs['leiden_velo']='100'
tmp=allcd8.obs['leiden_velo'].copy()
tmp[cd8tcdata_full.obs['CELL']]=list(cd8tcdata_full.obs['leiden'])
allcd8.obs['leiden_velo']=tmp.copy()

In [None]:
sc.settings.set_figure_params()

In [None]:
sc.pl.umap(allcd8,color=['leiden_velo'], legend_loc='on data', legend_fontsize=8, save='CD8T-projected-original.svg')

In [None]:
#allcd8.obs['Sample type']

In [None]:
allcd8tu=allcd8[allcd8.obs['Sample type']=='TIL'].copy()
allcd8pbmc=allcd8[allcd8.obs['Sample type']=='PBMC'].copy()

In [None]:
#mysubs=['celltype4_pub','celltype3_pub','celltype2_pub', 'celltype1']
mysubs=['leiden_velo']
for what in mysubs:
    
    df1=bc.tl.count_occurrence_subset_conditions(allcd8, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'Sample type',  return_percentage = True)
    df1.to_csv(propdir+'AllCD8_PBMCvsTIL_celltypeFreq_'+what+'.tsv')
    df2=bc.tl.count_occurrence_subset_conditions(allcd8, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'Sample type',  return_percentage = False)
    df2.to_csv(propdir+'AllCD8_PBMCvsTIL_celltypeNrs_'+what+'.tsv')
    
    df1=bc.tl.count_occurrence_subset_conditions(allcd8pbmc, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'RCat',  return_percentage = True)
    df1.to_csv(propdir+'AllCD8_PBMCs_celltypeFreq_'+what+'.tsv')
    df2=bc.tl.count_occurrence_subset_conditions(allcd8pbmc, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'RCat',  return_percentage = False)
    df2.to_csv(propdir+'AllCD8_PBMCs_celltypeNrs_'+what+'.tsv')
    
    df1=bc.tl.count_occurrence_subset_conditions(allcd8tu, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'RCat',  return_percentage = True)
    df1.to_csv(propdir+'AllCD8_TIL_celltypeFreq_'+what+'.tsv')
    df2=bc.tl.count_occurrence_subset_conditions(allcd8tu, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'RCat',  return_percentage = False)
    df2.to_csv(propdir+'AllCD8_TIL_celltypeNrs_'+what+'.tsv')
    


In [None]:
#mysubs=['celltype4_pub','celltype3_pub','celltype2_pub', 'celltype1']
mysubs=['celltype4_pub']
for what in mysubs:
    
    df1=bc.tl.count_occurrence_subset_conditions(ddata, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'Sample type',  return_percentage = True)
    df1.to_csv(propdir+'PBMCvsTIL_celltypeFreq_'+what+'.tsv')
    df2=bc.tl.count_occurrence_subset_conditions(ddata, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'Sample type',  return_percentage = False)
    df2.to_csv(propdir+'PBMCvsTIL_celltypeNrs_'+what+'.tsv')
    
    df1=bc.tl.count_occurrence_subset_conditions(pbmcdata, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'RCat',  return_percentage = True)
    df1.to_csv(propdir+'PBMCs_celltypeFreq_'+what+'.tsv')
    df2=bc.tl.count_occurrence_subset_conditions(pbmcdata, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'RCat',  return_percentage = False)
    df2.to_csv(propdir+'PBMCs_celltypeNrs_'+what+'.tsv')
    
    df1=bc.tl.count_occurrence_subset_conditions(tildata, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'RCat',  return_percentage = True)
    df1.to_csv(propdir+'TIL_celltypeFreq_'+what+'.tsv')
    df2=bc.tl.count_occurrence_subset_conditions(tildata, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'RCat',  return_percentage = False)
    df2.to_csv(propdir+'TIL_celltypeNrs_'+what+'.tsv')
    


In [None]:
macrodata.obs.index=list(macrodata.obs['CELL'])

In [None]:
allmacro_full=ddata[ddata.obs['celltype2_pub'].isin(['macrophage','classical monocyte'])].copy()
macrodata=macrodata[macrodata.obs.index.isin(list(allmacro_full.obs.index))]

In [None]:

allmacro_full.obs['leiden_velo']='100'
tmp=allmacro_full.obs['leiden_velo'].copy()
tmp[macrodata.obs['CELL']]=list(macrodata.obs['leiden'])
allmacro_full.obs['leiden_velo']=tmp.copy()

In [None]:
allmacro_full=allmacro_full[allmacro_full.obs['leiden_velo']!='100']

In [None]:
sns.set_theme(style="ticks")

In [None]:
sc.pl.umap(allmacro_full,color=['leiden_velo'], legend_loc='on data', legend_fontsize=8, save='-MonoMacro_projected_original.svg')

In [None]:
allmacrotu=allmacro_full[allmacro_full.obs['Sample type']=='TIL'].copy()
allmacropmbc=allmacro_full[allmacro_full.obs['Sample type']=='PBMC'].copy()


In [None]:

#mysubs=['celltype4_pub','celltype3_pub','celltype2_pub', 'celltype1']
mysubs=['leiden_velo']
for what in mysubs:
    
    df1=bc.tl.count_occurrence_subset_conditions(allmacro_full, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'Sample type',  return_percentage = True)
    df1.to_csv(propdir+'Macro_PBMCvsTIL_celltypeFreq_'+what+'.tsv')
    df2=bc.tl.count_occurrence_subset_conditions(allmacro_full, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'Sample type',  return_percentage = False)
    df2.to_csv(propdir+'Macro_PBMCvsTIL_celltypeNrs_'+what+'.tsv')
    
    df1=bc.tl.count_occurrence_subset_conditions(allmacropmbc, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'RCat',  return_percentage = True)
    df1.to_csv(propdir+'Macro_PBMCs_celltypeFreq_'+what+'.tsv')
    df2=bc.tl.count_occurrence_subset_conditions(allmacropmbc, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'RCat',  return_percentage = False)
    df2.to_csv(propdir+'Macro_PBMCs_celltypeNrs_'+what+'.tsv')
    
    df1=bc.tl.count_occurrence_subset_conditions(allmacrotu, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'RCat',  return_percentage = True)
    df1.to_csv(propdir+'Macro_TIL_celltypeFreq_'+what+'.tsv')
    df2=bc.tl.count_occurrence_subset_conditions(allmacrotu, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'RCat',  return_percentage = False)
    df2.to_csv(propdir+'Macro_TIL_celltypeNrs_'+what+'.tsv')
    


In [None]:
#mutdata=ddata[ddata.obs['Mutation'].isin(['NRAS','BRAF'])].copy()
mutdatatil=tildata[tildata.obs['Mutation'].isin(['NRAS','BRAF'])].copy()
mutdatapbmc=pbmcdata[pbmcdata.obs['Mutation'].isin(['NRAS','BRAF'])].copy()

In [None]:
mysubs=['celltype4_pub','celltype3_pub','celltype2_pub', 'celltype1']
#mysubs=['celltype4_pub']
for what in mysubs:
    
    df1=bc.tl.count_occurrence_subset_conditions(mutdatapbmc, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'Mutation',  return_percentage = True)
    df1.to_csv(propdir+'PBMCs_celltypeFreq_Mutation_'+what+'.tsv')
    df2=bc.tl.count_occurrence_subset_conditions(mutdatapbmc, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'Mutation',  return_percentage = False)
    df2.to_csv(propdir+'PBMCs_celltypeNrs_Mutation_'+what+'.tsv')
    
    df1=bc.tl.count_occurrence_subset_conditions(mutdatatil, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'Mutation',  return_percentage = True)
    df1.to_csv(propdir+'TIL_celltypeFreq_Mutation_'+what+'.tsv')
    df2=bc.tl.count_occurrence_subset_conditions(mutdatatil, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'Mutation',  return_percentage = False)
    df2.to_csv(propdir+'TIL_celltypeNrs_Mutation_'+what+'.tsv')
    


In [None]:
#mutdata=ddata[ddata.obs['Mutation'].isin(['NRAS','BRAF'])].copy()
tissdatatil=tildata[tildata.obs['Lesion'].isin(['LN','Brain','Subc'])].copy()
tissdatapbmc=pbmcdata[pbmcdata.obs['Lesion'].isin(['LN','Brain','Subc'])].copy()


In [None]:

mysubs=['celltype4_pub','celltype3_pub','celltype2_pub', 'celltype1']
#mysubs=['celltype4_pub']
for what in mysubs:
    
    df1=bc.tl.count_occurrence_subset_conditions(tissdatapbmc, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'Lesion',  return_percentage = True)
    df1.to_csv(propdir+'PBMCs_celltypeFreq_Lesion_'+what+'.tsv')
    df2=bc.tl.count_occurrence_subset_conditions(tissdatapbmc, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'Lesion',  return_percentage = False)
    df2.to_csv(propdir+'PBMCs_celltypeNrs_Lesion_'+what+'.tsv')
    
    df1=bc.tl.count_occurrence_subset_conditions(tissdatatil, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'Lesion',  return_percentage = True)
    df1.to_csv(propdir+'TIL_celltypeFreq_Lesion_'+what+'.tsv')
    df2=bc.tl.count_occurrence_subset_conditions(tissdatatil, subset_variable = 'PatientID', count_variable = what, condition_identifier = 'Lesion',  return_percentage = False)
    df2.to_csv(propdir+'TIL_celltypeNrs_Lesion_'+what+'.tsv')
    


In [None]:
from scipy import stats
import itertools

def getPs(cellFreqs,myconditions,name1):
    totest=list(itertools.combinations(myconditions, 2))
    pwilc={}
    pt={}
    for pairs in totest:
        pwilc[pairs[0]+'-'+pairs[1]]=stats.mannwhitneyu(list(cellFreqs.loc[cellFreqs[name1]==pairs[0],:].iloc[:,1]), 
                           list(cellFreqs.loc[cellFreqs[name1]==pairs[1],:].iloc[:,1]))[1]
        pt[pairs[0]+'-'+pairs[1]]=stats.ttest_ind(list(cellFreqs.loc[cellFreqs[name1]==pairs[0],:].iloc[:,1]), 
                           list(cellFreqs.loc[cellFreqs[name1]==pairs[1],:].iloc[:,1]))[1]

    myps=pd.DataFrame([pwilc,pt]).transpose()
    myps.columns=['MannWhitney','T-test']
    return(myps)

sns.set_theme(style="ticks")


In [None]:
figdir

### Lesion comparison

##### Lev 1 - TIL

In [None]:
what='celltype1'
tilfreq=pd.read_csv(propdir+'TIL_celltypeFreq_Lesion_'+what+'.tsv',header=0)
pbmcreq=pd.read_csv(propdir+'PBMCs_celltypeFreq_Lesion_'+what+'.tsv',header=0)

tilfreq.index=tilfreq.iloc[:,0]
pbmcreq.index=pbmcreq.iloc[:,0]

rs=[x.split(' ')[2] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=tilfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['Lesion']=rs


fig, axes = plt.subplots(1, 4,figsize=(14,3), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()

pvals={}
i=0
for mycell in list(tilfreq.index):
    ax=sns.boxplot(y=mycell,x='Lesion',data=toplot,ax=axes[i],
                   orient='v',order=['LN','Brain','Subc'])
    ax=sns.swarmplot(y=mycell,x='Lesion',data=toplot,color='black',ax=axes[i],
                    order=['LN','Brain','Subc'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    pvals[mycell]=getPs(toplot.loc[:,['Lesion',mycell]],['LN','Brain'],'Lesion').loc['LN-Brain',:]
    i=i+1

fig.savefig(figdir+'Celltypefreq-Lesion-TILs'+what+'.pdf', bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-TILs'+what+'.eps', format='eps', bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-TILs'+what+'.svg', format='svg', bbox_inches="tight")

pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Lesion.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')



In tumor, in LN setting, more B lymphocytes. 


In [None]:
##### Lev 1 - PBMC

sns.set_theme(style="ticks")


rs=[x.split(' ')[2] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]

toplot=pbmcreq.drop(columns=['Unnamed: 0']).transpose()
toplot['Lesion']=rs



fig, axes = plt.subplots(1, 4,figsize=(14,3), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()

pvals={}

i=0
for mycell in list(pbmcreq.index):
    ax=sns.boxplot(y=mycell,x='Lesion',data=toplot,ax=axes[i],
                   orient='v',order=['LN','Brain','Subc'])
    ax=sns.swarmplot(y=mycell,x='Lesion',data=toplot,color='black',ax=axes[i],
                    order=['LN','Brain','Subc'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    pvals[mycell]=getPs(toplot.loc[:,['Lesion',mycell]],['LN','Brain'],'Lesion').loc['LN-Brain',:]
    i=i+1
    
     
fig.savefig(figdir+'Celltypefreq-Lesion-PBMCs'+what+'.pdf', bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-PBMCs'+what+'.eps', format='eps', bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-PBMCs'+what+'.svg', format='svg', bbox_inches="tight")

pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-PBMC-'+what+'-per-Lesion.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')


In [None]:
##### Lev 2 - TIL



sns.set_theme(style="ticks")
what='celltype2_pub'
tilfreq=pd.read_csv(propdir+'TIL_celltypeFreq_Lesion_'+what+'.tsv',header=0)
pbmcreq=pd.read_csv(propdir+'PBMCs_celltypeFreq_Lesion_'+what+'.tsv',header=0)

tilfreq.index=tilfreq.iloc[:,0]
pbmcreq.index=pbmcreq.iloc[:,0]

rs=[x.split(' ')[2] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=tilfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['Lesion']=rs


fig, axes = plt.subplots(2, 6,figsize=(16,7), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()

pvals={}
i=0
for mycell in list(tilfreq.index):
    ax=sns.boxplot(y=mycell,x='Lesion',data=toplot,ax=axes[i],
                   orient='v',order=['LN','Brain','Subc'])
    ax=sns.swarmplot(y=mycell,x='Lesion',data=toplot,color='black',ax=axes[i],
                    order=['LN','Brain','Subc'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    pvals[mycell]=getPs(toplot.loc[:,['Lesion',mycell]],['LN','Brain'],'Lesion').loc['LN-Brain',:]
    pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['Lesion',mycell]].groupby('Lesion').mean().loc['LN',:])/(0.1+toplot.loc[:,['Lesion',mycell]].groupby('Lesion').mean().loc['Brain',:]))[0]

    i=i+1

fig.savefig(figdir+'Celltypefreq-Lesion-TILs'+what+'.pdf', bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-TILs'+what+'.eps', format='eps', bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-TILs'+what+'.svg', format='svg', bbox_inches="tight")

pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Lesion.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')




In [None]:
toinc=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.1].index)

plt.figure(figsize=(0.5,1.5))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(pd.DataFrame(pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC'].sort_values(ascending=False)),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig = sns_plot.get_figure()
fig.savefig(figdir+'Celltypefreq-Lesion-summary-'+what+'.pdf', bbox_inches="tight", dpi=300)
#fig.savefig(figdir+'Celltypefreq-Lesion-summary-'+what+'.eps', format='eps')
fig.savefig(figdir+'Celltypefreq-Lesion-summary'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Lesion-summary'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)
#In tumor, in BRAF setting, more B cells, CD4+ T cells, less macrophages and CD8+ T cells. 


In [None]:
##### Lev 2 - PBMC



sns.set_theme(style="ticks")
rs=[x.split(' ')[2] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]

toplot=pbmcreq.drop(columns=['Unnamed: 0']).transpose()
toplot['Lesion']=rs



fig, axes = plt.subplots(2, 6,figsize=(16,7), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)
axes = axes.flatten()

pvals={}

i=0
for mycell in list(pbmcreq.index):
    ax=sns.boxplot(y=mycell,x='Lesion',data=toplot,ax=axes[i],
                   orient='v',order=['LN','Brain','Subc'])
    ax=sns.swarmplot(y=mycell,x='Lesion',data=toplot,color='black',ax=axes[i],
                    order=['LN','Brain','Subc'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Lesion ',fontsize=10)
    pvals[mycell]=getPs(toplot.loc[:,['Lesion',mycell]],['LN','Brain'],'Lesion').loc['LN-Brain',:]

    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Lesion-PBMCs'+what+'.pdf',bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-PBMCs'+what+'.eps', format='eps',bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-PBMCs'+what+'.svg', format='svg',bbox_inches="tight")

pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-PBMC-'+what+'-per-Lesion.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')




In [None]:
##### Lev 3 - TIL
sns.set_theme(style="ticks")


what='celltype3_pub'
mychoice=['CD141-positive myeloid dendritic cell',
 'germinal center B cell',
 'naive CD4-positive T cell',
 'naive CD8-positive T cell',
 'MSR1-positive macrophage',
 'naive B cell',
 'effector memory CD8-positive T cell',
 'IL7R-max CD4-positive T cell',
 'cytotoxic CD56-dim natural killer cell',
 'macrophage',
 'proliferating NK cell',
 'plasmacytoid dendritic cell',
 'B cell',
 'regulatory T cell',
 'CXCL9-positive macrophage',
 'central memory CD4-positive T cell',
 'memory B cell',
 'classical monocyte',
 'mature NK T cell',
 'cytokine secreting effector CD8-positive T cell']

tilfreq=pd.read_csv(propdir+'TIL_celltypeFreq_Lesion_'+what+'.tsv',header=0)
pbmcreq=pd.read_csv(propdir+'PBMCs_celltypeFreq_Lesion_'+what+'.tsv',header=0)

tilfreq.index=tilfreq.iloc[:,0]
pbmcreq.index=pbmcreq.iloc[:,0]

rs=[x.split(' ')[2] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=tilfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['Lesion']=rs


fig, axes = plt.subplots(4, 5,figsize=(16,20), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()

pvals={}
i=0
for mycell in mychoice:
    ax=sns.boxplot(y=mycell,x='Lesion',data=toplot,ax=axes[i],
                   orient='v',order=['LN','Brain','Subc'])
    ax=sns.swarmplot(y=mycell,x='Lesion',data=toplot,color='black',ax=axes[i],
                    order=['LN','Brain','Subc'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Lesion ',fontsize=10)
    pvals[mycell]=getPs(toplot.loc[:,['Lesion',mycell]],['LN','Brain'],'Lesion').loc['LN-Brain',:]
    pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['Lesion',mycell]].groupby('Lesion').mean().loc['LN',:])/(0.1+toplot.loc[:,['Lesion',mycell]].groupby('Lesion').mean().loc['Brain',:]))[0]

    i=i+1

fig.savefig(figdir+'Celltypefreq-Lesion-TILs'+what+'.pdf',bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-TILs'+what+'.eps', format='eps',bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-TILs'+what+'.svg', format='svg',bbox_inches="tight")

pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Lesion.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')





In [None]:
#list(pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney').index[0:20])


In [None]:
#list(pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney').index[0:20])

toinc=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.1].index)
#toinc=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=1].index)

plt.figure(figsize=(0.5,2))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(pd.DataFrame(pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC'].sort_values(ascending=False)),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig = sns_plot.get_figure()
fig.savefig(figdir+'Celltypefreq-Lesion-summary-'+what+'.pdf',bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-summary-'+what+'.eps', format='eps',bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-summary-'+what+'.svg', format='svg',bbox_inches="tight")

#In tumor, in BRAF setting, more B cells (of different types), naive CD4+ T cells, less macrophages (of all types) and exhausted-like CD8+ T cells, regulatory T cells, . 


In [None]:
##### Lev 3 - PBMC

sns.set_theme(style="ticks")
mychoice=['immature conventional dendritic cell',
 'exhausted-like CD4-positive T cell',
 'central memory CD4-positive T cell',
 'regulatory T cell',
 'classical monocyte',
 'memory B cell',
 'IL7R-max CD4-positive T cell',
 'plasmacytoid dendritic cell',
 'effector memory CD4-positive T cell',
 'macrophage',
 'exhausted-like CD8-positive T cell',
 'CD4-positive, cytotoxic T cell',
 'cytokine secreting effector CD8-positive T cell',
 'effector memory CD8-positive T cell',
 'CD56-bright cytokine secreting natural killer cell',
 'mature NK T cell',
 'germinal center B cell',
 'CCR7-positive myeloid dendritic cell',
 'proliferating CD4-positive T cell',
 'naive B cell']

rs=[x.split(' ')[2] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]

toplot=pbmcreq.drop(columns=['Unnamed: 0']).transpose()
toplot['Lesion']=rs



fig, axes = plt.subplots(4, 5,figsize=(16,20), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)
axes = axes.flatten()

pvals={}

i=0
for mycell in mychoice:
    ax=sns.boxplot(y=mycell,x='Lesion',data=toplot,ax=axes[i],
                   orient='v',order=['LN','Brain','Subc'])
    ax=sns.swarmplot(y=mycell,x='Lesion',data=toplot,color='black',ax=axes[i],
                    order=['LN','Brain','Subc'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Lesion ',fontsize=10)
    pvals[mycell]=getPs(toplot.loc[:,['Lesion',mycell]],['LN','Brain'],'Lesion').loc['LN-Brain',:]
    pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['Lesion',mycell]].groupby('Lesion').mean().loc['LN',:])/(0.1+toplot.loc[:,['Lesion',mycell]].groupby('Lesion').mean().loc['Brain',:]))[0]

    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Lesion-PBMCs'+what+'.pdf',bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-PBMCs'+what+'.eps', format='eps',bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-PBMCs'+what+'.svg', format='svg',bbox_inches="tight")

pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-PBMC-'+what+'-per-Lesion.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')



#list(pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney').index[0:20])

In [None]:
list(pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney').index[0:20])

In [None]:
#list(pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney').index[0:20])

toinc=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.1].index)

plt.figure(figsize=(0.5,3))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(pd.DataFrame(pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC'].sort_values(ascending=False)),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig = sns_plot.get_figure()
fig.savefig(figdir+'Celltypefreq-Lesion-PBMC-summary-'+what+'.pdf',bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-PBMC-summary-'+what+'.eps', format='eps',bbox_inches="tight")
fig.savefig(figdir+'Celltypefreq-Lesion-PBMC-summary-'+what+'.svg', format='svg',bbox_inches="tight")


### Mutation comparison

In [None]:
sc.settings.set_figure_params()

In [None]:
propdir

##### Lev 1 - TIL



In [None]:
what='celltype1'
tilfreq=pd.read_csv(propdir+'TIL_celltypeFreq_Mutation_'+what+'.tsv',header=0)
pbmcreq=pd.read_csv(propdir+'PBMCs_celltypeFreq_Mutation_'+what+'.tsv',header=0)

tilfreq.index=tilfreq.iloc[:,0]
pbmcreq.index=pbmcreq.iloc[:,0]

rs=[x.split(' ')[2] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=tilfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['Mut']=rs


In [None]:
fig, axes = plt.subplots(1, 4,figsize=(10,3), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()

pvals={}
i=0
for mycell in list(tilfreq.index):
    ax=sns.boxplot(y=mycell,x='Mut',data=toplot,ax=axes[i],
                   orient='v',order=['BRAF','NRAS'])
    ax=sns.swarmplot(y=mycell,x='Mut',data=toplot,color='black',ax=axes[i],
                    order=['BRAF','NRAS'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    pvals[mycell]=getPs(toplot.loc[:,['Mut',mycell]],['BRAF','NRAS'],'Mut').loc['BRAF-NRAS',:]
    i=i+1

fig.savefig(figdir+'Celltypefreq-Mutation-TILs'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-TILs'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-TILs'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Mutation.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')



In tumor, in BRAF setting, more B lymphocytes. 


##### Lev 1 - PBMC

In [None]:
sns.set_theme(style="ticks")


rs=[x.split(' ')[2] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]

toplot=pbmcreq.drop(columns=['Unnamed: 0']).transpose()
toplot['Mut']=rs



fig, axes = plt.subplots(1, 4,figsize=(10,3), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()

pvals={}

i=0
for mycell in list(pbmcreq.index):
    ax=sns.boxplot(y=mycell,x='Mut',data=toplot,ax=axes[i],
                   orient='v',order=['BRAF','NRAS'])
    ax=sns.swarmplot(y=mycell,x='Mut',data=toplot,color='black',ax=axes[i],
                    order=['BRAF','NRAS'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Mut ',fontsize=10)
    
    pvals[mycell]=getPs(toplot.loc[:,['Mut',mycell]],['BRAF','NRAS'],'Mut').loc['BRAF-NRAS',:]

    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Mutation-PBMCs'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-PBMCs'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-PBMCs'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-PBMC-'+what+'-per-Mutation.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')



##### Lev 2 - TIL



In [None]:
sns.set_theme(style="ticks")
what='celltype2_pub'
tilfreq=pd.read_csv(propdir+'TIL_celltypeFreq_Mutation_'+what+'.tsv',header=0)
pbmcreq=pd.read_csv(propdir+'PBMCs_celltypeFreq_Mutation_'+what+'.tsv',header=0)

tilfreq.index=tilfreq.iloc[:,0]
pbmcreq.index=pbmcreq.iloc[:,0]

rs=[x.split(' ')[2] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=tilfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['Mut']=rs


fig, axes = plt.subplots(2, 6,figsize=(16,7), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()

pvals={}
i=0
for mycell in list(tilfreq.index):
    ax=sns.boxplot(y=mycell,x='Mut',data=toplot,ax=axes[i],
                   orient='v',order=['BRAF','NRAS'])
    ax=sns.swarmplot(y=mycell,x='Mut',data=toplot,color='black',ax=axes[i],
                    order=['BRAF','NRAS'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    pvals[mycell]=getPs(toplot.loc[:,['Mut',mycell]],['BRAF','NRAS'],'Mut').loc['BRAF-NRAS',:]
    pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['Mut',mycell]].groupby('Mut').mean().loc['BRAF',:])/(0.1+toplot.loc[:,['Mut',mycell]].groupby('Mut').mean().loc['NRAS',:]))[0]

    i=i+1

fig.savefig(figdir+'Celltypefreq-Mutation-TILs'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-TILs'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-TILs'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Mutation.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')


In [None]:
toinc=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.05].index)

plt.figure(figsize=(0.5,1.5))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(pd.DataFrame(pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC'].sort_values(ascending=False)),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig = sns_plot.get_figure()
fig.savefig(figdir+'Celltypefreq-Mutation-summary-'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-summary-'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-summary-'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

In tumor, in BRAF setting, more B cells, CD4+ T cells, less macrophages and CD8+ T cells. 


##### Lev 2 - PBMC



In [None]:
sns.set_theme(style="ticks")
rs=[x.split(' ')[2] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]

toplot=pbmcreq.drop(columns=['Unnamed: 0']).transpose()
toplot['Mut']=rs



fig, axes = plt.subplots(2, 6,figsize=(16,7), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)
axes = axes.flatten()

pvals={}

i=0
for mycell in list(pbmcreq.index):
    ax=sns.boxplot(y=mycell,x='Mut',data=toplot,ax=axes[i],
                   orient='v',order=['BRAF','NRAS'])
    ax=sns.swarmplot(y=mycell,x='Mut',data=toplot,color='black',ax=axes[i],
                    order=['BRAF','NRAS'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Mut ',fontsize=10)
    
    pvals[mycell]=getPs(toplot.loc[:,['Mut',mycell]],['BRAF','NRAS'],'Mut').loc['BRAF-NRAS',:]

    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Mutation-PBMCs'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-PBMCs'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-PBMCs'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-PBMC-'+what+'-per-Mutation.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')



##### Lev 3 - TIL


In [None]:
what='celltype3_pub'
mychoice=['naive CD4-positive T cell',
 'germinal center B cell',
 'memory B cell',
 'MSR1-positive macrophage',
 'naive B cell',
 'CXCL9-positive macrophage',
 'regulatory T cell',
 'exhausted-like CD8-positive T cell',
 'proliferating CD8-positive T cell',
 'MARCO-positive macrophage',
 'proliferating CD4-positive T cell',
 'non-classical monocyte',
 'plasmacytoid dendritic cell',
 'IL7R-max CD4-positive T cell',
 'naive CD8-positive T cell',
 'mature NK T cell',
 'CCR7-positive myeloid dendritic cell',
 'exhausted-like CD4-positive T cell',
 'cytotoxic CD56-dim natural killer cell',
 'CD141-positive myeloid dendritic cell']

tilfreq=pd.read_csv(propdir+'TIL_celltypeFreq_Mutation_'+what+'.tsv',header=0)
pbmcreq=pd.read_csv(propdir+'PBMCs_celltypeFreq_Mutation_'+what+'.tsv',header=0)

tilfreq.index=tilfreq.iloc[:,0]
pbmcreq.index=pbmcreq.iloc[:,0]

rs=[x.split(' ')[2] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=tilfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['Mut']=rs


fig, axes = plt.subplots(4, 5,figsize=(16,20), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()

pvals={}
i=0
for mycell in mychoice:
    ax=sns.boxplot(y=mycell,x='Mut',data=toplot,ax=axes[i],
                   orient='v',order=['BRAF','NRAS'])
    ax=sns.swarmplot(y=mycell,x='Mut',data=toplot,color='black',ax=axes[i],
                    order=['BRAF','NRAS'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    pvals[mycell]=getPs(toplot.loc[:,['Mut',mycell]],['BRAF','NRAS'],'Mut').loc['BRAF-NRAS',:]
    pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['Mut',mycell]].groupby('Mut').mean().loc['BRAF',:])/(0.1+toplot.loc[:,['Mut',mycell]].groupby('Mut').mean().loc['NRAS',:]))[0]

    i=i+1

fig.savefig(figdir+'Celltypefreq-Mutation-TILs'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-TILs'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-TILs'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Mutation.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')



In [None]:
#list(pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney').index[0:20])

In [None]:
toinc=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.1].index)

plt.figure(figsize=(0.5,3))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(pd.DataFrame(pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC'].sort_values(ascending=False)),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig = sns_plot.get_figure()
fig.savefig(figdir+'Celltypefreq-Mutation-summary-'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-summary-'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-summary-'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

In tumor, in BRAF setting, more B cells (of different types), naive CD4+ T cells, less macrophages (of all types) and exhausted-like CD8+ T cells, regulatory T cells, . 


##### Lev 3 - PBMC

In [None]:
sns.set_theme(style="ticks")
mychoice=['immature conventional dendritic cell',
 'proliferating CD8-positive T cell',
 'exhausted-like CD4-positive T cell',
 'naive CD8-positive T cell',
 'MARCO-positive macrophage',
 'proliferating NK cell',
 'CD56-bright cytokine secreting natural killer cell',
 'effector memory CD8-positive T cell',
 'cytotoxic CD56-dim natural killer cell',
 'naive B cell',
 'plasmacytoid dendritic cell',
 'CCR7-positive myeloid dendritic cell',
 'germinal center B cell',
 'CXCL9-positive macrophage',
 'IL7R-max CD4-positive T cell',
 'macrophage',
 'cytokine secreting effector CD8-positive T cell',
 'mature NK T cell',
 'regulatory T cell',
 'central memory CD4-positive T cell']

rs=[x.split(' ')[2] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]

toplot=pbmcreq.drop(columns=['Unnamed: 0']).transpose()
toplot['Mut']=rs



fig, axes = plt.subplots(4, 5,figsize=(16,20), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)
axes = axes.flatten()

pvals={}

i=0
for mycell in mychoice:
    ax=sns.boxplot(y=mycell,x='Mut',data=toplot,ax=axes[i],
                   orient='v',order=['BRAF','NRAS'])
    ax=sns.swarmplot(y=mycell,x='Mut',data=toplot,color='black',ax=axes[i],
                    order=['BRAF','NRAS'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Mut ',fontsize=10)
    
    pvals[mycell]=getPs(toplot.loc[:,['Mut',mycell]],['BRAF','NRAS'],'Mut').loc['BRAF-NRAS',:]
    pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['Mut',mycell]].groupby('Mut').mean().loc['BRAF',:])/(0.1+toplot.loc[:,['Mut',mycell]].groupby('Mut').mean().loc['NRAS',:]))[0]

    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Mutation-PBMCs'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-PBMCs'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Mutation-PBMCs'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-PBMC-'+what+'-per-Mutation.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')



In [None]:
#list(pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney').index[0:20])

### Periphery vs. tumor

In [None]:
sc.settings.set_figure_params()

##### Level 1 comparison

In [None]:
what='celltype1'
tilfreq=pd.read_csv(propdir+'TIL_celltypeFreq_'+what+'.tsv',header=0)
pbmcreq=pd.read_csv(propdir+'PBMCs_celltypeFreq_'+what+'.tsv',header=0)


In [None]:
tilfreq.index=tilfreq.iloc[:,0]
pbmcreq.index=pbmcreq.iloc[:,0]

In [None]:
tilfreq

In [None]:
fig, axes = plt.subplots(1, 4,figsize=(10,3), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.1, right=0.98, top=0.82, bottom=0.1)
axes = axes.flatten()

pvals={}
i=0
for mycell in list(set(adata.obs['celltype1'])):
    cellFreqs=pd.DataFrame([list(tilfreq.loc[mycell,:])[1:len(list(tilfreq.loc[mycell,:]))],
     list(pbmcreq.loc[mycell,:])[1:len(list(pbmcreq.loc[mycell,:]))]])
    cellFreqs.index=['TIL','PBMC']
    cellFreqs=cellFreqs.stack().reset_index(level=0)
    cellFreqs.columns=['Type','Perc '+mycell]
    ax=sns.boxplot(x='Type',y='Perc '+mycell,data=cellFreqs,ax=axes[i])
    ax=sns.swarmplot(x='Type',y='Perc '+mycell,data=cellFreqs,color='black',ax=axes[i])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Sample type ',fontsize=10)
    pvals[mycell]=getPs(cellFreqs,['TIL','PBMC'],'Type').iloc[0,:]
    i=i+1
    
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
myconditions=['TIL','PBMC']
name1='Type'

#myps.to_csv(figdir+'Pval-Velocitylen-'+velosubset+'-'+mysubset+'-'+subcat+'-per-response.pdf',sep='\t')

In [None]:
pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-'+what+'-per-TILvsPBMC.tsv',sep='\t')

In [None]:
pd.DataFrame.from_dict(pvals, orient='index')

More NK cells (highly significant) and more myleoids (marginally significant) in PBMCs; 
more T cells and B lymphocytes in tumors.

##### Level 2 comparison

In [None]:
what='celltype2_pub'
tilfreq=pd.read_csv(propdir+'TIL_celltypeFreq_'+what+'.tsv',header=0)
pbmcreq=pd.read_csv(propdir+'PBMCs_celltypeFreq_'+what+'.tsv',header=0)

tilfreq.index=tilfreq.iloc[:,0]
pbmcreq.index=pbmcreq.iloc[:,0]


In [None]:
tilfreq.T

In [None]:
c1='classical monocyte'
c2='macrophage'
c3='classical monocyte'

In [None]:
cmono=pd.DataFrame([pbmcreq.loc[c1,:],tilfreq.loc[c2,:]+tilfreq.loc[c3,:]])
cmono.index=['PBMC','TIL']
cmono=cmono.drop(columns='Unnamed: 0')
cmono=cmono.transpose()
cmono['RCat']=[x.split(' ')[2] for x in list(cmono.index)]
cmono.index=[str(x) for x in [x.split(' ')[1] for x in list(cmono.index)]]

In [None]:
cmono.columns='Myelo_'+cmono.columns

In [None]:
dfPat=pd.concat([dfPat,cmono], axis=1)

In [None]:
dfPat

In [None]:
cmono.corr(method='spearman')

In [None]:
cmonoln=cmono.loc[cmono.index.isin(dfPat.loc[dfPat['Lesion']=='LN',:].index),:][0:7].copy()
cmonosubc=cmono.loc[cmono.index.isin(dfPat.loc[dfPat['Lesion']=='Subc',:].index),:][0:3].copy()
cmonoln.corr(method='spearman')

In [None]:
cmonoln.corr(method='pearson')

In [None]:
cmono.columns=['ClassMono_PBMC','ClassMonoMacro_TIL','RCat']

In [None]:
stats.spearmanr(cmono['ClassMono_PBMC'][0:15],cmono['ClassMonoMacro_TIL'][0:15])

In [None]:
cmono.corr(method='pearson')

In [None]:
cmono

In [None]:
stats.spearmanr(cmonoln['Myelo_PBMC'][0:15],cmonoln['Myelo_TIL'][0:15])

In [None]:
stats.pearsonr(cmonoln['Myelo_PBMC'][0:15],cmonoln['Myelo_TIL'][0:15])

In [None]:
stats.pearsonr(cmono['ClassMono_PBMC'][0:15],cmono['ClassMonoMacro_TIL'][0:15])

In [None]:
cmono.to_csv(figdir+'Myelo_PBMC_vs_TIL.tsv', sep='\t')

In [None]:
cmono.index=cmono.index.astype('str')

In [None]:
#sns.scatterplot(data=cmono, x="Myelo_PBMC", y="Myelo_TIL",hue='RCat').set_title(c1+' vs. '+c2+'/'+c3)
sns.set_theme(style="ticks")
rcParams['figure.figsize'] = 7,5
fig=sns.scatterplot(x='ClassMono_PBMC',y='ClassMonoMacro_TIL',hue='RCat',data=cmono,
                hue_order=['R','TF','NR_adj','NR_nadj'],
               palette=color_dict).set_title(c1+' vs. '+c2+'/'+c3)
plt.legend(bbox_to_anchor=(1.05, 1.05),borderaxespad=0.5)
plt.tight_layout()
for i in range(len(dfPat.index)):
 plt.text(x=dfPat.reset_index().Myelo_PBMC[i],y=dfPat.reset_index().Myelo_TIL[i],s=dfPat.index[i], 
          fontdict=dict(color='black',size=10)) #bbox=dict(facecolor='gray',alpha=0.2)

plt.savefig(figdir+'Myelo_PBMC_vs_TIL.eps', format='eps', bbox_inches="tight", dpi=300)
plt.savefig(figdir+'Myelo_PBMC_vs_TIL.pdf', format='pdf', bbox_inches="tight", dpi=300)
plt.savefig(figdir+'Myelo_PBMC_vs_TIL.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
fig=sns.regplot(data=cmono, x="ClassMono_PBMC", y="ClassMonoMacro_TIL")
fig.figure.savefig(figdir+'Myelo_PBMC_vs_TIL_regplot.eps', format='eps', bbox_inches="tight", dpi=300)
fig.figure.savefig(figdir+'Myelo_PBMC_vs_TIL_regplot.pdf', format='pdf', bbox_inches="tight", dpi=300)
fig.figure.savefig(figdir+'Myelo_PBMC_vs_TIL_regplot.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
c1='plasma cell'
c2='plasma cell'

In [None]:
cmono=pd.DataFrame([pbmcreq.loc[c1,:],tilfreq.loc[c2,:]])
cmono.index=['PBMC','TIL']
cmono=cmono.drop(columns='Unnamed: 0')
cmono=cmono.transpose()
cmono['RCat']=[x.split(' ')[2] for x in list(cmono.index)]

In [None]:
cmono.corr(method='spearman')

In [None]:
sns.scatterplot(data=cmono, x="PBMC", y="TIL",hue='RCat').set_title(c1+' vs. '+c2)

In [None]:
sns.regplot(data=cmono, x="PBMC", y="TIL")

In [None]:
fig, axes = plt.subplots(2, 6,figsize=(16,10), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.1, right=0.98, top=0.82, bottom=0.2)
axes = axes.flatten()
pvals={}

i=0
for mycell in list(set(adata.obs['celltype2_pub'])):
    if (len(set([mycell])-set(list(pbmcreq.index)))>0):
        pbmcreq=pbmcreq.transpose()
        pbmcreq[mycell]=[mycell]+[0] * (len(pbmcreq.index)-1)
        pbmcreq=pbmcreq.transpose()
    if (len(set([mycell])-set(list(tilfreq.index)))>0):
        tilfreq=tilfreq.transpose()
        tilfreq[mycell]=[mycell]+[0] * (len(tilfreq.index)-1)   
        tilfreq=tilfreq.transpose()
    cellFreqs=pd.DataFrame([list(tilfreq.loc[mycell,:])[1:len(list(tilfreq.loc[mycell,:]))],
     list(pbmcreq.loc[mycell,:])[1:len(list(pbmcreq.loc[mycell,:]))]])
    cellFreqs.index=['TIL','PBMC']
    cellFreqs=cellFreqs.stack().reset_index(level=0)
    cellFreqs.columns=['Type','Perc '+mycell]
    ax=sns.boxplot(x='Type',y='Perc '+mycell,data=cellFreqs,ax=axes[i])
    ax=sns.swarmplot(x='Type',y='Perc '+mycell,data=cellFreqs,color='black',ax=axes[i])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Sample type ',fontsize=10)
    pvals[mycell]=getPs(cellFreqs,['TIL','PBMC'],'Type').iloc[0,:]
    pvals[mycell]['log2FC']=np.log2((0.1+cellFreqs.groupby('Type').mean().loc['TIL',:])/(0.1+cellFreqs.groupby('Type').mean().loc['PBMC',:]))[0]

    i=i+1
    
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
a1=pbmcreq.loc[['CD4-positive T cell'],:].drop(columns='Unnamed: 0').transpose()['CD4-positive T cell']
a2=pbmcreq.loc[['CD8-positive T cell'],:].drop(columns='Unnamed: 0').transpose()['CD8-positive T cell']

b1=tilfreq.loc[['CD4-positive T cell'],:].drop(columns='Unnamed: 0').transpose()['CD4-positive T cell']
b2=tilfreq.loc[['CD8-positive T cell'],:].drop(columns='Unnamed: 0').transpose()['CD8-positive T cell']

In [None]:
a1.median()

In [None]:
a2.median()

In [None]:
(a2/a1).median()

In [None]:
a2/a1

In [None]:
(b2/b1).median()

In [None]:
b1.median()

In [None]:
b2.median()

In [None]:
b2/b1

In [None]:
pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-'+what+'-per-TILvsPBMC.tsv',sep='\t')
toinc=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.05].index)

pd.DataFrame.from_dict(pvals, orient='index').sort_values('MannWhitney')

More pDCs, CD8+ T cells, mDCs, B cells, Plasma cells, CD56-bright NK, macrophages in tumors. More ncMonocytes, classical monocytes, CD56-dim NK in PBMCs. 

In [None]:

plt.figure(figsize=(0.5,3))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(pd.DataFrame(pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC'].sort_values(ascending=False)),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig = sns_plot.get_figure()
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL-summary-'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL-summary-'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL-summary-'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

##### Level 3 comparison

In [None]:
what='celltype3_pub'
tilfreq=pd.read_csv(propdir+'TIL_celltypeFreq_'+what+'.tsv',header=0)
pbmcreq=pd.read_csv(propdir+'PBMCs_celltypeFreq_'+what+'.tsv',header=0)

tilfreq.index=tilfreq.iloc[:,0]
pbmcreq.index=pbmcreq.iloc[:,0]


In [None]:
tilfreq

In [None]:
sns.set_theme(style="ticks", font_scale=1.25)
fig, axes = plt.subplots(6, 6,figsize=(20,30), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.82, bottom=0.1)
axes = axes.flatten()
pvals={}

i=0
for mycell in list(set(adata.obs['celltype3_pub'])):
    if (len(set([mycell])-set(list(pbmcreq.index)))>0):
        pbmcreq=pbmcreq.transpose()
        pbmcreq[mycell]=[mycell]+[0] * (len(pbmcreq.index)-1)
        pbmcreq=pbmcreq.transpose()
    if (len(set([mycell])-set(list(tilfreq.index)))>0):
        tilfreq=tilfreq.transpose()
        tilfreq[mycell]=[mycell]+[0] * (len(tilfreq.index)-1)   
        tilfreq=tilfreq.transpose()
    cellFreqs=pd.DataFrame([list(tilfreq.loc[mycell,:])[1:len(list(tilfreq.loc[mycell,:]))],
     list(pbmcreq.loc[mycell,:])[1:len(list(pbmcreq.loc[mycell,:]))]])
    cellFreqs.index=['TIL','PBMC']
    cellFreqs=cellFreqs.stack().reset_index(level=0)
    cellFreqs.columns=['Type','Perc '+mycell]
    ax=sns.boxplot(x='Type',y='Perc '+mycell,data=cellFreqs,ax=axes[i])
    ax=sns.swarmplot(x='Type',y='Perc '+mycell,data=cellFreqs,color='black',ax=axes[i])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Sample type ',fontsize=10)
    pvals[mycell]=getPs(cellFreqs,['TIL','PBMC'],'Type').iloc[0,:]
    pvals[mycell]['log2FC']=np.log2((0.1+cellFreqs.groupby('Type').mean().loc['TIL',:])/(0.1+cellFreqs.groupby('Type').mean().loc['PBMC',:]))[0]
    i=i+1
    
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-'+what+'-per-TILvsPBMC.tsv',sep='\t')
toinc=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.05].index)

pd.DataFrame.from_dict(pvals, orient='index').sort_values('MannWhitney')

In [None]:
plt.figure(figsize=(1,7))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(pd.DataFrame(pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC'].sort_values(ascending=False)),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig = sns_plot.get_figure()
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL-summary-'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL-summary-'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-PBMCvsTIL-summary-'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

### Stratified per response

##### Lev 1 - TIL

In [None]:
sns.set_theme(style="ticks")

what='celltype1'
tilfreq=pd.read_csv(propdir+'TIL_celltypeFreq_'+what+'.tsv',header=0)
pbmcreq=pd.read_csv(propdir+'PBMCs_celltypeFreq_'+what+'.tsv',header=0)

tilfreq.index=tilfreq.iloc[:,0]
pbmcreq.index=pbmcreq.iloc[:,0]

rs=[x.split(' ')[2] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=tilfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['RCat']=rs


In [None]:

fig, axes = plt.subplots(1, 4,figsize=(14,3), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()

pvals={}
pvalsn={}
i=0
for mycell in list(tilfreq.index):

    ax=sns.boxplot(y=mycell,x='RCat',data=toplot,ax=axes[i],
                   palette=color_dict,orient='v',order=['R','TF','NR_nadj','NR_adj'])
    ax=sns.swarmplot(y=mycell,x='RCat',data=toplot,color='black',ax=axes[i],
                    order=['R','TF','NR_nadj','NR_adj'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    pvalsn[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['R-NR_nadj',:]
    pvalsn[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['R',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_nadj',:]))[0]
    pvals[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['TF-NR_adj',:]
    pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['TF',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_adj',:]))[0]
    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Response-adj.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')


In [None]:
pd.DataFrame.from_dict(pvalsn, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Response-nadj.tsv',sep='\t')
pd.DataFrame.from_dict(pvalsn, orient='index').sort_values(by='MannWhitney')


In [None]:
toinc=list(pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney']<=0.1].index)
toinc2=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.1].index)

toinc=list(set(toinc2).union(set(toinc)))
mypd=pd.concat([pd.DataFrame.from_dict(pvalsn, orient='index').loc[toinc,:]['log2FC'],
          pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC']], axis=1)
mypd.columns=['nadj','adj']


In [None]:
toremove=list(tilfreq.median(axis=1)[tilfreq.median(axis=1)<1].index)

In [None]:
mypd

In [None]:
#mypd.sort_values('nadj',ascending=False)
plt.figure(figsize=(1,1))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(mypd.sort_values('nadj',ascending=False),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig = sns_plot.get_figure()
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)


In tumor, in nadj setting, R more myleoids & less B & T cells. 

In [None]:
sc.settings.set_figure_params()

##### Lev 1 - PBMC

In [None]:
rs=[x.split(' ')[2] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]

toplot=pbmcreq.drop(columns=['Unnamed: 0']).transpose()
toplot['RCat']=rs


In [None]:
pbmcreq

In [None]:

fig, axes = plt.subplots(1, 4,figsize=(14,3), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()
pvalsn={}
pvals={}

i=0
for mycell in list(pbmcreq.index):
    ax=sns.boxplot(y=mycell,x='RCat',data=toplot,ax=axes[i],
                   palette=color_dict,orient='v',order=['R','TF','NR_nadj','NR_adj'])
    ax=sns.swarmplot(y=mycell,x='RCat',data=toplot,color='black',ax=axes[i],
                    order=['R','TF','NR_nadj','NR_adj'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    
    pvalsn[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['R-NR_nadj',:]
    pvals[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['TF-NR_adj',:]
    pvalsn[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['R',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_nadj',:]))[0]
    pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['TF',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_adj',:]))[0]

    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Response-PBMCs'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-PBMCs'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-PBMCs'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-PBMC-'+what+'-per-Response-adj.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')



In [None]:

pd.DataFrame.from_dict(pvalsn, orient='index').to_csv(figdir+'Pval-CellTypeFreq-PBMC-'+what+'-per-Response-nadj.tsv',sep='\t')
pd.DataFrame.from_dict(pvalsn, orient='index').sort_values(by='MannWhitney')


NO signifiicant differences in PBMCs, but trend for more myeloids in Rs. 

In [None]:
#toinc

In [None]:
toinc=list(pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney']<=0.3].index)
toinc2=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.3].index)

toinc=list(set(toinc2).union(set(toinc)))
mypd=pd.concat([pd.DataFrame.from_dict(pvalsn, orient='index').loc[toinc,:]['log2FC'],
          pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC']], axis=1)
mypd.columns=['nadj','adj']


In [None]:
toremove=list(pbmcreq.median(axis=1)[pbmcreq.median(axis=1)<1].index)
toremove

In [None]:
#mypd.sort_values('nadj',ascending=False)
plt.figure(figsize=(1,1))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(mypd.sort_values('nadj',ascending=False),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig = sns_plot.get_figure()
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'-PBMC.pdf')
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'-PBMC.eps', format='eps')
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'-PBMC.svg', format='svg')

##### Lev 2 - TIL

In [None]:
sns.set_style('white')
what='celltype2_pub'
tilfreq=pd.read_csv(propdir+'TIL_celltypeFreq_'+what+'.tsv',header=0)
pbmcreq=pd.read_csv(propdir+'PBMCs_celltypeFreq_'+what+'.tsv',header=0)

tilfreq.index=tilfreq.iloc[:,0]
pbmcreq.index=pbmcreq.iloc[:,0]

rs=[x.split(' ')[2] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=tilfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['RCat']=rs



In [None]:
fig, axes = plt.subplots(2, 6,figsize=(16,7), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()
pvalsn={}
pvals={}
i=0
for mycell in list(tilfreq.index):
    ax=sns.boxplot(y=mycell,x='RCat',data=toplot,ax=axes[i],
                   palette=color_dict,orient='v',order=['R','TF','NR_nadj','NR_adj'])
    ax=sns.swarmplot(y=mycell,x='RCat',data=toplot,color='black',ax=axes[i]
                    ,order=['R','TF','NR_nadj','NR_adj'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    pvalsn[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['R-NR_nadj',:]
    pvals[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['TF-NR_adj',:]
    pvalsn[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['R',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_nadj',:]))[0]
    pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['TF',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_adj',:]))[0]


    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
mycell='myeloid dendritic cell'
toplot.loc[:,['RCat',mycell]].groupby('RCat').mean()

In [None]:
mycell='macrophage'
toplot.loc[:,['RCat',mycell]].groupby('RCat').mean()

In [None]:
mycell='plasmacytoid dendritic cell'
toplot.loc[:,['RCat',mycell]].groupby('RCat').mean()

In [None]:
mycell='classical monocyte'
toplot.loc[:,['RCat',mycell]].groupby('RCat').mean()

In [None]:
pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Response-adj.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')



In tumor, adjuvant setting - trends for more CD4+ T cells in NR. 

In [None]:



pd.DataFrame.from_dict(pvalsn, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Response-nadj.tsv',sep='\t')
pd.DataFrame.from_dict(pvalsn, orient='index').sort_values(by='MannWhitney')


In tumor, non-adjuvant setting - trends for more Plasma cells in nR, more macrophages, classical monocytes & myeloid DCs in Rs. 

In [None]:
toinc=list(pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney']<=0.5].index)
toinc2=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.5].index)

toinc=list(set(toinc2).union(set(toinc)))
mypd=pd.concat([pd.DataFrame.from_dict(pvalsn, orient='index').loc[toinc,:]['log2FC'],
          pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC']], axis=1)
mypd.columns=['nadj','adj']

toremove=list(tilfreq.mean(axis=1)[tilfreq.mean(axis=1)<0.1].index)
toremove

In [None]:

mypd=mypd[~mypd.index.isin(toremove)]

In [None]:


#mypd.sort_values('nadj',ascending=False)
plt.figure(figsize=(2,3))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(mypd.sort_values('nadj',ascending=False),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig = sns_plot.get_figure()
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

##### Lev 2 - PBMC

In [None]:
sns.set_style('white')
rs=[x.split(' ')[2] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]

toplot=pbmcreq.drop(columns=['Unnamed: 0']).transpose()
toplot['RCat']=rs


In [None]:
rs1=[x.split(' ')[2] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]
patid1=[x.split(' ')[1] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]

pbmcreq1=pbmcreq.loc[pbmcreq.median(axis=1)>0.5,:]

toplot1=pbmcreq1.drop(columns=['Unnamed: 0']).transpose()
toplot1['RCat']=rs1

toplot1.groupby('RCat').mean()['CD4-positive T cell']

In [None]:

fig, axes = plt.subplots(2, 6,figsize=(16,7), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()
pvals={}
pvalsn={}

i=0
for mycell in list(pbmcreq.index):
    ax=sns.boxplot(y=mycell,x='RCat',data=toplot,ax=axes[i],
                   palette=color_dict,orient='v',order=['R','TF','NR_nadj','NR_adj'])
    ax=sns.swarmplot(y=mycell,x='RCat',data=toplot,color='black',ax=axes[i],
                    order=['R','TF','NR_nadj','NR_adj'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    pvalsn[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['R-NR_nadj',:]
    pvals[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['TF-NR_adj',:]

    pvalsn[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['R',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_nadj',:]))[0]
    pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['TF',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_adj',:]))[0]

    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Response-PBMCs'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-PBMCs'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-PBMCs'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-PBMC-'+what+'-per-Response-adj.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')



No significant differences in adjuvant setting. 

In [None]:
pd.DataFrame.from_dict(pvalsn, orient='index').to_csv(figdir+'Pval-CellTypeFreq-PBMC-'+what+'-per-Response-nadj.tsv',sep='\t')
pd.DataFrame.from_dict(pvalsn, orient='index').sort_values(by='MannWhitney')


No significant differences, but trend for more classical monocytes in non-adjuvant setting. 

In [None]:
toinc=list(pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney']<=0.4].index)
toinc2=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.4].index)

toinc=list(set(toinc2).union(set(toinc)))
mypd=pd.concat([pd.DataFrame.from_dict(pvalsn, orient='index').loc[toinc,:]['log2FC'],
          pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC']], axis=1)
mypd.columns=['nadj','adj']


In [None]:
toremove=list(pbmcreq.mean(axis=1)[pbmcreq.mean(axis=1)<0.1].index)
toremove
mypd=mypd[~mypd.index.isin(toremove)]

In [None]:
#mypd.sort_values('nadj',ascending=False)
plt.figure(figsize=(1.5,2))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(mypd.sort_values('nadj',ascending=False),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig = sns_plot.get_figure()
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'-PBMC.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'-PBMC.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'-PBMC.svg', format='svg', bbox_inches="tight", dpi=300)


##### Lev 3 - TIL

In [None]:
sns.set_style('white')
what='celltype3_pub'
tilfreq=pd.read_csv(propdir+'TIL_celltypeFreq_'+what+'.tsv',header=0)
pbmcreq=pd.read_csv(propdir+'PBMCs_celltypeFreq_'+what+'.tsv',header=0)

tilfreq.index=tilfreq.iloc[:,0]
pbmcreq.index=pbmcreq.iloc[:,0]

tilfreq=tilfreq.loc[tilfreq.median(axis=1)>0.1,:]

rs=[x.split(' ')[2] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=tilfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['RCat']=rs

In [None]:
toplot.groupby('RCat').mean()['CD141-positive myeloid dendritic cell']

In [None]:
toplot.groupby('RCat').mean()['exhausted-like CD4-positive T cell']

In [None]:
rs1=[x.split(' ')[2] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]
patid1=[x.split(' ')[1] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]

pbmcreq1=pbmcreq.loc[pbmcreq.median(axis=1)>0.5,:]

toplot1=pbmcreq1.drop(columns=['Unnamed: 0']).transpose()
toplot1['RCat']=rs1

toplot1.groupby('RCat').mean()['classical monocyte']

##### Lev 3 - TIL

In [None]:
fig, axes = plt.subplots(6, 5,figsize=(16,20), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)



axes = axes.flatten()
pvals={}
pvalsn={}

i=0
for mycell in list(tilfreq.index):
    ax=sns.boxplot(y=mycell,x='RCat',data=toplot,ax=axes[i],
                   palette=color_dict,orient='v',order=['R','TF','NR_nadj','NR_adj'])
    ax=sns.swarmplot(y=mycell,x='RCat',data=toplot,color='black',ax=axes[i],order=['R','TF','NR_nadj','NR_adj'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    pvalsn[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['R-NR_nadj',:]
    pvals[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['TF-NR_adj',:]

    pvalsn[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['R',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_nadj',:]))[0]
    pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['TF',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_adj',:]))[0]

    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
pd.DataFrame.from_dict(pvalsn, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Response-nadj.tsv',sep='\t')
pd.DataFrame.from_dict(pvalsn, orient='index').sort_values(by='T-test')


In tumor, non-adjuvant setting - more exhausted-like CD4+ T cells in NRs. 

In [None]:
pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Response-adj.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='T-test')


In [None]:
toinc=list(pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney']<=0.4].index)
toinc2=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.4].index)

toinc=list(set(toinc2).union(set(toinc)))
mypd=pd.concat([pd.DataFrame.from_dict(pvalsn, orient='index').loc[toinc,:]['log2FC'],
          pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC']], axis=1)
mypd.columns=['nadj','adj']

In [None]:
toremove=list(tilfreq.mean(axis=1)[tilfreq.mean(axis=1)<0.1].index)
toremove
mypd=mypd[~mypd.index.isin(toremove)]

In [None]:

#mypd.sort_values('nadj',ascending=False)
plt.figure(figsize=(2,5))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(mypd.sort_values('nadj',ascending=False),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig = sns_plot.get_figure()
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)


##### Lev 3 - PBMC

In [None]:
sns.set_style('white')
rs=[x.split(' ')[2] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(pbmcreq.drop(columns=['Unnamed: 0']).columns)]

pbmcreq=pbmcreq.loc[pbmcreq.median(axis=1)>0.1,:]

toplot=pbmcreq.drop(columns=['Unnamed: 0']).transpose()
toplot['RCat']=rs


In [None]:

fig, axes = plt.subplots(4, 5,figsize=(16,12), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)
pvals={}
pvalsn={}

axes = axes.flatten()

i=0
for mycell in list(set(list(pbmcreq.index))-set(['CCR7-positive myeloid dendritic cell',
                                                 'MARCO-positive macrophage',
                                                 'exhausted-like CD8-positive T cell',
                                                'MSR1-positive macrophage','macrophage'])):
    ax=sns.boxplot(y=mycell,x='RCat',data=toplot,ax=axes[i],
                   palette=color_dict,orient='v',order=['R','TF','NR_nadj','NR_adj'])
    ax=sns.swarmplot(y=mycell,x='RCat',data=toplot,color='black',ax=axes[i],
                    order=['R','TF','NR_nadj','NR_adj'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+mycell,fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    pvalsn[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['R-NR_nadj',:]
    pvals[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['TF-NR_adj',:]

    pvalsn[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['R',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_nadj',:]))[0]
    pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['TF',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_adj',:]))[0]

    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Response-PBMCs'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-PBMCs'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-PBMCs'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

In [None]:
pd.DataFrame.from_dict(pvalsn, orient='index').to_csv(figdir+'Pval-CellTypeFreq-PBMC-'+what+'-per-Response-nadj.tsv',sep='\t')
pd.DataFrame.from_dict(pvalsn, orient='index').sort_values(by='T-test')


In [None]:
pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-PBMC-'+what+'-per-Response-adj.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='T-test')


In [None]:
toinc=list(pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney']<=0.4].index)
toinc2=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.4].index)

toinc=list(set(toinc2).union(set(toinc)))
mypd=pd.concat([pd.DataFrame.from_dict(pvalsn, orient='index').loc[toinc,:]['log2FC'],
          pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC']], axis=1)
mypd.columns=['nadj','adj']

In [None]:
toremove=list(pbmcreq.mean(axis=1)[pbmcreq.mean(axis=1)<0.1].index)
toremove
mypd=mypd[~mypd.index.isin(toremove)]

In [None]:

#mypd.sort_values('nadj',ascending=False)
plt.figure(figsize=(2,3))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(mypd.sort_values('nadj',ascending=False),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig = sns_plot.get_figure()
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'-PBMC.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'-PBMC.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'-PBMC.svg', format='svg', bbox_inches="tight", dpi=300)


##### CD8Tcell clusters - TIL

In [None]:
sns.set_style('white')

what='leiden_velo'
tilfreq=pd.read_csv(propdir+'AllCD8_TIL_celltypeFreq_'+what+'.tsv',header=0)
pbmcreq=pd.read_csv(propdir+'AllCD8_PBMCs_celltypeFreq_'+what+'.tsv',header=0)

tilfreq.index=tilfreq.iloc[:,0]
pbmcreq.index=pbmcreq.iloc[:,0]

rs=[x.split(' ')[2] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=tilfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['RCat']=rs


In [None]:
mydef=pd.Series([1,1])
mydef.index=['MannWhitney','T-test']

In [None]:
fig, axes = plt.subplots(5, 4,figsize=(14,12), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()

pvals={}
pvalsn={}
i=0
for mycell in list(tilfreq.index):
    ax=sns.boxplot(y=mycell,x='RCat',data=toplot,ax=axes[i],
                   palette=color_dict,orient='v',order=['R','TF','NR_nadj','NR_adj'])
    ax=sns.swarmplot(y=mycell,x='RCat',data=toplot,color='black',ax=axes[i],
                    order=['R','TF','NR_nadj','NR_adj'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+str(mycell),fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    if(toplot.loc[:,['RCat',mycell]].median()>0).any():
        pvalsn[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['R-NR_nadj',:]
        pvals[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['TF-NR_adj',:]
        
        pvalsn[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['R',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_nadj',:]))[mycell]
        pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['TF',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_adj',:]))[mycell]

    else:
        pvals[mycell]=mydef
        pvalsn[mycell]=mydef
        pvalsn[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['R',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_nadj',:]))[mycell]
        pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['TF',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_adj',:]))[mycell]

    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-TILs'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)


In [None]:
pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Response-adj.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')


pd.DataFrame.from_dict(pvalsn, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Response-nadj.tsv',sep='\t')
pd.DataFrame.from_dict(pvalsn, orient='index').sort_values(by='MannWhitney')


In [None]:
toinc=list(pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney']<=0.2].index)
toinc2=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.2].index)

toinc=list(set(toinc2).union(set(toinc)))
mypd=pd.concat([pd.DataFrame.from_dict(pvalsn, orient='index').loc[toinc,:]['log2FC'],
          pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC']], axis=1)
mypd.columns=['nadj','adj']

In [None]:
toremove=list(tilfreq.mean(axis=1)[tilfreq.mean(axis=1)<0.4].index)
toremove
mypd=mypd[~mypd.index.isin(toremove)]

In [None]:

#mypd.sort_values('nadj',ascending=False)
plt.figure(figsize=(1.5,3))
sns.set(font_scale=0.8)
sns_plot=sns.heatmap(mypd.sort_values('nadj',ascending=False),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig = sns_plot.get_figure()
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-summary-'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)


##### Myeloid cell clusters - TIL


In [None]:
sns.set_style('white')
what='leiden_velo'
tilfreq=pd.read_csv(propdir+'Macro_TIL_celltypeFreq_'+what+'.tsv',header=0)
pbmcreq=pd.read_csv(propdir+'Macro_PBMCs_celltypeFreq_'+what+'.tsv',header=0)

tilfreq.index=tilfreq.iloc[:,0]
pbmcreq.index=pbmcreq.iloc[:,0]

rs=[x.split(' ')[2] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]

In [None]:
toplot=tilfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['RCat']=rs

#log2FC=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['R',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_nadj',:]))[mycell]


In [None]:
mydef=pd.Series([1,1])
mydef.index=['MannWhitney','T-test']

fig, axes = plt.subplots(3, 4,figsize=(14,8), gridspec_kw={'wspace': 0.5, 'left': 0.25})
plt.subplots_adjust(left=0.2, right=0.98, top=0.86, bottom=0.1)

axes = axes.flatten()

pvals={}
pvalsn={}
i=0
for mycell in list(tilfreq.index):
    ax=sns.boxplot(y=mycell,x='RCat',data=toplot,ax=axes[i],
                   palette=color_dict,orient='v',order=['R','TF','NR_nadj','NR_adj'])
    ax=sns.swarmplot(y=mycell,x='RCat',data=toplot,color='black',ax=axes[i],
                    order=['R','TF','NR_nadj','NR_adj'])
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90,fontsize=10)
    ax.tick_params(axis='y', labelsize=10)
    ax.set_ylabel(ylabel='Perc '+str(mycell),fontsize=10)
    ax.set_xlabel(xlabel='Response ',fontsize=10)
    if(toplot.loc[:,['RCat',mycell]].median()>0).any():
        pvalsn[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['R-NR_nadj',:]
        pvals[mycell]=getPs(toplot.loc[:,['RCat',mycell]],['R','TF','NR_nadj','NR_adj'],'RCat').loc['TF-NR_adj',:]
        
        pvalsn[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['R',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_nadj',:]))[mycell]
        pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['TF',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_adj',:]))[mycell]

    else:
        pvals[mycell]=mydef
        pvalsn[mycell]=mydef
        pvalsn[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['R',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_nadj',:]))[mycell]
        pvals[mycell]['log2FC']=np.log2((0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['TF',:])/(0.1+toplot.loc[:,['RCat',mycell]].groupby('RCat').mean().loc['NR_adj',:]))[mycell]

    i=i+1
    
fig.savefig(figdir+'Celltypefreq-Response-TILs-macro-'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-TILs-macro-'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-TILs-macro-'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)


In [None]:
pd.DataFrame.from_dict(pvals, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Response-adj.tsv',sep='\t')
pd.DataFrame.from_dict(pvals, orient='index').sort_values(by='MannWhitney')


In [None]:



pd.DataFrame.from_dict(pvalsn, orient='index').to_csv(figdir+'Pval-CellTypeFreq-TIL-'+what+'-per-Response-nadj.tsv',sep='\t')
pd.DataFrame.from_dict(pvalsn, orient='index').sort_values(by='MannWhitney')




In [None]:
toinc=list(pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvalsn, orient='index')['MannWhitney']<=0.2].index)
toinc2=list(pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney'][pd.DataFrame.from_dict(pvals, orient='index')['MannWhitney']<=0.2].index)

toinc=list(set(toinc2).union(set(toinc)))
mypd=pd.concat([pd.DataFrame.from_dict(pvalsn, orient='index').loc[toinc,:]['log2FC'],
          pd.DataFrame.from_dict(pvals, orient='index').loc[toinc,:]['log2FC']], axis=1)
mypd.columns=['nadj','adj']

In [None]:
toremove=list(pbmcreq.mean(axis=1)[pbmcreq.mean(axis=1)<0.1].index)
toremove
mypd=mypd[~mypd.index.isin(toremove)]

Remark: Add cluster 29 in the plot

In [None]:


#mypd.sort_values('nadj',ascending=False)
plt.figure(figsize=(1.5,3))
sns.set(font_scale=0.8)
sns.heatmap(mypd.sort_values('nadj',ascending=False),center=0.00,cmap=sns.diverging_palette(220, 20, as_cmap=True))
fig.savefig(figdir+'Celltypefreq-Response-summary-TIL-macro'+what+'.pdf', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-summary-TIL-macro'+what+'.eps', format='eps', bbox_inches="tight", dpi=300)
fig.savefig(figdir+'Celltypefreq-Response-summary-TIL-macro'+what+'.svg', format='svg', bbox_inches="tight", dpi=300)

## Validation in other datasets

In [None]:
#myelobc=adata[adata.obs['celltype1'].isin(['myeloid leukocyte','lymphocyte of B lineage'])].copy()
tcell=adata[adata.obs['celltype1'].isin(['T cell'])].copy()
tcell=tcell[tcell.obs['Sample type']=='TIL'].copy()
cd8tcell=tcell[tcell.obs['celltype2_pub'].isin(['CD8-positive T cell'])].copy()

In [None]:
figdir

### Fairfax et al. Markers 

https://www.nature.com/articles/s41591-019-0734-6?proof=t

In [None]:
largeclones=['GNLY','FGFBP2','GZMH','NKG7','PRF1', 'CCL4', 'ITGB1', 'GZMB','KLRD1','PRSS23']

In [None]:
sc.pl.umap(cd8tcell,color=largeclones)

In [None]:
sc.pl.dotplot(cd8tcell,var_names=largeclones,groupby='celltype3_pub',
              dendrogram=True,save='Fairfax-markerplot.pdf')


In [None]:
cd8tcdatatil=cd8tcdata[cd8tcdata.obs['Sample type']=='TIL'].copy()

In [None]:
sc.pl.umap(cd8tcdatatil,color=largeclones)

In [None]:
sc.pl.dotplot(cd8tcdatatil,var_names=largeclones,groupby='leiden',
              dendrogram=True)

Fairfax large clone markers are most highly expressed in NK-like T cells. 

### Tirosh et al. Markers

In [None]:
tirosh={}
tirosh['Tnai']=['CCR7', 'TCF7', 'SELL', 'LEF1']
tirosh['Texh']=['PDCD1', 'TIGIT', 'LAG3', 'HAVCR2', 'CTLA4']
tirosh['Tcyt']=['NKG7', 'CCL4', 'CST7', 'PRF1', 'GZMB', 'GZMA', 'IFNG', 'CCL3']

sc.tl.score_genes(cd8tcell,tirosh['Tnai'],score_name="TIR_NAI",use_raw=True)
sc.tl.score_genes(cd8tcell,tirosh['Texh'],score_name="TIR_EXH",use_raw=True)
sc.tl.score_genes(cd8tcell,tirosh['Tcyt'],score_name="TIR_CYT",use_raw=True)

sc.tl.score_genes(cd8tcdata_full,tirosh['Tnai'],score_name="TIR_NAI",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,tirosh['Texh'],score_name="TIR_EXH",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,tirosh['Tcyt'],score_name="TIR_CYT",use_raw=True)



### Sade-Feldman et al. Markers

https://pubmed.ncbi.nlm.nih.gov/30388456/

In [None]:
sc.settings.set_figure_params()

In [None]:
sc.pl.umap(cd8tcell, color='celltype3_pub')

In [None]:
sade={}
sade['good']=['PLAC8', 'LTB', 'LY9', 'SELL', 'TCF7',  'CCR7','IL7R']
sade['bad']=['CCL3', 'CD38', 'HAVCR2', 'ENTPD1', 'WARS']

sc.tl.score_genes(cd8tcell,sade['good'],score_name="SADE_G",use_raw=True)
sc.tl.score_genes(cd8tcell,sade['bad'],score_name="SADE_B",use_raw=True)

sc.tl.score_genes(cd8tcdata,sade['good'],score_name="SADE_G",use_raw=True)
sc.tl.score_genes(cd8tcdata,sade['bad'],score_name="SADE_B",use_raw=True)

sc.tl.score_genes(cd8tcdata_full,sade['good'],score_name="SADE_G",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,sade['bad'],score_name="SADE_B",use_raw=True)

sc.tl.score_genes(tildata,sade['good'],score_name="SADE_G",use_raw=True)
sc.tl.score_genes(tildata,sade['bad'],score_name="SADE_B",use_raw=True)


sc.pl.dotplot(cd8tcell,var_names=sade['good']+sade['bad']+['CD8A','CD8B'],groupby='celltype3_pub',
              dendrogram=True,dot_max=0.5,vmax=1,save='Sade-Feldman-markerplot.pdf')
#fig.savefig(figdir+'Celltypefreq-Response-PBMCs'+what+'.pdf')

In [None]:
sc.pl.dotplot(cd8tcell,var_names=tirosh['Tnai']+tirosh['Tcyt']+tirosh['Texh']+['CD8A','CD8B'],groupby='celltype3_pub',
              dendrogram=True,dot_max=0.5,vmax=1,save='Tirosh-markerplot.pdf')
#fig.savefig(figdir+'Celltypefreq-Response-PBMCs'+what+'.pdf')

In [None]:

sc.pl.dotplot(cd8tcdata_full,var_names=sade['good']+sade['bad']+['CD8A','CD8B'],groupby='leiden',
              dendrogram=True,dot_max=0.5,vmax=1,save='Sade-Feldman-markerplot-veloplot.pdf')


In [None]:
sc.pl.dotplot(cd8tcdata_full,var_names=tirosh['Tnai']+tirosh['Tcyt']+tirosh['Texh']+['CD8A','CD8B'],groupby='leiden',
              dendrogram=True,vmax=2,dot_max=0.8,save='Tirosh-markerplot-veloplot.pdf')

In [None]:
sc.pl.umap(cd8tcell[cd8tcell.obs.sample(frac=1).index],color=['SADE_G','TIR_NAI'])

In [None]:
sc.pl.umap(cd8tcell[cd8tcell.obs.sample(frac=1).index],color=['SADE_B','TIR_EXH','TIR_CYT'])

In [None]:
sc.pl.umap(cd8tcdata_full[cd8tcdata_full.obs.sample(frac=1).index],ncols=5,color=['SADE_G','TIR_NAI', 
                                                                         'SADE_B','TIR_EXH','TIR_CYT'], 
           color_map=sns.cubehelix_palette(dark=0, light=.9, as_cmap=True),
           save='-litsignatures-cd8tcdata_full.svg')

In [None]:
sc.pl.umap(cd8tcdata_full[cd8tcdata_full.obs.sample(frac=1).index],color=['SADE_B','TIR_EXH','TIR_CYT'])

In [None]:
sc.pl.umap(cd8tcdata_full[cd8tcdata_full.obs.sample(frac=1).index],color=['leiden'],legend_loc='on data')

In [None]:
set(cd8tcell.obs['celltype3_pub'])

In [None]:
goodc=['effector memory CD8-positive T cell','naive CD8-positive T cell'] #,
badc=['exhausted-like CD8-positive T cell','proliferating CD8-positive T cell','cytokine secreting effector CD8-positive T cell']

goodcvelo=[16,10,3]
badcvelo=[4,14,12,13]

Good Sade-Feldmann markers expressed in naive and to some extent in EM samples. 
Bad Sade-Feldmann markers expressed in all exhausted-like samples, highest in proliferating ones, lowest in C8 and C9

In [None]:
what='celltype3_pub'
tilfreq=pd.read_csv(propdir+'TIL_celltypeFreq_'+what+'.tsv',header=0)
#pbmcreq=pd.read_csv(propdir+'PBMCs_celltypeFreq_'+what+'.tsv',header=0)

tilfreq.index=tilfreq.iloc[:,0]
tilfreq=tilfreq.loc[tilfreq.median(axis=1)>0.001,:]

rs=[x.split(' ')[2] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]
patid=[x.split(' ')[1] for x in list(tilfreq.drop(columns=['Unnamed: 0']).columns)]

toplot=tilfreq.drop(columns=['Unnamed: 0']).transpose()
toplot['RCat']=rs

In [None]:
#tilfreq
goodbad=pd.DataFrame([tilfreq.loc[goodc,:].sum(axis=0).drop(index=['Unnamed: 0']),
             tilfreq.loc[badc,:].sum(axis=0).drop(index=['Unnamed: 0'])]).transpose()
goodbad.columns=['good','bad']

In [None]:
goodbad['Ratio']=list(goodbad['good']/goodbad['bad'])

In [None]:
rs=[x.split(' ')[2] for x in list(goodbad.index)]
patid=[x.split(' ')[1] for x in list(goodbad.index)]

goodbad['RCat']=rs

In [None]:
goodbad

In [None]:
fig=sns.boxplot(y='Ratio',x='RCat',data=goodbad,
                   palette=color_dict,orient='v',order=['R','TF','NR_nadj','NR_adj'])
fig=sns.swarmplot(y='Ratio',x='RCat',data=goodbad,color='black',
                    order=['R','TF','NR_nadj','NR_adj'])
fig.set_xticklabels(fig.get_xticklabels(), rotation=90,fontsize=10)
fig.tick_params(axis='y', labelsize=10)
fig.set_ylabel(ylabel='Good/Bad ratio',fontsize=10)
fig.set_xlabel(xlabel='Response ',fontsize=10)

fig.figure.savefig(figdir+'Sade-Feldman-GoodtoBad.pdf')

In [None]:
pvalsn=getPs(goodbad.loc[:,['RCat','Ratio']],['R','TF','NR_nadj','NR_adj'],'RCat').loc['R-NR_nadj',:]
pvals=getPs(goodbad.loc[:,['RCat','Ratio']],['R','TF','NR_nadj','NR_adj'],'RCat').loc['TF-NR_adj',:]


In [None]:
pvalsn

In [None]:
pvals

### Li et al. markers

https://www.sciencedirect.com/science/article/pii/S009286741831568X

Cell subsets:
    - naive-like subsets IL7R, CCR7, and the transcription factor (TF) TCF7
    - memory T cell population
    - transitional CD8 effector T (GZMK+) pool
    - cytotoxic CD8 effector T (GZMH+) pool
    - dysfunctional CD8 T cells: high expression of immune checkpoint molecules such as PD1 and LAG3

In [None]:
li={}
li['NaiTcell']=['CCR7','IL7R','TCF7']
li['MemTcell']=[ 'SELL', 'C1orf21', 'KLRB1', 'ARL4C']
li['CD8Cyt']=['GZMH', 'GNLY', 'FGFBP2', 'CX3CR1','KLF2','TBX21', 'PLAC8', 'FGR','SPON2', 'MYBL1','ZNF683','KLRG1']
li['CD8Dys']=['PDCD1','LAG3','TIGIT', 'CXCL13','RBPJ', 'ZBED2', 'ETV1', 'ID3', 'MAF', 'PRDM1','EOMES', 'IFNG', 
              'HAVCR2','PTMS','FAM3C','ICOS','TNFRSF4', 'CCL4L2', 'PRDM1','SPOCK2', 'CCL3', 'TOX', 'ENTPD1','ITGAE']
li['CD8Trans']=['GZMK']
li['CD4Treg']=['FOXP3','IKZF2','IL2RA'] #ENTPD1, ITGAE, KLRG1
li['TExh']=['TNFRSF9', 'CSF1', 'TIGIT']

sc.tl.score_genes(cd8tcell,li['NaiTcell'],score_name="LI_NAI",use_raw=True)
sc.tl.score_genes(cd8tcell,li['MemTcell'],score_name="LI_MEM",use_raw=True)
sc.tl.score_genes(cd8tcell,li['CD8Cyt'],score_name="LI_CYT",use_raw=True)
sc.tl.score_genes(cd8tcell,li['CD8Dys'],score_name="LI_DYS",use_raw=True)
sc.tl.score_genes(cd8tcell,li['CD8Trans'],score_name="LI_TRANS",use_raw=True)
sc.tl.score_genes(cd8tcell,li['TExh'],score_name="LI_EXH",use_raw=True)

sc.tl.score_genes(cd8tcdata_full,li['NaiTcell'],score_name="LI_NAI",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,li['MemTcell'],score_name="LI_MEM",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,li['CD8Cyt'],score_name="LI_CYT",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,li['CD8Dys'],score_name="LI_DYS",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,li['CD8Trans'],score_name="LI_TRANS",use_raw=True)
sc.tl.score_genes(cd8tcdata_full,li['TExh'],score_name="LI_EXH",use_raw=True)



In [None]:
sc.pl.dotplot(cd8tcell,var_names=li['NaiTcell']+li['MemTcell']+li['CD8Trans']+li['CD8Cyt']+li['TExh']+li['CD8Dys']+['CD8A','CD8B'],groupby='celltype3_pub',
              dendrogram=True,dot_max=0.5,vmax=1,save='Li-markerplot.pdf')
#fig.savefig(figdir+'Celltypefreq-Response-PBMCs'+what+'.pdf')




In [None]:
sc.pl.dotplot(cd8tcdata_full,var_names=li['NaiTcell']+li['MemTcell']+li['CD8Trans']+li['CD8Cyt']+li['TExh']+li['CD8Dys']+['CD8A','CD8B'],groupby='leiden',
              dendrogram=True,dot_max=0.5,vmax=1,save='Li-markerplot-veloplot.pdf')


In [None]:
sc.pl.umap(cd8tcell[cd8tcell.obs.sample(frac=1).index],color=['LI_NAI','LI_MEM','LI_CYT','LI_DYS','LI_TRANS','LI_EXH'])

In [None]:
sc.pl.umap(cd8tcdata_full[cd8tcdata_full.obs.sample(frac=1).index],color=['LI_NAI','LI_MEM','LI_CYT','LI_DYS','LI_TRANS','LI_EXH'])

In [None]:

cmarkers=['CD4Treg','CD8Dys','CD8Trans','CD8Cyt', 'MemTcell','NaiTcell']
markersgoi=li['TExh'].copy()
for x in cmarkers:
    markersgoi=markersgoi+li[x]

In [None]:

#goi=bc+plasma+macro+cDC1+cDC2+TAMCx+TMid+TAM+tmo+NK+['CD3D','CD2','CD8A','CD8B','CD4']+
#tcnai+tregc+tcmil7+Tc4CM+Tmem+tc8em+cytox+tc8exp+exh+cc
sc.pl.dotplot(tcell,var_names=markersgoi,groupby='celltype3_pub',dot_max=0.7,vmax=1.5,
              dendrogram=True,save='-Li-MarkerPlot-tcell-v1.pdf')

In [None]:
#tcnai+tregc+tcmil7+Tc4CM+Tmem+tc8em+cytox+tc8exp+exh+cc
sc.pl.dotplot(tcell,var_names=markersgoi,groupby='celltype3_pub',dot_max=0.2,vmax=0.7,
              dendrogram=True)

In [None]:
sc.pl.heatmap(tcell[tcell.obs.sample(frac=0.25).index],
              var_names=markersgoi,standard_scale='var',
              groupby='celltype3_pub',dendrogram=True, vmax=1,
              figsize=(18, 14), use_raw=True,show_gene_labels=True,cmap='OrRd',
              save='-Li-MarkerPlot-tnk_v2.pdf')

In [None]:
sc.pl.dotplot(cd8tcdatatil,var_names=markersgoi,groupby='leiden',
              dendrogram=True)

#### Wu et al. 2020

In [None]:
wucd4=['IL6ST','CRIP1']
wueff=['CX3CR1','GNLY', 'NKG7',  'GZMH', 'KLRD1', 'GZMB', 'PRF1', 
       'IFITM2', 'LITAF','ITGB2','GZMA','GPR56','KLRC2','GZMM','RAP1B'] ## KLRC2 is higher in Rs
wuem=['GZMK','CCL4',   'DUSP2', 'CD74','DNAJB1','FOS','CCL3','IFNG'] ## DNAJ1, DUSP2, GZMK is higher in NRs
wurm=['CCL4', 'XCL1',   'XCL2',   'ZNF683']
wuil7=['NCR3','KLRB1','LYAR','IL7R']


### CX3CR1 + GZMH is NKT cell & cytotox T cell
### EM - GZMK+ 

sc.pl.dotplot(tcell,var_names=wucd4+wueff+wuem+wurm+wuil7,groupby='celltype3_pub',dot_max=0.8,vmax=2,
              dendrogram=True)


In [None]:
litgoi=largeclones+sade['good']+sade['bad']+li['NaiTcell']+li['MemTcell']+li['CD8Cyt']+li['CD8Dys']+li['CD8Trans']+li['TExh']
litgoi=list(set(litgoi+wueff+wuem+wurm+wuil7))

In [None]:
average_obs,fraction_obs=bc.get_means(tcell, 'celltype3_pub')


In [None]:
sns.set(font_scale=0.7)
a=sns.clustermap(fraction_obs.loc[:,litgoi+['MKI67','STMN1']],col_cluster=True, row_cluster=True,figsize=(16,4),
                 cmap='viridis',metric='correlation')#,vmax=1
#a.savefig(figdir+'Heatmap-litgoi-'+velosubset+'.celltype3_pub.tdata_tumorOnly.pdf')


In [None]:
#mycol=['MITFP', 'MHC1','REGIO','CITRESUP','CITRESDN']
dfPat1=pd.DataFrame(cd8tcell.obs.groupby(['PatientID']).mean())
dfPat1['RCat']=[cd8tcell[cd8tcell.obs['PatientID']==x].obs['RCat'][0] for x in list(dfPat1.index)]
dfPat1.index=dfPat1.index.astype('str')
dfPat1


In [None]:
dfPat1=pd.concat([dfPat1,tmb],axis=1)
dfPat1=dfPat1.loc[:,~dfPat1.columns.duplicated()]
dfPat1

In [None]:
#sigchoice=['score_lymphocyte_scanpy','score_myeloid_scanpy','score_Bcell_scanpy','score_Tcells_scanpy','score_CD4_scanpy',
# 'score_CD8_scanpy']
sigchoice=list(dfPat1.columns[3:100])+['SADE_B']

In [None]:
cors=pd.Series([dfPat1.loc[:,[x,'SADE_G']].corr(method='pearson')['SADE_G'][0] for x in sigchoice])
cors.index=sigchoice

In [None]:
cors.sort_values()

In [None]:
color_dict = {'R': 'coral', 'TF': 'firebrick', 'NR_nadj': 'lightskyblue','NR_adj': 'royalblue'}

dfPat1.loc[:,['score_Bcell_scanpy','SADE_G']].corr(method='pearson')




In [None]:
dfPat1.loc[:,['score_Myeloid_scanpy','SADE_G']].corr(method='pearson')


In [None]:
dfPat1.loc[:,['score_cDC1_scanpy','SADE_G']].corr(method='pearson')


In [None]:
dfPat1.loc[:,['score_cDC2_scanpy','SADE_G']].corr(method='pearson')


In [None]:
dfPat1.loc[:,['score_cDC_CCR7_scanpy','SADE_G']].corr(method='pearson')


In [None]:
sns.scatterplot(x='SADE_B',y='score_cDC_CCR7_scanpy',hue='RCat',data=dfPat1,
                hue_order=['R','TF','NR_adj','NR_nadj'],
               palette=color_dict).set_title('pp')
plt.legend(bbox_to_anchor=(1.05, 1.05),borderaxespad=0.5)
plt.tight_layout()
for i in range(len(dfPat1.index)):
 plt.text(x=dfPat1.reset_index().SADE_B[i],y=dfPat1.reset_index().score_cDC_CCR7_scanpy[i],s=dfPat1.index[i], 
          fontdict=dict(color='black',size=10)) #bbox=dict(facecolor='gray',alpha=0.2)


In [None]:
rcParams['figure.figsize'] = 7,5
sns.scatterplot(x='SADE_G',y='score_cDC_CCR7_scanpy',hue='RCat',data=dfPat1,
                hue_order=['R','TF','NR_adj','NR_nadj'],
               palette=color_dict).set_title('pp')
plt.legend(bbox_to_anchor=(1.05, 1.05),borderaxespad=0.5)
plt.tight_layout()
for i in range(len(dfPat1.index)):
 plt.text(x=dfPat1.reset_index().SADE_G[i],y=dfPat1.reset_index().score_cDC_CCR7_scanpy[i],s=dfPat1.index[i], 
          fontdict=dict(color='black',size=10)) #bbox=dict(facecolor='gray',alpha=0.2)


In [None]:
sns.scatterplot(x='SADE_G',y='score_cDC_CCR7_scanpy',hue='Lesion',data=dfPat1).set_title('pp')
plt.legend(bbox_to_anchor=(1.05, 1.05),borderaxespad=0.5)
plt.tight_layout()
for i in range(len(dfPat1.index)):
 plt.text(x=dfPat1.reset_index().SADE_G[i],y=dfPat1.reset_index().score_cDC_CCR7_scanpy[i],s=dfPat1.index[i], 
          fontdict=dict(color='black',size=10)) #bbox=dict(facecolor='gray',alpha=0.2)


In [None]:
rcParams['figure.figsize'] = 7,5
sns.scatterplot(x='SADE_G',y='score_Bcell_scanpy',hue='RCat',data=dfPat1,
                hue_order=['R','TF','NR_adj','NR_nadj'],
               palette=color_dict).set_title('pp')
plt.legend(bbox_to_anchor=(1.05, 1.05),borderaxespad=0.5)
plt.tight_layout()
for i in range(len(dfPat1.index)):
 plt.text(x=dfPat1.reset_index().SADE_G[i],y=dfPat1.reset_index().score_Bcell_scanpy[i],s=dfPat1.index[i], 
          fontdict=dict(color='black',size=10)) #bbox=dict(facecolor='gray',alpha=0.2)


In [None]:
rcParams['figure.figsize'] = 5,5
sns.scatterplot(x='SADE_G',y='SADE_B',hue='RCat',data=dfPat1,
                hue_order=['R','TF','NR_adj','NR_nadj'],
               palette=color_dict).set_title('pp')
plt.legend(bbox_to_anchor=(1.05, 1.05),borderaxespad=0.5)
plt.tight_layout()
for i in range(len(dfPat1.index)):
 plt.text(x=dfPat1.reset_index().SADE_G[i],y=dfPat1.reset_index().SADE_B[i],s=dfPat1.index[i], 
          fontdict=dict(color='black',size=10)) #bbox=dict(facecolor='gray',alpha=0.2)


In [None]:
sns.scatterplot(x='SADE_G',y='SADE_B',hue='Lesion',data=dfPat1).set_title('pp')
plt.legend(bbox_to_anchor=(1.05, 1.05),borderaxespad=0.5)
plt.tight_layout()
for i in range(len(dfPat1.index)):
 plt.text(x=dfPat1.reset_index().SADE_G[i],y=dfPat1.reset_index().SADE_B[i],s=dfPat1.index[i], 
          fontdict=dict(color='black',size=10)) #bbox=dict(facecolor='gray',alpha=0.2)


In [None]:
fig=sns.boxplot(x='RCat',y='SADE_G',data=dfPat1,palette=color_dict, order=['R','TF','NR_nadj','NR_adj'])
fig=sns.swarmplot(x='RCat',y='SADE_G',data=dfPat1,color='black', order=['R','TF','NR_nadj','NR_adj'])
#fig.figure.savefig(figdir+'/TMB-per-response.pdf') 

In [None]:
fig=sns.boxplot(x='RCat',y='SADE_B',data=dfPat1,palette=color_dict, order=['R','TF','NR_nadj','NR_adj'])
fig=sns.swarmplot(x='RCat',y='SADE_B',data=dfPat1,color='black', order=['R','TF','NR_nadj','NR_adj'])
#fig.figure.savefig(figdir+'/TMB-per-response.pdf') 

In [None]:
sc.pl.dotplot(cd8tcdatatil,var_names=sade['good']+sade['bad']+['CD8A','CD8B'],groupby='leiden',
              dendrogram=True)

#### Melanoma PBMC TEMRA (Iryna)

In [None]:
CD4temra=['PTMS','ITGB2-AS1','XCL2','XCL1',
          'IGFBP4','ZNF683','CLU','HLA-DRA','HLA-DQA1','CDCA7',
          'HLA-DRB5','LRRC2','ETV7','LAG3','FABP5','LAT2','GRAMD1C','CD4','CD8A']

In [None]:
set(tcell.obs['Sample type'])

In [None]:
tcell=adata[adata.obs['celltype1'].isin(['T cell'])].copy()
pbmctcell=tcell[tcell.obs['Sample type']=='PBMC'].copy()
tiltcell=tcell[tcell.obs['Sample type']=='TIL'].copy()

sc.tl.dendrogram(pbmctcell, groupby='celltype3_pub')
sc.tl.dendrogram(tiltcell, groupby='celltype3_pub')

In [None]:
sc.pl.dotplot(pbmctcell,var_names=CD4temra,groupby='celltype3_pub',dot_max=0.1,vmax=0.5,
              dendrogram=True)


In [None]:
sc.pl.dotplot(tiltcell,var_names=CD4temra,groupby='celltype3_pub',dot_max=0.1,vmax=0.5,
              dendrogram=True)


### Differential expression per cell type

In [None]:
#list(set(tildata.obs[what]))

In [None]:
#sc.pl.umap(tildata,color=split_condition)

### Get DE genes for various level annotations at single-cell level, corrected

In [None]:
#what='celltype1'
#what='celltype2_pub'
#what='celltype3_pub'
split_condition='PatientID'
condition='RCat'
correct_nrs=True

what='leiden_velo'

In [None]:
results_folder_pseudobulk = os.path.join(root_path, 'analyzed', analysis_name+'/DE/')

In [None]:
todrop=['CELL', 'input.path', 
        'leiden', 'celltype0', 'celltype1', 'celltype2', 
        'celltype3','celltypevar','celltypevar2','celltypevar3', 'dblabel']+list(adata.obs.columns[37:203])


In [None]:
exportPseudobulk=True
if (exportPseudobulk==True):
    for labtoexport in ['celltype1','celltype2_pub','celltype3_pub']:
    #for labtoexport in ['leiden_velo']:    
        pseudobulk=bc.export.pseudobulk(tildata, outpath = results_folder_pseudobulk+'Pseudobulk/',
             column = labtoexport,label  = labtoexport,split_condition  = split_condition, todrop=todrop)
        bc.tl.count_occurrence_subset(tildata,split_condition,count_variable=labtoexport,
                              return_percentage=False).to_csv(results_folder_pseudobulk+'Pseudobulk/CellCounts-TIL-'+labtoexport+'.tsv',sep='\t')

In [None]:
exportPseudobulk=True
if (exportPseudobulk==True):
    #for labtoexport in ['celltype1','celltype2_pub','celltype3_pub']:
    for labtoexport in ['leiden_velo']:    
        pseudobulk=bc.export.pseudobulk(allcd8, outpath = results_folder_pseudobulk+'Pseudobulk/',
             column = labtoexport,label  = labtoexport,split_condition  = split_condition, todrop=todrop)
        bc.tl.count_occurrence_subset(allcd8,split_condition,count_variable=labtoexport,
                              return_percentage=False).to_csv(results_folder_pseudobulk+'Pseudobulk/CellCounts-TIL-'+labtoexport+'.tsv',sep='\t')

In [None]:
allcd8c31=allcd8[allcd8.obs['leiden_velo'].isin(['1','3'])].copy()
allcd8c9=allcd8[allcd8.obs['leiden_velo'].isin(['9'])].copy()
allcd8c8=allcd8[allcd8.obs['leiden_velo'].isin(['8'])].copy()
allcd8c89=allcd8[allcd8.obs['leiden_velo'].isin(['9','8'])].copy()
allcd8c319=allcd8[allcd8.obs['leiden_velo'].isin(['9','1','3'])].copy()
allcd8c56110=allcd8[allcd8.obs['leiden_velo'].isin(['5','6','11','0'])].copy()
allcd8c110=allcd8[allcd8.obs['leiden_velo'].isin(['0','11'])].copy()
allcd8c56=allcd8[allcd8.obs['leiden_velo'].isin(['5','6'])].copy()

In [None]:
sc.pl.umap(dcdata,color='celltype3_pub')

In [None]:
#allmacrotu.obs

In [None]:
#cdata=allcd8c31.copy()
#subsetleg='AllCD8-leiden-velo-c3andc1'

#cdata=allcd8c9.copy()
#subsetleg='AllCD8-leiden-velo-c9'

#cdata=allcd8c8.copy()
#subsetleg='AllCD8-leiden-velo-c8'

#cdata=allcd8c89.copy()
#subsetleg='AllCD8-leiden-velo-c8andc9'

#cdata=allcd8c319.copy()
#subsetleg='AllCD8-leiden-velo-c1andc3andc9'


#cdata=allcd8c56110.copy()
#subsetleg='AllCD8-leiden-velo-c0andc6andc5andc11'

#cdata=allcd8c110.copy()
#subsetleg='AllCD8-leiden-velo-c0andc11'

#cdata=allcd8c56.copy()
#subsetleg='AllCD8-leiden-velo-c5andc6'

cdata=allmacrotu[allmacrotu.obs['celltype2_pub']=='macrophage'].copy()
subsetleg='MonoMacro-leiden-velo-TILmacro'

cdata=allmacrotu[allmacrotu.obs['celltype2_pub']=='classical monocyte'].copy()
subsetleg='MonoMacro-leiden-velo-TILcomonocyte'

#cdata=tildata[tildata.obs['celltype3_pub'].isin(['cytokine secreting effector CD8-positive T cell',
#                                                 'proliferating NK cell''effector memory CD8-positive T cell',
#                                                 'mature NK T cell','naive CD8-positive T cell',
#                                                'proliferating CD8-positive T cell',
#                                                 'exhausted-like CD8-positive T cell',
#                                                'cytotoxic CD56-dim natural killer cell',
#                                                'CD56-bright cytokine secreting natural killer cell',
#                                                'proliferating NK cell'])].copy()
#subsetleg='CD8_cytseceffCD8'
#subsetleg='CD8TandNK'
#'cytokine secreting effector CD8-positive T cell',
# 'effector memory CD8-positive T cell',
# 'exhausted-like CD8-positive T cell',
# 'mature NK T cell',
# 'naive CD8-positive T cell',
# 'proliferating CD4-positive T cell',
# 'proliferating CD8-positive T cell'

In [None]:
set(tildata.obs['celltype3_pub'])

In [None]:
cdata=tildata[tildata.obs['celltype3_pub'].isin(['CD4-positive, cytotoxic T cell',
                                                'effector memory CD4-positive T cell',
                                                'exhausted-like CD4-positive T cell',
                                                'proliferating CD4-positive T cell'])].copy()
subsetleg='CD4T-effector'

#cdata=tildata[tildata.obs['celltype3_pub'].isin(['naive CD4-positive T cell',
#    'central memory CD4-positive T cell',
#    'CD4-positive, cytotoxic T cell','effector memory CD4-positive T cell',
#                                                'exhausted-like CD4-positive T cell',
#                                                'proliferating CD4-positive T cell'])].copy()
#subsetleg='CD4T-AllnoTregMAIT'

cdata=tildata[tildata.obs['celltype3_pub'].isin(['naive CD4-positive T cell',
    'central memory CD4-positive T cell'])].copy()

subsetleg='CD4T-naiveCM'
#cdata=tildata[tildata.obs['celltype3_pub'].isin(['regulatory T cell'])].copy()

#subsetleg='CD4T-regT'

In [None]:
cdata=tildata[tildata.obs['celltype3_pub'].isin(['naive CD4-positive T cell',
    'central memory CD4-positive T cell','IL7R-max CD4-positive T cell','regulatory T cell',
    'CD4-positive, cytotoxic T cell','effector memory CD4-positive T cell',
                                                'exhausted-like CD4-positive T cell',
                                                'proliferating CD4-positive T cell'])].copy()
subsetleg='CD4T-all'


In [None]:
#cdata=tildata[tildata.obs['celltype3_pub'].isin(['cytokine secreting effector CD8-positive T cell',
#                                                 'proliferating NK cell','effector memory CD8-positive T cell',
#                                                 'mature NK T cell','naive CD8-positive T cell',
#                                                'proliferating CD8-positive T cell',
#                                                 'exhausted-like CD8-positive T cell',
#                                                'CD56-bright cytokine secreting natural killer cell',
#                                                'cytotoxic CD56-dim natural killer cell'])].copy()
#subsetleg='CD8TandNK'

#cdata=tildata[tildata.obs['celltype3_pub'].isin(['cytokine secreting effector CD8-positive T cell',
#                                                 'effector memory CD8-positive T cell',
#                                                 'mature NK T cell','naive CD8-positive T cell',
#                                                'proliferating CD8-positive T cell',
#                                                'exhausted-like CD8-positive T cell'])].copy()

#subsetleg='CD8T-all-noNK'
#cdata=tildata[tildata.obs['celltype3_pub'].isin(['proliferating NK cell',
#                                                 'CD56-bright cytokine secreting natural killer cell',
#                                                'cytotoxic CD56-dim natural killer cell'])].copy()

#subsetleg='NK-all'

In [None]:
#set(adata.obs['celltype3_pub'])

In [None]:
#dcdatatu=dcdata[dcdata.obs['Sample type']=='TIL'].copy()
#cdata=dcdatatu[dcdatatu.obs['celltype3_pub'].isin(['CCR7-positive myeloid dendritic cell','CD141-positive myeloid dendritic cell'])].copy()
#cdata=dcdatatu[dcdatatu.obs['celltype3_pub'].isin(['CD1c-positive myeloid dendritic cell'])].copy()

#subsetleg='cDC-leiden-velo-TILcDC1aDC'
#subsetleg='cDC-leiden-velo-TILcDC2'

In [None]:


#=bc.tl.dge.get_de(subdataa,'RCat',demethod='wilcoxon',topnr=5000, logfc=np.log(1.5),padj=0.1)
### try on individual tissues separately
b1DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['LN'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b2DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Brain'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b3DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Lung','Sinon','Subc'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)

allDE1=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['R','NR'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE2=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['TF','PD'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE=bc.tl.dge.get_de(cdata,'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)



In [None]:
testlist={}
testlist['LN-both']=b1DE
testlist['Brain-both']=b2DE
testlist['Other-both']=b3DE
testlist['Nadj']=allDE1
testlist['Adj']=allDE2
testlist['All']=allDE

### Intersection of all combinations
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).intersection(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).intersection(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).intersection(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).intersection(set(allDE2['R']['Name'])))


oiNRstr=strDE
strDE

In [None]:

oiRstr=strDE2
strDE2

In [None]:



### Complete comparison + LN only + Brain | Others + RvsNR | PDvsTF
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))

### Complete comparison + LN only | Brain | Others + RvsNR | PDvsTF
strDE=set(allDE['PD']['Name']).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name'])).union(set(b1DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(allDE['R']['Name']).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name'])).union(set(b1DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))

oiNRlen=strDE
oiRlen=strDE2

strDE


In [None]:
strDE2

In [None]:
sc.settings.set_figure_params()

In [None]:
figdir

In [None]:
sc.pl.matrixplot(cdata,var_names=list(strDE2)[0:60], groupby='RCat', standard_scale='var',save='DEanalysis_wilcoxon.'+subsetleg+'-TILonly.Rhigh.svg')


In [None]:
sc.pl.matrixplot(cdata,var_names=list(strDE)[0:60], groupby='RCat', standard_scale='var',save='DEanalysis_wilcoxon.'+subsetleg+'-TILonly.NRhigh.svg')


In [None]:
sc.pl.dotplot(cdata,var_names=list(strDE)[0:60], groupby='RCat')


In [None]:
rhigh={}
for i in list(strDE2):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['R']['Name'])))>0: comp[k]=1
        k=k+1
    rhigh[i]=comp
rhigh=pd.DataFrame(rhigh)
rhigh.index=testlist.keys()



In [None]:



nrhigh={}
for i in list(strDE):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['PD']['Name'])))>0: comp[k]=1
        k=k+1
    nrhigh[i]=comp
nrhigh=pd.DataFrame(nrhigh)
nrhigh.index=testlist.keys()


In [None]:

nrhighall=nrhigh.transpose().copy()
nrhighall['Score']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'Score'])
nrhighall['Log2FC']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'Log2FC'])
nrhighall['P.adj']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'P.adj'])
nrhighall.to_csv(figdir+'DEanalysis_wilcoxon.'+subsetleg+'-TILonly.NRhigh.tsv',sep='\t')


In [None]:
#nrhighall

In [None]:
rhighall=rhigh.transpose().copy()
rhighall['Score']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'Score'])
rhighall['Log2FC']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'Log2FC'])
rhighall['P.adj']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'P.adj'])
rhighall.to_csv(figdir+'DEanalysis_wilcoxon.'+subsetleg+'-TILonly.Rhigh.tsv',sep='\t')


In [None]:
#rhighall

In [None]:
# Macrophage and Monocytes + CD4 T cells
#rhighstr=rhighall.loc[((rhighall['LN-both']==1)&(rhighall['Nadj']==1)&(rhighall['Adj']==1)),:]
#T cells: 
rhighstr=rhighall.loc[((rhighall['LN-both']==1)&(rhighall['Nadj']==1)),:]
rhighstr

In [None]:
sc.pl.matrixplot(cdata,var_names=list(rhighstr.index), groupby='RCat', standard_scale='var',save='DEanalysis_wilcoxon.'+subsetleg+'-TILonly.Rhigh-stringent.svg')


In [None]:
nrhighstr=nrhighall.loc[((nrhighall['LN-both']==1)&(nrhighall['Nadj']==1)&(nrhighall['Adj']==1)),:]
# CD4 T cells
#nrhighstr=nrhighall.loc[((nrhighall['LN-both']==1)&(nrhighall['Nadj']==1)&(nrhighall['Adj']==1)&(nrhighall['Brain-both']==1)),:]

# macrophage and Monocytes: 
#nrhighstr=nrhighall.loc[((nrhighall['LN-both']==1)&(nrhighall['Other-both']==1)&(nrhighall['Nadj']==1)&(nrhighall['Adj']==1)&(nrhighall['Brain-both']==1)),:]
nrhighstr

In [None]:
sc.pl.matrixplot(cdata,var_names=list(nrhighstr.index), groupby='RCat', standard_scale='var',save='DEanalysis_wilcoxon.'+subsetleg+'-TILonly.NRhigh-stringent.svg')


In [None]:
    import gseapy
    #gseapy.get_library_name()
    
    #dbs=['KEGG_2021_Human','GO_Biological_Process_2021','Reactome_2016',
    # 'Human_Gene_Atlas','WikiPathways_2019_Human','ChEA_2016',
    # 'CellMarker_Augmented_2021','Azimuth_Cell_Types_2021','COVID-19_Related_Gene_Sets_2021',
    # 'MSigDB_Hallmark_2020']
    dbs=['KEGG_2021_Human','GO_Biological_Process_2021','Reactome_2016',
     'MSigDB_Hallmark_2020']

In [None]:
gseapy.enrichr

In [None]:
#description='DEanalysis_wilcoxon.'+subsetleg+'-TILonly.Rhigh.tsv', 
#description='DEanalysis_wilcoxon.'+subsetleg+'-TILonly.NRhigh.tsv', 

In [None]:
j

In [None]:
    cdata.raw.var['MeanExpr']=cdata.raw.X.mean(axis=0).tolist()[0]
    for j in dbs:
        if (len(list(rhigh.columns))>10):
            gseapy.enrichr(gene_list=list(list(rhigh.sum().sort_values(ascending=False)[0:200].index)),                 
                   gene_sets=j, background=list(cdata.raw.var.loc[cdata.raw.var['MeanExpr']>0.01]['SYMBOL']),cutoff=0.1, format='png',outdir=figdir+'enrichr/'+subsetleg+'-TILonly.Rhigh/')
        if (len(list(nrhigh.columns))>10):
            gseapy.enrichr(gene_list=list(list(nrhigh.sum().sort_values(ascending=False)[0:200].index)), 
                   gene_sets=j, background=list(cdata.raw.var.loc[cdata.raw.var['MeanExpr']>0.01]['SYMBOL']),cutoff=0.1, format='png',outdir=figdir+'enrichr/'+subsetleg+'-TILonly.NRhigh/')

In [None]:
sns.set(font_scale=0.8)

In [None]:
dbs

In [None]:
    mydb='MSigDB_Hallmark_2020'
    #for mydb in dbs:
    rhighhallmark=pd.read_csv(figdir+'enrichr/'+subsetleg+'-TILonly.Rhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')
    nrhighhallmark=pd.read_csv(figdir+'enrichr/'+subsetleg+'-TILonly.NRhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')
    nrhighhallmark=nrhighhallmark.loc[:,['Term','Combined Score']]
    rhighhallmark=rhighhallmark.loc[:,['Term','Combined Score']]
    nrhighhallmark['NR']=nrhighhallmark['Combined Score']
    rhighhallmark['R']=rhighhallmark['Combined Score']
    nrhighhallmark.index=list(nrhighhallmark['Term'])
    rhighhallmark.index=list(rhighhallmark['Term'])
    toplot=pd.concat([nrhighhallmark,rhighhallmark], axis=1).loc[:,['R','NR']].fillna(0)
    toplot=toplot.loc[toplot.max(axis=1)>20,:]
    #toplot[toplot>500]=500
    fig=sns.clustermap(np.log1p(toplot).transpose(), figsize=(8,1.5))
    #fig.savefig(figdir+'enrichr/'+subsetleg+'-TILonly-'+mydb+'.human.enrichr.pdf')
    #fig.savefig(figdir+'enrichr/'+subsetleg+'-TILonly-'+mydb+'.human.enrichr.eps', format='eps')
    fig.savefig(figdir+'enrichr/'+subsetleg+'-TILonly-'+mydb+'.human.enrichr.svg', format='svg')

In [None]:
subsetleg

##### Specifically create plots for Macrophage/DC subsets

In [None]:
for mydb in dbs:
    monor=pd.read_csv(figdir+'enrichr/MonoMacro-leiden-velo-TILcomonocyte-TILonly.Rhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')
    mononr=pd.read_csv(figdir+'enrichr/MonoMacro-leiden-velo-TILcomonocyte-TILonly.NRhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')

    macror=pd.read_csv(figdir+'enrichr/MonoMacro-leiden-velo-TILmacro-TILonly.Rhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')
    macronr=pd.read_csv(figdir+'enrichr/MonoMacro-leiden-velo-TILmacro-TILonly.NRhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')

    cdcr=pd.read_csv(figdir+'enrichr/cDC-leiden-velo-TILcDC-TILonly.Rhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')
    cdcnr=pd.read_csv(figdir+'enrichr/cDC-leiden-velo-TILcDC-TILonly.NRhigh/'+mydb+'.human.enrichr.reports.txt', sep='\t')

    monor=monor.loc[:,['Term','Combined Score']]
    mononr=mononr.loc[:,['Term','Combined Score']]
    mononr['monoNR']=mononr['Combined Score']
    monor['monoR']=monor['Combined Score']
    mononr.index=list(mononr['Term'])
    monor.index=list(monor['Term'])

    macror=macror.loc[:,['Term','Combined Score']]
    macronr=macronr.loc[:,['Term','Combined Score']]
    macronr['macroNR']=macronr['Combined Score']
    macror['macroR']=macror['Combined Score']
    macronr.index=list(macronr['Term'])
    macror.index=list(macror['Term'])
    
    
    cdcr=cdcr.loc[:,['Term','Combined Score']]
    cdcnr=cdcnr.loc[:,['Term','Combined Score']]
    cdcnr['cdcNR']=cdcnr['Combined Score']
    cdcr['cdcR']=cdcr['Combined Score']
    cdcnr.index=list(cdcnr['Term'])
    cdcr.index=list(cdcr['Term'])


    #toplot=pd.concat([macronr,macror, mononr, monor, cdcr, cdcnr], axis=1).loc[:,['monoNR','monoR','macroNR','macroR', 'cdcR','cdcNR']].fillna(0)
    #toplot=toplot.loc[toplot.max(axis=1)>20,:]
        #toplot[toplot>500]=500
    #sns.clustermap(np.log1p(toplot).transpose(), figsize=(8,1.5))
    #

    toplot=pd.concat([macronr,macror, mononr, monor], axis=1).loc[:,['monoNR','monoR','macroNR','macroR']].fillna(0)
    toplot=toplot.loc[toplot.max(axis=1)>20,:]

    toplot=toplot.loc[(((toplot['monoNR']-toplot['monoR'])>10) &((toplot['macroNR']-toplot['macroR'])>10))|(((toplot['monoNR']-toplot['monoR'])<-10) &((toplot['macroNR']-toplot['macroR'])<-10)),:]
 
    #toplot=toplot.loc[(((toplot['monoNR']-toplot['monoR'])>10) &((toplot['macroNR']-toplot['macroR'])>10)&((toplot['cdcNR']-toplot['cdcR'])>10))|(((toplot['monoNR']-toplot['monoR'])<-10) &((toplot['macroNR']-toplot['macroR'])<-10)&((toplot['cdcNR']-toplot['cdcR'])<-10)),:]
    fig=sns.clustermap(np.log1p(toplot).transpose(), figsize=(10,2))
    #fig.figure.savefig(figdir+'enrichr/MonoMacrocDC-leiden-velo-TIL-TILonly-'+mydb+'.human.enrichr.pdf')
    fig.savefig(figdir+'enrichr/MonoMacro-leiden-velo-TIL-TILonly-'+mydb+'.human.enrichr.pdf')
    fig.savefig(figdir+'enrichr/MonoMacro-leiden-velo-TIL-TILonly-'+mydb+'.human.enrichr.eps', format='eps')
    fig.savefig(figdir+'enrichr/MonoMacro-leiden-velo-TIL-TILonly-'+mydb+'.human.enrichr.svg', format='svg')


In [None]:
degenes1=pd.read_csv(figdir+'DEanalysis_wilcoxon.AllCD8-leiden-velo-c0andc6andc5andc11-TILonly.Rhigh.tsv',sep='\t')
degenes2=pd.read_csv(figdir+'DEanalysis_wilcoxon.AllCD8-leiden-velo-c5andc6-TILonly.Rhigh.tsv',sep='\t')
degenes3=pd.read_csv(figdir+'DEanalysis_wilcoxon.AllCD8-leiden-velo-c0andc11-TILonly.Rhigh.tsv',sep='\t')

nrdegenes1=pd.read_csv(figdir+'DEanalysis_wilcoxon.AllCD8-leiden-velo-c0andc6andc5andc11-TILonly.NRhigh.tsv',sep='\t')
nrdegenes2=pd.read_csv(figdir+'DEanalysis_wilcoxon.AllCD8-leiden-velo-c5andc6-TILonly.NRhigh.tsv',sep='\t')
nrdegenes3=pd.read_csv(figdir+'DEanalysis_wilcoxon.AllCD8-leiden-velo-c0andc11-TILonly.NRhigh.tsv',sep='\t')


In [None]:
denr=set(nrdegenes1['Unnamed: 0']).union(set(nrdegenes2['Unnamed: 0'])).union(set(nrdegenes3['Unnamed: 0']))
denrstr=set(nrdegenes1['Unnamed: 0']).intersection(set(nrdegenes2['Unnamed: 0'])).intersection(set(nrdegenes3['Unnamed: 0']))
der=set(degenes1['Unnamed: 0']).union(set(degenes2['Unnamed: 0'])).union(set(degenes3['Unnamed: 0']))

In [None]:
inpath='/Fullanalysis/analyzed/sw_besca2_immune/velocity/publication/'

#lingenesr=pd.read_csv(inpath+'Lingenes_CD8Tcell-All_PBMCandTIL_rbdata_leiden_8to0to6.tsv',sep='\t')
#lingenesnr=pd.read_csv(inpath+'Lingenes_CD8Tcell-All_PBMCandTIL_rbdata_leiden_8to11and5.tsv',sep='\t')

lingenesr=pd.read_csv(inpath+'Lingenes_CD8Tcell-All_PBMCandTIL_rbdata_leiden_9to0.tsv',sep='\t')
lingenesnr=pd.read_csv(inpath+'Lingenes_CD8Tcell-All_PBMCandTIL_nrbdata_leiden_9to11.tsv',sep='\t')
lingenesr=lingenesr.loc[lingenesr['to 5']>0.1]
lingenesnr=lingenesnr.loc[lingenesnr['to 5']>0.1]




In [None]:
tmp=allcd8[allcd8.obs['leiden_velo'].isin(['5','6'])].copy()
tmp=tmp[tmp.obs['RCat'].isin(['R','TF'])].copy()

In [None]:
tmp=allcd8#[allcd8.obs['leiden_velo'].isin(['9'])].copy()
tmpr=tmp[tmp.obs['RCat'].isin(['R','TF'])].copy()
tmpnr=tmp[tmp.obs['RCat'].isin(['NR_adj','NR_nadj'])].copy()

In [None]:
c9R=bc.tl.dge.get_de(tmpr,'leiden_velo',topnr=5000, logfc=np.log(2),padj=0.05)
c9NR=bc.tl.dge.get_de(tmpnr,'leiden_velo',topnr=5000, logfc=np.log(2),padj=0.05)

In [None]:
pd.Series(list(set(c9R['9']['Name']).intersection(set(c9NR['9']['Name'])))).to_csv(inpath+'candidates/Candidates_CD8Tcell-All_c9specgenes_NRandR.tsv',sep='\t')

In [None]:
pd.Series(list(set(c9R['8']['Name']).intersection(set(c9NR['8']['Name'])))).to_csv(inpath+'candidates/Candidates_CD8Tcell-All_c8specgenes_NRandR.tsv',sep='\t')

In [None]:
pd.Series(list(set(c9R['9']['Name'])-(set(c9NR['9']['Name'])))).to_csv(inpath+'candidates/Candidates_CD8Tcell-All_c9specgenes_RnotNR.tsv',sep='\t')

In [None]:
pd.Series(list(set(c9R['8']['Name'])-(set(c9NR['8']['Name'])))).to_csv(inpath+'candidates/Candidates_CD8Tcell-All_c8specgenes_RnotNR.tsv',sep='\t')

In [None]:
pd.Series(list(set(c9NR['9']['Name'])-(set(c9R['9']['Name'])))).to_csv(inpath+'candidates/Candidates_CD8Tcell-All_c9specgenes_NRnotR.tsv',sep='\t')

In [None]:
pd.Series(list(set(c9NR['8']['Name'])-(set(c9R['8']['Name'])))).to_csv(inpath+'candidates/Candidates_CD8Tcell-All_c8specgenes_NRnotR.tsv',sep='\t')

In [None]:
tmp=allcd8[allcd8.obs['leiden_velo'].isin(['9'])].copy()

In [None]:
import scipy.stats as ss
import operator

allmarkers=tmp.var_names

spearcorsAll = {}
for i in allmarkers:
    spearcorsAll[i]=ss.spearmanr(tmp.raw[:, i].X.todense(),tmp.raw[:,'KLRB1'].X.todense())[0]


spearcorsAll=sorted(spearcorsAll.items(),key=operator.itemgetter(1))
spearcorsAll_ids = [idx for idx, val in spearcorsAll]
goiNegAll=spearcorsAll_ids[0:30]
goiPosAll=spearcorsAll_ids[(len(spearcorsAll_ids)-30):len(spearcorsAll_ids)]

In [None]:
pd.Series(spearcorsAll_ids[(len(spearcorsAll_ids)-30):len(spearcorsAll_ids)]).to_csv(inpath+'candidates/Candidates_CD8Tcell-All_genesCorKLRB1inC9.tsv',sep='\t')

In [None]:
import scipy.stats as ss
import operator

allmarkers=allcd8.var_names

spearcorsAll = {}
for i in allmarkers:
    spearcorsAll[i]=ss.spearmanr(allcd8.raw[:, i].X.todense(),allcd8.raw[:,'KLRB1'].X.todense())[0]


spearcorsAll=sorted(spearcorsAll.items(),key=operator.itemgetter(1))
spearcorsAll_ids = [idx for idx, val in spearcorsAll]
goiNegAll=spearcorsAll_ids[0:30]
goiPosAll=spearcorsAll_ids[(len(spearcorsAll_ids)-30):len(spearcorsAll_ids)]

In [None]:
pd.Series(spearcorsAll_ids[(len(spearcorsAll_ids)-30):len(spearcorsAll_ids)]).to_csv(inpath+'candidates/Candidates_CD8Tcell-All_genesCorKLRB1inCD8T.tsv',sep='\t')

In [None]:
(spearcorsAll_ids[(len(spearcorsAll_ids)-30):len(spearcorsAll_ids)])

In [None]:
#degenes1=pd.read_csv(figdir+'DEanalysis_wilcoxon.AllCD8-leiden-velo-c9-TILonly.Rhigh.tsv',sep='\t')
degenesnr1=pd.read_csv(figdir+'DEanalysis_wilcoxon.AllCD8-leiden-velo-c9-TILonly.NRhigh.tsv',sep='\t')

In [None]:
goiPosAll=spearcorsAll_ids[(len(spearcorsAll_ids)-30):len(spearcorsAll_ids)]
sc.pl.dotplot(allcd8, var_names=goiPosAll,groupby='leiden_velo')

In [None]:
c5vs6=bc.tl.dge.get_de(tmp,'leiden_velo',topnr=5000, logfc=np.log(2),padj=0.05)

In [None]:
#mysubcand='c8toc5vsc6'
mysubcand='c9toc11vsc0'

In [None]:
candRout=lingenesr.loc[lingenesr['Unnamed: 0'].isin(list(set(lingenesr['Unnamed: 0']).intersection(der))),:]
candRout.columns=['Gene','to 5']
candRout.to_csv(inpath+'candidates/Candidates_CD8Tcell-All_'+mysubcand+'.Rhigh_9to11.tsv',sep='\t')
candRout



In [None]:
candc6out=lingenesr.loc[lingenesr['Unnamed: 0'].isin(list(set(lingenesr['Unnamed: 0']).intersection(set(c5vs6['6']['Name'])))),:]
candc6out.columns=['Gene','to 5']
candc6out.to_csv(inpath+'candidates/Candidates_CD8Tcell-All_'+mysubcand+'.6high_9to11.tsv',sep='\t')
candc6out

In [None]:
candNRout=lingenesnr.loc[lingenesnr['Unnamed: 0'].isin(list(set(lingenesnr['Unnamed: 0']).intersection(denr))),:]
candNRout.columns=['Gene','to 5']
candNRout.to_csv(inpath+'candidates/Candidates_CD8Tcell-All_'+mysubcand+'.NRhigh_9to11.tsv',sep='\t')
candNRout



In [None]:
candNRout=lingenesnr.loc[lingenesnr['Unnamed: 0'].isin(list(set(lingenesnr['Unnamed: 0']).intersection(denrstr))),:]
candNRout.columns=['Gene','to 5']
candNRout.to_csv(inpath+'candidates/Candidates_CD8Tcell-All_'+mysubcand+'.NRhigh_9to11.stringent.tsv',sep='\t')
candNRout

In [None]:
candc5out=lingenesnr.loc[lingenesnr['Unnamed: 0'].isin(list(set(lingenesnr['Unnamed: 0']).intersection(set(c5vs6['5']['Name'])))),:]
candc5out.columns=['Gene','to 5']
candc5out.to_csv(inpath+'candidates/Candidates_CD8Tcell-All_'+mysubcand+'.5high_9to11.tsv',sep='\t')
candc5out

In [None]:
candc5out=lingenesnr.loc[lingenesnr['Unnamed: 0'].isin(list(set(lingenesnr['Unnamed: 0']).intersection(denrstr).intersection(set(c5vs6['5']['Name'])))),:]
candc5out.columns=['Gene','to 5']
candc5out.to_csv(inpath+'candidates/Candidates_CD8Tcell-All_'+mysubcand+'.NRhigh.5high_9to11.stringent.tsv',sep='\t')
candc5out

In [None]:

nrhigh.transpose()

rhigh.transpose()

In [None]:
nkcell=tildata[tildata.obs['celltype2_pub'].isin(['proliferating NK cell',
                                                  'cytotoxic CD56-dim natural killer cell',
                                                  'CD56-bright cytokine secreting natural killer cell'])].copy()
cd8tcell=tildata[tildata.obs['celltype2_pub']=='CD8-positive T cell'].copy()
cd4tcell=tildata[tildata.obs['celltype2_pub']=='CD4-positive T cell'].copy()

cdc=tildata[tildata.obs['celltype2_pub']=='myeloid dendritic cell'].copy()
macro=tildata[tildata.obs['celltype2_pub']=='macrophage'].copy()
mono=tildata[tildata.obs['celltype2_pub']=='classical monocyte'].copy()



#cd8tcell=cd8tcell[cd8tcell.obs['celltype3_pub']!='naive CD8 positive T cell'].copy()
#cd8tcell=cd8tcell[cd8tcell.obs['celltype3_pub']!='mature NK T cell'].copy()
#cd8tcell=cd8tcell[cd8tcell.obs['celltype3_pub']!='proliferating CD8-positive T cell'].copy()


In [None]:
monopbmc=pbmcdata[pbmcdata.obs['celltype2_pub']=='classical monocyte'].copy()
cd4tcellpbmc=pbmcdata[pbmcdata.obs['celltype2_pub']=='CD4-positive T cell'].copy()
cd8tcellpbmc=pbmcdata[pbmcdata.obs['celltype2_pub']=='CD8-positive T cell'].copy()
cdcpbmc=pbmcdata[pbmcdata.obs['celltype2_pub']=='myeloid dendritic cell'].copy()

In [None]:
cdc1=tildata[tildata.obs['celltype3_pub'].isin(['CD141-positive myeloid dendritic cell',
                                                'CCR7-positive myeloid dendritic cell'])].copy()


In [None]:
set(tildata.obs['celltype3_pub'])

### NK cell DE 

In [None]:
cdata=nkcell.copy()

In [None]:
#=bc.tl.dge.get_de(subdataa,'RCat',demethod='wilcoxon',topnr=5000, logfc=np.log(1.5),padj=0.1)
### try on individual tissues separately
b1DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['LN'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b2DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Brain'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b3DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Lung','Sinon','Subc'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)

allDE1=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['R','NR'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE2=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['TF','PD'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE=bc.tl.dge.get_de(cdata,'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)


In [None]:
testlist={}
testlist['LN-both']=b1DE
testlist['Brain-both']=b2DE
testlist['Other-both']=b3DE
testlist['Nadj']=allDE1
testlist['Adj']=allDE2
testlist['All']=allDE

In [None]:
### Intersection of all combinations
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).intersection(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).intersection(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).intersection(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).intersection(set(allDE2['R']['Name'])))

In [None]:
strDE

In [None]:
strDE2

In [None]:
### Complete comparison + LN only + Brain | Others + RvsNR | PDvsTF
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))

In [None]:
### Complete comparison + LN only | Brain | Others + RvsNR | PDvsTF
strDE=set(allDE['PD']['Name']).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name'])).union(set(b1DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(allDE['R']['Name']).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name'])).union(set(b1DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))


In [None]:
nkNRlen=strDE
nkRlen=strDE2

In [None]:
strDE

In [None]:
strDE2

In [None]:
sc.pl.stacked_violin(nkcell,var_names=list(strDE2), groupby='RCat')

In [None]:
sc.pl.dotplot(nkcell,var_names=list(strDE2), groupby='RCat')

In [None]:
rhigh={}
for i in list(strDE2):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['R']['Name'])))>0: comp[k]=1
        k=k+1
    rhigh[i]=comp
rhigh=pd.DataFrame(rhigh)
rhigh.index=testlist.keys()

nrhigh={}
for i in list(strDE):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['PD']['Name'])))>0: comp[k]=1
        k=k+1
    nrhigh[i]=comp
nrhigh=pd.DataFrame(nrhigh)
nrhigh.index=testlist.keys()

In [None]:
nrhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.NK-TILonly.NRhigh.tsv',sep='\t')
rhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.NK-TILonly.Rhigh.tsv',sep='\t')

In [None]:
nrhigh.transpose()

In [None]:
rhigh.transpose()

### CD8 T cell DE


In [None]:
set(cd8tcell.obs['celltype3_pub'])

In [None]:
cdata=cd8tcell[cd8tcell.obs['celltype3_pub']!='mature NK T cell'].copy()

#=bc.tl.dge.get_de(subdataa,'RCat',demethod='wilcoxon',topnr=5000, logfc=np.log(1.5),padj=0.1)
### try on individual tissues separately
b1DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['LN'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b2DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Brain'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b3DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Lung','Sinon','Subc'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)

### on response categories separately 
allDE1=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['R','NR'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE2=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['TF','PD'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE=bc.tl.dge.get_de(cdata,'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)



In [None]:
testlist={}
testlist['LN-both']=b1DE
testlist['Brain-both']=b2DE
testlist['Other-both']=b3DE
testlist['Nadj']=allDE1
testlist['Adj']=allDE2
testlist['All']=allDE

In [None]:
### Intersection of all combinations
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).intersection(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).intersection(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).intersection(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).intersection(set(allDE2['R']['Name'])))



In [None]:
strDE



In [None]:
strDE2


In [None]:
strDE.intersection(set(litgoi))

In [None]:
strDE2.intersection(set(litgoi))

In [None]:
cd8NRstr=strDE
cd8Rstr=strDE2

In [None]:
### Complete comparison + LN only + Brain | Others + RvsNR | PDvsTF
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))

### Complete comparison + LN only | Brain | Others + RvsNR | PDvsTF
strDE=set(allDE['PD']['Name']).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name'])).union(set(b1DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(allDE['R']['Name']).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name'])).union(set(b1DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))

strDE


In [None]:
strDE.intersection(set(litgoi))
# CD74: Wu et al. Effector-Mem
# DNAJB1: Wu et al. Effector-Mem
# DUSP2: Wu et al. Effector-Mem
# IFNG: Wu et al. Effector-Mem; Li et al. dysfunctional CD8
# GZMK: Wu et al. Effector-Mem; Li et al. transitional CD8
# ENTPD1: Sade-Felman "bad" CD8; Li et al. dysfunctional CD8
# GPR56: Wu et al. Effector
# LITAF: Wu et al. Effector
# NKG7: Wu et al. Effector
# TNFRSF9: Li et al. exhausted CD8

In [None]:
strDE2.intersection(set(litgoi))
#IL7R: Li et al. naive; Wu et al. IL7R
#ITG1: Fairfax et al. large clones
#KLRC2: Wu et al. effector
#XCL1, XCL2: Wu et al. resident-memory

In [None]:
strDE2


In [None]:
cd8NRlen=strDE
cd8Rlen=strDE2

In [None]:
rhigh={}
for i in list(cd8Rlen):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['R']['Name'])))>0: comp[k]=1
        k=k+1
    rhigh[i]=comp
rhigh=pd.DataFrame(rhigh)
rhigh.index=testlist.keys()

nrhigh={}
for i in list(cd8NRlen):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['PD']['Name'])))>0: comp[k]=1
        k=k+1
    nrhigh[i]=comp
nrhigh=pd.DataFrame(nrhigh)
nrhigh.index=testlist.keys()

In [None]:
nrhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.cd8tcells-noNKT-TILonly.NRhigh.tsv',sep='\t')
rhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.cd8tcells-noNKT-TILonly.Rhigh.tsv',sep='\t')

In [None]:
### Prepare means
condition='RCat'
#condlist=list(set(cdata.obs[condition]))
condlist=['R','TF','NR_nadj','NR_adj']
mean,fct=bc.get_means(cdata,'PatientID', condition)

In [None]:
bc.pl.box_per_ind(mean, ['XCL1','XCL2','MIF','HLA-DQA2','IL7R','KLRC2','AMICA1','SPRY1','PLAUR'],
                  condition,order=condlist)

In [None]:
bc.pl.box_per_ind(mean, ['ATF3','ENTPD1','GZMK','DNAJA1','ZNF331','IFNG','FAS','CRTAM'],
                  condition,order=condlist)

In [None]:
sc.pl.matrixplot(cdata,var_names=list(strDE2.intersection(set(litgoi).union(cd8Rstr)))+list(strDE.intersection(set(litgoi).union(cd8NRstr))),
                 standard_scale='var',groupby='RCat',use_raw=True)


In [None]:
sc.pl.matrixplot(cdata,var_names=list(strDE2.intersection(set(litgoi).union(cd8Rstr)))+list(strDE.intersection(set(litgoi).union(cd8NRstr))),
                 standard_scale='var',groupby='RCat',use_raw=True)


In [None]:
sc.pl.matrixplot(cdata,var_names=list(strDE2.intersection(set(litgoi)))+list(strDE.intersection(set(litgoi))),
                 standard_scale='var',groupby='RCat',use_raw=True)


In [None]:
from matplotlib import rcParams
rcParams['figure.figsize'] = 5,4
#sc.pl.violin(cd8tcell,keys=['XCL1','WDR74','YBX1','KLRC1','AMICA1','TNFRSF18'], group_by='Respond')
df = sc.get.obs_df(cd8tcell[cd8tcell.obs['Adjuvant']=='ad'], ['XCL1','WDR74','KLRC1','AMICA1','TNFRSF18','RCat'])
df = df.set_index('RCat').stack().reset_index()
df.columns = ['RCat', 'gene', 'value']
sns.violinplot(data=df, x='gene', y='value', hue="RCat",split=True, inner="quart", linewidth=1)           

In [None]:
df = sc.get.obs_df(cd8tcell[cd8tcell.obs['Adjuvant']=='nad'], ['XCL1','WDR74','KLRC1','AMICA1','TNFRSF18','RCat'])
df = df.set_index('RCat').stack().reset_index()
df.columns = ['RCat', 'gene', 'value']
sns.violinplot(data=df, x='gene', y='value', hue="RCat",split=True, inner="quart", linewidth=1)           

In [None]:
sc.pl.dotplot(cd8tcell,var_names=list(cd8Rstr),
              groupby='RCat',use_raw=True)

##### Same but only in exhausted-like populations

In [None]:
set(tildata.obs['celltype3_pub'])

In [None]:
cdata=tildata[tildata.obs['celltype3_pub'].isin(['exhausted-like CD8-positive T cell',
                                                'cytokine secreting effector CD8-positive T cell',
                                                'proliferating CD8-positive T cell'])].copy()
cdata=tildata[tildata.obs['celltype3_pub'].isin([
 'cytokine secreting effector CD8-positive T cell',
 'effector memory CD8-positive T cell',
 'exhausted-like CD8-positive T cell',
 'naive CD8-positive T cell',
 'proliferating CD8-positive T cell' ,'mature NK T cell',
'CD4-positive, cytotoxic T cell','IL7R-max CD4-positive T cell',
    'exhausted-like CD4-positive T cell',])].copy()


In [None]:
sc.pl.dotplot(cdata,var_names=['XCL1','NAMPT'],
              groupby='RCat',use_raw=True)

In [None]:
#=bc.tl.dge.get_de(subdataa,'RCat',demethod='wilcoxon',topnr=5000, logfc=np.log(1.5),padj=0.1)
### try on individual tissues separately
b1DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['LN'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b2DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Brain'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b3DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Lung','Sinon','Subc'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)

### on response categories separately 
allDE1=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['R','NR'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE2=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['TF','PD'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE=bc.tl.dge.get_de(cdata,'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)


In [None]:
testlist={}
testlist['LN-both']=b1DE
testlist['Brain-both']=b2DE
testlist['Other-both']=b3DE
testlist['Nadj']=allDE1
testlist['Adj']=allDE2
testlist['All']=allDE
### Intersection of all combinations
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).intersection(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).intersection(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).intersection(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).intersection(set(allDE2['R']['Name'])))


In [None]:
strDE

In [None]:
cd8NRstr=strDE
cd8Rstr=strDE2

### Complete comparison +  LN only | Brain | Others + RvsNR | PDvsTF
strDE=set(allDE['PD']['Name']).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name'])).union(set(b1DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(allDE['R']['Name']).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name'])).union(set(b1DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))


In [None]:
cd8NRlen=strDE
cd8Rlen=strDE2
rhigh={}
for i in list(cd8Rlen):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['R']['Name'])))>0: comp[k]=1
        k=k+1
    rhigh[i]=comp
rhigh=pd.DataFrame(rhigh)
rhigh.index=testlist.keys()

nrhigh={}
for i in list(cd8NRlen):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['PD']['Name'])))>0: comp[k]=1
        k=k+1
    nrhigh[i]=comp
nrhigh=pd.DataFrame(nrhigh)
nrhigh.index=testlist.keys()


In [None]:
nrhigh.transpose()

In [None]:
set(rhigh.transpose().index)

In [None]:
set(rhigh.transpose().index).intersection(['XCL1'])

In [None]:
nrhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.exhcd8tcells-noNKT-TILonly.NRhigh.tsv',sep='\t')
rhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.exhcd8tcells-noNKT-TILonly.Rhigh.tsv',sep='\t')

## Validation data:  Sade-Feldmann et al. Melanoma tumors


In [None]:
mdata=sc.read('/scseq/oncology/GSE120575/analyzed/GSE120575_fromTPM_authors/GSE120575_fromTPM_authors.h5ad')

#set(mdata.obs['Cluster number'])
mygoi=['AMICA1','KLRC2','NR4A2','ATF3','HLA-DRB5','IFNG','ENTPD1','TCF7','TOX','SELL','LEF1','XCL1','DNAJB1','ZNF331','DUSP2','GZMK']


In [None]:
sc.pl.umap(mdata,color='louvain', legend_loc='on data')

In [None]:
cd8=mdata[mdata.obs['louvain'].isin(['3','5','1','8','6','2'])].copy()
cd8exh=mdata[mdata.obs['louvain'].isin(['3','5','1','8'])].copy()

In [None]:
sc.pl.umap(mdata,color=['Cluster number','characteristics: response','t_treatment','Patient'])

In [None]:
sc.pl.umap(mdata,color=['CD3D','CD8A','CD4','CD8B','IL7R','KLRF1','PIK3R1','TNFRSF18',
                        'TCF7','SELL','CXCR5','ENTPD1','ZNF331'])

In [None]:
pret=cd8[cd8.obs['t_treatment']=='Pre'].copy() #,
pretexh=cd8exh[cd8exh.obs['t_treatment']=='Pre'].copy() #,
#pret=pret[pret.obs['characteristics: therapy'].isin([ 'anti-PD1'])].copy()
#set(cdata.obs['characteristics: therapy']) , 'G7 - Regulatory Tcells' ,
#myc=['G11 - Lymphocytes exhausted/ Cell cycle','G5 - Lymphocytes','G6 - Exhausted CD8 Tcells','G8 - Cytotoxicity','G9 - Exhausted/HS CD8 Tcells','G10 - Memory Tcell']
#pret=pret[pret.obs['Cluster number'].isin(myc)].copy()


In [None]:
### Normalise the nr. cells per donor (skipped for now)
cellnrs=bc.tl.count_occurance_subset_conditions(pret, subset_variable = 'Patient', count_variable = 't_treatment', condition_identifier = 'characteristics: response',  return_percentage = False)

#int(np.min([len(temp.obs),cellnrs.transpose().median()[0]]))

#### Subsample to account for distinct cell nrs. per patient
myindex=[]
mylen=[]
myp=list(set(pret.obs['Patient']))
for i in myp:
    temp=pret[pret.obs['Patient']==i].copy()
    mylen.append(len(temp))
    sc.pp.subsample(temp,n_obs=int(np.min([len(temp.obs),cellnrs.transpose().mean()[0]])))
    myindex.append(temp.obs.index.values)

flatten=lambda l: [item for myindex in l for item in myindex]

pret=pret[flatten(myindex)]


In [None]:
sc.pl.dotplot(pret,mygoi, groupby='characteristics: response')

cdata=pret.copy()



In [None]:
### treatment DE
treatDE=bc.tl.dge.get_de(cd8,'t_treatment',topnr=5000, logfc=np.log(1.5),padj=0.1)
### on response categories separately 
#pretDEexh=bc.tl.dge.get_de(pretexh,'characteristics: response',topnr=5000, logfc=np.log(1.5),padj=0.1)


In [None]:
list(treatDE['Post']['Name'])

In [None]:
list(treatDE['Pre']['Name'])

In [None]:
### on response categories separately 
pretDE=bc.tl.dge.get_de(pret,'characteristics: response',topnr=5000, logfc=np.log(1.5),padj=0.1)
### on response categories separately 
pretDEexh=bc.tl.dge.get_de(pretexh,'characteristics: response',topnr=5000, logfc=np.log(1.5),padj=0.1)


In [None]:
set(pretDE['Responder']['Name']).intersection(cd8Rlen)

In [None]:
#set(pretDEexh['Responder']['Name']).intersection(cd8Rlen)

In [None]:
set(pretDE['Responder']['Name']).intersection(cd8NRlen)

In [None]:
#set(pretDEexh['Responder']['Name']).intersection(cd8NRlen)

In [None]:
len(set(pretDE['Non-responder']['Name']).intersection(cd8NRlen))

In [None]:
#len(set(pretDEexh['Non-responder']['Name']).intersection(cd8NRlen))

In [None]:
len(set(pretDE['Non-responder']['Name']).intersection(cd8Rlen))

In [None]:
#set(pretDEexh['Non-responder']['Name']).intersection(cd8Rlen)

In [None]:
set(pretDE['Non-responder']['Name']).intersection(cd8Rlen)

In [None]:
set(pretDE['Non-responder']['Name']).intersection(cd8NRlen)

In [None]:
what='characteristics: response'
bywhat='Patient'
average_obs,fraction_obs=bc.get_means(cdata,bywhat, what)

In [None]:
bc.pl.box_per_ind(average_obs, ['AMICA1', 'XCL1','KLRC2','SELL','TCF7', 'IL7R' ],what)

In [None]:
bc.pl.box_per_ind(average_obs, ['ATF3','DNAJA1','GZMK','DUSP2','HSPA1A','ENTPD1' ,'PDCD1','CD38','HAVCR2','ZNF331'],what)

### CD4 T cell DE


In [None]:
cdata=cd4tcell.copy()

#=bc.tl.dge.get_de(subdataa,'RCat',demethod='wilcoxon',topnr=5000, logfc=np.log(1.5),padj=0.1)
### try on individual tissues separately
b1DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['LN'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b2DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Brain'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b3DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Lung','Sinon','Subc'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)

allDE1=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['R','NR'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE2=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['TF','PD'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE=bc.tl.dge.get_de(cdata,'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)


In [None]:
### Intersection of all combinations
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).intersection(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).intersection(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).intersection(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).intersection(set(allDE2['R']['Name'])))



strDE



In [None]:
strDE2


In [None]:
testlist={}
testlist['LN-both']=b1DE
testlist['Brain-both']=b2DE
testlist['Other-both']=b3DE
testlist['Nadj']=allDE1
testlist['Adj']=allDE2
testlist['All']=allDE

In [None]:
cd4NRstr=strDE
cd4Rstr=strDE2

### Complete comparison + LN only + Brain | Others + RvsNR | PDvsTF
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))

### Complete comparison + LN only | Brain | Others + RvsNR | PDvsTF
strDE=set(allDE['PD']['Name']).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name'])).union(set(b1DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(allDE['R']['Name']).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name'])).union(set(b1DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))


strDE


In [None]:
strDE2


In [None]:
cd4NRlen=strDE
cd4Rlen=strDE2
gchoice=list(cd4Rstr)

sc.pl.stacked_violin(cdata,var_names=gchoice, 
                     groupby='RCat',use_raw=True)



In [None]:
sc.pl.dotplot(cdata,var_names=gchoice,groupby='RCat',use_raw=True)


In [None]:
rhigh={}
for i in list(cd4Rlen):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['R']['Name'])))>0: comp[k]=1
        k=k+1
    rhigh[i]=comp
rhigh=pd.DataFrame(rhigh)
rhigh.index=testlist.keys()

nrhigh={}
for i in list(cd4NRlen):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['PD']['Name'])))>0: comp[k]=1
        k=k+1
    nrhigh[i]=comp
nrhigh=pd.DataFrame(nrhigh)
nrhigh.index=testlist.keys()

nrhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.cd4tcells-TILonly.NRhigh.tsv',sep='\t')
rhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.cd4tcells-TILonly.Rhigh.tsv',sep='\t')

### CD4 T cell DE (all)


In [None]:


cdata=cd4tcell.copy()

#=bc.tl.dge.get_de(subdataa,'RCat',demethod='wilcoxon',topnr=5000, logfc=np.log(1.5),padj=0.1)
### try on individual tissues separately
b1DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['LN'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b2DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Brain'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b3DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Lung','Sinon','Subc'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)

allDE1=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['R','NR'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE2=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['TF','PD'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE=bc.tl.dge.get_de(cdata,'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)


### Intersection of all combinations
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).intersection(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).intersection(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).intersection(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).intersection(set(allDE2['R']['Name'])))



strDE



strDE2


testlist={}
testlist['LN-both']=b1DE
testlist['Brain-both']=b2DE
testlist['Other-both']=b3DE
testlist['Nadj']=allDE1
testlist['Adj']=allDE2
testlist['All']=allDE

cd4NRstr=strDE
cd4Rstr=strDE2

### Complete comparison + LN only + Brain | Others + RvsNR | PDvsTF
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))

strDE


strDE2


cd4NRlen=strDE
cd4Rlen=strDE2
gchoice=list(cd4Rstr)

sc.pl.stacked_violin(cdata,var_names=gchoice, 
                     groupby='RCat',use_raw=True)



sc.pl.dotplot(cdata,var_names=gchoice,groupby='RCat',use_raw=True)


rhigh={}
for i in list(cd4Rlen):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['R']['Name'])))>0: comp[k]=1
        k=k+1
    rhigh[i]=comp
rhigh=pd.DataFrame(rhigh)
rhigh.index=testlist.keys()

nrhigh={}
for i in list(cd4NRlen):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['PD']['Name'])))>0: comp[k]=1
        k=k+1
    nrhigh[i]=comp
nrhigh=pd.DataFrame(nrhigh)
nrhigh.index=testlist.keys()

nrhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.cd4tcells-TILonly.NRhigh.tsv',sep='\t')
rhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.cd4tcells-TILonly.Rhigh.tsv',sep='\t')

### cDC1 and aDC



In [None]:
cdata=cdc1.copy()

#=bc.tl.dge.get_de(subdataa,'RCat',demethod='wilcoxon',topnr=5000, logfc=np.log(1.5),padj=0.1)
### try on individual tissues separately
b1DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['LN'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b2DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Brain'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b3DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Lung','Sinon','Subc'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)

allDE1=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['R','NR'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE2=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['TF','PD'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE=bc.tl.dge.get_de(cdata,'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)


### Intersection of all combinations
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).intersection(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).intersection(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).intersection(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).intersection(set(allDE2['R']['Name'])))



strDE


In [None]:
strDE2


In [None]:
### Complete comparison + LN only + Brain | Others + RvsNR | PDvsTF
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))

### Complete comparison + LN only | Brain | Others + RvsNR | PDvsTF
strDE=set(allDE['PD']['Name']).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name'])).union(set(b1DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(allDE['R']['Name']).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name'])).union(set(b1DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))


strDE


In [None]:
strDE2


In [None]:
cdc1NRlen=strDE
cdc1Rlen=strDE2

sc.pl.stacked_violin(cdata,var_names=['XCR1','CLEC9A'], 
                     groupby='RCat',use_raw=True)



In [None]:
sc.pl.dotplot(cdata,var_names=gchoice,
              groupby='RCat',use_raw=True)


In [None]:
testlist={}
testlist['LN-both']=b1DE
testlist['Brain-both']=b2DE
testlist['Other-both']=b3DE
testlist['Nadj']=allDE1
testlist['Adj']=allDE2
testlist['All']=allDE

In [None]:
rhigh={}
for i in list(cdc1Rlen):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['R']['Name'])))>0: comp[k]=1
        k=k+1
    rhigh[i]=comp
rhigh=pd.DataFrame(rhigh)
rhigh.index=testlist.keys()

nrhigh={}
for i in list(cdc1NRlen):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['PD']['Name'])))>0: comp[k]=1
        k=k+1
    nrhigh[i]=comp
nrhigh=pd.DataFrame(nrhigh)
nrhigh.index=testlist.keys()

#nrhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.cdc-TILonly.NRhigh.tsv',sep='\t')
#rhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.cdc-TILonly.Rhigh.tsv',sep='\t')


In [None]:
rhigh

In [None]:
nrhigh

In [None]:
    subsetleg='cdc1'
    import gseapy
    #gseapy.get_library_name()
    
    dbs=['KEGG_2021_Human','GO_Biological_Process_2021','Reactome_2016',
     'Human_Gene_Atlas','WikiPathways_2019_Human','ChEA_2016',
     'CellMarker_Augmented_2021','Azimuth_Cell_Types_2021','COVID-19_Related_Gene_Sets_2021',
     'MSigDB_Hallmark_2020']
    cdata.raw.var['MeanExpr']=cdata.raw.X.mean(axis=0).tolist()[0]
    for j in dbs:
        if (len(list(rhigh.columns))>10):
            gseapy.enrichr(gene_list=list(list(rhigh.columns)), 
                           description='DEanalysis_wilcoxon.'+subsetleg+'-TILonly.Rhigh.tsv', 
                   gene_sets=j, background=list(cdata.raw.var.loc[cdata.raw.var['MeanExpr']>0.01]['SYMBOL']),cutoff=0.05, format='png',outdir=figdir+'enrichr/'+subsetleg+'-TILonly.Rhigh/')
        if (len(list(nrhigh.columns))>10):
            gseapy.enrichr(gene_list=list(list(nrhigh.columns)), description='DEanalysis_wilcoxon.'+subsetleg+'-TILonly.NRhigh.tsv', 
                   gene_sets=j, background=list(cdata.raw.var.loc[cdata.raw.var['MeanExpr']>0.01]['SYMBOL']),cutoff=0.05, format='png',outdir=figdir+'enrichr/'+subsetleg+'-TILonly.NRhigh/')

### cDC DE


In [None]:
cdata=cdc.copy()

#=bc.tl.dge.get_de(subdataa,'RCat',demethod='wilcoxon',topnr=5000, logfc=np.log(1.5),padj=0.1)
### try on individual tissues separately
b1DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['LN'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b2DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Brain'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b3DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Lung','Sinon','Subc'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)

allDE1=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['R','NR'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE2=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['TF','PD'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE=bc.tl.dge.get_de(cdata,'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)


### Intersection of all combinations
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).intersection(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).intersection(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).intersection(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).intersection(set(allDE2['R']['Name'])))



strDE


In [None]:
strDE2


In [None]:
cdcNRstr=strDE
cdcRstr=strDE2


In [None]:
### Complete comparison + LN only + Brain | Others + RvsNR | PDvsTF
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))

### Complete comparison + LN only | Brain | Others + RvsNR | PDvsTF
strDE=set(allDE['PD']['Name']).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name'])).union(set(b1DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(allDE['R']['Name']).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name'])).union(set(b1DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))


strDE


In [None]:
strDE2


In [None]:
testlist={}
testlist['LN-both']=b1DE
testlist['Brain-both']=b2DE
testlist['Other-both']=b3DE
testlist['Nadj']=allDE1
testlist['Adj']=allDE2
testlist['All']=allDE

In [None]:
cdcNRlen=strDE
cdcRlen=strDE2
gchoice=list(cdcRlen)

sc.pl.stacked_violin(cdata,var_names=gchoice, 
                     groupby='RCat',use_raw=True)


In [None]:
sc.pl.dotplot(cdata,var_names=gchoice,groupby='RCat',use_raw=True)


In [None]:
allDE['PD'].index=list(allDE['PD']['Name'])
allDE['PD'].loc[list(strDE),:].sort_values('Log2FC',ascending=False)

In [None]:
allDE['R'].index=list(allDE['R']['Name'])
allDE['R'].loc[list(strDE2),:].sort_values('Log2FC',ascending=False)

In [None]:
#allDE['PD'].loc[list(strDE),:].sort_values('Log2FC',ascending=False).to_csv(figdir+'DEanalysis_wilcoxon.cdc-TILonly.NRhigh.tsv',sep='\t')
#allDE['R'].loc[list(strDE2),:].sort_values('Log2FC',ascending=False).to_csv(figdir+'DEanalysis_wilcoxon.cdc-TILonly.Rhigh.tsv',sep='\t')

In [None]:
rhigh={}
for i in list(cdcRlen):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['R']['Name'])))>0: comp[k]=1
        k=k+1
    rhigh[i]=comp
rhigh=pd.DataFrame(rhigh)
rhigh.index=testlist.keys()

nrhigh={}
for i in list(cdcNRlen):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['PD']['Name'])))>0: comp[k]=1
        k=k+1
    nrhigh[i]=comp
nrhigh=pd.DataFrame(nrhigh)
nrhigh.index=testlist.keys()

#nrhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.cdc-TILonly.NRhigh.tsv',sep='\t')
#rhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.cdc-TILonly.Rhigh.tsv',sep='\t')



In [None]:
subsetleg='cdc'

nrhighall=nrhigh.transpose().copy()
nrhighall['Score']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'Score'])
nrhighall['Log2FC']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'Log2FC'])
nrhighall['P.adj']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'P.adj'])
nrhighall.to_csv(figdir+'DEanalysis_wilcoxon.'+subsetleg+'-TILonly.NRhigh.tsv',sep='\t')

rhighall=rhigh.transpose().copy()
rhighall['Score']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'Score'])
rhighall['Log2FC']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'Log2FC'])
rhighall['P.adj']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'P.adj'])
rhighall.to_csv(figdir+'DEanalysis_wilcoxon.'+subsetleg+'-TILonly.Rhigh.tsv',sep='\t')

genestoplotnr=list(nrhighall.loc[(nrhighall['LN-both']==1)&(nrhighall['Other-both']==1)&(nrhighall['Brain-both']==1)&(nrhighall['Nadj']==1)&(nrhighall['Adj']==1),:].index)
genestoplotr=list(rhighall.loc[((rhighall['LN-both']==1)|(rhighall['Other-both']==1)|(rhighall['Brain-both']==1))&((rhighall['Nadj']==1)&(rhighall['Adj']==1)),:].index)

In [None]:
#mycol=['MITFP', 'MHC1','REGIO','CITRESUP','CITRESDN']
dfc=pd.DataFrame(cdata.obs.groupby(['PatientID']).mean()).copy()
dfc['RCat']=[cdata[cdata.obs['PatientID']==x].obs['RCat'][0] for x in list(dfc.index)]
dfc['Mutation']=[cdata[cdata.obs['PatientID']==x].obs['Mutation'][0] for x in list(dfc.index)]
dfc['Lesion']=[cdata[cdata.obs['PatientID']==x].obs['Lesion'][0] for x in list(dfc.index)]

dfc['nr_cells']=list(bc.tl.count_occurrence(cdata,count_variable='PatientID').loc[dfc.index,'Counts'])


mysigs=['score_Bcell_scanpy','score_NaiTcell_scanpy','TIR_NAI','SADE_B','SADE_G','TIR_CYT','TIR_EXH',
            'score_CD4Tcell_scanpy','score_CytotoxCD8Tcell_scanpy','score_ClassMonocyte_scanpy',
            'score_Myeloid_scanpy','score_cDC_scanpy','score_cDC_CCR7_scanpy','score_cDC1_scanpy',
            'score_ExhCD8Tcell_scanpy','score_RegTcell_scanpy','score_Macrophage_scanpy'
           ,'score_Macrophage_MSR1_scanpy' ,'score_Macrophage_MARCO_scanpy', 'MA-MREGDC','MA-cDC1','MA-cDC2']

pwilc={}
for myx in mysigs:
    pwilc[myx]=stats.mannwhitneyu(list(dfc.loc[dfc.loc[:,'Mutation'].isin(['BRAF']),myx]), 
                           list(dfc.loc[dfc.loc[:,'Mutation'].isin(['NRAS']),myx]))[1]
    plt.figure(figsize=(2,3.5))
    sns.boxplot(x='Mutation',y=myx,data=dfc.loc[dfc.loc[:,'Mutation'].isin(['BRAF','NRAS']),:],
            palette=color_dict_mut)
    sns.swarmplot(x='Mutation',y=myx,data=dfc.loc[dfc.loc[:,'Mutation'].isin(['BRAF','NRAS']),:],
             color='black')
    #fig.figure.savefig(figdir+'/'+myx+'-per-mutation.pdf')

#pd.Series(pwilc).sort_values().to_csv(figdir+'/Signatures-per-mutation-pvals.csv')
pd.Series(pwilc).sort_values()

In [None]:
pwilc={}
for myx in mysigs:
    pwilc[myx]=stats.mannwhitneyu(list(dfc.loc[dfc.loc[:,'RCat'].isin(['R','TF']),myx]), 
                           list(dfc.loc[dfc.loc[:,'RCat'].isin(['NR_adj','NR_nadj']),myx]))[1]
    plt.figure(figsize=(3.5,3.5))
    sns.boxplot(x='RCat',y=myx,data=dfc.loc[dfc.loc[:,'RCat'].isin(['R','TF','NR_adj','NR_nadj']),:],
            palette=color_dict,order=['R','TF','NR_nadj','NR_adj'])
    sns.swarmplot(x='RCat',y=myx,data=dfc.loc[dfc.loc[:,'RCat'].isin(['R','TF','NR_adj','NR_nadj']),:],
             color='black',order=['R','TF','NR_nadj','NR_adj'])
#    fig.figure.savefig(figdir+'/Signatures-per-response.pdf')

#pd.Series(pwilc).sort_values().to_csv(figdir+'/Signatures-per-response-pvals.csv')
pd.Series(pwilc).sort_values()

In [None]:
what='RCat'
bywhat='PatientID'
average_obs,fraction_obs=bc.get_means(cdata,bywhat, what)



In [None]:
bc.pl.box_per_ind(average_obs, ['MIF','GRN', 'PLAUR' ],what)

In [None]:
bc.pl.box_per_ind(average_obs, ['BTLA','LGALS9', 'CD83','AKIRIN2','WDR74'],what)

In [None]:
bc.pl.box_per_ind(average_obs, ['CD80',
  'RELB',
  'RELA',
  'CD274',
  'PDCD1LG2',
  'CD200',
  'FAS',
  'SOCS1',
  'SOCS2',
  'ALDH1A2'],what)

In [None]:

sc.pl.matrixplot(cdata,var_names=genestoplotr,
                 standard_scale='var',groupby='RCat',use_raw=True)


In [None]:
sc.pl.matrixplot(cdata,var_names=genestoplotnr,
                 standard_scale='var',groupby='RCat',use_raw=True)


In [None]:
sc.pl.umap(dcdata, color=['CD83', 'SELK','ATF3','ZNF331'], color_map='viridis')

In [None]:
sc.pl.umap(dcdata, color=['WDR74', 'IFITM1','SPP1','PLAC8'], color_map='viridis')

In [None]:
    subsetleg='cdc'
    import gseapy
    #gseapy.get_library_name()
    
    dbs=['KEGG_2021_Human','GO_Biological_Process_2021','Reactome_2016',
     'Human_Gene_Atlas','WikiPathways_2019_Human','ChEA_2016',
     'CellMarker_Augmented_2021','Azimuth_Cell_Types_2021','COVID-19_Related_Gene_Sets_2021',
     'MSigDB_Hallmark_2020']
    cdata.raw.var['MeanExpr']=cdata.raw.X.mean(axis=0).tolist()[0]


In [None]:
    for j in dbs:
        if (len(list(rhigh.columns))>10):
            gseapy.enrichr(gene_list=list(rhigh.columns), 
                           description='DEanalysis_wilcoxon.dcdata-TILonly.Rhigh.tsv', 
                   gene_sets=j, background=list(cdata.raw.var.loc[cdata.raw.var['MeanExpr']>0.01]['SYMBOL']),cutoff=0.05, format='png',outdir=figdir+'enrichr/dcdata-TILonly.Rhigh/')
        if (len(list(nrhigh.columns))>10):
            gseapy.enrichr(gene_list=list(nrhigh.columns), description='DEanalysis_wilcoxon.dcdata-TILonly.NRhigh.tsv', 
                   gene_sets=j, background=list(cdata.raw.var.loc[cdata.raw.var['MeanExpr']>0.01]['SYMBOL']),cutoff=0.05, format='png',outdir=figdir+'enrichr/dcdata-TILonly.NRhigh/')

### Macrophages 

In [None]:
subsetleg='macro'

In [None]:
cdata=macro.copy()

#=bc.tl.dge.get_de(subdataa,'RCat',demethod='wilcoxon',topnr=5000, logfc=np.log(1.5),padj=0.1)
### try on individual tissues separately
b1DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['LN'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b2DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Brain'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b3DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Lung','Sinon','Subc'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)

allDE1=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['R','NR'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE2=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['TF','PD'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE=bc.tl.dge.get_de(cdata,'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)



### Intersection of all combinations
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).intersection(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).intersection(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).intersection(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).intersection(set(allDE2['R']['Name'])))



strDE


In [None]:
strDE2


In [None]:
testlist={}
testlist['LN-both']=b1DE
testlist['Brain-both']=b2DE
testlist['Other-both']=b3DE
testlist['Nadj']=allDE1
testlist['Adj']=allDE2
testlist['All']=allDE

In [None]:
macroNRstr=strDE
macroRstr=strDE2


In [None]:
### Complete comparison + LN only + Brain | Others + RvsNR | PDvsTF
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))

### Complete comparison + LN only | Brain | Others + RvsNR | PDvsTF
strDE=set(allDE['PD']['Name']).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name'])).union(set(b1DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(allDE['R']['Name']).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name'])).union(set(b1DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))


strDE


In [None]:
strDE2


macroNRlen=strDE
macroRlen=strDE2


In [None]:
rhigh={}
for i in list(strDE2):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['R']['Name'])))>0: comp[k]=1
        k=k+1
    rhigh[i]=comp
rhigh=pd.DataFrame(rhigh)
rhigh.index=testlist.keys()


nrhigh={}
for i in list(strDE):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['PD']['Name'])))>0: comp[k]=1
        k=k+1
    nrhigh[i]=comp
nrhigh=pd.DataFrame(nrhigh)
nrhigh.index=testlist.keys()


nrhighall=nrhigh.transpose().copy()
nrhighall['Score']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'Score'])
nrhighall['Log2FC']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'Log2FC'])
nrhighall['P.adj']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'P.adj'])
nrhighall.to_csv(figdir+'DEanalysis_wilcoxon.'+subsetleg+'-TILonly.NRhigh.tsv',sep='\t')

rhighall=rhigh.transpose().copy()
rhighall['Score']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'Score'])
rhighall['Log2FC']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'Log2FC'])
rhighall['P.adj']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'P.adj'])
rhighall.to_csv(figdir+'DEanalysis_wilcoxon.'+subsetleg+'-TILonly.Rhigh.tsv',sep='\t')

genestoplotnr=list(nrhighall.loc[(nrhighall['LN-both']==1)&(nrhighall['Other-both']==1)&(nrhighall['Brain-both']==1)&(nrhighall['Nadj']==1)&(nrhighall['Adj']==1),:].index)
genestoplotr=list(rhighall.loc[((rhighall['LN-both']==1)|(rhighall['Other-both']==1)|(rhighall['Brain-both']==1))&((rhighall['Nadj']==1)&(rhighall['Adj']==1)),:].index)

In [None]:
genestoplotr

In [None]:
sc.pl.matrixplot(cdata,var_names=genestoplotnr,
                 standard_scale='var',groupby='RCat',use_raw=True)


In [None]:
sc.pl.matrixplot(cdata,var_names=genestoplotr,
                 standard_scale='var',groupby='RCat',use_raw=True)

In [None]:
sc.pl.umap(macrodata, color=['IFITM2', 'MT1F', 'FCN1', 'LILRA3', 
                             'GPR183', 'ZNF331','SIK1'], color_map='viridis')

In [None]:
what='RCat'
bywhat='PatientID'
average_obs,fraction_obs=bc.get_means(cdata,bywhat, what)


In [None]:
bc.pl.box_per_ind(average_obs, ['ZNF331','CCL3', 'CCL4' , 'ATF', 'SIK1'],what)

In [None]:
rhighall

### Gene set enrichment analysis 

In [None]:
    import gseapy
    #gseapy.get_library_name()
    
    dbs=['KEGG_2021_Human','GO_Biological_Process_2021','Reactome_2016',
     'Human_Gene_Atlas','WikiPathways_2019_Human','ChEA_2016',
     'CellMarker_Augmented_2021','Azimuth_Cell_Types_2021','COVID-19_Related_Gene_Sets_2021',
     'MSigDB_Hallmark_2020']
    macro.raw.var['MeanExpr']=macro.raw.X.mean(axis=0).tolist()[0]
    for j in dbs:
        if (len(list(rhigh.columns))>10):
            gseapy.enrichr(gene_list=list(list(rhigh.columns)), 
                           description='DEanalysis_wilcoxon.macro-TILonly.Rhigh.tsv', 
                   gene_sets=j, background=list(macro.raw.var.loc[macro.raw.var['MeanExpr']>0.01]['SYMBOL']),cutoff=0.05, format='png',outdir=figdir+'enrichr/macro-TILonly.Rhigh/')
        if (len(list(nrhigh.columns))>10):
            gseapy.enrichr(gene_list=list(list(nrhigh.columns)), description='DEanalysis_wilcoxon.macro-TILonly.NRhigh.tsv', 
                   gene_sets=j, background=list(macro.raw.var.loc[macro.raw.var['MeanExpr']>0.01]['SYMBOL']),cutoff=0.05, format='png',outdir=figdir+'enrichr/macro-TILonly.NRhigh/')

### Myeloids

In [None]:
set(tildata.obs['celltype1'])

In [None]:
subsetleg='Myeloid'

In [None]:
cdata=tildata[tildata.obs['celltype1']!='myeloid leukocyte'].copy()
#=bc.tl.dge.get_de(subdataa,'RCat',demethod='wilcoxon',topnr=5000, logfc=np.log(1.5),padj=0.1)
### try on individual tissues separately
b1DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['LN'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b2DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Brain'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b3DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Lung','Sinon','Subc'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)


In [None]:
### on response categories separately 
allDE1=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['R','NR'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE2=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['TF','PD'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE=bc.tl.dge.get_de(cdata,'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)


In [None]:
testlist={}
testlist['LN-both']=b1DE
testlist['Brain-both']=b2DE
testlist['Other-both']=b3DE
testlist['Nadj']=allDE1
testlist['Adj']=allDE2
testlist['All']=allDE
### Intersection of all combinations
### Complete comparison + LN only + Brain | Others + RvsNR | PDvsTF
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))

### Complete comparison + LN only + Brain | Others + RvsNR | PDvsTF
#strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
#strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))

strDE


In [None]:
strDE2

In [None]:
myeloNRstr=strDE
myeloRstr=strDE2



### Complete comparison + LN only | Brain | Others + RvsNR | PDvsTF
strDE=set(allDE['PD']['Name']).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name'])).union(set(b1DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(allDE['R']['Name']).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name'])).union(set(b1DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))


strDE


strDE2


myeloNRlen=strDE
myeloRlen=strDE2

In [None]:
rhigh={}
for i in list(strDE2):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['R']['Name'])))>0: comp[k]=1
        k=k+1
    rhigh[i]=comp
rhigh=pd.DataFrame(rhigh)
rhigh.index=testlist.keys()


nrhigh={}
for i in list(strDE):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['PD']['Name'])))>0: comp[k]=1
        k=k+1
    nrhigh[i]=comp
nrhigh=pd.DataFrame(nrhigh)
nrhigh.index=testlist.keys()


nrhighall=nrhigh.transpose().copy()
nrhighall['Score']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'Score'])
nrhighall['Log2FC']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'Log2FC'])
nrhighall['P.adj']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'P.adj'])
nrhighall.to_csv(figdir+'DEanalysis_wilcoxon.'+subsetleg+'-TILonly.NRhigh.tsv',sep='\t')

rhighall=rhigh.transpose().copy()
rhighall['Score']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'Score'])
rhighall['Log2FC']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'Log2FC'])
rhighall['P.adj']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'P.adj'])
rhighall.to_csv(figdir+'DEanalysis_wilcoxon.'+subsetleg+'-TILonly.Rhigh.tsv',sep='\t')


In [None]:
genestoplotnr=list(nrhighall.loc[(nrhighall['LN-both']==1)&(nrhighall['Other-both']==1)&(nrhighall['Brain-both']==1)&(nrhighall['Nadj']==1)&(nrhighall['Adj']==1),:].index)
genestoplotr=list(rhighall.loc[((rhighall['LN-both']==1)|(rhighall['Other-both']==1)|(rhighall['Brain-both']==1))&((rhighall['Nadj']==1)&(rhighall['Adj']==1)),:].index)

genestoplotr

sc.pl.matrixplot(cdata,var_names=genestoplotnr,
                 standard_scale='var',groupby='RCat',use_raw=True)


In [None]:
sc.pl.matrixplot(cdata,var_names=genestoplotr,
                 standard_scale='var',groupby='RCat',use_raw=True)


In [None]:
sc.pl.umap(cdata, color=['IFITM2', 'MT1F', 'FCN1', 'LILRA3', 
                             'GPR183', 'ZNF331','SIK1','S100A8'], color_map='viridis')

what='RCat'
bywhat='PatientID'
average_obs,fraction_obs=bc.get_means(cdata,bywhat, what)


In [None]:


bc.pl.box_per_ind(average_obs, ['ZNF331','CCL3', 'CCL4' , 'ATF', 'SIK1','S100A8'],what)

rhighall

In [None]:
bc.pl.box_per_ind(average_obs, ['CXCL9'],what)

In [None]:
bc.pl.box_per_ind(fraction_obs, ['CXCL9'],what)

In [None]:
    import gseapy
    #gseapy.get_library_name()
    
    dbs=['KEGG_2021_Human','GO_Biological_Process_2021','Reactome_2016',
     'Human_Gene_Atlas','WikiPathways_2019_Human','ChEA_2016',
     'CellMarker_Augmented_2021','Azimuth_Cell_Types_2021','COVID-19_Related_Gene_Sets_2021',
     'MSigDB_Hallmark_2020']
    cdata.raw.var['MeanExpr']=cdata.raw.X.mean(axis=0).tolist()[0]
    for j in dbs:
        if (len(list(rhigh.columns))>10):
            gseapy.enrichr(gene_list=list(list(rhigh.columns)), 
                           description='DEanalysis_wilcoxon.myelo-TILonly.Rhigh.tsv', 
                   gene_sets=j, background=list(cdata.raw.var.loc[cdata.raw.var['MeanExpr']>0.01]['SYMBOL']),cutoff=0.05, format='png',outdir=figdir+'enrichr/myelo-TILonly.Rhigh/')
        if (len(list(nrhigh.columns))>10):
            gseapy.enrichr(gene_list=list(list(nrhigh.columns)), description='DEanalysis_wilcoxon.myelo-TILonly.NRhigh.tsv', 
                   gene_sets=j, background=list(cdata.raw.var.loc[cdata.raw.var['MeanExpr']>0.01]['SYMBOL']),cutoff=0.05, format='png',outdir=figdir+'enrichr/myelo-TILonly.NRhigh/')

### all DE

In [None]:
cdata=tildata.copy()

#=bc.tl.dge.get_de(subdataa,'RCat',demethod='wilcoxon',topnr=5000, logfc=np.log(1.5),padj=0.1)
### try on individual tissues separately
b1DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['LN'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b2DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Brain'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
b3DE=bc.tl.dge.get_de(cdata[cdata.obs['Lesion'].isin(['Lung','Sinon','Subc'])].copy(),'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)

allDE1=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['R','NR'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE2=bc.tl.dge.get_de(cdata[cdata.obs['Response3'].isin(['TF','PD'])],'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)
allDE=bc.tl.dge.get_de(cdata,'Respond',topnr=5000, logfc=np.log(1.5),padj=0.1)


### Intersection of all combinations
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).intersection(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).intersection(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).intersection(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).intersection(set(allDE2['R']['Name'])))



strDE


In [None]:
strDE2

In [None]:



cdcNRstr=strDE
cdcRstr=strDE2

In [None]:
### Complete comparison + LN only + Brain | Others + RvsNR | PDvsTF
strDE=set(b1DE['PD']['Name']).intersection(set(allDE['PD']['Name'])).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(b1DE['R']['Name']).intersection(set(allDE['R']['Name'])).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))

### Complete comparison + LN only | Brain | Others + RvsNR | PDvsTF
strDE=set(allDE['PD']['Name']).intersection((set(b2DE['PD']['Name'])).union(set(b3DE['PD']['Name'])).union(set(b1DE['PD']['Name']))).intersection((set(allDE1['PD']['Name'])).union(set(allDE2['PD']['Name'])))
strDE2=set(allDE['R']['Name']).intersection((set(b2DE['R']['Name'])).union(set(b3DE['R']['Name'])).union(set(b1DE['R']['Name']))).intersection((set(allDE1['R']['Name'])).union(set(allDE2['R']['Name'])))


strDE


In [None]:
strDE2


In [None]:
testlist={}
testlist['LN-both']=b1DE
testlist['Brain-both']=b2DE
testlist['Other-both']=b3DE
testlist['Nadj']=allDE1
testlist['Adj']=allDE2
testlist['All']=allDE

cdcNRlen=strDE
cdcRlen=strDE2
gchoice=list(cdcRlen)

sc.pl.stacked_violin(cdata,var_names=gchoice, 
                     groupby='RCat',use_raw=True)


In [None]:
sc.pl.dotplot(cdata,var_names=gchoice,groupby='RCat',use_raw=True)


allDE['PD'].index=list(allDE['PD']['Name'])
allDE['PD'].loc[list(strDE),:].sort_values('Log2FC',ascending=False)

allDE['R'].index=list(allDE['R']['Name'])
allDE['R'].loc[list(strDE2),:].sort_values('Log2FC',ascending=False)



In [None]:

#allDE['PD'].loc[list(strDE),:].sort_values('Log2FC',ascending=False).to_csv(figdir+'DEanalysis_wilcoxon.cdc-TILonly.NRhigh.tsv',sep='\t')
#allDE['R'].loc[list(strDE2),:].sort_values('Log2FC',ascending=False).to_csv(figdir+'DEanalysis_wilcoxon.cdc-TILonly.Rhigh.tsv',sep='\t')

rhigh={}
for i in list(cdcRlen):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['R']['Name'])))>0: comp[k]=1
        k=k+1
    rhigh[i]=comp
rhigh=pd.DataFrame(rhigh)
rhigh.index=testlist.keys()

nrhigh={}
for i in list(cdcNRlen):
    comp=[0] * len(testlist)
    k=0
    for j in testlist.keys():
        if len(set([i]).intersection(set(testlist[j]['PD']['Name'])))>0: comp[k]=1
        k=k+1
    nrhigh[i]=comp
nrhigh=pd.DataFrame(nrhigh)
nrhigh.index=testlist.keys()

#nrhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.cdc-TILonly.NRhigh.tsv',sep='\t')
#rhigh.transpose().to_csv(figdir+'DEanalysis_wilcoxon.cdc-TILonly.Rhigh.tsv',sep='\t')




    


In [None]:
subsetleg='alltil'

nrhighall=nrhigh.transpose().copy()
nrhighall['Score']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'Score'])
nrhighall['Log2FC']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'Log2FC'])
nrhighall['P.adj']=list(allDE['PD'].loc[allDE['PD']['Name'].isin(list(nrhigh.columns)),'P.adj'])
nrhighall.to_csv(figdir+'DEanalysis_wilcoxon.'+subsetleg+'-TILonly.NRhigh.tsv',sep='\t')

rhighall=rhigh.transpose().copy()
rhighall['Score']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'Score'])
rhighall['Log2FC']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'Log2FC'])
rhighall['P.adj']=list(allDE['R'].loc[allDE['R']['Name'].isin(list(rhigh.columns)),'P.adj'])
rhighall.to_csv(figdir+'DEanalysis_wilcoxon.'+subsetleg+'-TILonly.Rhigh.tsv',sep='\t')

genestoplotnr=list(nrhighall.loc[(nrhighall['LN-both']==1)&(nrhighall['Other-both']==1)&(nrhighall['Brain-both']==1)&(nrhighall['Nadj']==1)&(nrhighall['Adj']==1),:].index)
genestoplotr=list(rhighall.loc[((rhighall['LN-both']==1)|(rhighall['Other-both']==1)|(rhighall['Brain-both']==1))&((rhighall['Nadj']==1)&(rhighall['Adj']==1)),:].index)

#mycol=['MITFP', 'MHC1','REGIO','CITRESUP','CITRESDN']
dfc=pd.DataFrame(cdata.obs.groupby(['PatientID']).mean()).copy()
dfc['RCat']=[cdata[cdata.obs['PatientID']==x].obs['RCat'][0] for x in list(dfc.index)]
dfc['Mutation']=[cdata[cdata.obs['PatientID']==x].obs['Mutation'][0] for x in list(dfc.index)]
dfc['Lesion']=[cdata[cdata.obs['PatientID']==x].obs['Lesion'][0] for x in list(dfc.index)]

dfc['nr_cells']=list(bc.tl.count_occurrence(cdata,count_variable='PatientID').loc[dfc.index,'Counts'])


mysigs=['score_Bcell_scanpy','score_NaiTcell_scanpy','TIR_NAI','SADE_B','SADE_G','TIR_CYT','TIR_EXH',
            'score_CD4Tcell_scanpy','score_CytotoxCD8Tcell_scanpy','score_ClassMonocyte_scanpy',
            'score_Myeloid_scanpy','score_cDC_scanpy','score_cDC_CCR7_scanpy','score_cDC1_scanpy',
            'score_ExhCD8Tcell_scanpy','score_RegTcell_scanpy','score_Macrophage_scanpy'
           ,'score_Macrophage_MSR1_scanpy' ,'score_Macrophage_MARCO_scanpy', 'MA-MREGDC','MA-cDC1','MA-cDC2']

pwilc={}
for myx in mysigs:
    pwilc[myx]=stats.mannwhitneyu(list(dfc.loc[dfc.loc[:,'Mutation'].isin(['BRAF']),myx]), 
                           list(dfc.loc[dfc.loc[:,'Mutation'].isin(['NRAS']),myx]))[1]
    plt.figure(figsize=(2,3.5))
    sns.boxplot(x='Mutation',y=myx,data=dfc.loc[dfc.loc[:,'Mutation'].isin(['BRAF','NRAS']),:],
            palette=color_dict_mut)
    sns.swarmplot(x='Mutation',y=myx,data=dfc.loc[dfc.loc[:,'Mutation'].isin(['BRAF','NRAS']),:],
             color='black')
    #fig.figure.savefig(figdir+'/'+myx+'-per-mutation.pdf')

#pd.Series(pwilc).sort_values().to_csv(figdir+'/Signatures-per-mutation-pvals.csv')
pd.Series(pwilc).sort_values()

pwilc={}
for myx in mysigs:
    pwilc[myx]=stats.mannwhitneyu(list(dfc.loc[dfc.loc[:,'RCat'].isin(['R','TF']),myx]), 
                           list(dfc.loc[dfc.loc[:,'RCat'].isin(['NR_adj','NR_nadj']),myx]))[1]
    plt.figure(figsize=(3.5,3.5))
    sns.boxplot(x='RCat',y=myx,data=dfc.loc[dfc.loc[:,'RCat'].isin(['R','TF','NR_adj','NR_nadj']),:],
            palette=color_dict,order=['R','TF','NR_nadj','NR_adj'])
    sns.swarmplot(x='RCat',y=myx,data=dfc.loc[dfc.loc[:,'RCat'].isin(['R','TF','NR_adj','NR_nadj']),:],
             color='black',order=['R','TF','NR_nadj','NR_adj'])
#    fig.figure.savefig(figdir+'/Signatures-per-response.pdf')

#pd.Series(pwilc).sort_values().to_csv(figdir+'/Signatures-per-response-pvals.csv')
pd.Series(pwilc).sort_values()



In [None]:
what='RCat'
bywhat='PatientID'
average_obs,fraction_obs=bc.get_means(cdata,bywhat, what)



bc.pl.box_per_ind(average_obs, ['CXCL9','CXCL10','IFNG','ISG15','IFI6','STAT5A' ],what)



In [None]:
bc.pl.box_per_ind(average_obs, ['CXCL9','CXCL10','MARCO' ],what)

In [None]:
bc.pl.box_per_ind(average_obs, ['AMICA1' ],what)

In [None]:


sc.pl.matrixplot(cdata,var_names=genestoplotr,
                 standard_scale='var',groupby='RCat',use_raw=True)


sc.pl.matrixplot(cdata,var_names=genestoplotnr,
                 standard_scale='var',groupby='RCat',use_raw=True)


#sc.pl.umap(dcdata, color=['CD83', 'SELK','ATF3','ZNF331'], color_map='viridis')

#sc.pl.umap(dcdata, color=['WDR74', 'IFITM1','SPP1','PLAC8'], color_map='viridis')


In [None]:
    dbs=['KEGG_2021_Human','GO_Biological_Process_2021','Reactome_2016',
     'Human_Gene_Atlas','WikiPathways_2019_Human','ChEA_2016',
     'CellMarker_Augmented_2021','Azimuth_Cell_Types_2021','COVID-19_Related_Gene_Sets_2021',
     'MSigDB_Hallmark_2020']
    cdata.raw.var['MeanExpr']=cdata.raw.X.mean(axis=0).tolist()[0]


    for j in dbs:
        if (len(list(rhigh.columns))>10):
            gseapy.enrichr(gene_list=list(rhigh.columns), 
                           description='DEanalysis_wilcoxon.alltil-TILonly.Rhigh.tsv', 
                   gene_sets=j, background=list(cdata.raw.var.loc[cdata.raw.var['MeanExpr']>0.01]['SYMBOL']),cutoff=0.05, format='png',outdir=figdir+'enrichr/alltil-TILonly.Rhigh/')
        if (len(list(nrhigh.columns))>10):
            gseapy.enrichr(gene_list=list(nrhigh.columns), description='DEanalysis_wilcoxon.alltil-TILonly.NRhigh.tsv', 
                   gene_sets=j, background=list(cdata.raw.var.loc[cdata.raw.var['MeanExpr']>0.01]['SYMBOL']),cutoff=0.05, format='png',outdir=figdir+'enrichr/alltil-TILonly.NRhigh/')

#### Systematic DE per cell type

In [None]:
what

In [None]:
correct_nrs=False
adj={}
nadj={}
for mycell in list(set(tildata.obs[what])):
#for mycell in ['CD8-positive T cell','plasma cell']:
    subdata=tildata[tildata.obs[what]==mycell].copy()
    ### Explore the nr. cells per donor - avoid large donor-specific biases
    if correct_nrs==True:
        cellnrs=bc.tl.count_occurrence_subset_conditions(subdata, subset_variable = split_condition, count_variable = what, condition_identifier = condition,  return_percentage = False)
        myindex=[]
        mylen=[]
        myp=list(subdata.obs[split_condition].cat.categories)
        for i in myp:
            temp=subdata[subdata.obs[split_condition]==i].copy()
            mylen.append(len(temp))
            sc.pp.subsample(temp,n_obs=int(np.min([len(temp.obs),cellnrs.transpose().mean()[0]])))
            myindex.append(temp.obs.index.values)

        flatten=lambda l: [item for myindex in l for item in myindex]
        subdata=subdata[flatten(myindex)].copy()
    
    subdatana=subdata[subdata.obs['RCat'].isin(['R','NR_nadj'])].copy()
    subdataa=subdata[subdata.obs['RCat'].isin(['TF','NR_adj'])].copy()
    if (len(subdataa)>50):
        adj[mycell]=bc.tl.dge.get_de(subdataa,'RCat',demethod='wilcoxon',topnr=5000, logfc=np.log(1.5),padj=0.1)
    else:
        adj[mycell]=[]
    if (len(subdatana)>50):
        nadj[mycell]=bc.tl.dge.get_de(subdatana,'RCat',demethod='wilcoxon',topnr=5000, logfc=np.log(1.5),padj=0.1)
    else:
        nadj[mycell]=[]

In [None]:
#nadj_lev1=nadj
#adj_lev1=adj

In [None]:
nadj_lev2=nadj
adj_lev2=adj

In [None]:
nadj_lev3=nadj
adj_lev3=adj

In [None]:
list(set(tildata.obs[what]))

In [None]:
list(nadj_lev1['T cell']['R']['Name'])

In [None]:
list(adj_lev1['T cell']['TF']['Name'])

In [None]:
list(nadj_lev1['natural killer cell']['R']['Name'])

In [None]:
list(adj_lev1['natural killer cell']['TF']['Name'])

In [None]:
nadj['cytotoxic CD56-dim natural killer cell']

In [None]:
sc.pl.dotplot(adata, var_names=tops,groupby=clusters)

In [None]:
mycell='naive CD8-positive T cell'
what='celltype2_pub'
split_condition='PatientID'
condition='RCat'
subdata=tildata[tildata.obs[what]==mycell].copy()
subdata

In [None]:
nadj['plasma cell']['R']

In [None]:
nadj['plasma cell']['R']

In [None]:
nadj['plasma cell']['NR_nadj']

In [None]:
nadj['plasma cell']['NR_nadj']

In [None]:


### Select only top genes (in order of p-val) for 2 clusters and plot expression per cluster
tops=list(DEgenes['44']['Name'][0:50])+list(DEgenes['40']['Name'][0:10])
sc.pl.dotplot(adata, var_names=tops,groupby=clusters)

In [None]:
! jupyter nbconvert --to html Figures-part2.ipynb