# Annotating Cell Types

This workbook was run after the standard workflow.

In [None]:
import besca as bc
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
from scipy import sparse, io
import os
import time
import logging
import seaborn as sns
#import cosg as cosg ### gene makrers with cosg
sc.logging.print_header()

# for standard processing, set verbosity to minimum
sc.settings.verbosity = 0  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80)
version = '2.8'
start0 = time.time()

In [None]:
#define standardized filepaths based on above input
root_path = os.getcwd()
bescapath_full = os.path.dirname(bc.__file__)
bescapath = os.path.split(bescapath_full)[0]

analysis_name = 'sw_besca_24_pub'
species='mouse' ## or mouse for now
conversion=None
#analysis_name = 'standard_workflow_besca2_CLR' #use _CLR or _DSB for citeseq analysis
annot_author = 'schwalip' ### replace with userid

# Choose the clustering to base the annotation on.
# leiden (RNA), citeseq (protein)
clusters='leiden' 

# Which UMAP basis to use for visualization
# umap or umap_citeseq
umap_basis='umap'

# Specify if Cite-seq data
citeseq=True

# Specify if signatures should be exported to gmtx
export_sigs=True

# Specify if you are connected to a mongoDB that you want to use to explore similarity with previous data
mongodb_explore=False


The chunk of code below is usefull if this is the initial installation of besca and that you are running this notebook as a test. It will download if needed the test datasets and export the labelling. 
This export is usually done at the end of the standard workflow. Exported files  are necessary for the annotations.

In [None]:
use_example_dataset = False
if use_example_dataset:
    analysis_name='pbmc3k_processed'
    results_folder = os.path.split(os.getcwd())[0] + '/besca/datasets/data/'
    clusters='leiden'
    umap_basis='umap'
    citeseq = False
    # This line will either download, or load the datasets
    adata = bc.datasets.pbmc3k_processed()
    # This line exports the annotation for the annotation.
    adata = bc.st.additional_labeling(adata, labeling_to_use= clusters, labeling_name = clusters, 
                                      labeling_description = 'Exporting a postori the labels for annotation',
                                      labeling_author = 'Testing', 
                                      results_folder= results_folder)
else:
    if clusters == 'leiden':
        results_folder = os.path.join(root_path, 'analyzed')
        if citeseq:
            results_folder = os.path.join(results_folder, analysis_name, 'citeseq' , 'citeseq') 
            adata = sc.read_h5ad(os.path.join(results_folder + '_merged' ,analysis_name + '.h5ad') ) 
        else:
            results_folder = os.path.join(results_folder, analysis_name)
            adata = sc.read_h5ad(os.path.join(results_folder, analysis_name + '.h5ad') )
    else:
        results_folder = os.path.join(root_path, 'analyzed', analysis_name, 'citeseq', clusters)
        adata = sc.read_h5ad(os.path.join(results_folder + '_merged' ,analysis_name + '.h5ad') )
        clusters='leiden_citeseq'

In [None]:
### Create export file and folder names
results_file = os.path.join(results_folder, analysis_name + '.annotated.h5ad')
figdir=os.path.join(root_path, 'analyzed', analysis_name+'/figures/')
sc.settings.figdir = figdir
if not os.path.exists(figdir):
    os.makedirs(figdir)

In [None]:
sc.pl.embedding(adata, color = [clusters], basis = umap_basis , legend_loc='on data')

In [None]:
sc.pl.embedding(adata, color = ['treatment_id'], basis = umap_basis )

In [None]:
if citeseq:
    # Expression pattern of the antibodies
    # Can be usefull to refine the annotations, if the clusters are RNA-based
    ab_names = adata.var[adata.var.feature_type=='Antibody Capture']
    sc.pl.embedding(adata, basis = umap_basis, color=ab_names.index.tolist(), color_map = 'viridis',vmin=-1.5)

In [None]:
oi=[x.split('_')[1] for x in list(ab_names['SYMBOL'])]
sc.pl.umap( adata, color =  oi, color_map = 'viridis')

In [None]:
sc.pl.umap( adata, color = ['Xcl1','Cd200','S1pr1','S1pr5','Esm1'] , color_map = 'viridis')

### Explore top marker genes per cluster 

In [None]:
DEgenes=bc.tl.dge.get_de(adata,clusters,demethod='wilcoxon',topnr=5000, logfc=1,padj=0.05)


In [None]:
### Select only top genes (in order of p-val) for 2 clusters and plot expression per cluster
### Sort according to FC for more specific genes
tops=list(DEgenes['6'].sort_values('Log2FC',ascending=False)['Name'][0:30])+list(DEgenes['1'].sort_values('Log2FC',ascending=False)['Name'][0:10])
sc.pl.dotplot(adata, var_names=tops,groupby=clusters, dot_max=0.6)

In [None]:
Th17=['Cd163l1','Abi3bp','Il17a','Kcnc1','Rorc', 'Cryba4']

In [None]:
sc.pl.dotplot(adata, var_names=Th17,groupby=clusters, dot_max=0.6)

Cluster 22 - Th17 cells

In [None]:
### Select only top genes (in order of p-val) for 2 clusters and plot expression per cluster
### Sort according to FC for more specific genes
tops=list(DEgenes['4'].sort_values('Log2FC',ascending=False)['Name'][0:30])+list(DEgenes['3'].sort_values('Log2FC',ascending=False)['Name'][0:10])
sc.pl.dotplot(adata, var_names=tops,groupby=clusters, dot_max=0.6)

### Explore distribution of various cell populations

In [None]:
# One can load besca-provided signatures using the function below
signature_dict = bc.datasets.load_immune_signatures(refined=False)

signature_dict

Additionaly it is possible to read an compute scanpy score using this function below.

If the gmt file is composed of combined signature (UP and DN), a common score will be computed: 
$$Total\_SCORE= Score_{UP} - Score_{DN}$$

In [None]:
gmt_file= bescapath + '/besca/datasets/genesets/Immune.gmt'

if species=='mouse':
    # Genes converted to mouse homologs
    mousehuman_file = bescapath + '/besca/datasets/homologs/MGItoHGNC.csv'
    mousehuman=pd.read_csv(mousehuman_file,sep='\t',header='infer', encoding="unicode_escape")
    mousehuman.index=mousehuman['MGI']
    conversion=pd.Series(data=mousehuman['HGNC'], index=mousehuman.index)


In [None]:

bc.tl.sig.combined_signature_score(adata, gmt_file,
                             UP_suffix='_UP', DN_suffix='_DN', method='scanpy',
                             overwrite=False, verbose=False,
                             use_raw=True, conversion=conversion)

In [None]:
scores = [x for x in adata.obs.columns if 'scanpy' in x]

In [None]:
sc.pl.embedding(adata, basis = umap_basis, color= scores)

## Signatures for specific sub-populations

In [None]:
## Provided with besca; change this for own gmt file
gmt_file_anno= bescapath + '/besca/datasets/genesets/CellNames_scseqCMs6_sigs.gmt'


In [None]:
### Plot all signatures containing "scanpy" in name
scores = [x for x in adata.obs.columns if 'scanpy' in x]
sc.pl.embedding(adata, basis = umap_basis, color= scores, color_map = 'viridis')

In [None]:
bc.tl.sig.combined_signature_score(adata, gmt_file_anno,
                             UP_suffix='_UP', DN_suffix='_DN', method='scanpy',
                             overwrite=False, verbose=False,
                             use_raw=True, conversion=conversion)

scores = [x for x in adata.obs.columns if 'scanpy' in x]

In [None]:
### Plot all signatures containing "scanpy" in name
scores = [x for x in adata.obs.columns if 'scanpy' in x]
sc.pl.embedding(adata, basis = umap_basis, color= scores, color_map = 'viridis')

In [None]:
### Plot only selected signatures
sc.pl.embedding(adata, basis = umap_basis, color= ['score_Myeloid_scanpy','score_Bcell_scanpy','score_Tcell_scanpy','score_NKcell_scanpy'], color_map = 'viridis')

In [None]:
ab_names

In [None]:
# Signatures from https://www.pnas.org/content/116/28/14113
jadhav_mem=['Sell','Il7r','Bcl2','Klrg1','Il2rb']
jadhav_eff=['Ifng','Tnf','Il2','Il21','Il6','Gzma','Gzmb','Prf1','Fasl','Tnsf10']
jadhav_stem=['Nsg2','P2rx7','Lrig1','Tcf7','Aff3','Crtam','Kbtbd11','Nt5e','Cxcr5','Cxcl10','Cd83','Ccr7','Traf1']
jadhav_exh=['Chn2','Lilr4b','Pim1','Slamf1','Prdm1','Gzmb','Fasl','Tmcc3','1700017B05Rik', 'Il10','Lgals3','Ccl3','Ccl4','Cd48']

In [None]:
# Selected (publication) and all Gzm genes 
gzmessel=['Gzma','Gzmb','Gzmc','Gzmf']
gzmesall=['Gzma','Gzmb','Gzmc','Gzme','Gzmd','Gzmf','Gzmk']

In [None]:
sc.tl.score_genes(adata,gene_list=jadhav_mem,score_name='Memory_j')
sc.tl.score_genes(adata,gene_list=jadhav_eff,score_name='Effector_j')
sc.tl.score_genes(adata,gene_list=jadhav_stem,score_name='Resource_j')
sc.tl.score_genes(adata,gene_list=jadhav_exh,score_name='Exhausted_j')
sc.tl.score_genes(adata,gene_list=gzmessel,score_name='Gzms')

In [None]:
sc.pl.umap(adata, color=gzmesall, color_map='viridis')

In [None]:
sc.pl.umap(adata, color='Gzms', color_map='viridis', vmax=3.5)

In [None]:
# Yost et al. exhaustion and acttivation signatures
yost_exh=['Krt86','Layn','Entpd1','Acp5','Galnt2','Tigit','Havcr2','Gzmb','Ahi1','Atp8b4','Itgae',
          'Vcam1','Golim4','Mtss1','Jaml','Sox4','Pde7b','Cxcr6','Csf1','Tnfrsf18','Asb2','Gem','Sla2',
          'Myo7a','Sqle']
yost_act=['Nfkbia','Junb','Jun','Tnf','Ier2','Ifng','Slc2a3','Cd69','Fos','Nr4a2','Ubc','Gadd45b','Nr4a1',
         'Tsc22d3','Hspa1b','Dusp1','Zfp36l1','Ppp1r15a','Actb','Bhlhe40','Fosb','Pim1',
         'Clic1','Hspa1a','Cdkn1a','Nfkbiz','Zc3h12a','Tmsb10','Csrnp1']

In [None]:
# Signature from https://www.nature.com/articles/nature19330
resource={}
resource['UP']=['Tcf7','CXCR5_Cxcr5','PD1_Pdcd1','Pdcd1','SLAMF6_Slamf6','Slamf6','Cxcr3','CXCR3_Cxcr3','Cd28','Bcl6','Plagl1']
resource['DN']=['TIM3_Havcr2','Havcr2','Cd244a','Entpd1','CD39_Entpd1']
resourceext={}
resourceext['UP']=['Tcf7','CXCR5_Cxcr5','PD1_Pdcd1','Pdcd1','SLAMF6_Slamf6','SLAMF6_Slamf6','Slamf6','Cxcr3','CXCR3_Cxcr3','Icos','Tnfsf14','Tnfrsf4','Il2','Tnf','Lag3','LAG3_Lag3', 'Ctla4','Bcl6','Plagl1']
resourceext['DN']=['TIM3_Havcr2','Havcr2','Cd244a','Entpd1','CD39_Entpd1','Prdm1','Id2','Il2rb','Klrg1','Ccl3','Ccl4','Ccl5','Csf1']

sigs={}
sigs['resourceCD8Tcell']=resource
sigs['resourceCD8Tcellext']=resourceext

In [None]:
bc.tl.sig.combined_signature_score(adata, signature_dict=sigs)
sc.tl.score_genes(adata,gene_list=sigs['resourceCD8Tcell']['UP'],score_name='resourceCD8Tcell_UP')
sc.tl.score_genes(adata,gene_list=sigs['resourceCD8Tcell']['DN'],score_name='resourceCD8Tcell_DOWN')

In [None]:
sc.pl.matrixplot(adata, var_names=list(set(sigs['resourceCD8Tcellext']['UP']).union(set(sigs['resourceCD8Tcellext']['DN']))), 
                 groupby='leiden',standard_scale='var',dendrogram=True, save='Heatmap-resource-leiden.pdf')

In [None]:
sc.pl.matrixplot(adata, var_names=yost_exh, groupby='leiden',standard_scale='var',dendrogram=True, save='Heatmap-yost_exh-leiden.pdf')

In [None]:
sc.pl.matrixplot(adata, var_names=yost_act, groupby='leiden',standard_scale='var',dendrogram=True, save='Heatmap-yost_act-leiden.pdf')

In [None]:
sc.tl.score_genes(adata,gene_list=yost_act,score_name='Activation_Yost')
sc.tl.score_genes(adata,gene_list=yost_exh,score_name='Exhaustion_Yost')
Th17=['Cd163l1','Abi3bp','Il17a','Kcnc1','Rorc', 'Cryba4']
sc.tl.score_genes(adata,gene_list=Th17,score_name='score_Th17_scanpy')
#https://www.cell.com/cell/pdfExtended/S0092-8674(15)01696-7
Ifna=['Ifit3','Slfn5','Ifit1','Rsad2','Oas3','Cxcl10','Ifi204','Irf7','Oas2','Rfp4','Stat1','Usp18','Socs1','Gbp4','Ifn7']
sc.tl.score_genes(adata,gene_list=Ifna,score_name='score_Ifna_scanpy')

In [None]:
sc.settings.set_figure_params()
sc.pl.umap(adata, color=['Activation_Yost','Exhaustion_Yost', 
                         'score_ExhCD8Tcell_scanpy', 'score_resourceCD8Tcell_scanpy', 
                         'score_resourceCD8Tcellext_scanpy',
                        'resourceCD8Tcell_UP','resourceCD8Tcell_DOWN',
                         'score_NaiCD8Tcell_scanpy','score_Ifna_scanpy'], ncols=3,color_map='viridis')

In [None]:
subtab=adata.obs.loc[:,['Activation_Yost','Exhaustion_Yost', 'score_ExhCD8Tcell_scanpy','sample_id','treatment_id']].copy()
subtabl=adata.obs.loc[:,['Activation_Yost','Exhaustion_Yost', 'score_ExhCD8Tcell_scanpy','score_resourceCD8Tcell_scanpy',
                         'score_resourceCD8Tcellext_scanpy','resourceCD8Tcell_UP','resourceCD8Tcell_DOWN',
                         'score_NaiCD8Tcell_scanpy','score_Ifna_scanpy','score_Th17_scanpy','score_RegTcell_scanpy',
                         'score_ProlifCD8Tcell_scanpy','score_Myeloid_scanpy','sample_id','leiden']].copy()

In [None]:
#adata.obs.loc[:,['leiden','celltype']]

In [None]:
subtabMeans=subtab.groupby('sample_id').mean()
subtabMeans['Treat']=[x.split("_tum")[0] for x in list(subtabMeans.index)]

In [None]:
subtabMeansl=subtabl.groupby(['sample_id', 'leiden']).mean()
subtabMeansl['Treat']=[x[0].split("_tum")[0] for x in list(subtabMeansl.index)]
subtabMeansl['cluster']=[x[1] for x in list(subtabMeansl.index)]

In [None]:
figdir

In [None]:

fig=sns.clustermap(subtabMeansl.groupby('cluster').mean(), standard_scale=1, col_cluster=False,figsize=(6,10))
fig.savefig(figdir+"Heatmap-signatures-resourceFocus.png")

In [None]:
sns.boxplot(x="Activation_Yost",y="Treat",data=subtabMeans)
sns.swarmplot(x="Activation_Yost",y="Treat",data=subtabMeans, color='black')

In [None]:
sorted_index_desc = subtabMeansl.groupby('cluster').mean()['Activation_Yost'].sort_values(ascending=False).index

sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_style('ticks')
sns.boxplot(x="Activation_Yost",y="cluster",data=subtabMeansl, order=sorted_index_desc, color='lightgray')
sns.swarmplot(x="Activation_Yost",y="cluster",hue='Treat',data=subtabMeansl, order=sorted_index_desc)

In [None]:
sns.boxplot(x="Exhaustion_Yost",y="Treat",data=subtabMeans)
sns.swarmplot(x="Exhaustion_Yost",y="Treat",data=subtabMeans, color='black')

In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_style('ticks')
sns.boxplot(x="Exhaustion_Yost",y="cluster",data=subtabMeansl, order=sorted_index_desc, color='lightgray')
sns.swarmplot(x="Exhaustion_Yost",y="cluster",hue='Treat',data=subtabMeansl, order=sorted_index_desc)

In [None]:
sorted_index_desc = subtabMeansl.groupby('cluster').mean()['Exhaustion_Yost'].sort_values(ascending=False).index

sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_style('ticks')
sns.boxplot(x="Exhaustion_Yost",y="cluster",data=subtabMeansl, order=sorted_index_desc, color='lightgray')
sns.swarmplot(x="Exhaustion_Yost",y="cluster",hue='Treat',data=subtabMeansl, order=sorted_index_desc)

In [None]:
sns.boxplot(x="score_ExhCD8Tcell_scanpy",y="Treat",data=subtabMeans)
sns.swarmplot(x="score_ExhCD8Tcell_scanpy",y="Treat",data=subtabMeans, color='black')

In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_style('ticks')
sns.boxplot(x="score_ExhCD8Tcell_scanpy",y="cluster",data=subtabMeansl, order=sorted_index_desc, color='lightgray')
sns.swarmplot(x="score_ExhCD8Tcell_scanpy",y="cluster",hue='Treat',data=subtabMeansl, order=sorted_index_desc)

In [None]:
sorted_index_desc = subtabMeansl.groupby('cluster').mean()['score_ExhCD8Tcell_scanpy'].sort_values(ascending=False).index

sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_style('ticks')
sns.boxplot(x="score_ExhCD8Tcell_scanpy",y="cluster",data=subtabMeansl, order=sorted_index_desc, color='lightgray')
sns.swarmplot(x="score_ExhCD8Tcell_scanpy",y="cluster",hue='Treat',data=subtabMeansl, order=sorted_index_desc)

#### Broad overview of gene of interest expression 

In [None]:
goi=['Ptprc','Trac','Cd3d','Cd8a','Cd44','CD44_Cd44','Cd4','Foxp3','Lrrc32','Il2ra','CD25_Il2ra',
     'Il7r','IL7RA_Il7r','Cd28','CD28_Cd28',
         'Slamf6','SLAMF6_Slamf6','Sell','CD62L_Sell',
                            'Tcf7','Cxcr5','CXCR5_Cxcr5','Cxcr3','CXCR3_Cxcr3','S1pr1','S1pr5','Esm1','Klrg1','Il18r1',
     'Pdcd1','PD1_Pdcd1','Havcr2','TIM3_Havcr2','Tnfrsf9','41BB_Tnfrsf9','Lag3','LAG3_Lag3',
      'Tigit','TIGIT_Tigit',
     'Entpd1','CD39_Entpd1','Tox','Tox2','Greb1','Gzmk',
     'Il10','Gzmb','Klrc2','Lncpint','Ifitm1','Ifitm2','Iigp1','Oas3','Klrc3','Ccl1','Ccl3','Crtam','Cd83','Gzme','Gzmd','Gzmf',
     'Ifit1','Isg15','Cxcl10','Xaf1',
     'Il21r','IL21R_Il21r','Xcl1','Igfbp7','Slc15a1','Bace2','Ltf','Mrc2','Bmp7','Fcrl6',
     'Mki67','Stmn1','Rorc',
     'Cd163l1','Il17a','Csf1r','Msr1','Apoe','Hspa1a','Dnajb1','Hspb1','Bag3']
sc.pl.matrixplot(adata, var_names=goi, groupby='leiden',standard_scale='var',dendrogram=True, save='Heatmap-goi-leiden.pdf')

# Automated annotation

A decision-tree-based annotation that reads signatures from a provided .gmt file and hierarchy as well as cutoffs and signature ordering from a configuration file and attributes each cell to a specific type according to signature enrichment. 

This is an aid to start ther annotation and annotation can then be further refined by adding further signatures or adjusting the configuration files. It was tested mainly on PBMCs and oncology (tumor biopsies) related samples.


## Loading markers and signature

In [None]:
from itertools import repeat
mymarkers = bc.tl.sig.read_GMT_sign(gmt_file_anno,directed=False)
if species=='mouse':
    for signature in mymarkers.keys():
        mymarkers[signature] = [i for i in map(bc.tl.sig._helper._to_geneid, repeat(conversion), mymarkers[signature]) if i is not None]


In [None]:
mymarkers = bc.tl.sig.filter_siggenes(adata, mymarkers) ### remove genes not present in dataset or empty signatures

In [None]:
sc.pl.embedding(adata, basis = umap_basis, color= mymarkers['Hematopoietic'])

In [None]:
sc.pl.embedding(adata, basis = umap_basis, color= mymarkers['CommonLymphoidPC'])

In [None]:
sc.pl.embedding(adata, basis = umap_basis, color= mymarkers['NKcell'])

### Select Ubiquitously expressed genes for cutoff adjustment to invidual datasets

In [None]:
## Cutoff in the configuration file will be calculated relative to the enrichment of this ubiquitous signature
## For best performance, choose genes that are uniformly distributed across all clusters on a given dataset 
## Ideally, expression is moderate rather than high
#mymarkers['Ubi'] = [ 'ZNF207', 'HNRNPU','SNRPD3', 'SRRM1'] # alternatives: ['B2M','ACTB', 'HNRNPK'] 
mymarkers['Ubi'] = ['B2m','Actb', 'Hnrnpk','Hnrnpu'] ### used for cutoff adjustment to individual dataset, can be modified

In [None]:
### Inspect gene expression for ubi genes, checking for uniform distribution across clusters
sc.pl.embedding(adata, basis = umap_basis, color= mymarkers['Ubi'])

In [None]:
sc.pl.dotplot(adata, var_names= mymarkers['Ubi'], groupby=clusters)

In [None]:
### Inspect gene expression for an example signature
sc.pl.embedding(adata, basis = umap_basis, color= mymarkers['NClassMonocyte'])

## Configuration of the annotation

We read the configuration file, containing hierarchy, cutoff and signature priority information. 
A new version of this file should be created and maintained with each annotation. 
The included example is optimised for the annotation of the 6.6k PBMC dataset. 

In [None]:
configfile=bescapath + '/besca/datasets/genesets/CellNames_scseqCMs6_config.tsv' ### replace this with your config
if species=='mouse':
    configfile=bescapath + '/besca/datasets/genesets/CellNames_scseqCMs6_config.mouse.tsv' ### replace this with your config

In [None]:
sigconfig,levsk=bc.tl.sig.read_annotconfig(configfile)

In [None]:
# Optional configuatation: The order of cells at different levels can be manually changed if needed
#levsk[0]=['ColorectalCancer', 'Epithelial','Fibroblast','Endothelial','Erythrocyte','HematoStem',
#          'Hepatocyte','MelMelanoma','Neural','Adipocyte','Hematopoietic','Schwann','Chondrocyte','Glial']

#### Get an overview of the cell type hierarchy included in the configuration file

In [None]:
plt=bc.pl.nomenclature_network(configfile, font_size=8)
plt.savefig(figdir+"Nomenclatureplot.svg", format="svg")

Fract_pos was exported by BESCA in the standard worflow test, 
contains information of fraction positive cells per genes per cluster.

We use these values as a basis for a wilcoxon test per signature per cluster. 

In [None]:
## Optional: For an alternative clustering to be used as annotation, one needs to export the corresponding fract_pos

#adata = bc.st.clustering(adata, results_folder, myres=2, method = clusters) ## higher clustering resolution
#bc.export.clustering(adata, outpath = os.path.join(results_folder, 'labelings', clusters+'_r2'), method = clusters)
#bc.export.labeling_info(outpath=os.path.join(results_folder, 'labelings', clusters+'_r2'), description=clusters+' clustering with r=2', method='leiden')

In [None]:
f=pd.read_csv(results_folder + '/labelings/' + clusters + '/fract_pos.gct' ,sep="\t",skiprows=2)
df=bc.tl.sig.score_mw(f,mymarkers)

### Set a cutoff based on Ubi and scale with values from config file
### Change the factor 0.5 to systematically be more stringent (higher e.g. 1) or leniant (lower e.g. 0.25)
myc=np.median(df.loc['Ubi',:]*0.25) ### Set a cutoff based on Ubi and scale with values from config file


In [None]:
### Check the cutoff 
myc

In [None]:
### Check the ubiquitous signature score (should be as uniform as possible across clusters)
df.loc["Ubi",:]

In [None]:
df.iloc[0:3,0:7]

#### For an overview of highest scoring signatures, one can generate a heatmap

In [None]:
sns.clustermap(df.loc[df.max(axis=1)>myc*2,:].astype(float),figsize=(12, 8))
plt.savefig(figdir+"SignatureHeatmap_all.svg", format="svg")

For each signature, positive and negative clusters are determined. Only positive clusters are maintained. Cutoffs can be individualised based on the config file (scaling factor) and myc, which is determined based on ubiquitously expressed genes. 

In [None]:
### Remove ubiquitous signature for the scoring part
df=df.drop('Ubi')

In [None]:
# Optional: Cutoffs can also be manually adjusted if needed 
# Always adjust from lowest to highest level and remember to check priorities as well (order)
# sigconfig.loc['Epithelial','Cutoff']=1.5 ### Increase for being more stringent, decrease for more leniant
# Cutoffs can also be manually adjusted if needed 
sigconfig.loc['CD8Tcell','Cutoff']=2
sigconfig.loc['EMCD8Tcell','Cutoff']=4.7
sigconfig.loc['NaiCD8Tcell','Cutoff']=2.5
sigconfig.loc['CytotoxCD8Tcell','Cutoff']=2.75
sigconfig.loc['Hematopoietic','Cutoff']=4

#### Cluster attribution based on cutoff (all clusters above cutoff will be attributed to a cell type)

In [None]:
sigscores={}
for mysig in list(df.index):
    sigscores[mysig]=bc.tl.sig.getset(df,mysig,sigconfig.loc[mysig,'Cutoff']*myc)
    #sigscores[mysig]=bc.tl.sig.getset(df,mysig,10)

One can inspect the cluster attribution per cell type in the signature list and adjust cutoffs as required. 

In [None]:
sigscores

In [None]:
### Check to a specific cell type 
sigscores['Bcell']

Now each cluster gets annotated, according to the distinct levels specified in the config file. 
Note that in case a cluster is positive for multiple identities, only the first one is taken, 
in the order specified in the "Order" column in the config file. 

To check the given order, per levels, you can inspect levsk, and adjust above as needed

In [None]:
#levsk

#### Cell types that are not expected in the dataset or that are too fine-grained be explicity excluded from the annotation

In [None]:
### For instance, if no erythrocytes or pancreatic cells are expected, they can be specified here
### For instance, if plasma cells should not be subclassified, they can can be specified here
toexclude=['Erythrocyte','AlphaPancreatic', 'BetaPancreatic', 'DeltaPancreatic',
           'IgGPlasma','IgAPlasma','IgMPlasma','Chondrocyte']


### Obtain cluster assignment

In [None]:
cnames=bc.tl.sig.make_anno(df,sigscores,sigconfig,levsk, toexclude=toexclude)

We now obtained per each cluster cell type attribution at distinct levels. 

In [None]:
cnames

Export the used annotation parameters, for future reference

In [None]:
bc.tl.sig.export_annotconfig(sigconfig, levsk, results_folder, analysis_name)

## Using dblabel convention

Only short names were used in the signature naming convention in this case. 
One can easity tranform this to EFO terms if preferred, a conversion table comes with besca. 

This nomenclature is quite extended, and the function 
**obtain_dblabel** can perform the conversion.

In [None]:
### Transform these short forms to dblabel - EFO standard nomenclature
cnamesDBlabel = bc.tl.sig.obtain_dblabel(bescapath+'/besca/datasets/nomenclature/CellTypes_v1.tsv', cnames )
cnamesDBlabel

Finally, one can add the new labels to adata.obs as annotation. 

In [None]:
adata.obs['celltype0']=bc.tl.sig.add_anno(adata,cnamesDBlabel,'celltype0',clusters)
adata.obs['celltype1']=bc.tl.sig.add_anno(adata,cnamesDBlabel,'celltype1',clusters)
adata.obs['celltype2']=bc.tl.sig.add_anno(adata,cnamesDBlabel,'celltype2',clusters)
adata.obs['celltype3']=bc.tl.sig.add_anno(adata,cnamesDBlabel,'celltype3',clusters)

Inspect the labels on the umap from lowest to highest resolution

In [None]:
sc.pl.embedding(adata,color=['celltype1'], basis = umap_basis) 

In [None]:
sc.pl.embedding(adata,color=['celltype2'], basis = umap_basis) 

In [None]:
sc.pl.embedding(adata,color=['celltype3'], basis = umap_basis) 

In [None]:
sc.pl.embedding(adata,color=['leiden'], basis = umap_basis,legend_loc='on data') 

### Helper functions for additional checks

In [None]:
### Which classification has cluster 7? 
bc.tl.sig.match_cluster(adata,'leiden','7','celltype3',0.3)

In [None]:
### What cluster corresponds to CD1c-positive myeloid dendritic cell? 
bc.tl.sig.match_cluster(adata,'celltype3','CD1c-positive myeloid dendritic cell','leiden',0.3) ## lowered cutoff

## Manual refinement

Manual refinement and attribution of resource, fresh effector, better effector and exhausted 
* resource: TCF1+ (Tcf7) PD1+
* fresh effector: TCF1- PD1+ IL18R+ TIM3- (KI67+ GZMB+)
* better effector: TCF1- PD1+ IL18R++ TIM3+ (KI67+ GZMB+)
* exhausted: TCF1- PD1++ IL18R- TIM3++ CD39+ (KI67+ GZMB+)

In [None]:
### Example 2: manual attribution to specific cluster
adata.obs['celltype1']=adata.obs['celltype1'].cat.add_categories('T-helper 17 cell')
adata.obs['celltype2']=adata.obs['celltype2'].cat.add_categories('T-helper 17 cell')
adata.obs['celltype3']=adata.obs['celltype3'].cat.add_categories('T-helper 17 cell')


In [None]:

adata.obs.loc[adata.obs[clusters].isin(['22']),'celltype1']='T cell'
adata.obs.loc[adata.obs[clusters].isin(['22']),'celltype2']='T-helper 17 cell'
adata.obs.loc[adata.obs[clusters].isin(['22']),'celltype3']='T-helper 17 cell'

In [None]:
adata.obs['celltype1']=adata.obs['celltype1'].cat.add_categories('myeloid T cell doublet')
adata.obs['celltype2']=adata.obs['celltype2'].cat.add_categories('myeloid T cell doublet')
adata.obs['celltype3']=adata.obs['celltype3'].cat.add_categories('myeloid T cell doublet')

adata.obs.loc[adata.obs[clusters].isin(['19','7']),'celltype1']='myeloid T cell doublet'
adata.obs.loc[adata.obs[clusters].isin(['19','7']),'celltype2']='myeloid T cell doublet'
adata.obs.loc[adata.obs[clusters].isin(['19','7']),'celltype3']='myeloid T cell doublet'

In [None]:
adata.obs['celltype3']=adata.obs['celltype3'].cat.add_categories('regulatory T cell')
adata.obs.loc[adata.obs[clusters].isin(['20']),'celltype3']='regulatory T cell'

In [None]:
adata.obs.loc[adata.obs[clusters].isin(['1']),'celltype3']='exhausted-like CD8-positive, alpha-beta T cell'

In [None]:
#### Make sure to remove obsolete categories
adata.obs['celltype3']=adata.obs['celltype3'].cat.remove_unused_categories()
adata.obs['celltype2']=adata.obs['celltype2'].cat.remove_unused_categories()
adata.obs['celltype1']=adata.obs['celltype1'].cat.remove_unused_categories()
adata.obs['celltype0']=adata.obs['celltype0'].cat.remove_unused_categories()

In [None]:
adata.obs['celltype']=adata.obs['celltype3'].copy()

In [None]:
sc.pl.umap(adata,color='celltype')

In [None]:
sc.pl.embedding(adata,color=['leiden'], basis = umap_basis,legend_loc='on data') 

In [None]:
adata.obs['celltype']=adata.obs['celltype'].cat.add_categories('IFNa CD8-positive, alpha-beta T cell')
adata.obs.loc[adata.obs[clusters].isin(['14']),'celltype']='IFNa CD8-positive, alpha-beta T cell'

In [None]:
adata.obs['celltype']=adata.obs['celltype'].cat.add_categories('resource CD8-positive, alpha-beta T cell')
adata.obs.loc[adata.obs[clusters].isin(['6']),'celltype']='resource CD8-positive, alpha-beta T cell'

In [None]:
adata.obs['celltype']=adata.obs['celltype'].cat.add_categories('CCL3-positive effector CD8-positive, alpha-beta T cell')
adata.obs.loc[adata.obs[clusters].isin(['11']),'celltype']='CCL3-positive effector CD8-positive, alpha-beta T cell'

In [None]:
adata.obs['celltype']=adata.obs['celltype'].cat.add_categories('fresh effector CD8-positive, alpha-beta T cell')
adata.obs.loc[adata.obs[clusters].isin(['0','12', '2']),'celltype']='fresh effector CD8-positive, alpha-beta T cell'

In [None]:
adata.obs['celltype']=adata.obs['celltype'].cat.add_categories('better T effector CD8-positive, alpha-beta T cell')
adata.obs.loc[adata.obs[clusters].isin(['4','3','15']),'celltype']='better T effector CD8-positive, alpha-beta T cell'

In [None]:
adata.obs['celltype']=adata.obs['celltype'].cat.add_categories('exhausted CD8-positive, alpha-beta T cell')
adata.obs.loc[adata.obs[clusters].isin(['8','10']),'celltype']='exhausted CD8-positive, alpha-beta T cell'

In [None]:
adata.obs['celltype']=adata.obs['celltype'].cat.add_categories('terminally exhausted CD8-positive, alpha-beta T cell')
adata.obs.loc[adata.obs[clusters].isin(['1']),'celltype']='terminally exhausted CD8-positive, alpha-beta T cell'

In [None]:
adata.obs['celltype']=adata.obs['celltype'].cat.add_categories('HSP-positive CD8-positive, alpha-beta T cell')
adata.obs.loc[adata.obs[clusters].isin(['17']),'celltype']='HSP-positive CD8-positive, alpha-beta T cell'

In [None]:
adata.obs['celltype']=adata.obs['celltype'].cat.add_categories('BMP7-positive effector CD8-positive, alpha-beta T cell')
adata.obs.loc[adata.obs[clusters].isin(['15']),'celltype']='BMP7-positive effector CD8-positive, alpha-beta T cell'

In [None]:
adata.obs['celltype']=adata.obs['celltype'].cat.add_categories('GZMs-positive effector CD8-positive, alpha-beta T cell')
adata.obs.loc[adata.obs[clusters].isin(['16']),'celltype']='GZMs-positive effector CD8-positive, alpha-beta T cell'

In [None]:
#### Make sure to remove obsolete categories
adata.obs['celltype']=adata.obs['celltype'].cat.remove_unused_categories()


In [None]:
sc.pl.umap(adata,color='celltype')

In [None]:
set(adata[adata.obs['celltype']=='exhausted-like CD8-positive, alpha-beta T cell'].obs['leiden'])

In [None]:
sc.tl.dendrogram(adata,groupby='celltype')

In [None]:
goi=['Ptprc','Trac','Cd3d','Cd8a','Cd44','CD44_Cd44','Cd4','Foxp3','Lrrc32','Il2ra','CD25_Il2ra',
     'Il7r','IL7RA_Il7r','Cd28','CD28_Cd28',
         'Slamf6','SLAMF6_Slamf6','Sell','CD62L_Sell',
                            'Tcf7','Cxcr5','CXCR5_Cxcr5','Cxcr3','CXCR3_Cxcr3','S1pr1','S1pr5','Esm1','Klrg1','Il18r1',
     'Pdcd1','PD1_Pdcd1','Havcr2','TIM3_Havcr2','Tnfrsf9','41BB_Tnfrsf9','Lag3','LAG3_Lag3',
      'Tigit','TIGIT_Tigit',
     'Entpd1','CD39_Entpd1','Tox','Tox2','Greb1','Gzmk',
     'Il10','Gzmb','Klrc2','Lncpint','Ifitm1','Ifitm2','Iigp1','Oas3','Klrc3','Ccl1','Ccl3','Crtam','Cd83','Gzme','Gzmd','Gzmf',
     'Ifit1','Isg15','Cxcl10','Xaf1',
     'Il21r','IL21R_Il21r','Xcl1','Igfbp7','Slc15a1','Bace2','Ltf','Mrc2','Bmp7','Fcrl6',
     'Mki67','Stmn1','Rorc',
     'Cd163l1','Il17a','Csf1r','Msr1','Apoe','Hspa1a','Dnajb1','Hspb1','Bag3']
sc.pl.matrixplot(adata, var_names=goi, groupby='celltype',standard_scale='var',
                 dendrogram=True, save='Heatmap-goi-celltype.pdf')

In [None]:
goi=['Ptprc','Trac','Cd3d','Cd8a','Cd44','CD44_Cd44','Cd4','Foxp3','Lrrc32','Il2ra','CD25_Il2ra',
     'Il7r','IL7RA_Il7r','Cd28','CD28_Cd28',
         'Slamf6','SLAMF6_Slamf6','Sell','CD62L_Sell',
                            'Tcf7','Cxcr5','CXCR5_Cxcr5','Cxcr3','CXCR3_Cxcr3','S1pr1','S1pr5','Esm1','Klrg1','Il18r1',
     'Pdcd1','PD1_Pdcd1','Havcr2','TIM3_Havcr2','Tnfrsf9','41BB_Tnfrsf9','Lag3','LAG3_Lag3',
      'Tigit','TIGIT_Tigit',
     'Entpd1','CD39_Entpd1','Tox','Tox2','Greb1','Gzmk',
     'Il10','Gzmb','Klrc2','Lncpint','Ifitm1','Ifitm2','Iigp1','Oas3','Klrc3','Ccl1','Ccl3','Crtam','Cd83','Gzme','Gzmd','Gzmf',
     'Ifit1','Isg15','Cxcl10','Xaf1',
     'Il21r','IL21R_Il21r','Xcl1','Igfbp7','Slc15a1']
sc.pl.matrixplot(adata, var_names=goi, groupby='celltype',
                 standard_scale='var',dendrogram=True, save='Heatmap-goi-celltype-red.pdf')

In [None]:
sc.pl.matrixplot(adata, var_names=list(set(sigs['resourceCD8Tcellext']['UP']).union(set(sigs['resourceCD8Tcellext']['DN']))), 
                 groupby='celltype',standard_scale='var',dendrogram=True, save='Heatmap-resource-celltype.pdf')



In [None]:
sc.pl.matrixplot(adata, var_names=yost_exh, 
                 groupby='celltype',standard_scale='var',dendrogram=True, save='Heatmap-yost_exh-celltype.pdf')


In [None]:
sc.pl.matrixplot(adata, var_names=yost_act, 
                 groupby='celltype',standard_scale='var',dendrogram=True, save='Heatmap-yost_act-celltype.pdf')



In [None]:
subtab=adata.obs.loc[:,['Activation_Yost','Exhaustion_Yost', 'score_ExhCD8Tcell_scanpy','sample_id','treatment_id']].copy()
subtabl=adata.obs.loc[:,['Activation_Yost','Exhaustion_Yost', 'score_ExhCD8Tcell_scanpy','score_resourceCD8Tcell_scanpy',
                         'score_resourceCD8Tcellext_scanpy','resourceCD8Tcell_UP','resourceCD8Tcell_DOWN',
                         'score_NaiCD8Tcell_scanpy','score_Ifna_scanpy','score_Th17_scanpy','score_RegTcell_scanpy',
                         'score_ProlifCD8Tcell_scanpy','score_Myeloid_scanpy','sample_id','leiden']].copy()
subtabanno=adata.obs.loc[:,['leiden','celltype']].drop_duplicates()

In [None]:

#adata.obs.loc[:,['leiden','celltype']]

subtabMeans=subtab.groupby('sample_id').mean()
subtabMeans['Treat']=[x.split("_tum")[0] for x in list(subtabMeans.index)]

subtabMeansl=subtabl.groupby(['sample_id', 'leiden']).mean()
subtabMeansl['Treat']=[x[0].split("_tum")[0] for x in list(subtabMeansl.index)]
subtabMeansl['cluster']=[x[1] for x in list(subtabMeansl.index)]


In [None]:
subtabanno.loc[subtabanno['leiden']==x,'celltype'][0]

In [None]:
tmp=subtabMeansl.groupby('cluster').mean()
tmp.index=[x+ '-' +subtabanno.loc[subtabanno['leiden']==x,:]['celltype'][0] for x in list(tmp.index)]

fig=sns.clustermap(tmp, standard_scale=1, col_cluster=False,figsize=(6,10))
fig.savefig(figdir+"Heatmap-signatures-resourceFocus.png")

In [None]:
subtabl=adata.obs.loc[:,['Activation_Yost','Exhaustion_Yost', 'score_ExhCD8Tcell_scanpy','score_resourceCD8Tcell_scanpy',
                         'score_resourceCD8Tcellext_scanpy','resourceCD8Tcell_UP','resourceCD8Tcell_DOWN',
                         'score_NaiCD8Tcell_scanpy','score_Ifna_scanpy','score_Th17_scanpy','score_RegTcell_scanpy',
                         'score_ProlifCD8Tcell_scanpy','score_Myeloid_scanpy','sample_id','celltype']].copy()

subtabMeansl=subtabl.groupby(['sample_id', 'celltype']).mean()
subtabMeansl['Treat']=[x[0].split("_tum")[0] for x in list(subtabMeansl.index)]
subtabMeansl['cluster']=[x[1] for x in list(subtabMeansl.index)]


In [None]:
subtabMeansl

In [None]:
sorted_index_desc = subtabMeansl.groupby('cluster').mean()['Activation_Yost'].sort_values(ascending=False).index

sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_style('ticks')
sns.boxplot(x="Activation_Yost",y="cluster",data=subtabMeansl, order=sorted_index_desc, color='lightgray')
sns.swarmplot(x="Activation_Yost",y="cluster",hue='Treat',data=subtabMeansl, order=sorted_index_desc)


In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_style('ticks')
sns.boxplot(x="Exhaustion_Yost",y="cluster",data=subtabMeansl, order=sorted_index_desc, color='lightgray')
sns.swarmplot(x="Exhaustion_Yost",y="cluster",hue='Treat',data=subtabMeansl, order=sorted_index_desc)


In [None]:
sorted_index_desc = subtabMeansl.groupby('cluster').mean()['Exhaustion_Yost'].sort_values(ascending=False).index

sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_style('ticks')
sns.boxplot(x="Exhaustion_Yost",y="cluster",data=subtabMeansl, order=sorted_index_desc, color='lightgray')
sns.swarmplot(x="Exhaustion_Yost",y="cluster",hue='Treat',data=subtabMeansl, order=sorted_index_desc)


In [None]:

sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_style('ticks')
sns.boxplot(x="score_ExhCD8Tcell_scanpy",y="cluster",data=subtabMeansl, order=sorted_index_desc, color='lightgray')
sns.swarmplot(x="score_ExhCD8Tcell_scanpy",y="cluster",hue='Treat',data=subtabMeansl, order=sorted_index_desc)


In [None]:

sorted_index_desc = subtabMeansl.groupby('cluster').mean()['score_ExhCD8Tcell_scanpy'].sort_values(ascending=False).index

sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_style('ticks')
sns.boxplot(x="score_ExhCD8Tcell_scanpy",y="cluster",data=subtabMeansl, order=sorted_index_desc, color='lightgray')
sns.swarmplot(x="score_ExhCD8Tcell_scanpy",y="cluster",hue='Treat',data=subtabMeansl, order=sorted_index_desc)

In [None]:
set(adata[adata.obs['celltype3']=='exhausted-like CD8-positive, alpha-beta T cell'].obs['leiden'])

In [None]:
set(adata[adata.obs['celltype']=='exhausted CD8-positive, alpha-beta T cell'].obs['leiden'])

In [None]:
adata.obs['celltype3']=adata.obs['celltype3'].cat.add_categories('CD8-positive, alpha-beta resource T cell')
adata.obs.loc[adata.obs[clusters].isin(['6']),'celltype3']='CD8-positive, alpha-beta resource T cell'

adata.obs['celltype3']=adata.obs['celltype3'].cat.add_categories('CD8-positive, alpha-beta cytotoxic T cell')
adata.obs.loc[adata.obs[clusters].isin(['0','14','15','16','17','2','3','4']),'celltype3']='CD8-positive, alpha-beta cytotoxic T cell'




In [None]:
adata.obs['celltype3']=adata.obs['celltype3'].cat.add_categories('CD8-positive, alpha-beta T cell')
adata.obs.loc[adata.obs[clusters].isin(['14']),'celltype3']='CD8-positive, alpha-beta T cell'



In [None]:
sc.pl.umap(adata, color=['celltype3'],ncols=1)

In [None]:

#### Make sure to remove obsolete categories
adata.obs['celltype3']=adata.obs['celltype3'].cat.remove_unused_categories()


### Save annotation result and export labelling

Chosen labels can also be exported as a new folder in labelings/

In [None]:
### Attribute the cell annotation level of choice (typically the highest resolution one, if estimated to be reliable) to dblabel
adata.obs['dblabel']=adata.obs['celltype3']

### Export file for future reference
adata.write(results_file)
#adata=sc.read(results_file)

In [None]:
#adata.write(results_file)

In [None]:
adata=sc.read(results_file)

In [None]:
# From https://www.pnas.org/content/116/28/14113
jadhav_mem=['Sell','Il7r','Bcl2','Klrg1','Il2rb']
jadhav_eff=['Ifng','Tnf','Il2','Il21','Il6','Gzma','Gzmb','Prf1','Fasl','Tnsf10']
jadhav_stem=['Nsg2','P2rx7','Lrig1','Tcf7','Aff3','Crtam','Kbtbd11','Nt5e','Cxcr5','Cxcl10','Cd83','Ccr7','Traf1']
jadhav_exh=['Chn2','Lilr4b','Pim1','Slamf1','Prdm1','Gzmb','Fasl','Tmcc3','1700017B05Rik', 'Il10','Lgals3','Ccl3','Ccl4','Cd48']

sc.tl.score_genes(adata,gene_list=jadhav_mem,score_name='Memory_j')
sc.tl.score_genes(adata,gene_list=jadhav_eff,score_name='Effector_j')
sc.tl.score_genes(adata,gene_list=jadhav_stem,score_name='Resource_j')
sc.tl.score_genes(adata,gene_list=jadhav_exh,score_name='Exhausted_j')

In [None]:
gzmessel=['Gzma','Gzmb','Gzmc','Gzmf']
gzmesall=['Gzma','Gzmb','Gzmc','Gzme','Gzmd','Gzmf','Gzmk']

In [None]:
sc.tl.score_genes(adata,gene_list=gzmessel,score_name='Gzms')


### Simplified annotation & focus on CD8 T cells and specific conditions

In [None]:
subtabl=adata.obs.loc[:,['Memory_j','Effector_j','Resource_j','Exhausted_j', 'Activation_Yost','Exhaustion_Yost', 'score_ExhCD8Tcell_scanpy','score_CytotoxCD8Tcell_scanpy','score_resourceCD8Tcell_scanpy',
                         'score_resourceCD8Tcellext_scanpy','resourceCD8Tcell_UP','resourceCD8Tcell_DOWN','score_CD4_scanpy',
 'score_CD8_scanpy','score_EMCD8Tcell_scanpy',
                         'score_NaiCD8Tcell_scanpy','score_Ifna_scanpy','score_Th17_scanpy','score_RegTcell_scanpy',
                         'score_ProlifCD8Tcell_scanpy','score_Myeloid_scanpy','sample_id','leiden']].copy()

subtabMeansl=subtabl.groupby([ 'leiden']).mean()


In [None]:
sc.pl.umap(adata, color=['leiden'],legend_loc='on data')

In [None]:
cls=['5','6','0','14','12','11','15','10','3','2','4','17','20','16','13','9','8','1','7','22','19','18','21']

In [None]:
sns.clustermap(subtabMeansl.loc[cls,['Memory_j','Effector_j','Resource_j','Exhausted_j','score_CytotoxCD8Tcell_scanpy','score_EMCD8Tcell_scanpy','score_ExhCD8Tcell_scanpy','score_resourceCD8Tcell_scanpy','score_NaiCD8Tcell_scanpy','score_ProlifCD8Tcell_scanpy',
                                   'score_CD4_scanpy','score_CD8_scanpy','score_RegTcell_scanpy','score_Myeloid_scanpy',
                                   'score_Ifna_scanpy']],col_cluster=False,row_cluster=False,figsize=(7,12), 
               vmin=-0.5, vmax=1)

In [None]:
cls=['5','6','14','12','11','3','2','4','17','16','0','13','9','15','10','8','1']
subtabMeansllim=subtabMeansl.loc[cls,:].copy()

In [None]:

sns.clustermap(subtabMeansllim.loc[cls,['Memory_j','Effector_j','Resource_j','Exhausted_j','score_CytotoxCD8Tcell_scanpy','score_EMCD8Tcell_scanpy','score_ExhCD8Tcell_scanpy','score_resourceCD8Tcell_scanpy','score_NaiCD8Tcell_scanpy','score_ProlifCD8Tcell_scanpy',
                                   'score_CD4_scanpy','score_CD8_scanpy',
                                   'score_Ifna_scanpy']],col_cluster=False,row_cluster=False,figsize=(5,12), 
               standard_scale=1)

In [None]:
goi=['Foxp3','CD25_Il2ra','Cd8a','CD62L_Sell','Lef1','IL7RA_Il7r','CD28_Cd28',
         'Tcf7','CXCR5_Cxcr5','SLAMF6_Slamf6','CXCR3_Cxcr3',
     'PD1_Pdcd1','41BB_Tnfrsf9','LAG3_Lag3','TIM3_Havcr2','41BB_Tnfrsf9','LAG3_Lag3','TIGIT_Tigit',
     'CD39_Entpd1','Tox','Tox2','Gzmk',
     'Il10','Gzmb','Gzma','Klrc2','Bmpr2','Igf2r','Sema6d',
     'Mki67','Stmn1']
sc.pl.matrixplot(adata[adata.obs['leiden'].isin(cls)], categories_order=cls,var_names=goi, 
                 groupby='leiden',standard_scale='var')

In [None]:
goifocus=['IL7RA_Il7r','Il7r','Bcl2',
         'Tcf7','CXCR5_Cxcr5','S1pr1','Klrg1',
     'PD1_Pdcd1','Pdcd1','TIM3_Havcr2','Havcr2',
     'Tox','Tox2','Il10','Ccl3','Ccl4','Gzmb','Gzmf','Gzma']

In [None]:
sc.pl.matrixplot(adata[adata.obs['leiden'].isin(cls)], categories_order=cls,var_names=goifocus, 
                 groupby='leiden',standard_scale='var')

In [None]:
goi=['CD62L_Sell','Lef1','IL7RA_Il7r','CD28_Cd28',
         'Tcf7','CXCR5_Cxcr5','SLAMF6_Slamf6','CXCR3_Cxcr3',
     'PD1_Pdcd1','41BB_Tnfrsf9','LAG3_Lag3','TIM3_Havcr2','TIGIT_Tigit',
     'Entpd1','Tox','Tox2','Nr4a2','Gzmk','Gzmb','Gzmf','Gzmc','Gzme','Gzma','Lamp1', 'Prf1']
goiext=['Cd3d','Cd3e','Cd8a','Cd4','Cd14','Foxp3','CD62L_Sell','Lef1','Bach2','IL7RA_Il7r','CD28_Cd28',
         'Tcf7','CXCR5_Cxcr5','SLAMF6_Slamf6','CXCR3_Cxcr3','Klf2','S1pr1',
     'PD1_Pdcd1','41BB_Tnfrsf9','LAG3_Lag3','TIM3_Havcr2','TIGIT_Tigit',
     'Entpd1','Tox','Tox2','Gzmk','Gzmb','Gzmf','Gzmc','Gzme','Gzma','Lamp1','Prf1','Stmn1','Mki67']
sc.pl.matrixplot(adata[adata.obs['leiden'].isin(cls)], categories_order=cls,var_names=goi, 
                 groupby='leiden',standard_scale='var', save='-selmarkerexpression-leiden-selclusters.pdf')

In [None]:
goinewext=['Cd3d','Cd3e','Cd8a','Cd4','Cd14','Foxp3','Il2ra','Il17a','Il23r','Stmn1','Mki67','Klf2','S1pr1','Tcf7',
        'CXCR5_Cxcr5','Ifngr1','Bcl2','IL7RA_Il7r', 'Il7r',
'Lrig1','PD1_Pdcd1','Pdcd1','Fasl','Gzma','Gzmb','Gzmf','Lamp1','Tbx21','Itga1','Il2rg','Il18r1','Il18rap','Kit','Tnf','Ifng',
 'Lag3','Tnfrsf9','TIM3_Havcr2','Havcr2','TIGIT_Tigit','Tox','Tox2','Il10','Ccl3','Ccl4']



In [None]:
#itgae Prdm1 Lamp1

In [None]:
goiv=['Lamp1','Lamp2','Lamp3', 'Adgrg1']

In [None]:
sc.pl.matrixplot(adata, 
                 dendrogram=True,
                 var_names=goinewext, 
                 groupby='leiden',standard_scale='var', save='-selmarkerexpression-leiden-all.pdf')

In [None]:
sc.pl.matrixplot(adata, 
                 categories_order=['5','6','14','12','11','3','2','4','17','16','0','13','9','15','10','8','1','20','7','22','19','18','21'],var_names=goiext, 
                 groupby='leiden',standard_scale='var', save='-selmarkerexpression-leiden-all.pdf')

In [None]:
clsk=['6','14','12','11','3','2','4','17','16','0','13','9','15','10','8','1']

In [None]:
adata.obs['celltype_simple']='other'
adata.obs['celltype_new']='other'

In [None]:
#tmp.obs['celltype_simple']=tmp.obs['leiden'].copy()
sc.pl.umap(adata,color='celltype_simple')

In [None]:
sc.pl.umap(adata,color='celltype_new')

In [None]:
adata.obs['celltype_simple']=adata.obs['celltype_simple'].cat.add_categories('resource')
adata.obs['celltype_simple']=adata.obs['celltype_simple'].cat.add_categories('naive')

adata.obs['celltype_simple']=adata.obs['celltype_simple'].cat.add_categories('effcytotox')
adata.obs['celltype_simple']=adata.obs['celltype_simple'].cat.add_categories('effmem')
adata.obs['celltype_simple']=adata.obs['celltype_simple'].cat.add_categories('effexh')


In [None]:
adata.obs.loc[adata.obs[clusters].isin(['5']),'celltype_simple']='naive'
adata.obs.loc[adata.obs[clusters].isin(['6']),'celltype_simple']='resource'
adata.obs.loc[adata.obs[clusters].isin(['14','12','3','4','17','16','14','12']),'celltype_simple']='effcytotox'

adata.obs.loc[adata.obs[clusters].isin(['0','15','10','8','9','13','2']),'celltype_simple']='effmem'
adata.obs.loc[adata.obs[clusters].isin(['1','11']),'celltype_simple']='effexh'


In [None]:
#tmp.obs['celltype_simple']=tmp.obs['leiden'].copy()
sc.pl.umap(adata[adata.obs['treatment_id'].isin(['PD1','PD1-IL2v','FAP-IL2v_PD1'])] ,color='celltype_simple')

In [None]:
sc.pl.umap(adata[adata.obs['treatment_id'].isin(['Vehicle'])] ,color='celltype_simple')

In [None]:
sc.pl.umap(adata[adata.obs['treatment_id'].isin(['PD1'])] ,color='celltype_simple')

In [None]:
sc.pl.umap(adata[adata.obs['treatment_id'].isin(['FAP-IL2v'])] ,color='celltype_simple')

In [None]:
sc.pl.umap(adata[adata.obs['treatment_id'].isin(['PD1-IL2v'])] ,color='celltype_simple')

In [None]:
sc.pl.umap(adata[adata.obs['treatment_id'].isin(['FAP-IL2v_PD1'])] ,color='celltype_simple')

In [None]:
tmp=adata[adata.obs['leiden'].isin(cls)].copy()

In [None]:
sc.pl.umap(adata,color='celltype_simple', save='-annotation-celltype_simple-adata.pdf')

In [None]:
sc.pl.umap(adata,groups='effcytotox',color='celltype_simple', save='-annotation-celltype_simple-adata.svg')

In [None]:
sc.pl.umap(tmp,color='celltype_simple', save='-annotation-celltype_simple-selclusters.pdf')

In [None]:
sc.pl.umap(tmp,groups='effcytotox', color='celltype_simple', save='-annotation-celltype_simple-selclusters.svg')

In [None]:
tmp.obs['celltype_simple']=tmp.obs['celltype_simple'].cat.remove_unused_categories()

In [None]:
sc.pl.umap(tmp,color='leiden',legend_loc='on data', save='-annotation-leiden-selclusters.pdf')

In [None]:
coiorder=['naive','resource','effmem','effcytotox','effexh']

In [None]:
sc.pl.matrixplot(tmp, var_names=goi, categories_order=coiorder,
                 groupby='celltype_simple',standard_scale='var', save='-selmarkerexpression-celltype_simple-selclusters.pdf')

In [None]:
sc.pl.matrixplot(tmp, 
                 categories_order=coiorder,
                 var_names=goiv, 
                 groupby='celltype_simple',standard_scale='var')

In [None]:
sc.pl.matrixplot(tmp[tmp.obs['celltype_simple']!='naive'], var_names=goi, categories_order=['resource','effmem','effcytotox','effexh'],
                 groupby='celltype_simple',standard_scale='var')

In [None]:
tmp.obs['celltype_treatment']=tmp.obs['treatment_id'].astype(str)+' | '+tmp.obs['celltype_simple'].astype(str)

In [None]:
tmp.obs['leiden_treatment']=tmp.obs['treatment_id'].astype(str)+' | '+tmp.obs['leiden'].astype(str)

In [None]:
sc.pl.matrixplot(tmp, var_names=goiv, dendrogram=True,
                 groupby='celltype_treatment',standard_scale='var', vmax=0.8)

In [None]:
sc.pl.matrixplot(tmp, var_names=goi, dendrogram=True,
                 groupby='celltype_treatment',standard_scale='var', vmax=0.8)

In [None]:
mysub=tmp[tmp.obs['celltype_simple'].isin(['effmem','effcytotox','effexh'])].copy()

In [None]:
sc.tl.dendrogram(mysub, groupby='celltype_treatment')

In [None]:
sc.pl.matrixplot(tmp, var_names=goiext,dendrogram=True,
                 groupby='celltype_treatment',standard_scale='var')

In [None]:
sc.pl.matrixplot(tmp, var_names=goiext, dendrogram=True,
                 groupby='leiden_treatment',standard_scale='var')

In [None]:
sc.pl.matrixplot(tmp, var_names=goiext, 
                 groupby='leiden',standard_scale='var')

In [None]:
mytreats=list(set(tmp.obs['treatment_id']))
for treat in mytreats:
    sc.pl.umap(tmp, groups=[treat],
               color='treatment_id', save='-treatmentID-'+treat+'.selclusters.pdf')

In [None]:
sc.pl.umap(tmp, color=goiv)

In [None]:
sc.pl.dotplot(tmp, var_names=goiv+['Gzmb','Gzma','Gzmk','Gzmc','Gzmd','Gzme','Gzmf'], groupby='celltype_simple')

In [None]:
sc.pl.umap(tmp, color=['Gzmb','Gzma','Gzmk','Gzmc','Gzmd','Gzme','Gzmf','Tox','Pdcd1', 'PD1_Pdcd1','CXCR5_Cxcr5'], 
           save='-gzms-individual.selclusters.pdf')

In [None]:
sc.pl.umap(tmp[tmp.obs['treatment_id'].isin(['PD1',  'FAP-IL2v_PD1', 'PD1-IL2v'])], 
           color='celltype_simple')

In [None]:
toplot=tmp[tmp.obs['treatment_id'].isin(['PD1',  'FAP-IL2v_PD1', 'PD1-IL2v'])].copy()

In [None]:
sc.pl.umap(toplot,groups='effcytotox',color='celltype_simple', save='-annotation-celltype_simple-selclusters-3Tonly.svg')

In [None]:
jadhav_mem

In [None]:
jadhav_eff

In [None]:
jadhav_stem

In [None]:
DEgenes=bc.tl.dge.get_de(tmp[tmp.obs['treatment_id'].isin(['Vehicle'])],'leiden',demethod='wilcoxon',topnr=5000, logfc=1,padj=0.05)


In [None]:
DEgenes2=bc.tl.dge.get_de(tmp[tmp.obs['treatment_id'].isin(['Vehicle'])],'celltype_simple',demethod='wilcoxon',topnr=5000, logfc=1,padj=0.05)


#### Create list of genes specific for cluster 6 (stem-like cells).  

In [None]:
topsa=list(DEgenes['6'].sort_values('Log2FC',ascending=False)['Name'][0:50])
topsb=list(DEgenes2['resource'].sort_values('Log2FC',ascending=False)['Name'][0:50])

In [None]:
n1=list(DEgenes2['effmem'].sort_values('Log2FC',ascending=False)['Name'][0:100])
n2=list(DEgenes2['effexh'].sort_values('Log2FC',ascending=False)['Name'][0:100])
n3=list(DEgenes2['effcytotox'].sort_values('Log2FC',ascending=False)['Name'][0:100])
n4=list(DEgenes2['naive'].sort_values('Log2FC',ascending=False)['Name'][0:50])

In [None]:
tops=list(set(topsa).intersection(set(topsb))-set(n1)-set(n2)-set(n3)-set(n4))

In [None]:
tops

In [None]:
pd.Series(tops).to_csv(figdir+"res_c6_specific_genes.tsv", sep='\t')

In [None]:
#tops=list(DEgenes['6'].sort_values('Log2FC',ascending=False)['Name'][0:50])
sc.pl.dotplot(tmp, var_names=tops,groupby=clusters, dot_max=0.6)

In [None]:
goinew=['Klf2','S1pr1','Tcf7','CXCR5_Cxcr5','Ifngr1','Bcl2', 'IL7RA_Il7r','Il7r',
'Lrig1','PD1_Pdcd1','Pdcd1','Fasl','Gzma','Gzmb','Gzmf','Lamp1','Tbx21','Itga1','Il2rg','Il18r1','Il18rap','Kit','Tnf','Ifng',
 'Lag3','Tnfrsf9','TIM3_Havcr2', 'Havcr2','TIGIT_Tigit','Tox','Tox2','Il10','Ccl3','Ccl4']



In [None]:
sc.pl.matrixplot(toplot, var_names=tops, 
                 categories_order=['naive','resource','effmem','effcytotox','effexh'],
                 groupby='celltype_simple',standard_scale='var')

In [None]:
sc.pl.umap(toplot, color='leiden')

In [None]:
tst=adata[adata.obs['celltype_simple']!='naive'].copy()
tst=tst[tst.obs['celltype_simple']!='other'].copy()

In [None]:
sc.pl.matrixplot(tst, var_names=goinew, 
                 groupby='treatment_id',standard_scale='var')

In [None]:
pd1plus=tst[tst.raw[:,'PD1_Pdcd1'].X>0.5].copy()
pd1plus=pd1plus[pd1plus.raw[:,'Pdcd1'].X>0].copy()

In [None]:
sc.pl.dotplot(pd1plus, var_names=goinew, 
                 groupby='treatment_id')

In [None]:
sc.pl.matrixplot(pd1plus, var_names=goinew, 
                 groupby='treatment_id',standard_scale='var')

In [None]:
sc.pl.matrixplot(toplot, var_names=goinew, 
                 categories_order=['naive','resource','effmem','effcytotox','effexh'],
                 groupby='celltype_simple',standard_scale='var', 
                 save='-selmarkerexpression-celltype_simple-selclusters_3Tonly.pdf')

In [None]:
set(toplot.obs['celltype_treatment'])

In [None]:
sc.pl.matrixplot(toplot, var_names=goinew, 
                 categories_order=['PD1-IL2v | naive','FAP-IL2v_PD1 | naive','PD1 | naive','PD1-IL2v | resource',
                                   'FAP-IL2v_PD1 | resource', 'PD1 | resource','PD1-IL2v | effmem',
                                    'FAP-IL2v_PD1 | effmem','PD1 | effmem', 'PD1-IL2v | effcytotox', 'FAP-IL2v_PD1 | effcytotox',
                                    'PD1 | effcytotox','PD1-IL2v | effexh','FAP-IL2v_PD1 | effexh', 'PD1 | effexh'],
                 groupby='celltype_treatment',standard_scale='var', 
                 save='-selmarkerexpression-celltype_treatment-selclusters_3Tonly.pdf')

In [None]:
inhrec=['Cd160','Lag3','Cd244a','Btla','Pdcd1','Havcr2','Tigit','Cd101'] ## Lowe antiIl2 and anti comb compared to untreated and antiPD1
eff=['Gzma','Gzmb','Lamp1'] 
costim=['Cd28','Cd226','Icos','Cd7','Kit','Tnfrsf9'] # Cd17 is Kit
cyto=['Il2ra','Ifngr1','Il7r','Il2rg','Il12rb2','Il18r1','Il18rap','Il1rl1', 'Il2rb','Il6st','Il10ra','Il10','Il12rb1','Il21r','Il21']
migration=['Ccr2','Cxcr3','Cxcr4','Cx3cr1','S1pr1','Itga1','Itga4','Itgae','Itgb1','Itgb7','Cd44','Ly6c2','Cxcr5']
tf=['Klf2','Lef1','Bach2','Tbx21','Tcf7','Ahr','Batf','Bcl6','Egr1','Egr2',
'Eomes','Foxo1','Foxo3','Ikzf2','Irf4','Maf','Nfatc1','Nr4a1','Nr4a2','Nr4a3','Prdm1','Tox','Tox2']
res=tops

In [None]:
pd.Series(mymarkers['ExhCD8Tcell']).to_csv(figdir+"ExhCD8Tcell_genes.tsv", sep='\t')

In [None]:
pd.Series(inhrec).to_csv(figdir+"inrec_genes.tsv", sep='\t')
pd.Series(migration).to_csv(figdir+"migration_genes.tsv", sep='\t')

In [None]:
adata.obs=adata.obs.drop(columns='res_c6')
tmp.obs=tmp.obs.drop(columns='res_c6')

In [None]:
sc.tl.score_genes(adata,gene_list=inhrec,score_name='inhrec_l')
sc.tl.score_genes(adata,gene_list=eff,score_name='eff_l')
sc.tl.score_genes(adata,gene_list=costim,score_name='costim_l')
sc.tl.score_genes(adata,gene_list=cyto,score_name='cyto_l')
sc.tl.score_genes(adata,gene_list=migration,score_name='migration_l')
sc.tl.score_genes(adata,gene_list=tf,score_name='tf_l')
sc.tl.score_genes(adata,gene_list=res,score_name='res_c6')

In [None]:
sc.tl.score_genes(tmp,gene_list=inhrec,score_name='inhrec_l')
sc.tl.score_genes(tmp,gene_list=eff,score_name='eff_l')
sc.tl.score_genes(tmp,gene_list=costim,score_name='costim_l')
sc.tl.score_genes(tmp,gene_list=cyto,score_name='cyto_l')
sc.tl.score_genes(tmp,gene_list=migration,score_name='migration_l')
sc.tl.score_genes(tmp,gene_list=tf,score_name='tf_l')
sc.tl.score_genes(tmp,gene_list=res,score_name='res_c6')

In [None]:
sc.pl.matrixplot(toplot, var_names=inhrec+eff+costim+cyto, 
                 categories_order=['naive','resource','effmem','effcytotox','effexh'],
                 groupby='celltype_simple',standard_scale='var')

In [None]:
sc.pl.matrixplot(toplot, var_names=migration+tf, 
                 categories_order=['naive','resource','effmem','effcytotox','effexh'],
                 groupby='celltype_simple',standard_scale='var')

In [None]:
sc.pl.matrixplot(toplot, var_names=tops, 
                 categories_order=['naive','resource','effmem','effcytotox','effexh'],
                 groupby='celltype_simple',standard_scale='var')

In [None]:
adata.write(results_file)

In [None]:
pd1plus=tmp[tmp.raw[:,'PD1_Pdcd1'].X>0.5].copy()
pd1plus=pd1plus[pd1plus.raw[:,'Pdcd1'].X>0].copy()

In [None]:
sc.pl.umap(pd1plus,color='celltype_simple')

In [None]:
sc.pl.dotplot(tmp, var_names=goiext, dendrogram=True, 
                 groupby='treatment_id')

In [None]:
sc.pl.dotplot(tmp,var_names=goi,
              categories_order=coiorder,dot_max=0.5, groupby='celltype_simple')

In [None]:
sc.pl.dotplot(tmp,var_names=goi,dot_max=0.6, groupby='celltype_treatment', dendrogram=True)

In [None]:
sc.pl.dotplot(adata[adata.obs['leiden'].isin(cls)], categories_order=cls,var_names=goi, groupby='leiden')

In [None]:


sns.clustermap(subtabMeansllim.loc[cls,['score_CytotoxCD8Tcell_scanpy','score_EMCD8Tcell_scanpy','score_ExhCD8Tcell_scanpy','score_resourceCD8Tcell_scanpy','score_NaiCD8Tcell_scanpy','score_ProlifCD8Tcell_scanpy',
                                   'score_CD4_scanpy','score_CD8_scanpy',
                                   'score_Ifna_scanpy']],col_cluster=False,row_cluster=False,figsize=(5,12), 
               standard_scale=1,
               vmin=-0.5, vmax=1)

In [None]:
set(tmp.obs['celltype_simple'])

In [None]:
#toplot=tmp[tmp.obs['treatment_id'].isin(['PD1',
# 'PD1-IL2v',
# 'FAP-IL2v_PD1'])].copy()
toplot=tmp.copy()
toplot=toplot[toplot.obs['celltype_simple'].isin(['effexh', 'effcytotox','effmem'])].copy()
toplot2=toplot[toplot.obs['treatment_id'].isin(['PD1-IL2v','FAP-IL2v_PD1','PD1'])].copy()

In [None]:
subtabm=toplot2.obs.loc[:,['res_c6','migration_l','cyto_l','eff_l','inhrec_l','Memory_j','Effector_j','Resource_j','Exhausted_j','Activation_Yost','Exhaustion_Yost', 'score_ExhCD8Tcell_scanpy','score_ProlifCD8Tcell_scanpy','sample_id','celltype_simple']].copy()

subtab=toplot.obs.loc[:,['res_c6','migration_l','cyto_l','eff_l','inhrec_l','Memory_j','Effector_j','Resource_j','Exhausted_j','Activation_Yost','Exhaustion_Yost', 'score_ExhCD8Tcell_scanpy','score_ProlifCD8Tcell_scanpy','sample_id','treatment_id']].copy()
subtabl=toplot.obs.loc[:,['res_c6','migration_l','cyto_l','eff_l','inhrec_l','Memory_j','Effector_j','Resource_j','Exhausted_j','Activation_Yost','Exhaustion_Yost', 'score_ExhCD8Tcell_scanpy','score_resourceCD8Tcell_scanpy',
                         'score_resourceCD8Tcellext_scanpy','resourceCD8Tcell_UP','resourceCD8Tcell_DOWN',
                         'score_NaiCD8Tcell_scanpy','score_Ifna_scanpy','score_Th17_scanpy','score_RegTcell_scanpy',
                         'score_ProlifCD8Tcell_scanpy','score_Myeloid_scanpy','sample_id','leiden']].copy()

#adata.obs.loc[:,['leiden','celltype']]

subtabMeans=subtab.groupby('sample_id').mean()


In [None]:
subtabMeans['Treat']=[x.split("_tum")[0] for x in list(subtabMeans.index)]

subtabMeansl=subtabl.groupby(['sample_id', 'leiden']).mean()
subtabMeansl['Treat']=[x[0].split("_tum")[0] for x in list(subtabMeansl.index)]
subtabMeansl['cluster']=[x[1] for x in list(subtabMeansl.index)]

subtabMeansM=subtabm.groupby(['sample_id', 'celltype_simple']).mean()
subtabMeansM['Treat']=[x[0].split("_tum")[0] for x in list(subtabMeansM.index)]
subtabMeansM['celltype']=[x[1] for x in list(subtabMeansM.index)]


In [None]:
treatcol={}
treatcol['Vehicle']=list(tmp.uns['treatment_id_colors'])[4]
treatcol['PD1']=list(tmp.uns['treatment_id_colors'])[2]
treatcol['PD1-IL2v']=list(tmp.uns['treatment_id_colors'])[3]
treatcol['FAP-IL2v']=list(tmp.uns['treatment_id_colors'])[0]
treatcol['FAP-IL2v_PD1']=list(tmp.uns['treatment_id_colors'])[1]

In [None]:
treatcol2={}
treatcol2['naive']=list(tmp.uns['celltype_simple_colors'])[4]
treatcol2['resource']=list(tmp.uns['celltype_simple_colors'])[2]
treatcol2['effcytotox']=list(tmp.uns['celltype_simple_colors'])[3]
treatcol2['effmem']=list(tmp.uns['celltype_simple_colors'])[0]
treatcol2['effexh']=list(tmp.uns['celltype_simple_colors'])[1]

In [None]:
subtabMeansM.to_csv(figdir+"Signatures_per_CelltypeAndTreatment_eff.tsv", sep='\t')

In [None]:
subtabMeans.to_csv(figdir+"Signatures_per_Treatment_eff.tsv", sep='\t')

In [None]:
sns.set(rc={'figure.figsize':(2,1.25)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="res_c6",y="celltype",data=subtabMeansM, palette=treatcol2)
#sns.swarmplot(x="res_c6",y="celltype",data=subtabMeansM, hue='Treat')
sns.swarmplot(x="res_c6",y="celltype",data=subtabMeansM, color='black')
plt.savefig(figdir+"Boxplot-res_c6_celltypes.pdf")

In [None]:
sns.set(rc={'figure.figsize':(3.5,2.5)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="res_c6",y="Treat",data=subtabMeans, palette=treatcol, 
                order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'])
sns.swarmplot(x="res_c6",y="Treat",data=subtabMeans,
              order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'], color='black')
plt.savefig(figdir+"Boxplot-res_c6_effexh_CD8.pdf")

In [None]:
sns.set(rc={'figure.figsize':(2,1.25)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="res_c6",y="Treat",data=subtabMeans.loc[subtabMeans['Treat'].isin(['PD1-IL2v','FAP-IL2v_PD1','PD1']),:], palette=treatcol, 
                order=['PD1-IL2v','FAP-IL2v_PD1','PD1'])
sns.swarmplot(x="res_c6",y="Treat",data=subtabMeans.loc[subtabMeans['Treat'].isin(['PD1-IL2v','FAP-IL2v_PD1','PD1']),:],
              order=['PD1-IL2v','FAP-IL2v_PD1','PD1'], color='black')
plt.savefig(figdir+"Boxplot-res_c6_3Tonly.pdf")

In [None]:
sns.set(rc={'figure.figsize':(2,1.25)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="score_ExhCD8Tcell_scanpy",y="celltype",data=subtabMeansM, palette=treatcol2)
sns.swarmplot(x="score_ExhCD8Tcell_scanpy",y="celltype",data=subtabMeansM, color='black')
#sns.swarmplot(x="score_ExhCD8Tcell_scanpy",y="celltype",data=subtabMeansM, hue='Treat')
plt.savefig(figdir+"Boxplot-score_ExhCD8Tcell_scanpy_celltypes.pdf")

In [None]:
sns.set(rc={'figure.figsize':(3.5,2.5)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="score_ExhCD8Tcell_scanpy",y="Treat",data=subtabMeans, palette=treatcol, 
                order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'])
sns.swarmplot(x="score_ExhCD8Tcell_scanpy",y="Treat",data=subtabMeans,
              order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'], color='black')
plt.savefig(figdir+"Boxplot-score_ExhCD8Tcell_scanpy_effexh_CD8.pdf")

In [None]:
sns.set(rc={'figure.figsize':(2,1.25)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="score_ExhCD8Tcell_scanpy",y="Treat",data=subtabMeans.loc[subtabMeans['Treat'].isin(['PD1-IL2v','FAP-IL2v_PD1','PD1']),:], palette=treatcol, 
                order=['PD1-IL2v','FAP-IL2v_PD1','PD1'])
sns.swarmplot(x="score_ExhCD8Tcell_scanpy",y="Treat",data=subtabMeans.loc[subtabMeans['Treat'].isin(['PD1-IL2v','FAP-IL2v_PD1','PD1']),:],
              order=['PD1-IL2v','FAP-IL2v_PD1','PD1'], color='black')
plt.savefig(figdir+"Boxplot-score_ExhCD8Tcell_scanpy_3Tonly.pdf")

In [None]:
sns.set(rc={'figure.figsize':(3.5,2.5)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="Memory_j",y="Treat",data=subtabMeans, palette=treatcol, 
                order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'])
sns.swarmplot(x="Memory_j",y="Treat",data=subtabMeans,
              order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'], color='black')
plt.savefig(figdir+"Boxplot-Memory_j.pdf")

In [None]:
sns.set(rc={'figure.figsize':(3.5,2.5)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="migration_l",y="Treat",data=subtabMeans, palette=treatcol, 
                order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'])
sns.swarmplot(x="migration_l",y="Treat",data=subtabMeans,
              order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'], color='black')
plt.savefig(figdir+"Boxplot-migration_l.pdf")

In [None]:
sns.set(rc={'figure.figsize':(2,1.25)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="migration_l",y="celltype",data=subtabMeansM, palette=treatcol2)
sns.swarmplot(x="migration_l",y="celltype",data=subtabMeansM, color='black')
#sns.swarmplot(x="migration_l",y="celltype",data=subtabMeansM, hue='Treat')
plt.savefig(figdir+"Boxplot-migration_l_celltypes.pdf")

In [None]:
sns.set(rc={'figure.figsize':(2,1.25)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="migration_l",y="Treat",data=subtabMeans.loc[subtabMeans['Treat'].isin(['PD1-IL2v','FAP-IL2v_PD1','PD1']),:], palette=treatcol, 
                order=['PD1-IL2v','FAP-IL2v_PD1','PD1'])
sns.swarmplot(x="migration_l",y="Treat",data=subtabMeans.loc[subtabMeans['Treat'].isin(['PD1-IL2v','FAP-IL2v_PD1','PD1']),:],
              order=['PD1-IL2v','FAP-IL2v_PD1','PD1'], color='black')
plt.savefig(figdir+"Boxplot-migration_l_3Tonly.pdf")

In [None]:
sns.set(rc={'figure.figsize':(3.5,2.5)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="inhrec_l",y="Treat",data=subtabMeans, palette=treatcol, 
                order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'])
sns.swarmplot(x="inhrec_l",y="Treat",data=subtabMeans,
              order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'], color='black')
plt.savefig(figdir+"Boxplot-inhrec_l.pdf")

In [None]:
sns.set(rc={'figure.figsize':(2,1.25)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="inhrec_l",y="celltype",data=subtabMeansM, palette=treatcol2)
sns.swarmplot(x="inhrec_l",y="celltype",data=subtabMeansM, color='black')
#sns.swarmplot(x="migration_l",y="celltype",data=subtabMeansM, hue='Treat')
plt.savefig(figdir+"Boxplot-inhrec_l_celltypes.pdf")

In [None]:
sns.set(rc={'figure.figsize':(2,1.25)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="inhrec_l",y="Treat",data=subtabMeans.loc[subtabMeans['Treat'].isin(['PD1-IL2v','FAP-IL2v_PD1','PD1']),:], palette=treatcol, 
                order=['PD1-IL2v','FAP-IL2v_PD1','PD1'])
sns.swarmplot(x="inhrec_l",y="Treat",data=subtabMeans.loc[subtabMeans['Treat'].isin(['PD1-IL2v','FAP-IL2v_PD1','PD1']),:],
              order=['PD1-IL2v','FAP-IL2v_PD1','PD1'], color='black')
plt.savefig(figdir+"Boxplot-inhrec_l_3Tonly.pdf")

In [None]:
sns.set(rc={'figure.figsize':(3.5,2.5)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="eff_l",y="Treat",data=subtabMeans, palette=treatcol, 
                order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'])
sns.swarmplot(x="eff_l",y="Treat",data=subtabMeans,
              order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'], color='black')
plt.savefig(figdir+"Boxplot-eff_l.pdf")

In [None]:
sns.set(rc={'figure.figsize':(3.5,2.5)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="cyto_l",y="Treat",data=subtabMeans, palette=treatcol, 
                order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'])
sns.swarmplot(x="cyto_l",y="Treat",data=subtabMeans,
              order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'], color='black')
plt.savefig(figdir+"Boxplot-cyto_l.pdf")

In [None]:
sns.set(rc={'figure.figsize':(2,1.25)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="eff_l",y="celltype",data=subtabMeansM, palette=treatcol2)
sns.swarmplot(x="eff_l",y="celltype",data=subtabMeansM, color='black')
#sns.swarmplot(x="migration_l",y="celltype",data=subtabMeansM, hue='Treat')
plt.savefig(figdir+"Boxplot-eff_l_celltypes.pdf")

In [None]:
sns.set(rc={'figure.figsize':(3.5,2.5)})
sns.set_style("white")

plt.figure()
sns.boxplot(x="Exhausted_j",y="Treat",data=subtabMeans, palette=treatcol, 
                order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'])
sns.swarmplot(x="Exhausted_j",y="Treat",data=subtabMeans,
              order=['Vehicle','FAP-IL2v','PD1-IL2v','FAP-IL2v_PD1','PD1'], color='black')
plt.savefig(figdir+"Boxplot-Exhausted_j.pdf")

In [None]:
sns.boxplot(x="score_ProlifCD8Tcell_scanpy",y="Treat",data=subtabMeans)
sns.swarmplot(x="score_ProlifCD8Tcell_scanpy",y="Treat",data=subtabMeans, color='black')

In [None]:

#figdir


fig=sns.clustermap(subtabMeansl.groupby('cluster').mean(), standard_scale=1, col_cluster=False,figsize=(6,10))
#fig.savefig(figdir+"Heatmap-signatures-resourceFocus.png")

In [None]:
tosel=['res_c6','migration_l', 'cyto_l', 'eff_l', 'inhrec_l', 
       'score_ExhCD8Tcell_scanpy', 'score_ProlifCD8Tcell_scanpy', 'Treat']

In [None]:
fig=sns.clustermap(subtabMeansl.loc[:,tosel].groupby('Treat').mean(), standard_scale=1, col_cluster=False,figsize=(5,4))
fig.savefig(figdir+"Heatmap_signatures_effexh_CD8.pdf")

In [None]:
sc.settings.set_figure_params()

In [None]:
sc.pl.umap(tmp[tmp.obs['treatment_id'].isin(['PD1',  'FAP-IL2v_PD1', 'PD1-IL2v'])], 
           color=['migration_l', 'inhrec_l'], 
           color_map='viridis', vmin=-0.5, vmax=1, save='-signatures-selclusters_3Tonly.pdf')

In [None]:
sc.pl.umap(tmp[tmp.obs['treatment_id'].isin(['PD1',  'FAP-IL2v_PD1', 'PD1-IL2v'])], 
           color=['Il7r', 'Havcr2','Pdcd1'], 
           color_map='viridis', save='-Il7r_Havcr2_Pdcd1-selclusters_3Tonly.pdf')

In [None]:
sc.pl.umap(tmp[tmp.obs['treatment_id'].isin(['PD1',  'FAP-IL2v_PD1', 'PD1-IL2v'])], 
           color=['Sell', 'CD62L_Sell'], vmin=0,
           color_map='viridis', save='-Sell-selclusters_3Tonly.pdf')

In [None]:
sc.pl.umap(tmp[tmp.obs['treatment_id'].isin(['PD1',  'FAP-IL2v_PD1', 'PD1-IL2v'])], 
           color=gzmesall, 
           color_map='viridis', save='-Gzmesall-selclusters_3Tonly.pdf')

In [None]:
gzmessel

In [None]:
sc.pl.umap(tmp[tmp.obs['treatment_id'].isin(['PD1',  'FAP-IL2v_PD1', 'PD1-IL2v'])], 
           color='Gzms', vmax=3,
           color_map='viridis', save='-GzmesSig-Gzma-b-c-f-selclusters_3Tonly.pdf')

In [None]:
sc.pl.umap(adata, color=['celltype1','celltype2','dblabel','celltype'],ncols=1)

In [None]:
set(adata[adata.obs['celltype'].isin(['fresh effector CD8-positive, alpha-beta T cell'])].obs['leiden'])

In [None]:
### Save labelling celltype1
adata = bc.st.additional_labeling(adata, 'celltype1', 'celltype1', 'Major cell types', annot_author, results_folder)

### Save labelling celltype1
adata = bc.st.additional_labeling(adata, 'celltype2', 'celltype2', 'Intermediate cell types', annot_author, results_folder)


### Save labelling dblabel used as reference
adata = bc.st.additional_labeling(adata, 'dblabel', 'dblabel', 'Cell types attributed according to CL nomenclature, based on own annotation (celltype3)', annot_author, results_folder)

### Save labelling celltype1
adata = bc.st.additional_labeling(adata, 'celltype', 'celltype', 'Manually annotated cell types', annot_author, results_folder)



### Follow-up analyses for marker generation and inspecting the annotation

#### Cell-centered analysis

In [None]:
### Breakdown of cell types per experiment (sample)
bc.pl.celllabel_quant_stackedbar(adata, count_variable='celltype3', subset_variable = 'sample_id');


In [None]:
### Breakdown of cell types per experiment (sample)
bc.pl.celllabel_quant_stackedbar(adata, count_variable='celltype', subset_variable = 'sample_id');


In [None]:
### Breakdown of cell types per experiment (sample)
bc.pl.celllabel_quant_stackedbar(adata, count_variable='celltype', subset_variable = 'treatment_id');


In case two annotation versions are present, one can perfom a riverplot to compare

In [None]:
bc.pl.riverplot_2categories(adata, ['celltype','treatment_id'])

In [None]:
bc.pl.riverplot_2categories(adata, ['celltype','dblabel'])

#### Gene-centered analysis

If one is interested in new markers, one can perform DE at the cell type annotation level of choice. Markers can then also be exported to a .gmtx file for subsequent import into GeMS (signatureDB), following specific conventions. 

In [None]:
export_sigs=False
if export_sigs:
    ### Metadata setup for cell type signatures
    User=author
    Source='internal scseq'  
    Subtype='all' # or healthy, onc, ...
    domain='cell marker'
    studyID='mongodb-study-id' # replace with your studyID
    analysisID=analysis_name
    genesetname=studyID+'_dblabel'
    suffix='_model_user' # replace with specific info, e.g. _mc38_pcs
    signature_dict={}

In [None]:
### Perform DE cells of each celltype3 vs. all other cells
DEgenes=bc.tl.dge.get_de(adata,'celltype',demethod='wilcoxon',topnr=5000, logfc=1,padj=0.05)

In [None]:
DEgenes.keys()

##### Example 1: resource T cells

In [None]:
### Select only top 15 genes (in order of p-val) for 2 cell types and plot expression per cell type
coi='resource CD8-positive, alpha-beta T cell' # full dblabel
tops=list(DEgenes[coi].sort_values('Log2FC',ascending=False)['Name'][0:35])
sc.pl.dotplot(adata, var_names=tops,groupby='celltype')

###### Example 2: fresh effector T cells

In [None]:
### Select only top 15 genes (in order of p-val) for 2 cell types and plot expression per cell type
coi='fresh effector CD8-positive, alpha-beta T cell' # full dblabel
tops=list(DEgenes[coi].sort_values('Log2FC',ascending=False)['Name'][0:35])
sc.pl.dotplot(adata, var_names=tops,groupby='celltype')

##### Further examples

In [None]:
### Select only top 15 genes (in order of p-val) for 2 cell types and plot expression per cell type
coi='better T effector CD8-positive, alpha-beta T cell' # full dblabel
tops=list(DEgenes[coi].sort_values('Log2FC',ascending=False)['Name'][0:35])
sc.pl.dotplot(adata, var_names=tops,groupby='celltype')

In [None]:
### Select only top 15 genes (in order of p-val) for 2 cell types and plot expression per cell type
coi='terminally exhausted CD8-positive, alpha-beta T cell' # full dblabel
tops=list(DEgenes[coi].sort_values('Log2FC',ascending=False)['Name'][0:35])
sc.pl.dotplot(adata, var_names=tops,groupby='celltype')

In [None]:
### Select only top 15 genes (in order of p-val) for 2 cell types and plot expression per cell type
coi='CCL3-positive effector CD8-positive, alpha-beta T cell' # full dblabel
tops=list(DEgenes[coi].sort_values('Log2FC',ascending=False)['Name'][0:35])
sc.pl.dotplot(adata, var_names=tops,groupby='celltype')

In [None]:
### Export sigs if relevant 
if export_sigs:
    outgmtfile=results_folder+'/Celltypemarkers.gmtx'
    bc.tl.sig.write_gmtx_forgems(signature_dict, outgmtfile)

If one is interested in additional markers correlated with the marker of interest, 
one can caculated the spearman correlation. 

In [None]:
import scipy.stats as ss
import operator

allmarkers=adata.var_names

spearcorsAll = {}
for i in allmarkers:
    spearcorsAll[i]=ss.spearmanr(adata.raw[:, i].X.todense(),adata.raw[:,'CXCR5_Cxcr5'].X.todense())[0]


spearcorsAll=sorted(spearcorsAll.items(),key=operator.itemgetter(1))
spearcorsAll_ids = [idx for idx, val in spearcorsAll]
goiNegAll=spearcorsAll_ids[0:30]
goiPosAll=spearcorsAll_ids[(len(spearcorsAll_ids)-30):len(spearcorsAll_ids)]

In [None]:
goiPosAll=spearcorsAll_ids[(len(spearcorsAll_ids)-30):len(spearcorsAll_ids)]

In [None]:
sc.pl.dotplot(adata, var_names=goiPosAll,groupby='celltype')

In [None]:
sc.pl.dotplot(adata, var_names=goiPosAll,groupby='leiden')

### Read clonality (analysed with vdj-scirpy-pub) for a quick check

In [None]:
vdata=sc.read('sw_besca_24_final-vdj.h5ad')

In [None]:
vdata.obs.columns

In [None]:
subdata=adata[adata.obs['CELL'][adata.obs['CELL'].isin(vdata.obs['CELL'])]].copy()

In [None]:
vdata[adata.obs['CELL'][adata.obs['CELL'].isin(vdata.obs['CELL'])].index].obs

In [None]:
subdata.obs['clonal_expansion']=vdata[adata.obs['CELL'][adata.obs['CELL'].isin(vdata.obs['CELL'])].index].obs['clonal_expansion']
subdata.obs['clone_id']=vdata[adata.obs['CELL'][adata.obs['CELL'].isin(vdata.obs['CELL'])].index].obs['clone_id']
subdata.obs['chain_pairing']=vdata[adata.obs['CELL'][adata.obs['CELL'].isin(vdata.obs['CELL'])].index].obs['chain_pairing']
subdata.obs['clone_id_size']=vdata[adata.obs['CELL'][adata.obs['CELL'].isin(vdata.obs['CELL'])].index].obs['clone_id_size']
subdata.obs['alpha_diversity_clone_id']=vdata[adata.obs['CELL'][adata.obs['CELL'].isin(vdata.obs['CELL'])].index].obs['alpha_diversity_clone_id']
subdata.obs['alpha_diversity_ind_leiden']=vdata[adata.obs['CELL'][adata.obs['CELL'].isin(vdata.obs['CELL'])].index].obs['alpha_diversity_ind_leiden']


In [None]:
subdata.obs['clone_id_size_log1p']=list(np.log1p(vdata[adata.obs['CELL'][adata.obs['CELL'].isin(vdata.obs['CELL'])].index].obs['clone_id_size']))


In [None]:
metadata=pd.read_csv('raw/metadata.tsv', sep='\t')
metadata.index=list(metadata['CELL'])
for x in ['chain_pairing','clone_id_size','clone_id','clone_id_size_log1p','clonal_expansion','alpha_diversity_clone_id','alpha_diversity_ind_leiden']:
    metadata[x]=None
    tmp=metadata[x].copy()
    metadata.loc[subdata.obs.index,:][x]=list(subdata.obs[x])
    tmp[subdata.obs.index]=list(subdata.obs[x])
    metadata[x]=list(tmp)
metadata.to_csv('raw/metadata_ext.tsv',sep='\t', index=False)
    

In [None]:
metadata=pd.read_csv('citeseq/citeseq/normalized_counts/metadata.tsv', sep='\t')
metadata.index=list(metadata['CELL'])
for x in ['chain_pairing','clone_id_size','clone_id','clone_id_size_log1p','clonal_expansion','alpha_diversity_clone_id','alpha_diversity_ind_leiden']:
    metadata[x]=None
    tmp=metadata[x].copy()
    metadata.loc[subdata.obs.index,:][x]=list(subdata.obs[x])
    tmp[subdata.obs.index]=list(subdata.obs[x])
    metadata[x]=list(tmp)
metadata.to_csv('citeseq/citeseq/normalized_counts/metadata_ext.tsv',sep='\t', index=False)


In [None]:
sc.settings.set_figure_params()
sc.pl.umap(subdata, color=["chain_pairing"])

In [None]:
sc.pl.umap(subdata, color=["clone_id_size"], vmax=5,color_map='viridis')

In [None]:
sc.pl.umap(subdata, color=["clone_id_size"], vmax=50,color_map='viridis')

In [None]:
sc.pl.umap(subdata, color=["clone_id_size"], vmax=200,color_map='viridis')

In [None]:
tmps=subdata[subdata.obs['leiden'].isin(cls)].copy()

In [None]:
sc.pl.umap(tmps[tmps.obs['treatment_id'].isin(['FAP-IL2v_PD1','PD1','PD1-IL2v'])], 
           color=["clone_id_size_log1p"],color_map='viridis', save='Clone_id_size_log1p_selclusters_3Tonly.pdf')

In [None]:
sc.pl.umap(subdata, color=["clone_id_size"],color_map='viridis')

In [None]:
subtab=subdata.obs.loc[:,['clone_id_size','sample_id','treatment_id','leiden','celltype_simple']].copy()


In [None]:
subtabMeans=subtab.groupby(['sample_id', 'celltype_simple']).mean()
subtabMeans['Treat']=[x[0].split("_tum")[0] for x in list(subtabMeans.index)]
subtabMeans['cluster']=[x[1] for x in list(subtabMeans.index)]
#subtabMeans['sample_id']=[x[0] for x in list(subtabMeans.index)]
subtabMeans['clone_id_size_log']=np.log10(subtabMeans['clone_id_size'])

In [None]:
subtabMeans

In [None]:
subtabMeans.to_csv(figdir+"CloneIDSize_per_CelltypeAndTreatment_eff.tsv", sep='\t')

In [None]:
tmp=subtabMeans.groupby('cluster').mean()
#tmp.index=[x+ '-' +subtabanno.loc[subtabanno['celltype_simple']==x,:]['celltype'][0] for x in list(tmp.index)]


In [None]:
tmp

In [None]:

fig=sns.clustermap(tmp['clone_id_size_log'].loc[['naive','resource','effcytotox','effmem','effexh']],
                   col_cluster=False,row_cluster=False,
                   figsize=(0.75,2.5))
fig.savefig(figdir+"Heatmap-clone_id_size_log.pdf")

In [None]:
subtabMeans

In [None]:
subtabMeans.groupby(['sample_id']).mean().to_csv(figdir+"CloneIDSize_per_Treatment.tsv", sep='\t')

In [None]:
sorted_index_desc = subtabMeans.groupby('cluster').mean()['clone_id_size_log'].sort_values(ascending=False).index


In [None]:

sns.set(rc={'figure.figsize':(6,6)})
sns.set_style('ticks')
sns.boxplot(x="clone_id_size_log",y="cluster",data=subtabMeans, order=sorted_index_desc, color='lightgray')
sns.swarmplot(x="clone_id_size_log",y="cluster",hue='Treat',data=subtabMeans, order=sorted_index_desc)

In [None]:
subtabMeans=subtab.groupby(['sample_id']).mean()
subtabMeans['Treat']=[x.split("_tum")[0] for x in list(subtabMeans.index)]
subtabMeans['clone_id_size_log']=np.log10(subtabMeans['clone_id_size'])

In [None]:
sns.set(rc={'figure.figsize':(4,3)})
sns.set_style('ticks')
sns.boxplot(x="clone_id_size_log",y="Treat",data=subtabMeans)
sns.swarmplot(x="clone_id_size_log",y="Treat",data=subtabMeans, color='black')
plt.savefig(figdir+'CloneID_per_treatment.pdf')

In [None]:
sns.set(rc={'figure.figsize':(3,2)})
sns.set_style('ticks')
sns.boxplot(x="clone_id_size_log",y="Treat",data=subtabMeans.loc[subtabMeans['Treat'].isin(['FAP-IL2v_PD1','PD1','PD1-IL2v']),:])
sns.swarmplot(x="clone_id_size_log",y="Treat",data=subtabMeans.loc[subtabMeans['Treat'].isin(['FAP-IL2v_PD1','PD1','PD1-IL2v']),:], color='black')
plt.savefig(figdir+'CloneID_per_treatment_3Tonly.pdf')

### Convert to html

In [None]:
%%javascript

IPython.notebook.kernel.execute('nb_name = "' + IPython.notebook.notebook_name + '"')

In [None]:
nb_name = os.path.join(os.getcwd(), nb_name)

In [None]:
! jupyter nbconvert --to html {nb_name}