# Annotating Cell Types

This workbook was run after the standard workflow.

In [None]:
import besca as bc
import numpy as np
import pandas as pd
import scanpy.api as sc
import matplotlib.pyplot as plt
from scipy import sparse, io
import os
import time
import logging
import seaborn as sns
sc.logging.print_versions()

# for standard processing, set verbosity to minimum
sc.settings.verbosity = 0  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80)
version = '2.8'
start0 = time.time()

In [None]:
### Plot parameters for publication 
def set_pub():    
    small_size = 10
    medium_size = 12
    large_size = 14

    resolution = 300 #in dpi
    plt.rcParams['font.weight'] = 'normal'
    #plt.rc('font', **{'family':'sans-serif','sans-serif':['Helvetica']})
    plt.rc('axes', titlesize=large_size, titleweight = "bold")               # fontsize of the axes title
    plt.rc('axes', labelsize=medium_size, labelweight = "bold")               # fontsize of the x and y labels
    plt.rc('xtick', labelsize=small_size)               # fontsize of the tick labels
    plt.rc('ytick', labelsize=small_size)               # fontsize of the tick labels
    plt.rc('legend', fontsize=small_size, title_fontsize = medium_size)               # legend fontsize
    plt.rc('figure', titlesize=large_size, titleweight = "bold")              # fontsize of the figure title
    plt.rc('savefig', dpi=resolution)                   # higher res outputs

    plt.rcParams['svg.fonttype'] = 'none'


set_pub()

In [None]:
#define standardized filepaths based on above input
root_path = os.getcwd()
bescapath_full = os.path.dirname(bc.__file__)
bescapath = os.path.split(bescapath_full)[0]

### Uncomment this when running after the standard workflow
analysis_name = 'sw_besca2_immune'

clusters='leiden'


The chunk of code below is usefull if this is the initial installation of besca and that you are running this notebook as a test. It will download if needed the test datasets and export the labelling. 
This export is usually done at the end of the standard workflow. Exported files  are necessary for the annotations.

In [None]:
use_example_dataset = False
if use_example_dataset:
    analysis_name='pbmc3k_processed'
    results_folder = os.path.split(os.getcwd())[0] + '/besca/datasets/data/'
    clusters='leiden'
    # This line will either download, or load the datasets
    adata = bc.datasets.pbmc3k_processed()
    # This line exports the annotation for the annotation.
    adata = bc.st.additional_labeling(adata, labeling_to_use= clusters, labeling_name = clusters, 
                                      labeling_description = 'Exporting a postori the labels for annotation',
                                      labeling_author = 'Testing', 
                                      results_folder= results_folder)
else:
    results_folder = os.path.join(root_path, 'analyzed', analysis_name)
    adata = sc.read_h5ad(os.path.join(results_folder, analysis_name + '.h5ad') )

In [None]:
results_file = os.path.join(results_folder, analysis_name + '.annotated.h5ad')
figdir=os.path.join(root_path, 'analyzed', analysis_name+'/figures/')
sc.settings.figdir = figdir
if not os.path.exists(figdir):
    os.makedirs(figdir)

In [None]:
sc.pl.umap(adata, color= [clusters], legend_loc='on data')

In [None]:
adata.obs['PatientID']=adata.obs['PatientID'].astype('category')

In [None]:
sc.pl.umap(adata,color=['PatientID','Sample type','Adjuvant','Lesion','Stage','CD3IHC','Age','Gender'],
           save='-variables.svg')

### Explore top marker genes per cluster 

In [None]:
DEgenes=bc.tl.dge.get_de(adata,clusters,demethod='wilcoxon',topnr=5000, logfc=1,padj=0.05)


In [None]:
### Select only top genes (in order of p-val) for 2 clusters and plot expression per cluster
tops=list(DEgenes['44']['Name'][0:50])+list(DEgenes['40']['Name'][0:10])
sc.pl.dotplot(adata, var_names=tops,groupby=clusters)

In [None]:
sc.pl.umap(adata,color=['C1QA','CCL3','IFI27','CXCL10','CST3','APOE'])

### Explore distribution of various cell populations

In [None]:
# One can load besca-provided signatures using the function below
signature_dict = bc.datasets.load_immune_signatures(refined=False)

signature_dict

Additionaly it is possible to read an compute scanpy score using this function below.

If the gmt file is composed of combined signature (UP and DN), a common score will be computed: 
$$Total\_SCORE= Score_{UP} - Score_{DN}$$

In [None]:

gmt_file= bescapath + '/besca/datasets/genesets/Immune.gmt'
bc.tl.sig.combined_signature_score(adata, gmt_file,
                             UP_suffix='_UP', DN_suffix='_DN', method='scanpy',
                             overwrite=False, verbose=False,
                             use_raw=True, conversion=None)

In [None]:
scores = [x for x in adata.obs.columns if 'scanpy' in x]

In [None]:
sc.pl.umap(adata, color= scores)

# Immune signatures for specific sub-populations

In [None]:
## PROVIDED WITH BESCA
gmt_file_anno= bescapath + '/besca/datasets/genesets/CellNames_scseqCMs6_sigs.gmt'
bc.tl.sig.combined_signature_score(adata, gmt_file_anno)


In [None]:
scores = [x for x in adata.obs.columns if 'scanpy' in x]


In [None]:
sc.pl.umap(adata, color= scores, color_map = 'viridis',save='-all_CellAnnot.svg')

In [None]:
sc.pl.umap(adata, color= ['score_Myeloid_scanpy','score_Bcell_scanpy',
                          'score_Tcell_scanpy','score_CD8Tcell_scanpy',
                          'score_CD4Tcell_scanpy','score_gdTcell_scanpy','score_NKcell_scanpy',
                         'score_Macrophage_scanpy','score_ClassMonocyte_scanpy',
                          'score_cDC_scanpy','score_pDC_scanpy'], 
           color_map = 'viridis',save='-all_CellAnnot_reduced.svg')

# Automated annotation

A decision-tree-based annotation that reads signatures from a provided .gmt file and hierarchy as well as cutoffs and signature ordering from a configuration file and attributes each cell to a specific type according to signature enrichment. 

This is an aid to start ther annotation and annotation can then be further refined by adding further signatures or adjusting the configuration files. It was tested mainly on PBMCs and oncology (tumor biopsies) related samples.


## Loading markers and signature

In [None]:
gmt_file_anno= bescapath + '/besca/datasets/genesets/CellNames_scseqCMs6_sigs.gmt'

mymarkers = bc.tl.sig.read_GMT_sign(gmt_file_anno,directed=False)
mymarkers = bc.tl.sig.filter_siggenes(adata, mymarkers) ### remove genes not present in dataset or empty signatures
mymarkers['Ubi'] = ['B2M','ACTB', 'ZNF207', 'HNRNPK','HNRNPU'] ### used for cutoff adjustment to individual dataset, can be modified

In [None]:
#mymarkers['Ubi'] = ['B2M','ACTB', 'GAPDH']

In [None]:
### Inspect gene expression for an example population
sc.pl.umap(adata, color= mymarkers['NClassMonocyte'])

In [None]:
#sc.pl.umap(adata, color= mymarkers['cDC'])

## Configuration of the annotation

We read the configuration file, containing hierarchy, cutoff and signature priority information. 
A new version of this file should be created and maintained with each annotation. 
The included example is optimised for the annotation of the 6.6k PBMC dataset. 

In [None]:
configfile=bescapath + '/besca/datasets/genesets/CellNames_scseqCMs6_config.tsv' ### replace this with your config

In [None]:
sigconfig,levsk=bc.tl.sig.read_annotconfig(configfile)

In [None]:
# The order of cells at different levels can be manually changed if needed
#levsk[0]=['ColorectalCancer', 'Epithelial','Fibroblast','Endothelial','Erythrocyte','HematoStem',
#          'Hepatocyte','MelMelanoma','Neural','Adipocyte','Hematopoietic','Schwann','Chondrocyte','Glial']

#### Get an overview of the cell type hierarchy included in the configuration file

In [None]:
plt=bc.pl.nomenclature_network(configfile, font_size=8)
plt.savefig(figdir+"Nomenclatureplot.svg", format="svg")

Fract_pos was exported by BESCA in the standard worflow test, 
contains information of fraction positive cells per genes per cluster.

We use these values as a basis for a wilcoxon test per signature per cluster. 

In [None]:

f=pd.read_csv(results_folder + "/labelings/"+clusters+"/fract_pos.gct",sep="\t",skiprows=2)
df=bc.tl.sig.score_mw(f,mymarkers)
myc=np.median(df.loc['Ubi',:]*1/3) ### Set a cutoff based on Ubi and scale with values from config file


#### For an overview of highest scoring signatures, one can generate a heatmap

In [None]:
sns.clustermap(df.loc[df.max(axis=1)>myc*3,:].astype(float),figsize=(14, 10))
plt.savefig(figdir+"SignatureHeatmap_all.svg", format="svg")

For each signature, positive and negative clusters are determined. Only positive clusters are maintained. Cutoffs can be individualised based on the config file (scaling factor) and myc, which is determined based on ubiquitously expressed genes. 

In [None]:
df=df.drop('Ubi')

In [None]:
# Cutoffs can also be manually adjusted if needed 
sigconfig.loc['Blymphocyte','Cutoff']=3
sigconfig.loc['Tcell','Cutoff']=3
sigconfig.loc['CD8Tcell','Cutoff']=1.4
sigconfig.loc['NKcell','Cutoff']=3
sigconfig.loc['Macrophage','Cutoff']=0.95
sigconfig.loc['cDC','Cutoff']=2.5
sigconfig.loc['CD56dimNK','Cutoff']=3
sigconfig.loc['CD56brightNK','Cutoff']=0.75
sigconfig.loc['CytotoxCD8Tcell','Cutoff']=4.75
sigconfig.loc['NaiCD4Tcell','Cutoff']=4.5
sigconfig.loc['EMCD8Tcell','Cutoff']=4
sigconfig.loc['Macrophage_MARCO','Cutoff']=1.75
sigconfig.loc['Macrophage_CXCL9','Cutoff']=0.75
sigconfig.loc['Macrophage_MSR1','Cutoff']=5
sigconfig.loc['ProlifBcell','Cutoff']=1.5
sigconfig.loc['NaiBcell','Cutoff']=2.5
sigconfig.loc['MemBcell','Cutoff']=1.5
sigconfig.loc['ExhCD8Tcell','Cutoff']=6
sigconfig.loc['CMCD4Tcell','Cutoff']=2.5
sigconfig.loc['Hematopoietic','Cutoff']=0.5
sigconfig.loc['ClassMonocyte','Cutoff']=2
sigconfig.loc['ExhBcell','Cutoff']=3
#sigconfig.loc['Tcell','Cutoff']

In [None]:
#Cluster attribution based on cutoff

sigscores={}
for mysig in list(df.index):
    sigscores[mysig]=bc.tl.sig.getset(df,mysig,sigconfig.loc[mysig,'Cutoff']*myc)
    #sigscores[mysig]=bc.tl.sig.getset(df,mysig,10)

One can inspect the cluster attribution per cell type in the signature list and adjust cutoffs as required. 

In [None]:
sigscores['cDC']

In [None]:
sc.pl.umap(adata, color= [clusters], legend_loc='on data')

In [None]:
#sc.pl.umap(adata, color=['LRMP','VPREB3','LYZ','C1QA', 'APOC1','CXCL10', 'IFI27'],color_map='viridis')

Now each cluster gets annotated, according to the distinct levels specified in the config file. 
Note that in case a cluster is positive for multiple identities, only the first one is taken, 
in the order specified in the "Order" column in the config file. 

To check the given order, per levels, you can inspect levsk, and adjust above as needed

In [None]:
levsk

## Obtained cluster assignment

In [None]:
### Cell types that are not expected in the dataset can be explicity excluded from the annotation 
toexclude=['Erythrocyte','AlphaPancreatic', 'BetaPancreatic', 'DeltaPancreatic',
           'FollicularBcell','Neural', 'ProlifBcell']


In [None]:
cnames=bc.tl.sig.make_anno(df,sigscores,sigconfig,levsk, toexclude=toexclude)

We now obtained per each cluster cell type attribution at distinct levels. 

In [None]:
cnames

Export the used annotation parameters, for future reference

In [None]:
bc.tl.sig.export_annotconfig(sigconfig, levsk, results_folder, analysis_name)

## Using db label convention

Only short names were used in the signature naming convention in this case. 
One can easity tranform this to EFO terms if preferred, a conversion table comes with besca. 

This nomenclature is quite extended, and the function 
**obtain_dblabel** can perform the conversion.

In [None]:
### transform these short forms to dblabel - EFO standard nomenclature
cnamesDBlabel = bc.tl.sig.obtain_dblabel(bescapath+'/besca/datasets/nomenclature/CellTypes_v1.tsv', cnames )
cnamesDBlabel

In [None]:
list(pd.read_csv(bescapath+'/besca/datasets/nomenclature/CellTypes_v1.tsv',sep='\t')['short_dblabel'])

Finally, one can add the new labels to adata.obs as annotation. 

In [None]:
adata.obs['scelltype0']=bc.tl.sig.add_anno(adata,cnames,'celltype0',clusters)
adata.obs['scelltype1']=bc.tl.sig.add_anno(adata,cnames,'celltype1',clusters)
adata.obs['scelltype2']=bc.tl.sig.add_anno(adata,cnames,'celltype2',clusters)
adata.obs['scelltype3']=bc.tl.sig.add_anno(adata,cnames,'celltype3',clusters)

In [None]:
adata.obs['celltype0']=bc.tl.sig.add_anno(adata,cnamesDBlabel,'celltype0',clusters)
adata.obs['celltype1']=bc.tl.sig.add_anno(adata,cnamesDBlabel,'celltype1',clusters)
adata.obs['celltype2']=bc.tl.sig.add_anno(adata,cnamesDBlabel,'celltype2',clusters)
adata.obs['celltype3']=bc.tl.sig.add_anno(adata,cnamesDBlabel,'celltype3',clusters)

In [None]:
sc.pl.umap(adata,color=['celltype0']) #,'celltype2'

Melanoma cells present as one sample was mixed CD45- and CD45+

In [None]:
sc.pl.umap(adata,color=['celltype1']) #,'celltype2'

In [None]:
sc.pl.umap(adata,color=['celltype2']) #,'celltype2'

In [None]:
sc.pl.umap(adata,color=['celltype3']) #,'celltype3'

In [None]:
sc.pl.umap(adata,color=['leiden'],legend_loc='on data', legend_fontsize=6) #,'celltype3'

## Manual refinement

In some cases, the annotation does not produce the optimal result. Manual adjustments can be made, 
by replacing cell type names or by manually labeling clusters. Note that the second option is not 
stable across reruns if any adjustments are made to the clustering. 

In [None]:
### Example 1: unclear patient-specific myeloid cluster, set to macrophage
#adata.obs['celltype2']=adata.obs['celltype2'].cat.add_categories('myeloid leukocyte')
adata.obs['celltype3']=adata.obs['celltype3'].cat.add_categories('macrophage')
#adata.obs.loc[adata.obs[clusters].isin(['44']),'celltype2']='myeloid leukocyte'
adata.obs.loc[adata.obs[clusters].isin(['44']),'celltype3']='macrophage'

In [None]:
### Example 1: unclear patient-specific myeloid cluster
#adata.obs['scelltype2']=adata.obs['scelltype2'].cat.add_categories('Myeloid')
adata.obs['scelltype3']=adata.obs['scelltype3'].cat.add_categories('Macrophage')
#adata.obs.loc[adata.obs[clusters].isin(['44']),'scelltype2']='Myeloid'
adata.obs.loc[adata.obs[clusters].isin(['44']),'scelltype3']='Macrophage'

In [None]:
allcells=pd.read_csv(bescapath+'/besca/datasets/nomenclature/CellTypes_v1.tsv',sep='\t')

In [None]:
#list(allcells['dblabel'])

In [None]:
allcells.loc[allcells['short_dblabel']=='MyeloTcell',:]

In [None]:
### Example 2: CD8 T CELLS
adata.obs['celltype3']=adata.obs['celltype3'].cat.add_categories('mature NK T cell')
adata.obs.loc[adata.obs[clusters].isin(['3','45','39']),'celltype3']='mature NK T cell'


In [None]:
adata.obs.loc[adata.obs[clusters].isin(['3','45','39']),'celltype2']='CD8-positive, alpha-beta T cell'

In [None]:
### Example 2: CD8 T CELLS
adata.obs['scelltype3']=adata.obs['scelltype3'].cat.add_categories('NKTcell')
adata.obs.loc[adata.obs[clusters].isin(['3','45','39']),'scelltype3']='NKTcell'


In [None]:
adata.obs.loc[adata.obs[clusters].isin(['3','45','39']),'scelltype2']='CD8Tcell'

In [None]:
sc.pl.umap(adata,color=['leiden','scelltype3','celltype2', 'celltype1'], legend_loc='on data',legend_fontsize=6)

In [None]:
# everything that was done so far goes to the .h5ad file for later use
#adata.write(results_file)
adata=sc.read(results_file)

This is the first basic annotation. Downstream, analysis was redone with PBMC, TIL, T cells, myeloids separately and annotation adjusted according to these results to obtain finer grained populations. 

# Reclustering per various subpopulations 


The main steps are:
+ saving previous clustering and annotation for comparison purpose (advised)
+ recluster 
+ Export the new labelling (see function additional_labeling)
+ Read the new labelling information including fract_pos files.
+ Recompute signatures/markers values
+ Reannotate
+ Convert annotation to dblabel
+ Export all for the data subset to the larger adata object.

In [None]:
recluster=True
if recluster==True:
    adata.obs['leiden_original'] = adata.obs['leiden'].copy()
    adata.obs['celltype3_original']  = adata.obs['celltype3'].copy() 
    adata.obs['celltype2_original']  = adata.obs['celltype2'].copy()
    adata.obs['celltype1_original']  = adata.obs['celltype1'].copy()
    adata.obs['celltype0_original']  = adata.obs['celltype0'].copy()

In [None]:
adata=sc.read(results_file)

### PBMC reclustering

In [None]:
celltype_label='Sample type'
to_recluster=('PBMC')
cluster_renamed = 'Leiden_reclustering_PBMC'

In [None]:

    # Calling reclustering
    adata_rc = bc.tl.rc.recluster ( adata, celltype_label = celltype_label, 
                              celltype=to_recluster, resolution=3, batch_key='experiment')
    # Leiden reclustering have to be exported to use the annotation function 
    adata_rc.write(os.path.join(results_folder, analysis_name + '.annotated.PBMC.h5ad'))
    

In [None]:
    adata_rc=sc.read(os.path.join(results_folder, analysis_name + '.annotated.PBMC.h5ad'))
    adata_rc = bc.st.additional_labeling(adata_rc, 'leiden', cluster_renamed, 
                                         'Leiden reclustering on PBMCs', 'PCS', 
                                         results_folder)


In [None]:
    sigconfig,levsk=bc.tl.sig.read_annotconfig(configfile)
    # Reading additional labelling
    f=pd.read_csv(results_folder + "/labelings/"+cluster_renamed+"/fract_pos.gct",sep="\t",skiprows=2)
    df=bc.tl.sig.score_mw(f,mymarkers)
    myc=np.median(df.loc['Ubi',:]*1/3) ### Set a cutoff based on Ubi and scale with values from config file
    df=df.drop('Ubi')

In [None]:

    # Adjust cutoffs if needed
    # Cutoffs can also be manually adjusted if needed 
    sigconfig.loc['Bcell','Cutoff']=1.5
    sigconfig.loc['Blymphocyte','Cutoff']=2.75
    sigconfig.loc['Tcell','Cutoff']=2.9
    sigconfig.loc['CD8Tcell','Cutoff']=1.35
    sigconfig.loc['NKcell','Cutoff']=3
    sigconfig.loc['Macrophage','Cutoff']=1.5
    sigconfig.loc['cDC','Cutoff']=1.5
    sigconfig.loc['CD56dimNK','Cutoff']=3.5
    sigconfig.loc['CD56brightNK','Cutoff']=0.65
    sigconfig.loc['CytotoxCD8Tcell','Cutoff']=4.75
    sigconfig.loc['NaiCD4Tcell','Cutoff']=4.5
    sigconfig.loc['EMCD8Tcell','Cutoff']=4
    sigconfig.loc['Macrophage_MARCO','Cutoff']=1.75
    sigconfig.loc['Macrophage_CXCL9','Cutoff']=0.75
    sigconfig.loc['Macrophage_MSR1','Cutoff']=5
    sigconfig.loc['ProlifBcell','Cutoff']=1.5
    sigconfig.loc['NaiBcell','Cutoff']=2.5
    sigconfig.loc['MemBcell','Cutoff']=1.5
    sigconfig.loc['ExhCD8Tcell','Cutoff']=6
    sigconfig.loc['CMCD4Tcell','Cutoff']=2.5
    sigconfig.loc['Hematopoietic','Cutoff']=0.5
    sigconfig.loc['ClassMonocyte','Cutoff']=2
    sigconfig.loc['ExhBcell','Cutoff']=3
    #sigconfig.loc['Tcell','Cutoff']
    sigconfig.loc['Myeloid','Cutoff']=0.75
    sigconfig.loc['NaiCD8Tcell','Cutoff']=4.5

    ### Cell types that are not expected in the dataset can be explicity excluded from the annotation 
    toexclude=['Erythrocyte','AlphaPancreatic', 'BetaPancreatic', 'DeltaPancreatic',
           'FollicularBcell','Neural', 'ProlifBcell', 'GermCenterBcell']
    # RECOMPUTING SIG SCORE WITH NEW CUTOFF
    
    sigscores={}
    for mysig in list(df.index):
        sigscores[mysig]=bc.tl.sig.getset(df,mysig,sigconfig.loc[mysig,'Cutoff']*myc)

    
    cnames=bc.tl.sig.make_anno(df,sigscores,sigconfig,levsk, toexclude=toexclude)
    cnamesDBlabel = bc.tl.sig.obtain_dblabel(bescapath+'/besca/datasets/nomenclature/CellTypes_v1.tsv', cnames )
    
    adata_rc.obs['celltype0']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype0','leiden')
    adata_rc.obs['celltype1']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype1','leiden')
    adata_rc.obs['celltype2']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype2','leiden')
    adata_rc.obs['celltype3']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype3','leiden')


In [None]:
sc.pl.umap(adata_rc,color=['celltype0'])

In [None]:
sc.pl.umap(adata_rc,color=['leiden'])

In [None]:
sc.pl.umap(adata_rc,color=['celltype1'])

In [None]:
sc.pl.umap(adata_rc, color= ['score_Myeloid_scanpy','score_Bcell_scanpy',
                          'score_Tcell_scanpy','score_CD8Tcell_scanpy',
                          'score_CD4Tcell_scanpy','score_gdTcell_scanpy','score_NKcell_scanpy',
                         'score_Macrophage_scanpy','score_ClassMonocyte_scanpy',
                          'score_cDC_scanpy','score_pDC_scanpy','score_GermCenterBcell_scanpy'], 
           color_map = 'viridis') #save='-all_CellAnnot_reduced.PBMC.svg'

In [None]:
sc.pl.umap(adata_rc,color=['celltype2'])

In [None]:
sc.pl.umap(adata_rc,color=['celltype3'])

In [None]:
sc.pl.umap(adata_rc,color=['celltype3_original'])

In [None]:
    # Lex orrder needed.
    names_1 = []
    names_2 = []
    names_3 = []
    leiden_rc=[]
    for i in range( cnamesDBlabel.shape[0]) :
        names_1 += [cnamesDBlabel['celltype1'][str(i)]]
        names_2 += [cnamesDBlabel['celltype2'][str(i)]]
        names_3 += [cnamesDBlabel['celltype3'][str(i)]]
    
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = names_1, new_label='celltype1_PBMC', method = 'leiden')
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = names_2, new_label='celltype2_PBMC', method = 'leiden')
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = names_3, new_label='celltype3_PBMC', method = 'leiden')
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = list(adata_rc.obs['leiden'].cat.categories), 
                                    new_label='leiden_PBMC', method = 'leiden') 
    #sc.pl.umap(adata,color=['celltype2', 'celltype2_original',
    #                   'celltype3'], ncols=1) 
    


    adata.obs['leiden_PBMC']='not_labeled'
    adata.obs['leiden_PBMC']=adata.obs['leiden_PBMC'].astype("category")
    leidennk=adata.obs['leiden_PBMC'].copy()
    leidennk=leidennk.cat.add_categories(list(adata_rc.obs['leiden'].cat.categories)).copy()
    leidennk.loc[adata_rc.obs.index]=list(adata_rc.obs['leiden'])
    adata.obs['leiden_PBMC']=list(leidennk)

In [None]:
#sigscores['Blymphocyte']

In [None]:
# df.loc['Blymphocyte',:].sort_values()

In [None]:
# everything that was done so far goes to the .h5ad file for later use
adata_rc.write(os.path.join(results_folder, analysis_name + '.annotated.PBMC.h5ad'))

In [None]:
sc.pl.umap(adata_rc,color=['leiden'],legend_loc='on data', legend_fontsize=8)

In [None]:
DEgenesPBMC=bc.tl.dge.get_de(adata_rc[adata_rc.obs['celltype3']=='classical monocyte'],clusters,demethod='wilcoxon',topnr=5000, logfc=1,padj=0.05)


In [None]:
### Select only top genes (in order of p-val) for 2 clusters and plot expression per cluster
tops=list(DEgenesPBMC['31']['Name'][0:50])+list(DEgenesPBMC['9']['Name'][0:20])
sc.pl.dotplot(adata_rc[adata_rc.obs['celltype3']=='classical monocyte'], var_names=tops,groupby=clusters)

In [None]:
sc.pl.umap(adata_rc,color=['Lesion', 'celltype2','PatientID'])

In [None]:
#sc.pl.umap(adata,color=['celltype1','celltype1_PBMC', 'celltype1_TNK', 'celltype1_BMy'])

In [None]:
sc.pl.umap(adata_rc,color=['celltype3'])

In [None]:
sc.pl.umap(adata_rc,color=['leiden'],legend_loc='on data', legend_fontsize=7)

In [None]:
sc.pl.umap(adata_rc,color=['leiden'])

In [None]:
sc.pl.umap(adata_rc,color=['CD3D','CD8A','CD8B','CD4'])

In [None]:
sc.pl.umap(adata, color='celltype3_PBMC')

In [None]:
expandedAnnot=adata.obs['celltype3_PBMC'].copy()
expandedAnnotrc=adata_rc.obs['celltype3'].copy()

In [None]:
expandedAnnot=expandedAnnot.cat.add_categories(['myeloid B cell','myeloid T cell', 'classical monocyte, brain lesions'])
expandedAnnotrc=expandedAnnotrc.cat.add_categories(['myeloid B cell', 'myeloid T cell', 'classical monocyte, brain lesions'])

In [None]:
expandedAnnot[adata.obs['leiden_PBMC'].isin(['30'])]='myeloid T cell'
expandedAnnotrc[adata_rc.obs['leiden'].isin(['30'])]='myeloid T cell'

expandedAnnot[adata.obs['celltype3_PBMC'].isin(['B cell'])]='myeloid B cell'
expandedAnnotrc[adata_rc.obs['celltype3'].isin(['B cell'])]='myeloid B cell'

In [None]:
expandedAnnot[adata.obs['leiden_PBMC'].isin(['29','15','18'])]='classical monocyte, brain lesions'
expandedAnnotrc[adata_rc.obs['leiden'].isin(['29','15','18'])]='classical monocyte, brain lesions'

In [None]:
#expandedAnnot[adata.obs['leiden_PBMC'].isin(['31'])]='classical monocyte subpop'
#expandedAnnotrc[adata_rc.obs['leiden'].isin(['31'])]='classical monocyte subpop'

In [None]:
adata.obs['celltype3_expanded']=list(expandedAnnot)
adata_rc.obs['celltype3_expanded']=list(expandedAnnotrc)

In [None]:
adata_rc.write(os.path.join(results_folder, analysis_name + '.annotated.PBMC.h5ad'))
#adata_rc=sc.read(os.path.join(results_folder, analysis_name + '.annotated.PBMC.h5ad'))
adata.write(results_file)

### TIL reclustering

In [None]:
celltype_label='Sample type'
to_recluster=('TIL')
cluster_renamed = 'Leiden_reclustering_TIL'


In [None]:
    #Calling reclustering
    adata_rc = bc.tl.rc.recluster ( adata, celltype_label = celltype_label, 
                               celltype=to_recluster, resolution=2.5, batch_key='experiment')
    # Leiden reclustering have to be exported to use the annotation function 
    adata_rc.write(os.path.join(results_folder, analysis_name + '.annotated.TIL.h5ad'))
    


In [None]:
    adata_rc=sc.read(os.path.join(results_folder, analysis_name + '.annotated.TIL.h5ad'))


    adata_rc = bc.st.additional_labeling(adata_rc, 'leiden', cluster_renamed, 
                                         'Leiden reclustering on TILs', 'PCS', 
                                         results_folder)


In [None]:
    sigconfig,levsk=bc.tl.sig.read_annotconfig(configfile)
    # Reading additional labelling
    f=pd.read_csv(results_folder + "/labelings/"+cluster_renamed+"/fract_pos.gct",sep="\t",skiprows=2)
    df=bc.tl.sig.score_mw(f,mymarkers)
    myc=np.median(df.loc['Ubi',:]*1/3) ### Set a cutoff based on Ubi and scale with values from config file
    df=df.drop('Ubi')



In [None]:

    # Adjust cutoffs if needed
    # Cutoffs can also be manually adjusted if needed 
    sigconfig.loc['Blymphocyte','Cutoff']=2.75
    sigconfig.loc['RegTcell','Cutoff']=2.15
    sigconfig.loc['Tcell','Cutoff']=3.1
    sigconfig.loc['CD8Tcell','Cutoff']=1.5
    sigconfig.loc['NKcell','Cutoff']=3
    sigconfig.loc['Macrophage','Cutoff']=1.5
    sigconfig.loc['cDC','Cutoff']=1.5
    sigconfig.loc['CD56dimNK','Cutoff']=3.5
    sigconfig.loc['CD56brightNK','Cutoff']=0.65
    sigconfig.loc['CytotoxCD8Tcell','Cutoff']=5
    sigconfig.loc['NaiCD4Tcell','Cutoff']=4.5
    sigconfig.loc['EMCD8Tcell','Cutoff']=4
    sigconfig.loc['Macrophage_MARCO','Cutoff']=1.7
    sigconfig.loc['Macrophage_CXCL9','Cutoff']=1
    sigconfig.loc['Macrophage_MSR1','Cutoff']=4.75
    sigconfig.loc['ProlifBcell','Cutoff']=1.5
    sigconfig.loc['NaiBcell','Cutoff']=2.5
    sigconfig.loc['MemBcell','Cutoff']=1.5
    sigconfig.loc['ExhCD8Tcell','Cutoff']=6
    sigconfig.loc['CMCD4Tcell','Cutoff']=2
    sigconfig.loc['Hematopoietic','Cutoff']=0.5
    sigconfig.loc['ClassMonocyte','Cutoff']=2.025
    sigconfig.loc['ExhBcell','Cutoff']=3
    #sigconfig.loc['Tcell','Cutoff']
    sigconfig.loc['Myeloid','Cutoff']=0.75
    sigconfig.loc['NaiCD8Tcell','Cutoff']=4.5
    sigconfig.loc['cDC2','Cutoff']=2
    sigconfig.loc['cDC_CCR7','Cutoff']=2

    # RECOMPUTING SIG SCORE WITH NEW CUTOFF
    
    sigscores={}
    for mysig in list(df.index):
        sigscores[mysig]=bc.tl.sig.getset(df,mysig,sigconfig.loc[mysig,'Cutoff']*myc)

    ### Cell types that are not expected in the dataset can be explicity excluded from the annotation 
    toexclude=['Erythrocyte','AlphaPancreatic', 'BetaPancreatic', 'DeltaPancreatic',
           'FollicularBcell','Neural', 'ProlifBcell', 'GermCenterBcell']
    
    cnames=bc.tl.sig.make_anno(df,sigscores,sigconfig,levsk, toexclude=toexclude)
    cnamesDBlabel = bc.tl.sig.obtain_dblabel(bescapath+'/besca/datasets/nomenclature/CellTypes_v1.tsv', cnames )
    
    adata_rc.obs['celltype0']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype0','leiden')
    adata_rc.obs['celltype1']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype1','leiden')
    adata_rc.obs['celltype2']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype2','leiden')
    adata_rc.obs['celltype3']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype3','leiden')


sc.pl.umap(adata_rc,color=['celltype0'])


In [None]:
sigscores['Macrophage_MARCO']

In [None]:
sc.pl.umap(adata_rc,color=['leiden'], legend_loc='on data', legend_fontsize=6)

In [None]:
sc.pl.umap(adata_rc,color=['CXCL9','MARCO','XCL1','TOX','MSR1','CD4','CD3D','CD8B' 
                           ,'MKI67','PDCD1','IL7R', 'MKI67', 'PDCD1','LAG3','HAVCR2','ENTPD1'])

In [None]:
sc.pl.umap(adata_rc,color=['celltype3'])

In [None]:
sc.pl.umap(adata_rc,color=['celltype3_original'])

In [None]:
sc.pl.umap(adata_rc[adata_rc.obs['celltype1']=='T cell'],color=['celltype3'])

In [None]:
sc.pl.umap(adata_rc[adata_rc.obs['celltype1']=='T cell'],color=['celltype3_original'])

In [None]:
sc.pl.umap(adata_rc,color=['CCR7','IL7R','LEF1','CD1C','CD1D', 'CLEC10A','PDCD1','CD38'])

In [None]:
#sc.pl.umap(adata_rc[adata_rc.obs['celltype3']=='CD8-positive, alpha-beta cytotoxic T cell'],
#           color=['leiden'], )


In [None]:
sc.pl.umap(adata_rc,color=['celltype2'])


In [None]:
sc.pl.umap(adata_rc,color=['celltype1_original'])

In [None]:
sc.pl.umap(adata_rc,color=['celltype3'])


In [None]:
sc.pl.umap(adata_rc,color=['celltype3_original'])


In [None]:
    # Lex orrder needed.
    names_1 = []
    names_2 = []
    names_3 = []
    leiden_rc=[]
    for i in range( cnamesDBlabel.shape[0]) :
        names_1 += [cnamesDBlabel['celltype1'][str(i)]]
        names_2 += [cnamesDBlabel['celltype2'][str(i)]]
        names_3 += [cnamesDBlabel['celltype3'][str(i)]]
    
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = names_1, new_label='celltype1_TIL', method = 'leiden')
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = names_2, new_label='celltype2_TIL', method = 'leiden')
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = names_3, new_label='celltype3_TIL', method = 'leiden')
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = list(adata_rc.obs['leiden'].cat.categories), 
                                    new_label='leiden_TIL', method = 'leiden') 
    #sc.pl.umap(adata,color=['celltype2', 'celltype2_original',
    #                   'celltype3'], ncols=1) 
    


    adata.obs['leiden_TIL']='not_labeled'
    adata.obs['leiden_TIL']=adata.obs['leiden_TIL'].astype("category")
    leidennk=adata.obs['leiden_TIL'].copy()
    leidennk=leidennk.cat.add_categories(list(adata_rc.obs['leiden'].cat.categories)).copy()
    leidennk.loc[adata_rc.obs.index]=list(adata_rc.obs['leiden'])
    adata.obs['leiden_TIL']=list(leidennk)

In [None]:
sc.pl.umap(adata_rc,color=['celltype3'])


In [None]:
#sc.pl.umap(adata_rc,color=['celltype3_original'])


In [None]:
sc.pl.umap(adata, color='celltype3_TIL')

In [None]:
expandedAnnot=adata.obs['celltype3_TIL'].copy()
expandedAnnotrc=adata_rc.obs['celltype3'].copy()

expandedAnnot=expandedAnnot.cat.add_categories(['central-memory CD8-positive, alpha-beta T cell'])
expandedAnnotrc=expandedAnnotrc.cat.add_categories(['central-memory CD8-positive, alpha-beta T cell'])

expandedAnnot[adata.obs['leiden_TIL'].isin(['2'])]='central-memory CD8-positive, alpha-beta T cell'
expandedAnnotrc[adata_rc.obs['leiden'].isin(['2'])]='central-memory CD8-positive, alpha-beta T cell'

#expandedAnnot[adata.obs['leiden_PBMC'].isin(['31'])]='classical monocyte subpop'
#expandedAnnotrc[adata_rc.obs['leiden'].isin(['31'])]='classical monocyte subpop'

adata.obs['celltype3_expanded_TIL']=list(expandedAnnot)
adata_rc.obs['celltype3_expanded_TIL']=list(expandedAnnotrc)

In [None]:
adata_rc.write(os.path.join(results_folder, analysis_name + '.annotated.TIL.h5ad'))
#adata_rc=sc.read(os.path.join(results_folder, analysis_name + '.annotated.TIL.h5ad'))
adata.write(results_file)

### Myeloid reclustering

In [None]:
celltype_label='celltype1'
to_recluster=('lymphocyte of B lineage', 'myeloid leukocyte')
cluster_renamed = 'Leiden_reclustering_myeloids'


In [None]:
        
    # Calling reclustering
    adata_rc = bc.tl.rc.recluster ( adata, celltype_label = celltype_label, 
                               celltype=to_recluster, resolution=2.5, batch_key='experiment')

    adata_rc.write(os.path.join(results_folder, analysis_name + '.annotated.Myeloids.h5ad'))
    


In [None]:
    adata_rc=sc.read(os.path.join(results_folder, analysis_name + '.annotated.Myeloids.h5ad'))
    # Leiden reclustering have to be exported to use the annotation function 
    adata_rc = bc.st.additional_labeling(adata_rc, 'leiden', cluster_renamed, 
                                         'Leiden reclustering on myeloids', 'PCS', 
                                         results_folder)


In [None]:
    # Reading additional labelling
    f=pd.read_csv(results_folder + "/labelings/"+cluster_renamed+"/fract_pos.gct",sep="\t",skiprows=2)
    df=bc.tl.sig.score_mw(f,mymarkers)
    myc=np.median(df.loc['Ubi',:]*1/3) ### Set a cutoff based on Ubi and scale with values from config file
    # RECOMPUTING SIG SCORE WITH NEW CUTOFF
    df=df.drop('Ubi')


In [None]:
   
    # Adjust cutoffs if needed
    # Cutoffs can also be manually adjusted if needed 
    sigconfig.loc['NClassMonocyte','Cutoff']=1.75
    sigconfig.loc['Myeloid','Cutoff']=0.5
    sigconfig.loc['Blymphocyte','Cutoff']=2.75
    sigconfig.loc['Tcell','Cutoff']=3.5
    sigconfig.loc['NKcell','Cutoff']=3
    sigconfig.loc['Macrophage','Cutoff']=2.3
    sigconfig.loc['cDC','Cutoff']=1.5
    sigconfig.loc['Macrophage_MARCO','Cutoff']=1.6
    sigconfig.loc['Macrophage_CXCL9','Cutoff']=0.9
    sigconfig.loc['Macrophage_MSR1','Cutoff']=4.75
    sigconfig.loc['ProlifBcell','Cutoff']=1.5
    sigconfig.loc['NaiBcell','Cutoff']=2.5
    sigconfig.loc['MemBcell','Cutoff']=1.5
    sigconfig.loc['Hematopoietic','Cutoff']=0.5
    sigconfig.loc['ClassMonocyte','Cutoff']=2
    sigconfig.loc['cDC2','Cutoff']=2
    sigconfig.loc['cDC_CCR7','Cutoff']=2
    sigconfig.loc['pDC','Cutoff']=1.5
    sigconfig.loc['ExhBcell','Cutoff']=4
    sigconfig.loc['GermCenterBcell','Cutoff']=2

    sigscores={}
    for mysig in list(df.index):
        sigscores[mysig]=bc.tl.sig.getset(df,mysig,sigconfig.loc[mysig,'Cutoff']*myc)

    ### Cell types that are not expected in the dataset can be explicity excluded from the annotation 
    toexclude=['Erythrocyte','AlphaPancreatic', 'BetaPancreatic', 'DeltaPancreatic',
           'FollicularBcell','Neural', 'ProlifBcell', 'ImmaturecDC']

    cnames=bc.tl.sig.make_anno(df,sigscores,sigconfig,levsk, toexclude=toexclude)
    cnamesDBlabel = bc.tl.sig.obtain_dblabel(bescapath+'/besca/datasets/nomenclature/CellTypes_v1.tsv', cnames )
    
    adata_rc.obs['celltype0']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype0','leiden')
    adata_rc.obs['celltype1']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype1','leiden')
    adata_rc.obs['celltype2']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype2','leiden')
    adata_rc.obs['celltype3']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype3','leiden')
    


In [None]:
# everything that was done so far goes to the .h5ad file for later use
#adata_rc.write(os.path.join(results_folder, analysis_name + '.annotated.Myeloids.h5ad'))

In [None]:
sc.pl.umap(adata_rc,color=['leiden'], legend_loc='on data')

In [None]:
sc.pl.umap(adata_rc,color=['celltype2'])

In [None]:
sc.pl.umap(adata_rc,color=['celltype3'])

In [None]:
sc.pl.umap(adata_rc,color=['celltype3_original'])

In [None]:
sc.pl.umap(adata_rc,color=mymarkers['ProlifBcell'])

In [None]:
sc.pl.umap(adata_rc,color=mymarkers['Macrophage'])

In [None]:
sc.pl.umap(adata_rc,color=mymarkers['ClassMonocyte'])

In [None]:
sc.pl.umap(adata_rc,color=['PatientID','Sample type','Lesion'])

In [None]:
sc.pl.umap(adata_rc,color=['celltype3'])


In [None]:
sc.pl.umap(adata_rc,color=['celltype2_original'])


In [None]:
### Example 2: unclear patient-specific myeloid cluster
adata_rc.obs['celltype3']=adata_rc.obs['celltype3'].cat.add_categories(['immature conventional dendritic cell'])
adata_rc.obs.loc[adata_rc.obs['celltype3'].isin(['myeloid leukocyte']),'celltype2']='myeloid dendritic cell'
adata_rc.obs.loc[adata_rc.obs['celltype3'].isin(['myeloid leukocyte']),'celltype3']='immature conventional dendritic cell'


In [None]:
### Example 2: unclear patient-specific myeloid cluster
#adata_rc.obs['celltype2']=adata_rc.obs['celltype2'].cat.add_categories('myeloid leukocyte')
#adata_rc.obs['celltype3']=adata_rc.obs['celltype3'].cat.add_categories('myeloid leukocyte')
#adata_rc.obs.loc[adata_rc.obs[clusters].isin(['33']),'celltype2']='myeloid leukocyte'
#adata_rc.obs.loc[adata_rc.obs[clusters].isin(['33']),'celltype3']='myeloid leukocyte'
adata_rc.obs.loc[adata_rc.obs['celltype3'].isin(['myeloid dendritic cell']),'celltype2']='myeloid leukocyte'
adata_rc.obs.loc[adata_rc.obs['celltype3'].isin(['myeloid dendritic cell']),'celltype3']='myeloid leukocyte'


In [None]:
adata_rc.obs['celltype3']=adata_rc.obs['celltype3'].cat.remove_unused_categories()
adata_rc.obs['celltype2']=adata_rc.obs['celltype2'].cat.remove_unused_categories()
adata_rc.obs['celltype1']=adata_rc.obs['celltype1'].cat.remove_unused_categories()

In [None]:
sc.pl.umap(adata_rc,color=['celltype3'])


In [None]:
sc.pl.umap(adata_rc,color=['celltype2'])


In [None]:
    # Lex orrder needed.
    names_1 = []
    names_2 = []
    names_3 = []
    leiden_rc=[]
    for i in range( cnamesDBlabel.shape[0]) :
        names_1 += [cnamesDBlabel['celltype1'][str(i)]]
        names_2 += [cnamesDBlabel['celltype2'][str(i)]]
        names_3 += [cnamesDBlabel['celltype3'][str(i)]]
    
    
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = names_1, new_label='celltype1_BMy', method = 'leiden')
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = names_2, new_label='celltype2_BMy', method = 'leiden')
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = names_3, new_label='celltype3_BMy', method = 'leiden')
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = list(adata_rc.obs['leiden'].cat.categories), 
                                    new_label='leiden_BMy', method = 'leiden') 
    #sc.pl.umap(adata,color=['celltype2', 'celltype2_original',
    #                   'celltype3'], ncols=1) 
    


In [None]:
adata.obs['leiden_BMy']='not_labeled'
adata.obs['leiden_BMy']=adata.obs['leiden_BMy'].astype("category")
leidennk=adata.obs['leiden_BMy'].copy()
leidennk=leidennk.cat.add_categories(list(adata_rc.obs['leiden'].cat.categories)).copy()
leidennk.loc[adata_rc.obs.index]=list(adata_rc.obs['leiden'])
adata.obs['leiden_BMy']=list(leidennk)

In [None]:
sc.pl.umap(adata_rc,color=['celltype2'])


In [None]:
sc.pl.umap(adata_rc,color=['celltype3'])

In [None]:
sc.pl.umap(adata_rc,color=['leiden'])

In [None]:
sc.pl.umap(adata_rc,color=['leiden_original'])

In [None]:
adata_rc.write(os.path.join(results_folder, analysis_name + '.annotated.Myeloids.h5ad'))
#adata_rc=sc.read(os.path.join(results_folder, analysis_name + '.annotated.Myeloids.h5ad'))
adata.write(results_file)

In [None]:
adatamy=sc.read(os.path.join(results_folder, analysis_name + '.annotated.Myeloids.h5ad'))

### T cell reclustering

In [None]:
set(adata.obs['celltype1'])

In [None]:
    #if recluster:
    celltype_label='celltype1'
    to_recluster=('natural killer cell', 'T cell')
    cluster_renamed = 'Leiden_reclustering_TNK'


In [None]:
    # Calling reclustering
    adata_rc = bc.tl.rc.recluster ( adata, celltype_label = celltype_label, 
    #                           celltype=to_recluster, resolution=3.5, batch_key='experiment',n_shared=2.5)
    adata_rc.write(os.path.join(results_folder, analysis_name + '.annotated.TNK.h5ad'))
    

In [None]:
    adata_rc=sc.read(os.path.join(results_folder, analysis_name + '.annotated.TNK.h5ad'))
    
    # Leiden reclustering have to be exported to use the annotation function 
    adata_rc = bc.st.additional_labeling(adata_rc, 'leiden', cluster_renamed, 
                                         'Leiden reclustering on TNK', 'PCS', 
                                         results_folder)
   


In [None]:
    # Reading additional labelling
    f=pd.read_csv(results_folder + "/labelings/"+cluster_renamed+"/fract_pos.gct",sep="\t",skiprows=2)
    df=bc.tl.sig.score_mw(f,mymarkers)
    myc=np.median(df.loc['Ubi',:]*1/3) ### Set a cutoff based on Ubi and scale with values from config file
    df=df.drop('Ubi')

In [None]:
    # Adjust cutoffs if needed
    # Cutoffs can also be manually adjusted if needed 
    sigconfig.loc['Blymphocyte','Cutoff']=3
    sigconfig.loc['Tcell','Cutoff']=2.75
    sigconfig.loc['CD8Tcell','Cutoff']=1.375
    sigconfig.loc['NKcell','Cutoff']=3
    sigconfig.loc['CD56dimNK','Cutoff']=3
    sigconfig.loc['CD56brightNK','Cutoff']=0.75
    sigconfig.loc['CytotoxCD8Tcell','Cutoff']=4.75
    sigconfig.loc['NaiCD4Tcell','Cutoff']=4.5
    sigconfig.loc['NaiCD8Tcell','Cutoff']=4.5
    sigconfig.loc['EMCD8Tcell','Cutoff']=4
    sigconfig.loc['Macrophage_MARCO','Cutoff']=1.75
    sigconfig.loc['Macrophage_CXCL9','Cutoff']=0.75
    sigconfig.loc['Macrophage_MSR1','Cutoff']=5
    sigconfig.loc['ProlifBcell','Cutoff']=1.5
    sigconfig.loc['NaiBcell','Cutoff']=2.5
    sigconfig.loc['MemBcell','Cutoff']=1.5
    sigconfig.loc['ExhCD8Tcell','Cutoff']=6
    sigconfig.loc['CMCD4Tcell','Cutoff']=2
    sigconfig.loc['CMCD8Tcell','Cutoff']=2
    sigconfig.loc['Hematopoietic','Cutoff']=0.5
    sigconfig.loc['RegTcell','Cutoff']=1.5
    sigconfig.loc['Myeloid','Cutoff']=1
    #sigconfig.loc['Tcell','Cutoff']

    toexclude=['Erythrocyte','AlphaPancreatic', 'BetaPancreatic', 'DeltaPancreatic',
           'FollicularBcell','Neural', 'ProlifBcell']
    
    # RECOMPUTING SIG SCORE WITH NEW CUTOFF

    sigscores={}
    for mysig in list(df.index):
        sigscores[mysig]=bc.tl.sig.getset(df,mysig,sigconfig.loc[mysig,'Cutoff']*myc)

    
    cnames=bc.tl.sig.make_anno(df,sigscores,sigconfig,levsk, toexclude=toexclude)
    cnamesDBlabel = bc.tl.sig.obtain_dblabel(bescapath+'/besca/datasets/nomenclature/CellTypes_v1.tsv', cnames )
    
    adata_rc.obs['celltype0']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype0','leiden')
    adata_rc.obs['celltype1']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype1','leiden')
    adata_rc.obs['celltype2']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype2','leiden')
    adata_rc.obs['celltype3']=bc.tl.sig.add_anno(adata_rc,cnamesDBlabel,'celltype3','leiden')
    
    sc.pl.umap(adata_rc,color=['celltype0','celltype1'])

In [None]:
sc.pl.umap(adata_rc,color=['PatientID','Sample type','Lesion'])

In [None]:
sc.pl.umap(adata_rc,color=mymarkers['Tcell'])

In [None]:
sc.pl.umap(adata_rc,color=['CD4','CD8A','CD8B', 'CD3D', 'MKI67','PDCD1', 'CD38','ENTPD1','TCF7',
                               'XCL1', 'TOX', 'IL7R','LAG3','GNLY', 'GZMB','GZMH', 'FOXP3','IL2RA'])

In [None]:
allcells=pd.read_csv(bescapath+'/besca/datasets/nomenclature/CellTypes_v1.tsv',sep='\t')

In [None]:
list(allcells['dblabel'])

In [None]:
sc.pl.umap(adata_rc,color=['celltype2'])

In [None]:
sc.pl.umap(adata_rc,color=['celltype2_original'])

In [None]:
sc.pl.umap(adata_rc,color=['celltype3'])

In [None]:
sc.pl.umap(adata_rc,color=['celltype3_original'])

In [None]:
sc.pl.umap(adata_rc,color=['leiden'], legend_loc='on data', legend_fontsize=10)

In [None]:
sc.pl.umap(adata_rc,color=['leiden_original'])

In [None]:
set(adata_rc.obs['celltype3'])

In [None]:
adata_rc.obs['celltype3']=adata_rc.obs['celltype3'].cat.add_categories(['mature NK T cell', 'effector memory CD4-positive, alpha-beta T cell',
                            'CD8-positive, alpha-beta memory T cell','CD8-positive, alpha-beta cytokine secreting effector T cell'])


In [None]:
adata_rc.obs.loc[adata_rc.obs['leiden'].isin(['26','19','6','35','15']),'celltype3']='mature NK T cell'
adata_rc.obs.loc[adata_rc.obs['leiden'].isin(['10']),'celltype3']='CD8-positive, alpha-beta memory T cell'
adata_rc.obs.loc[adata_rc.obs['celltype3'].isin(['effector memory CD8-positive, alpha-beta T cell']),'celltype3']='CD8-positive, alpha-beta cytokine secreting effector T cell'


In [None]:
adata_rc.obs.loc[adata_rc.obs['celltype3'].isin(['CD8-positive, alpha-beta cytotoxic T cell']),'celltype3']='CD8-positive, alpha-beta memory T cell'
adata_rc.obs.loc[adata_rc.obs['leiden'].isin(['27','22']),'celltype3']='CD8-positive, alpha-beta cytotoxic T cell'
#adata_rc.obs.loc[adata_rc.obs['celltype3'].isin(['myeloid leukocyte']),'celltype3']='immature conventional dendritic cell'


In [None]:
adata_rc.obs.loc[adata_rc.obs['celltype3'].isin(['CD4-positive, alpha-beta cytotoxic T cell']),'celltype3']='effector memory CD4-positive, alpha-beta T cell'


In [None]:
adata_rc.obs.loc[adata_rc.obs['celltype3'].isin(['CD4-positive, alpha-beta T cell']),'celltype3']='effector memory CD4-positive, alpha-beta T cell'
adata_rc.obs.loc[adata_rc.obs['celltype3'].isin(['CD8-positive, alpha-beta T cell']),'celltype3']='CD8-positive, alpha-beta memory T cell'


In [None]:
sc.pl.umap(adata_rc,color=['celltype3'])

In [None]:
adata_rc.obs['celltype3']=adata_rc.obs['celltype3'].cat.remove_unused_categories()
adata_rc.obs['celltype2']=adata_rc.obs['celltype2'].cat.remove_unused_categories()
adata_rc.obs['celltype1']=adata_rc.obs['celltype1'].cat.remove_unused_categories()

In [None]:
#adata.obs=adata.obs.drop(columns=['celltype1_TNK','celltype2_TNK','celltype3_TNK','leiden_TNK']).copy()

In [None]:
i='3'
adata_rc[adata_rc.obs['leiden']==i].obs['celltype1'][0]

In [None]:
c1={}
c2={}
c3={}
for i in list(cnamesDBlabel.index):
    c1[i]=adata_rc[adata_rc.obs['leiden']==i].obs['celltype1'][0]
    c2[i]=adata_rc[adata_rc.obs['leiden']==i].obs['celltype2'][0]
    c3[i]=adata_rc[adata_rc.obs['leiden']==i].obs['celltype3'][0]
    
cnamesDBlabelnew=cnamesDBlabel.copy()
cnamesDBlabelnew['celltype1']=pd.Series(c1)
cnamesDBlabelnew['celltype2']=pd.Series(c2)
cnamesDBlabelnew['celltype3']=pd.Series(c3)

In [None]:
    # Lex orrder needed.
    names_1 = []
    names_2 = []
    names_3 = []
    leiden_rc=[]
    for i in range( cnamesDBlabelnew.shape[0]) :
        names_1 += [cnamesDBlabelnew['celltype1'][str(i)]]
        names_2 += [cnamesDBlabelnew['celltype2'][str(i)]]
        names_3 += [cnamesDBlabelnew['celltype3'][str(i)]]
    
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = names_1, new_label='celltype1_TNK', method = 'leiden')
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = names_2, new_label='celltype2_TNK', method = 'leiden')
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = names_3, new_label='celltype3_TNK', method = 'leiden')
    bc.tl.rc.annotate_new_cellnames( adata, adata_rc, names = list(adata_rc.obs['leiden'].cat.categories), 
                                    new_label='leiden_TNK', method = 'leiden') 
    #sc.pl.umap(adata,color=['celltype2', 'celltype2_original',
    #                   'celltype3'], ncols=1)     
    adata.obs['leiden_TNK']='not_labeled'
    adata.obs['leiden_TNK']=adata.obs['leiden_TNK'].astype("category")
    leidennk=adata.obs['leiden_TNK'].copy()
    leidennk=leidennk.cat.add_categories(list(adata_rc.obs['leiden'].cat.categories)).copy()
    leidennk.loc[adata_rc.obs.index]=list(adata_rc.obs['leiden'])
    adata.obs['leiden_TNK']=list(leidennk)

In [None]:
#adata_rc=sc.read(os.path.join(results_folder, analysis_name + '.annotated.TNK.h5ad'))

In [None]:
adata_rc.write(os.path.join(results_folder, analysis_name + '.annotated.TNK.h5ad'))
#adata_rc=sc.read(os.path.join(results_folder, analysis_name + '.annotated.TNK.h5ad'))
adata.write(results_file)

In [None]:
sc.pl.umap(adata,color=['celltype3_TNK'], ncols=1) 

In [None]:
sc.pl.umap(adata,color=['celltype3_TNK'], ncols=1) 

In [None]:
sc.pl.umap(adata,color=['leiden_TNK'], ncols=1) 

In [None]:
sc.pl.umap(adata,color=['celltype2_TNK'], ncols=1) 

In [None]:
sc.pl.umap(adata,color=['celltype2_original'], ncols=1) 

In [None]:
sc.pl.umap(adata_rc,color=['leiden','Sample type','PatientID'], ncols=1) 

In [None]:
sc.pl.umap(adata_rc,color=['score_ExhCD8Tcell_scanpy','score_EMCD8Tcell_scanpy'])

In [None]:
rgoi=['CCR7','LAMP3','CD1C','XCR1','CXCL9','MARCO','MSR1','CLEC9A','TCF7L2']
rgoi=['LEF1','CCR7','SELL','IL7R']
sc.pl.umap(adata_rc,color=rgoi, ncols=1) 

In [None]:
sc.pl.umap(adata,color=['celltype1','celltype3', 'celltype3_original'], ncols=1) 

## Adjust main annotation based on partial ones

In [None]:
sub1=adata.obs['celltype1_TNK'].copy()
sub1=sub1.cat.add_categories(list(set(adata.obs['celltype1_BMy'].cat.categories)-set((sub1.cat.categories))))
sub1[sub1=='not_labeled']=list(adata.obs['celltype1_BMy'][sub1=='not_labeled'])
adata.obs['celltype1_rc_cells']=list(sub1)

sub1=adata.obs['celltype2_TNK'].copy()
sub1=sub1.cat.add_categories(list(set(adata.obs['celltype2_BMy'].cat.categories)-set((sub1.cat.categories))))
sub1[sub1=='not_labeled']=list(adata.obs['celltype2_BMy'][sub1=='not_labeled'])
adata.obs['celltype2_rc_cells']=list(sub1)

sub1=adata.obs['celltype3_TNK'].copy()
sub1=sub1.cat.add_categories(list(set(adata.obs['celltype3_BMy'].cat.categories)-set((sub1.cat.categories))))
sub1[sub1=='not_labeled']=list(adata.obs['celltype3_BMy'][sub1=='not_labeled'])
adata.obs['celltype3_rc_cells']=list(sub1)

sub1=adata.obs['leiden_TNK'].copy()
sub1=sub1.cat.add_categories(list(set(adata.obs['leiden_BMy'].cat.categories)-set((sub1.cat.categories))))
sub1[sub1=='not_labeled']=list(adata.obs['leiden_BMy'][sub1=='not_labeled'])
adata.obs['leiden_rc_cells']=list(sub1)


In [None]:
sub1=adata.obs['celltype1_PBMC'].copy()
sub1=sub1.cat.add_categories(list(set(adata.obs['celltype1_TIL'].cat.categories)-set((sub1.cat.categories))))
sub1[sub1=='not_labeled']=list(adata.obs['celltype1_TIL'][sub1=='not_labeled'])
adata.obs['celltype1_rc_sample']=list(sub1)

sub1=adata.obs['celltype2_PBMC'].copy()
sub1=sub1.cat.add_categories(list(set(adata.obs['celltype2_TIL'].cat.categories)-set((sub1.cat.categories))))
sub1[sub1=='not_labeled']=list(adata.obs['celltype2_TIL'][sub1=='not_labeled'])
adata.obs['celltype2_rc_sample']=list(sub1)

sub1=adata.obs['celltype3_PBMC'].copy()
sub1=sub1.cat.add_categories(list(set(adata.obs['celltype3_TIL'].cat.categories)-set((sub1.cat.categories))))
sub1[sub1=='not_labeled']=list(adata.obs['celltype3_TIL'][sub1=='not_labeled'])
adata.obs['celltype3_rc_sample']=list(sub1)

sub1=adata.obs['leiden_PBMC'].copy()
sub1=sub1.cat.add_categories(list(set(adata.obs['leiden_TIL'].cat.categories)-set((sub1.cat.categories))))
sub1[sub1=='not_labeled']=list(adata.obs['leiden_TIL'][sub1=='not_labeled'])
adata.obs['leiden_rc_sample']=list(sub1)


In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=['leiden'], 
           legend_loc='on data', legend_fontsize=8)

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=['leiden_rc_cells'], 
           legend_loc='on data', legend_fontsize=7)

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=['leiden_rc_sample'], 
           legend_loc='on data', legend_fontsize=7)

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=['celltype3'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=['celltype3_rc_sample'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=['celltype3_rc_cells'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=['celltype3_expanded'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=['celltype3_expanded_TIL'])

In [None]:
adata.obs['celltype4']=adata.obs['celltype3'].copy()

In [None]:
sub3=adata.obs['celltype3'].copy()
sub4=adata.obs['celltype4'].copy()
sub2=adata.obs['celltype2'].copy()

In [None]:
sub4=sub4.cat.add_categories(['CCR7-positive myeloid dendritic cell',
                              'classical monocyte, brain lesions', 
                              'myeloid T cell', 'immature conventional dendritic cell',
                             'transitional monocyte'])
sub3=sub3.cat.add_categories(['CCR7-positive myeloid dendritic cell', 'immature conventional dendritic cell'])
#sub2=sub2.cat.add_categories(['myeloid leukocyte'])

In [None]:
imono=(adata.obs['celltype3_rc_cells'].isin(['CD141-positive myeloid dendritic cell']))&(adata.obs['celltype2']=='myeloid dendritic cell')

sub4[imono]='CD141-positive myeloid dendritic cell'
sub3[imono]='CD141-positive myeloid dendritic cell'

imono=(adata.obs['celltype3_rc_cells'].isin(['CCR7-positive myeloid dendritic cell']))&(adata.obs['celltype2']=='myeloid dendritic cell')
sub4[imono]='CCR7-positive myeloid dendritic cell'
sub3[imono]='CCR7-positive myeloid dendritic cell'

imono=(adata.obs['celltype3_rc_sample'].isin(['CCR7-positive myeloid dendritic cell']))&(adata.obs['celltype2']=='myeloid dendritic cell')
sub4[imono]='CCR7-positive myeloid dendritic cell'
sub3[imono]='CCR7-positive myeloid dendritic cell'

sub4[(adata.obs['celltype3_rc_cells'].isin(['MSR1-positive macrophage']))&(adata.obs['celltype2']=='macrophage')]='MSR1-positive macrophage'
sub3[(adata.obs['celltype3_rc_cells'].isin(['MSR1-positive macrophage']))&(adata.obs['celltype2']=='macrophage')]='MSR1-positive macrophage'

sub4[(adata.obs['celltype3_rc_cells'].isin(['CXCL9-positive macrophage']))&(adata.obs['celltype2']=='macrophage')]='CXCL9-positive macrophage'
sub3[(adata.obs['celltype3_rc_cells'].isin(['CXCL9-positive macrophage']))&(adata.obs['celltype2']=='macrophage')]='CXCL9-positive macrophage'

imono=(adata.obs['celltype3_rc_cells'].isin(['myeloid leukocyte']))&(adata.obs['celltype2']=='myeloid dendritic cell')
sub4[imono]='immature conventional dendritic cell'
sub3[imono]='immature conventional dendritic cell'


imono=(adata.obs['celltype3_rc_sample'].isin(['myeloid leukocyte']))&(adata.obs['celltype2']=='myeloid dendritic cell')
sub4[imono]='immature conventional dendritic cell'
sub3[imono]='immature conventional dendritic cell'

sub2[(adata.obs['celltype3'].isin(['macrophage']))]='macrophage'

#sub4[(adata.obs['celltype3'].isin(['macrophage']))]='myeloid leukocyte'
#sub3[(adata.obs['celltype3'].isin(['macrophage']))]='myeloid leukocyte'
#sub2[(adata.obs['celltype3'].isin(['macrophage']))]='myeloid leukocyte'

imono=(adata.obs['celltype3_expanded'].isin(['classical monocyte, brain lesions']))&(adata.obs['celltype2']=='classical monocyte')
sub4[imono]='classical monocyte, brain lesions'

imono=(adata.obs['celltype3_expanded'].isin(['myeloid T cell']))&(adata.obs['celltype2']=='classical monocyte')
sub4[imono]='myeloid T cell'


imono=(adata.obs['leiden_rc_cells'].isin(['13','28']))&(adata.obs['celltype2']=='macrophage')&(adata.obs['celltype3']!='non-classical monocyte')
sub4[imono]='transitional monocyte'
sub3[imono]='classical monocyte'
sub2[imono]='classical monocyte'

imono=(adata.obs['leiden_rc_cells'].isin(['29','19']))&(adata.obs['celltype2']=='macrophage')
sub4[imono]='MARCO-positive macrophage'
sub3[imono]='MARCO-positive macrophage'
#adata.obs['celltype3']=list(sub1)

imono=(adata.obs['leiden_rc_cells'].isin(['23']))&(adata.obs['celltype2']=='macrophage')
sub4[imono]='macrophage'
sub3[imono]='macrophage'

In [None]:
adata.obs['celltype2_merged']=list(sub2)
adata.obs['celltype3_merged']=list(sub3)
adata.obs['celltype4_merged']=list(sub4)

In [None]:
#imono=(adata.obs['celltype3'].isin(['CD141-positive myeloid dendritic cell']))&(adata.obs['celltype3_merged']=='immature conventional dendritic cell')
#sub4[imono]='CD141-positive myeloid dendritic cell'
#sub3[imono]='CD141-positive myeloid dendritic cell'
#adata.obs['celltype3_merged']=list(sub3)
#adata.obs['celltype4_merged']=list(sub4)

In [None]:
sc.pl.umap(adata[adata.obs['celltype2_merged']=='myeloid dendritic cell'], color=['celltype4_merged'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=['celltype3_merged'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=['celltype3_rc_sample'])

 Immature DCs are transitional DCs are Axl+ DCs

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=['CLEC4C','TCF4','IRF8','FAM105A','NBPF10','LILRB4','SIGLEC6'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=mymarkers['NClassMonocyte'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=mymarkers['ClassMonocyte'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=mymarkers['Macrophage'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=mymarkers['Macrophage_CXCL9'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=mymarkers['Macrophage_MARCO'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=mymarkers['Macrophage_MSR1'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=mymarkers['cDC'])

In [None]:
#sc.pl.umap(adata[adata.obs['celltype1']=='myeloid leukocyte'], color=['Lesion'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='T cell'], color=['leiden'], legend_loc='on data', legend_fontsize=7)

In [None]:
sc.pl.umap(adata, color=['celltype3_merged'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='T cell'], color=['celltype3_rc_cells'])

In [None]:
set(adata.obs['celltype2'])

In [None]:
sub3=adata.obs['celltype3_merged'].copy()
sub4=adata.obs['celltype4_merged'].copy()
sub2=adata.obs['celltype2_merged'].copy()

sub4=sub4.cat.add_categories(['CD8-positive, alpha-beta cytokine secreting effector T cell', 
                              'proliferating CD4-positive, alpha-beta T cell',
                              'exhausted-like CD4-positive, alpha-beta T cell','proliferating NK cell'])
sub3=sub3.cat.add_categories(['CD8-positive, alpha-beta cytokine secreting effector T cell',
                              'proliferating CD4-positive, alpha-beta T cell',
                              'exhausted-like CD4-positive, alpha-beta T cell','proliferating NK cell'])
sub2=sub2.cat.add_categories(['proliferating NK cell'])


In [None]:
sub4[(adata.obs['celltype3_rc_cells'].isin(['CD8-positive, alpha-beta cytokine secreting effector T cell']))]='CD8-positive, alpha-beta cytokine secreting effector T cell'
sub3[(adata.obs['celltype3_rc_cells'].isin(['CD8-positive, alpha-beta cytokine secreting effector T cell']))]='CD8-positive, alpha-beta cytokine secreting effector T cell'
sub2[(adata.obs['celltype3_rc_cells'].isin(['CD8-positive, alpha-beta cytokine secreting effector T cell']))]='CD8-positive, alpha-beta T cell'

imono=(adata.obs['leiden'].isin(['33','36']))
sub4[imono]='CD8-positive, alpha-beta cytokine secreting effector T cell'
sub3[imono]='CD8-positive, alpha-beta cytokine secreting effector T cell'


In [None]:
idx=(adata.obs['celltype3_rc_cells'].isin(['proliferating CD4-positive, alpha-beta T cell']))&(adata.obs['celltype1'].isin(['T cell']))
sub4[idx]='proliferating CD4-positive, alpha-beta T cell'
sub3[idx]='proliferating CD4-positive, alpha-beta T cell'
sub2[idx]='CD4-positive, alpha-beta T cell'
sub2[sub3=='proliferating CD4-positive, alpha-beta T cell']='CD4-positive, alpha-beta T cell'

In [None]:
idx=(adata.obs['celltype3_rc_cells'].isin(['proliferating CD4-positive, alpha-beta T cell']))&(adata.obs['celltype1'].isin(['natural killer cell']))
sub4[idx]='proliferating NK cell'
sub3[idx]='proliferating NK cell'
sub2[idx]='proliferating NK cell'


In [None]:
sub4[(adata.obs['celltype3_rc_cells'].isin(['naive thymus-derived CD8-positive, alpha-beta T cell']))]='naive thymus-derived CD8-positive, alpha-beta T cell'
sub3[(adata.obs['celltype3_rc_cells'].isin(['naive thymus-derived CD8-positive, alpha-beta T cell']))]='naive thymus-derived CD8-positive, alpha-beta T cell'
sub2[(adata.obs['celltype3_rc_cells'].isin(['naive thymus-derived CD8-positive, alpha-beta T cell']))]='CD8-positive, alpha-beta T cell'


In [None]:
idx=(adata.obs['celltype3_rc_cells'].isin(['central memory CD4-positive, alpha-beta T cell']))&(adata.obs['celltype3'].isin(['central memory CD4-positive, alpha-beta T cell',
                                                                                                                             'naive thymus-derived CD4-positive, alpha-beta T cell', 
                                                                                                                            'CD4-positive, alpha-beta cytotoxic T cell']))
sub4[idx]='central memory CD4-positive, alpha-beta T cell'
sub3[idx]='central memory CD4-positive, alpha-beta T cell'
sub2[idx]='CD4-positive, alpha-beta T cell'


In [None]:
idx=(adata.obs['celltype3_rc_cells'].isin(['central memory CD4-positive, alpha-beta T cell']))&(adata.obs['celltype3'].isin(['effector memory CD4-positive, alpha-beta T cell']))
sub4[idx]='exhausted-like CD4-positive, alpha-beta T cell'
sub3[idx]='exhausted-like CD4-positive, alpha-beta T cell'
sub2[idx]='CD4-positive, alpha-beta T cell'


In [None]:
sub4[(adata.obs['celltype3'].isin(['CD4-positive, alpha-beta T cell']))]='naive thymus-derived CD4-positive, alpha-beta T cell'
sub3[(adata.obs['celltype3'].isin(['CD4-positive, alpha-beta T cell']))]='naive thymus-derived CD4-positive, alpha-beta T cell'

sub4[(sub3.isin(['effector memory CD4-positive, alpha-beta T cell']))]='CD4-positive, alpha-beta cytotoxic T cell'
sub3[(sub3.isin(['effector memory CD4-positive, alpha-beta T cell']))]='CD4-positive, alpha-beta cytotoxic T cell'

#sub2[(adata.obs['celltype3'].isin(['IL7R-max CD3-positive, ']))]='naive thymus-derived CD4-positive, alpha-beta T cell'


In [None]:
sub2[(adata.obs['celltype3'].isin(['mature NK T cell']))]='CD8-positive, alpha-beta T cell'


In [None]:
idx=(adata.obs['celltype3'].isin(['CD4-positive, alpha-beta cytotoxic T cell'])) & (adata.obs['celltype3_rc_cells'].isin(['effector memory CD4-positive, alpha-beta T cell']))
sub4[idx]='effector memory CD4-positive, alpha-beta T cell'
sub3[idx]='effector memory CD4-positive, alpha-beta T cell'
sub2[idx]='CD4-positive, alpha-beta T cell'

In [None]:
idx=(adata.obs['celltype3_rc_cells'].isin(['CD8-positive, alpha-beta memory T cell']))&(adata.obs['celltype1'].isin(['T cell']))
sub4[idx]='effector memory CD8-positive, alpha-beta T cell'
sub3[idx]='effector memory CD8-positive, alpha-beta T cell'
sub2[idx]='CD8-positive, alpha-beta T cell'


In [None]:
adata.obs['celltype2_merged']=list(sub2)
adata.obs['celltype3_merged']=list(sub3)
adata.obs['celltype4_merged']=list(sub4)

In [None]:
sub2=adata.obs['celltype2_merged'].copy()
sub2[adata.obs['celltype3_merged']=='proliferating CD4-positive, alpha-beta T cell']='CD4-positive, alpha-beta T cell'

In [None]:
adata.obs['celltype2_merged']=list(sub2)

In [None]:
set(adata.obs['celltype3_merged'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype3_merged']=='CCR7-positive myeloid dendritic cell'], color=['celltype2_merged'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='T cell'], color=['celltype4_merged'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='T cell'], color=['celltype3_merged'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype3_merged']=='CD8-positive, alpha-beta cytokine secreting effector T cell'], color=['celltype2_merged'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='T cell'], color=['Sample type'])

In [None]:
adata[adata.obs['celltype4_merged']=='CD4-positive, alpha-beta T cell'].obs['celltype3_TNK'].value_counts()

In [None]:
adata[adata.obs['celltype4_merged']=='CD4-positive, alpha-beta T cell'].obs['celltype3_rc_sample'].value_counts()

In [None]:
adata[adata.obs['celltype4_merged']=='CD4-positive, alpha-beta T cell'].obs['celltype3'].value_counts()

In [None]:
adata.obs["celltype2_merged"]=adata.obs["celltype2_merged"].astype("category")
adata.obs["celltype3_merged"]=adata.obs["celltype3_merged"].astype("category")
adata.obs["celltype4_merged"]=adata.obs["celltype4_merged"].astype("category")

In [None]:
adata.obs['celltype2_merged']=adata.obs['celltype2_merged'].cat.remove_unused_categories()
adata.obs['celltype3_merged']=adata.obs['celltype3_merged'].cat.remove_unused_categories()
adata.obs['celltype4_merged']=adata.obs['celltype4_merged'].cat.remove_unused_categories()

In [None]:
sc.pl.umap(adata[adata.obs['celltype2_merged']=='CD8-positive, alpha-beta T cell'], color=['celltype3_merged'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype2_merged']=='CD4-positive, alpha-beta T cell'], color=['celltype3_merged'])

In [None]:
set(adata.obs['celltype1'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='lymphocyte of B lineage'], color=['celltype3_merged'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='lymphocyte of B lineage'], color=['celltype3_rc_cells'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='lymphocyte of B lineage'], color=['celltype3_rc_sample'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='lymphocyte of B lineage'], color=['CD19','MS4A1','MKI67'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='T cell'], color=['celltype3'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='T cell'], color=['celltype2'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='T cell'], color=mymarkers['CD4Tcell'])

In [None]:
sc.pl.umap(adata[adata.obs['celltype1']=='T cell'], color=['IL7R','GZMB', 'GZMH','GZMA', 'NKG7','GNLY','CLU','XCL1'
                                                           ,'LEF1','TCF7','SELL','PDCD1', 'LAG3'])

In [None]:
adata.obs['dblabel']=adata.obs['celltype3_merged'].copy()

### Export labelling

Chosen labels can also be exported as a new folder in labelings/

In [None]:
### Save labelling
adata = bc.st.additional_labeling(adata, 'celltype1', 'celltype1', 'Major cell types attributed based on HumanCD45p_scseqCMs8', 'schwalip', results_folder)


In [None]:
### Save labelling
adata = bc.st.additional_labeling(adata, 'celltype2_merged', 'celltype2', 'Higher level cell types attributed based on HumanCD45p_scseqCMs8', 'schwalip', results_folder)


In [None]:
### Save labelling
adata = bc.st.additional_labeling(adata, 'dblabel', 'dblabel', 'Highest level cell types attributed based on HumanCD45p_scseqCMs8', 'schwalip', results_folder)


In [None]:
### Save labelling
adata = bc.st.additional_labeling(adata, 'celltype4_merged', 'celltype3_detailed', 'Highest level cell types attributed based on HumanCD45p_scseqCMs8, further details upon reclustering', 'schwalip', results_folder)


In [None]:
adata.write(results_file)
#adata=sc.read(results_file)

### Follow-up analyses for marker generation

If one is interested in new markers, one can perform DE at the cell type annotation level of choice. 

In [None]:
### Perform DE cells of each celltype3 vs. all other cells
DEgenes=bc.tl.dge.get_de(adata,'celltype4_merged',demethod='wilcoxon',topnr=5000, logfc=1,padj=0.05)

In [None]:
DEgenes.keys()

In [None]:
### Select only top 15 genes (in order of p-val) for 2 cell types and plot expression per cell type
tops=list(DEgenes['brain macrophage']['Name'][0:40])
sc.pl.dotplot(adata, var_names=tops,groupby='celltype3_detailed')

In [None]:
### Select only top 15 genes (in order of p-val) for 2 cell types and plot expression per cell type
tops=list(DEgenes['naive thymus-derived CD8-positive, alpha-beta T cell']['Name'][0:40])
sc.pl.dotplot(adata, var_names=tops,groupby='celltype3_detailed')

### Convert to html

In [None]:
! jupyter nbconvert --to html pub_celltype_annotation_besca.ipynb