In [1]:
DATA_DIR = '/storage/groups/ml01/workspace/louis.kuemmerle/projects/A1/data2/'
DATA_VERSION = 'april21'

# Mappings
List all 'leiden cluster to celltype'-mappings in here

In [2]:
import os
import pandas as pd

import numpy as np
import itertools
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt

import scanpy as sc
import anndata as ann
import scvelo as scv
import cellrank as cr

import scipy as sci
from scipy import sparse
from scipy.sparse import csr_matrix, issparse
from matplotlib import rcParams
from matplotlib import colors

sc.settings.verbosity = 3
sc.logging.print_header()#print_versions()
print(f"scvelo=={scv.__version__}")
print(f"cellrank=={cr.__version__}")

rcParams['figure.figsize']= (10,10)

scanpy==1.6.1 anndata==0.7.5 umap==0.4.6 numpy==1.19.5 scipy==1.6.0 pandas==1.2.0 scikit-learn==0.24.0 statsmodels==0.12.1 python-igraph==0.8.3 louvain==0.7.0 leidenalg==0.8.3
scvelo==0.2.2
cellrank==1.1.0


In [3]:
SHAM = True

########################################################################################
sham_str = '_wSham' if SHAM else ''
RESULTS_FILE_PATH = f'./final_annotations{sham_str}.csv'

#### For each celltype apply the following:
- Set `FINE_ANNOTATIONS` to set the level2 celltypes states. We always first annotate the lower resolution cluster and then (eventually partially) overwrite them with the mappings for higher resolution clusters.   
- Optionally also reannotate coarse celltype annotations (obs_key `'cell_types'`) via `COARSE_REANNOTATIONS`. Same procedure as above.
- Ideally all cells are mapped wrt level2!    


#### stick to the following structure for the next cells:
```
FINE_ANNOTATIONS = {
    'res_0.25': {
        '0': 'cell_state1',
        '1': 'cell_state2',
        #.... till last leiden cluster (if res=0.25 is your "go-to resoltuion")
    },
    'res_0.5': {
        '1': 'cell_state3',
    },
}
COARSE_REANNOTATIONS = {
    'res_0.5': {
        '4': 'erythrocytes',
    },  
}
```

# B cells

In [4]:
CT_OI = 'B cells'

FINE_ANNOTATIONS = {
    # mappings for clusters of (different) leiden resolution 
    '1': {
        '0': 'mature B cell',
        '1': 'mature B cell',
        '2': 'mature B cell',
        '3': 'mature B cell',
        '4': 'pre B cell',
        '5': 'immature B cell',
        '6': 'mature B cell',
        '7': 'pro B cell',
        '8': 'mature B cell',
        '9': 'mature B cell',
        '10': 'mature B cell',
        '11': 'pro B cell',
        '12': 'mature B cell',
        '13': 'plasma cell',
        '14': 'immature B cell',
    },
}
COARSE_REANNOTATIONS = {
    '1': {
        '0': 'B cell',
        '1': 'B cell',
        '2': 'B cell',
        '3': 'B cell',
        '4': 'B cell',
        '5': 'B cell',
        '6': 'B cell',
        '7': 'B cell',
        '8': 'B cell',
        '9': 'B cell',
        '10': 'B cell',
        '11': 'B cell',
        '12': 'B cell',
        '13': 'B cell',
        '14': 'B cell',
}
}
########################################################################################
df_leiden = pd.read_csv(f'./{CT_OI}.csv',index_col=0).astype(str)
resolutions = [col.split('_')[-1] for col in df_leiden.columns]

df = pd.read_csv(RESULTS_FILE_PATH,index_col=0)

for res in resolutions:
    if str(res) in FINE_ANNOTATIONS:
        for leiden_cl, ct in FINE_ANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level2'] = ct
    if str(res) in COARSE_REANNOTATIONS:
        for leiden_cl, ct in COARSE_REANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level1'] = ct  
            
df.to_csv(RESULTS_FILE_PATH)

# T cells

In [5]:
CT_OI = 'T cells'

FINE_ANNOTATIONS = {
    # mappings for clusters of (different) leiden resolution 
    '1': {
        '0': 'Cd8 T cell',
        '1': 'NK cell',
        '2': 'NK-T cell',
        '3': 'Cd4 T cell',
        '4': 'Cd4 T cell',
        '5': 'NK-T cell',
        '6': 'Cd8 T cell', 
        '7': 'NK cell',
        '8': 'Cd8 T cell',
        '9': 'gdT cell',
        '10': 'innate lymphoid cell',
        '11': 'NK cell',
        '12': 'NK cell',
    }
}
COARSE_REANNOTATIONS = {
      '1': {
        '0': 'T cell',
        '1': 'NK cell',
        '2': 'NK-T cell',
        '3': 'T cell',
        '4': 'T cell',
        '5': 'NK-T cell',
        '6': 'T cell', 
        '7': 'NK cell',
        '8': 'T cell',
        '9': 'T cell',
        '10': 'innate lymphoid cell',
        '11': 'NK cell',
        '12': 'NK cell',
    } 
}

########################################################################################
df_leiden = pd.read_csv(f'./{CT_OI}.csv',index_col=0).astype(str)
resolutions = [col.split('_')[-1] for col in df_leiden.columns]

df = pd.read_csv(RESULTS_FILE_PATH,index_col=0)

for res in resolutions:
    if str(res) in FINE_ANNOTATIONS:
        for leiden_cl, ct in FINE_ANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level2'] = ct
    if str(res) in COARSE_REANNOTATIONS:
        for leiden_cl, ct in COARSE_REANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level1'] = ct  
            
df.to_csv(RESULTS_FILE_PATH)

# Dendritic cells

In [6]:
CT_OI = 'dendritic cells'

FINE_ANNOTATIONS = {
    # mappings for clusters of (different) leiden resolution 
    '0.5': {
        '0': 'plasmacytoid DC',
        '1': 'plasmacytoid DC',
        '2': 'conventional DC2',
        '3': 'monocyte-derived DC',
        '4': 'common DC progenitor (CDP)',
        '5': 'macrophage-DC progenitor (MDP)',
        '6': 'conventional DC2', 
        '7': 'conventional DC1',
        '8': 'plasmacytoid DC',
        '9': 'B cell-DC hybrid', #I found nothing supporting this finding
        '10': 'neutrophil-DC hybrid', #There is a paper about this population but they say 
        #theseDCs differentiate from neutrophils

    }
}
COARSE_REANNOTATIONS = {
       '0.5': {
        '0': 'dendritic cell',
        '1': 'dendritic cell',
        '2': 'dendritic cell',
        '3': 'dendritic cell',
        '4': 'dendritic cell',
        '5': 'progenitors',
        '6': 'dendritic cell', 
        '7': 'dendritic cell',
        '8': 'dendritic cell',
        '9': 'dendritic cell', 
        '10': 'dendritic cell', 

    }
}  

########################################################################################
df_leiden = pd.read_csv(f'./{CT_OI}.csv',index_col=0).astype(str)
resolutions = [col.split('_')[-1] for col in df_leiden.columns]

df = pd.read_csv(RESULTS_FILE_PATH,index_col=0)

for res in resolutions:
    if str(res) in FINE_ANNOTATIONS:
        for leiden_cl, ct in FINE_ANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level2'] = ct
    if str(res) in COARSE_REANNOTATIONS:
        for leiden_cl, ct in COARSE_REANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level1'] = ct  
            
df.to_csv(RESULTS_FILE_PATH)

# Monocytes

In [7]:
CT_OI = 'monocytes'

FINE_ANNOTATIONS = {
    # mappings for clusters of (different) leiden resolution 
    '0.25': {
        '0': 'classical monocyte',
        '1': 'non-classical monocyte', #interesting almost exclusively in naive condition https://www.frontiersin.org/articles/10.3389/fimmu.2020.01117/full
        '2': 'monocyte progenitor', #Single-Cell Analyses Inform Mechanisms of MyeloidTargeted Therapies in Colon Cancer
        '3': 'classical monocyte', #gene numbers are very low, clusters with high mito content cluster..
        '4': 'non-classical monocyte', #https://www.biorxiv.org/content/10.1101/2020.04.14.040451v1.full.pdf ischemia related?
        '5': 'monocyte-derived DC', #interesting, only found in sham?!
#couldnt find the intermediate monocytes - no specific expression of its markers in the umap
#I think this resolution is good enough for this subset
#couldnt find MDP as a separate population , can check again
    }
}
COARSE_REANNOTATIONS = {
    '0.25': {
        '0': 'monocyte',
        '1': 'monocyte',
        '2': 'monocyte', 
        '3': 'monocyte', 
        '4': 'monocyte', 
        '5': 'dendritic cell', 
    }
}  

########################################################################################
df_leiden = pd.read_csv(f'./{CT_OI}.csv',index_col=0).astype(str)
resolutions = [col.split('_')[-1] for col in df_leiden.columns]

df = pd.read_csv(RESULTS_FILE_PATH,index_col=0)

for res in resolutions:
    if str(res) in FINE_ANNOTATIONS:
        for leiden_cl, ct in FINE_ANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level2'] = ct
    if str(res) in COARSE_REANNOTATIONS:
        for leiden_cl, ct in COARSE_REANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level1'] = ct  
            
df.to_csv(RESULTS_FILE_PATH)

# Eosinophil/basophil progenitor

In [8]:
CT_OI = 'eosinophil_basophil progenitor'

FINE_ANNOTATIONS = {
    # mappings for clusters of (different) leiden resolution 
    '0.25': {
        '0': 'basophil',
        '1': 'basophil',
        '2': 'basophil',
        '3': 'basophil progenitor',
    }
}
COARSE_REANNOTATIONS = {
    '0.25': {
        '0': 'basophil',
        '1': 'basophil',
        '2': 'basophil',
        '3': 'basophil',
    }
}

########################################################################################
df_leiden = pd.read_csv(f'./{CT_OI}.csv',index_col=0).astype(str)
resolutions = [col.split('_')[-1] for col in df_leiden.columns]
tmp_file_str = CT_OI.replace('/','_')
    
df = pd.read_csv(RESULTS_FILE_PATH,index_col=0)

for res in resolutions:
    if str(res) in FINE_ANNOTATIONS:
        for leiden_cl, ct in FINE_ANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level2'] = ct
    if str(res) in COARSE_REANNOTATIONS:
        for leiden_cl, ct in COARSE_REANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level1'] = ct  
            
df.to_csv(RESULTS_FILE_PATH)

# Microglia

In [9]:
CT_OI = 'microglia'

FINE_ANNOTATIONS = {
    # mappings for clusters of (different) leiden resolution 
    '1': {
        '0': 'perivascular macrophage',
        '1': 'microglia',
        '2': 'antigen-presenting macrophage', # Mac3 population in - https://jvi.asm.org/content/jvi/early/2020/09/24/JVI.01295-20.full.pdf
        '3': 'perivascular macrophage',
        '4': 'microglia',
        '5': 'perivascular macrophage', # not at all specific
        '6': 'perivascular macrophage', 
        '7': 'antigen-presenting macrophage', 
        '8': 'monocyte-derived macrophage',
        '9': 'monocyte-derived macrophage', 
        '10': 'macrophage', 
        '11': 'antigen-presenting macrophage', #very similar to cluster 2 expression... + bone specific cluster!
        '12': 'macrophage', # Mono2 population in - https://jvi.asm.org/content/jvi/early/2020/09/24/JVI.01295-20.full.pdf
    }
}
COARSE_REANNOTATIONS = {
    '1': {
        '0': 'macrophage',
        '1': 'microglia',
        '2': 'macrophage', 
        '3': 'macrophage',
        '4': 'microglia',
        '5': 'macrophage', 
        '6': 'macrophage', 
        '7': 'macrophage', 
        '8': 'macrophage',
        '9': 'macrophage', 
        '10': 'macrophage', 
        '11': 'macrophage',
        '12': 'macrophage',
    }
}

########################################################################################
df_leiden = pd.read_csv(f'./{CT_OI}.csv',index_col=0).astype(str)
resolutions = [col.split('_')[-1] for col in df_leiden.columns]

df = pd.read_csv(RESULTS_FILE_PATH,index_col=0)

for res in resolutions:
    if str(res) in FINE_ANNOTATIONS:
        for leiden_cl, ct in FINE_ANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level2'] = ct
    if str(res) in COARSE_REANNOTATIONS:
        for leiden_cl, ct in COARSE_REANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level1'] = ct  
            
df.to_csv(RESULTS_FILE_PATH)

# Fibroblasts

In [10]:
CT_OI = 'fibroblasts'

FINE_ANNOTATIONS = {
    # mappings for clusters of (different) leiden resolution 
    '1': {
        '0': 'dural fibroblast',
        '1': 'meningeal-Choroid Plexus cell',
        '2': 'astrocyte',
        '3': 'astrocyte',
        '4': 'neuron',
        '5': 'meningeal-Choroid Plexus cell',
        '6': 'Gnb3+ cell', #neural photorreceptor cells??? https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2781860/ 
        '7': 'meningeal-Choroid Plexus cell',
        '8': 'meningeal-Choroid Plexus cell',
        '9': 'dural fibroblast',
        '10': 'dural fibroblast',
        '11': 'astrocyte',
        '12': 'Omp+ cell', 
        '13': 'fibroblast',
        '14': 'oligodendrocyte',
        '15': 'endothelial cell', #?????? 
        '16': 'dural fibroblast',
        '17': 'dural fibroblast',
        '18': 'Gnb3+ cell', #same as 6 and 24
        '19': 'adipose-derived stromal cell',#https://www.biorxiv.org/content/10.1101/2020.06.23.166066v1.full.pdf
        '20': 'oligodendrocyte',
        '21': 'oligodendrocyte',
        '22': 'endothelial cell', #only in the brain!! https://www.nature.com/articles/nature25739?proof=t Ly6a, Lyc1, ptprb
        '23': 'dural fibroblast',
        '24': 'Gnb3+ cell', #correct this
        '25': 'megakaryocyte', #no cells in the brain, interesting distribution among bones
        '26': 'brain-Chroid Plexus endothelial cell',
},
}    
COARSE_REANNOTATIONS = {
  '1': {
        '0': 'structural cell',
        '1': 'structural cell',
        '2': 'brain cell',
        '3': 'brain cell',
        '4': 'brain cell',
        '5': 'structural cell',
        '6': 'structural cell',  
        '7': 'structural cell',
        '8': 'structural cell',
        '9': 'structural cell',
        '10': 'structural cell',
        '11': 'brain cell',
        '12': 'structural cell', 
        '13': 'structural cell',
        '14': 'brain cell',
        '15': 'structural cell', 
        '16': 'structural cell',
        '17': 'structural cell',
        '18': 'structural cell', 
        '19': 'structural cell',
        '20': 'brain cell',
        '21': 'brain cell',
        '22': 'structural cell',  
        '23': 'structural cell',
        '24': 'structural cell', 
        '25': 'megakaryocyte', 
        '26': 'structural cell',
},
}

########################################################################################
df_leiden = pd.read_csv(f'./{CT_OI}.csv',index_col=0).astype(str)
resolutions = [col.split('_')[-1] for col in df_leiden.columns]

df = pd.read_csv(RESULTS_FILE_PATH,index_col=0)

for res in resolutions:
    if str(res) in FINE_ANNOTATIONS:
        for leiden_cl, ct in FINE_ANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level2'] = ct
    if str(res) in COARSE_REANNOTATIONS:
        for leiden_cl, ct in COARSE_REANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level1'] = ct  
            
df.to_csv(RESULTS_FILE_PATH)

# Progenitors

In [11]:
CT_OI = 'myeloid progenitors'

FINE_ANNOTATIONS = {
    # mappings for clusters of (different) leiden resolution 
    '0.25': {
        '0': 'monocyte-primed GMP',
        '1': 'granulocyte-monocyte progenitor',
        '2': 'hematopoietic stem cell',
        '3': 'granulocyte-monocyte progenitor',
        '4': 'granulocyte-monocyte progenitor', #myeloid lineage but not sure how to call
        '5': 'monocyte-primed GMP',
        '6': 'common myeloid progenitor', 
        '7': 'erythroid progenitor', #erytrocyte/neutrophil related

    }
}
COARSE_REANNOTATIONS = {
    '0.25': {
        '0': 'progenitors',
        '1': 'progenitors',
        '2': 'progenitors',
        '3': 'progenitors',
        '4': 'progenitors', #myeloid lineage but not sure how to call
        '5': 'progenitors',
        '6': 'progenitors', 
        '7': 'erythroid precursor', #erytrocyte/neutrophil related

    }
}

########################################################################################
df_leiden = pd.read_csv(f'./{CT_OI}.csv',index_col=0).astype(str)
resolutions = [col.split('_')[-1] for col in df_leiden.columns]

df = pd.read_csv(RESULTS_FILE_PATH,index_col=0)

for res in resolutions:
    if str(res) in FINE_ANNOTATIONS:
        for leiden_cl, ct in FINE_ANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level2'] = ct
    if str(res) in COARSE_REANNOTATIONS:
        for leiden_cl, ct in COARSE_REANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level1'] = ct  
            
df.to_csv(RESULTS_FILE_PATH)

# Erythrocytes

In [12]:
CT_OI = 'erythrocytes'

FINE_ANNOTATIONS = {
    # mappings for clusters of (different) leiden resolution 
    '0.25': {
        '0': 'erythrocyte',
        '1': 'erythroblast',

    }
}
COARSE_REANNOTATIONS = {
    '0.25': {
        '0': 'erythroid cell',
        '1': 'erythroid cell',

    }
}

########################################################################################
df_leiden = pd.read_csv(f'./{CT_OI}.csv',index_col=0).astype(str)
resolutions = [col.split('_')[-1] for col in df_leiden.columns]

df = pd.read_csv(RESULTS_FILE_PATH,index_col=0)

for res in resolutions:
    if str(res) in FINE_ANNOTATIONS:
        for leiden_cl, ct in FINE_ANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level2'] = ct
    if str(res) in COARSE_REANNOTATIONS:
        for leiden_cl, ct in COARSE_REANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level1'] = ct  
            
df.to_csv(RESULTS_FILE_PATH)

# Erythroblast

In [13]:
CT_OI = 'erythroblasts'

FINE_ANNOTATIONS = {
    # mappings for clusters of (different) leiden resolution 
    '0.5': {
        '0': 'erythroid cell',
        '1': 'erythroid cell',
        '2': 'erythroid progenitor',
        '3': 'erythroid progenitor',
        '4': 'granulocyte-monocyte progenitor',
        '5': 'erythroid progenitor',
        '6': 'erythroid progenitor',
        '7': 'erythroid progenitor',
        '8': 'erythroid cell',
        '9': 'granulocyte-monocyte progenitor',
        '10': 'erythroid progenitor',
        '11': 'granulocyte-monocyte progenitor',
        '12': 'erythroid progenitor',
    }
}
COARSE_REANNOTATIONS = {
    '0.5': {
        '0': 'erythroid precursor',
        '1': 'erythroid precursor',
        '2': 'erythroid precursor',
        '3': 'erythroid precursor',
        '4': 'progenitors',
        '5': 'erythroid precursor',
        '6': 'erythroid precursor',
        '7': 'erythroid precursor',
        '8': 'erythroid precursor',
        '9': 'progenitors',
        '10': 'erythroid precursor',
        '11': 'erythroid precursor',
        '12': 'erythroid precursor',
    }
}

########################################################################################
df_leiden = pd.read_csv(f'./{CT_OI}.csv',index_col=0).astype(str)
resolutions = [col.split('_')[-1] for col in df_leiden.columns]

df = pd.read_csv(RESULTS_FILE_PATH,index_col=0)

for res in resolutions:
    if str(res) in FINE_ANNOTATIONS:
        for leiden_cl, ct in FINE_ANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level2'] = ct
    if str(res) in COARSE_REANNOTATIONS:
        for leiden_cl, ct in COARSE_REANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level1'] = ct  
            
df.to_csv(RESULTS_FILE_PATH)

# Pro_neutrophils

In [14]:
CT_OI = 'pro neutrophils'

FINE_ANNOTATIONS = {
    # mappings for clusters of (different) leiden resolution 
    '0.25': {
        '0': 'pro neutrophil',#similar to 6
        '1': 'pro neutrophil',
        '2': 'mature neutrophil',
        '3': 'immature neutrophil',
        '4': 'pre neutrophil', #no expression
        '5': 'pre neutrophil', #?? 
        '6': 'pro neutrophil', #similar to 0
        '7': 'mature neutrophil',
    }
}
COARSE_REANNOTATIONS = {
    '0.25': {
        '0': 'neutrophil',#similar to 6
        '1': 'progenitors',
        '2': 'neutrophil',
        '3': 'neutrophil',
        '4': 'neutrophil', #no expression
        '5': 'neutrophil', #?? 
        '6': 'neutrophil', #similar to 0
        '7': 'neutrophil',
    }
}  

########################################################################################
df_leiden = pd.read_csv(f'./{CT_OI}.csv',index_col=0).astype(str)
resolutions = [col.split('_')[-1] for col in df_leiden.columns]

df = pd.read_csv(RESULTS_FILE_PATH,index_col=0)

for res in resolutions:
    if str(res) in FINE_ANNOTATIONS:
        for leiden_cl, ct in FINE_ANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level2'] = ct
    if str(res) in COARSE_REANNOTATIONS:
        for leiden_cl, ct in COARSE_REANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level1'] = ct  
            
df.to_csv(RESULTS_FILE_PATH)

# Neutrophils

In [15]:
CT_OI = 'neutrophils'

FINE_ANNOTATIONS = {
    # mappings for clusters of (different) leiden resolution 
    '0.25': {
        '0': 'immature neutrophil',
        '1': 'mature neutrophil',
        '2': 'mature neutrophil',
        '3': 'mature neutrophil',
        '4': 'immature neutrophil',
    }
}
COARSE_REANNOTATIONS = {
    '0.25': {
        '0': 'neutrophil',
        '1': 'neutrophil',
        '2': 'neutrophil',
        '3': 'neutrophil',
        '4': 'neutrophil',
    }
}

########################################################################################
df_leiden = pd.read_csv(f'./{CT_OI}.csv',index_col=0).astype(str)
resolutions = [col.split('_')[-1] for col in df_leiden.columns]

df = pd.read_csv(RESULTS_FILE_PATH,index_col=0)

for res in resolutions:
    if str(res) in FINE_ANNOTATIONS:
        for leiden_cl, ct in FINE_ANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level2'] = ct
    if str(res) in COARSE_REANNOTATIONS:
        for leiden_cl, ct in COARSE_REANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level1'] = ct  
            
df.to_csv(RESULTS_FILE_PATH)

# Granulocyte monocyte progenitor - revisit

In [16]:
CT_OI = 'granulocyte-monocyte progenitor'

FINE_ANNOTATIONS = {
    # mappings for clusters of (different) leiden resolution 
    '0.25': {
        '0': 'erythroid progenitor',
        '1': 'granulocyte-monocyte progenitor',
        '2': 'neutrophil-primed GMP',
        '3': 'neutrophil-primed GMP',
        '4': 'erythroid cell',
        '5': 'erythroid cell',
        '6': 'monocyte-primed GMP',
        '7': 'granulocyte-monocyte progenitor',
        '8': 'erythroid cell',
    }
}
COARSE_REANNOTATIONS = {
    '0.25': {
        '0': 'erythroid precursor',
        '1': 'progenitors',
        '2': 'progenitors',
        '3': 'progenitors',
        '4': 'erythroid precursor',
        '5': 'erythroid precursor',
        '6': 'progenitors',
        '7': 'progenitors',
        '8': 'erythroid precursor',
    }
}

########################################################################################
df_leiden = pd.read_csv(f'./{CT_OI}.csv',index_col=0).astype(str)
resolutions = [col.split('_')[-1] for col in df_leiden.columns]

df = pd.read_csv(RESULTS_FILE_PATH,index_col=0)

for res in resolutions:
    if str(res) in FINE_ANNOTATIONS:
        for leiden_cl, ct in FINE_ANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level2'] = ct
    if str(res) in COARSE_REANNOTATIONS:
        for leiden_cl, ct in COARSE_REANNOTATIONS[str(res)].items():
            df.loc[df_leiden.loc[df_leiden[f'leiden_{res}'] == leiden_cl].index,'level1'] = ct  
            
df.to_csv(RESULTS_FILE_PATH)