This script (mouse_adult_brain.Saunder_2018_annotation.ipynb) takes subcluster_stats.csv file (emailed from Dr. Saunders) and reformat subcluster name to the same format in the annotation file, therefore mapping the number of cells to metacell names. 

May 17th, 2022: use enhanced_subcluster_stats.csv instead of subcluster_stats.csv. The original file for that row was missing cell number in the subcluster. 

May 19th, 2022: adding master_class column, seperating clusters into Neuron, Glia, and Vasculature; the one subcluster that is mitotic is kept. This reduces executions in the mouse_adult_brain.Saunders_2018_Cell_Type.ipynb script.  

In [1]:
import numpy as np
import pandas as pd

In [2]:
tissue_dict={
    'P60Cerebellum_ALT':'CB',
    'P60Cortex_noRep5_FRONTALonly':'FC',
    'P60Cortex_noRep5_POSTERIORonly':'PC',
    'P60EntoPeduncular':'ENT',
    'P60GlobusPallidus':'GP',
    'P60Hippocampus':'HC',
    'P60Striatum':'STR',
    'P60SubstantiaNigra':'SN',
    'P60Thalamus':'TH'
}

class_marker_dict={
     'Gad1Gad2': 'GABA',
     'Slc17a7': 'Glutamate',
     'Slc17a6': 'Glutamate',
     'Slc17a7-Slc17a6': 'Glutamate',
     'Th': 'Dopamine',
     'Gad1Gad2_Htr3a': 'GABA', # Htr3a+ GABAergic neurons
     'Slc17a7Slc17a6': 'Glutamate',
     'Gad1Gad2_Sst': 'GABA', # OLM neurons; SST is an OLM marker
     'Gad1Gad2-Th': 'Dopamine', # MGE-derived cortical interneurons, Striatum Th+ SPN
     'Gad1Gad2_Id2': 'GABA',
     'Gad1Gad2Slc17a8_Htr3a': 'Mix',
     'Gad1Gad2_Pvalb': 'GABA',
     'Slc17a7-Slc17a8': 'Glutamate',
     'Gad1Gad2-Chat': 'Acetylcholine',
     'Chat-Gad1Gad2-Slc17a8': 'Acetylcholine',
     'Gad1Gad2-Slc17a6': 'Mix', # e.g., Entopeduncular Nucleus (Ent), GABA/Glu co-releasing
     'Slc17a6-Slc17a7': 'Glutamate',
     'Gad1Gad2_Cplx3': 'GABA',
     'Gad1Gad2-Slc17a8': 'Mix',
     'Slc17a6-Slc17a7-Slc17a8': 'Glutamate',
     'Gad1Gad2Chat_Htr3a': 'Acetylcholine',
     'Gad1Gad2Slc17a8_Sst': 'Mix', # OLM neurons with co-releasing? 
     'Gad1Gad2-Chat-Slc17a8': 'Acetylcholine',
     'Slc17a6-Gad2-Th': 'Dopamine',
     'Th-Slc17a6': 'Dopamine',
     'Slc17a6-Gad1Gad2': 'Mix'
}

exc_inh_dict={
     'Gad1Gad2': 'inh',
     'Slc17a7': 'exc',
     'Slc17a6': 'exc',
     'Slc17a7-Slc17a6': 'exc',
     'Th': 'both',
     'Gad1Gad2_Htr3a': 'inh',
     'Slc17a7Slc17a6': 'exc',
     'Gad1Gad2_Sst': 'inh', # GABAergic OLM neurons 
     'Gad1Gad2-Th': 'both',
     'Gad1Gad2_Id2': 'inh',
     'Gad1Gad2Slc17a8_Htr3a': 'both',
     'Gad1Gad2_Pvalb': 'inh',
     'Slc17a7-Slc17a8': 'exc',
     'Gad1Gad2-Chat': 'both',
     'Chat-Gad1Gad2-Slc17a8': 'both',
     'Gad1Gad2-Slc17a6': 'both',
     'Slc17a6-Slc17a7': 'exc',
     'Gad1Gad2_Cplx3': 'inh',
     'Gad1Gad2-Slc17a8': 'both',
     'Slc17a6-Slc17a7-Slc17a8': 'exc',
     'Gad1Gad2Chat_Htr3a': 'both',
     'Gad1Gad2Slc17a8_Sst': 'both', # co-releasing OLM? 
     'Gad1Gad2-Chat-Slc17a8': 'both',
     'Slc17a6-Gad2-Th': 'both',
     'Th-Slc17a6': 'both',
     'Slc17a6-Gad1Gad2': 'both'
}

class_dict = {
    'NEURON':'Neuron', 
    'ENDOTHELIAL_STALK':'Vasculature', 
    'ENDOTHELIAL_TIP':'Vasculature', 
    'MURAL':'Vasculature',
    'MICROGLIA':'Glia',
    'OLIGODENDROCYTE':'Glia',
    'POLYDENDROCYTE':'Glia', 
    'ASTROCYTE':'Glia',
    'CHOROID_PLEXUS':'Vasculature', 
    'MACROPHAGE':'Glia', 
    'MITOTIC':'Mitotic', 
    'EPENDYMAL':'Vasculature',
    'NEUROGENESIS':'Neuron'
}

In [3]:
stats_df = pd.read_csv('../data/single-cell/saunders_2018/enhanced_subcluster_stats.csv')

In [4]:
stats_df.drop(stats_df.columns[0],axis=1,inplace=True)

In [5]:
stats_df['tissue_abbrev']=stats_df['tissue'].apply(lambda x: tissue_dict.get(x))

In [6]:
stats_df['tissue_subcluster']=stats_df['tissue_abbrev']+'_'+stats_df['subcluster']

In [7]:
annotation_df=pd.read_excel('../data/single-cell/saunders_2018/annotation.BrainCellAtlas_Saunders_version_2018.04.01.xlsx')

  warn(msg)


In [9]:
annotation_df['neurotransmitter']=annotation_df['class_marker'].apply(lambda x: class_marker_dict.get(x, 'Non-neuronal'))
annotation_df['exc_inh']=annotation_df['class_marker'].apply(lambda x: exc_inh_dict.get(x, 'Non-neuronal'))
annotation_df['master_class']=annotation_df['class'].map(class_dict)

In [10]:
annotation_df.drop(columns=['tissue','subcluster','full_name'],inplace=True)

In [11]:
stats_df.drop(columns=['cluster','subcluster'],inplace=True)

In [12]:
# Check whether the file Dr. Saunders sent me matches the annotation file from DropViz
# annotation_df.merge(stats_df,on='tissue_subcluster').to_excel('../results/single-cell/saunders_2018/check_match.xlsx')
# They Match! 
# Merge new dataframe
merge_df=pd.merge(left=stats_df,right=annotation_df,left_on='tissue_subcluster',right_on='tissue_subcluster',how='inner')

In [13]:
merge_df.to_excel('../results/single-cell/saunders_2018/enhanced_annotation.xlsx',index=False)