This script merges cell features, cell specimen IDs and corresponding cell set nomenclature and cell type information together. This output a dataframe that help categorizes individual cells for analysis of firing rate and cell volume. 

Jun 14th, 2021: Add aliases from Miller et al 2020 for the cell sets

In [19]:
import numpy as np
import pandas as pd

In [20]:
pd.set_option('display.max_columns', 50)

In [21]:
# import features of each cell specimen
feature_df = pd.read_csv('../data/cell_types_specimen_details.csv')

In [None]:
feature_df.shape # a matrix of 2333 cells, 54 features

In [None]:
# This dataframe has no duplicated specimen IDs
feature_df[feature_df.duplicated(subset=['specimen__id'],keep=False)]

In [22]:
# import type classifications from Gouwens et al., 2019
# Classification of electrophysiological and morphological neuron types in the mouse visual cortex
# Supplementary Dataset 3 
# Type classifications and morphological adjustment parameters by cell
# e-types: 17 electrophysiological types
# m-types: 38 morphological types
# me-types: 46 morpho-electric types
type_df = pd.read_excel('../docs/Allen_Classification_Paper_Sup_3.xlsx',sheet_name='Data')

In [7]:
type_df.shape # a matrix of 1947 cells, 8 features

(1947, 8)

In [8]:
# The dataframe has no duplicated specimen IDs
type_df[type_df.duplicated(subset=['specimen_id'],keep=False)]

Unnamed: 0,specimen_id,e-type,m-type,me-type,upright_angle,soma_distance_from_pia,estimated_shrinkage_factor,estimated_slice_angle


In [36]:
df = pd.merge(left=type_df,right=feature_df,left_on='specimen_id',right_on='specimen__id',how='inner')

In [37]:
# No duplicated specimen IDs
df[df.duplicated(subset=['specimen_id'],keep=False)]

Unnamed: 0,specimen_id,e-type,m-type,me-type,upright_angle,soma_distance_from_pia,estimated_shrinkage_factor,estimated_slice_angle,line_name,specimen__id,specimen__name,specimen__hemisphere,structure__id,structure__name,structure__acronym,structure_parent__id,structure_parent__acronym,structure__layer,nr__max_euclidean_distance,nr__number_stems,nr__number_bifurcations,nr__average_contraction,nr__average_parent_daughter_ratio,nr__reconstruction_type,nrwkf__id,...,si__height,si__width,si__path,csl__x,csl__y,csl__z,csl__normalized_depth,cell_reporter_status,m__glif,m__biophys,m__biophys_perisomatic,m__biophys_all_active,tag__apical,tag__dendrite_type,morph_thumb_path,ephys_thumb_path,ephys_inst_thresh_thumb_path,donor__age,donor__sex,donor__disease_state,donor__race,donor__years_of_seizure_history,donor__species,donor__id,donor__name


In [11]:
df.to_csv('../result/cell_type.tsv',sep='\t',index=False)

In [38]:
# Creating a lite info table 
lite_df = df.dropna(subset=['me-type']).copy(deep=True)

In [39]:
drop_list=['specimen__id','donor__species','donor__name',
           'specimen__name','structure__id','structure__name',
           'ephys_thumb_path','ephys_inst_thresh_thumb_path',
           'structure_parent__id','morph_thumb_path','donor__id',
           'nr__reconstruction_type','nrwkf__id',
           'erwkf__id','si__path']

In [40]:
lite_df.drop(drop_list,axis=1,inplace=True)

In [18]:
lite_df.to_csv('../result/cell_type_lite.tsv',sep='\t',index=False)

In [41]:
lite_df

Unnamed: 0,specimen_id,e-type,m-type,me-type,upright_angle,soma_distance_from_pia,estimated_shrinkage_factor,estimated_slice_angle,line_name,specimen__hemisphere,structure__acronym,structure_parent__acronym,structure__layer,nr__max_euclidean_distance,nr__number_stems,nr__number_bifurcations,nr__average_contraction,nr__average_parent_daughter_ratio,ef__fast_trough_v_long_square,ef__upstroke_downstroke_ratio_long_square,ef__adaptation,ef__f_i_curve_slope,ef__threshold_i_long_square,ef__tau,ef__avg_isi,ef__avg_firing_rate,ef__ri,ef__peak_t_ramp,ef__vrest,si__height,si__width,csl__x,csl__y,csl__z,csl__normalized_depth,cell_reporter_status,m__glif,m__biophys,m__biophys_perisomatic,m__biophys_all_active,tag__apical,tag__dendrite_type,donor__age,donor__sex,donor__disease_state,donor__race,donor__years_of_seizure_history
8,313862022,Exc_3,Spiny_8,ME_Exc_6,150.319955,492.100275,2.977636,-25.453107,Scnn1a-Tg2-Cre,right,VISp4,VISp,4,488.514987,8.0,39.0,0.878773,1.122363,-49.187500,5.261166,0.288858,0.162139,190.0,17.060247,147.790000,6.766358,58.281288,3.574770,-63.575584,7570.0,5762.0,9489.242575,1423.652390,2785.443072,0.461634,positive,0,1,1,0,intact,spiny,,,,,
10,313862167,Inh_13,Aspiny_1,ME_Inh_15,119.629705,408.467273,2.030599,-21.213269,Sst-IRES-Cre,right,VISl4,VISl,4,247.737791,5.0,19.0,0.930364,0.858529,-58.156250,1.663913,0.003120,0.413095,130.0,12.185559,30.758833,32.510986,181.874961,6.302612,-79.635635,5748.0,7583.0,9279.539485,1831.035020,2151.244320,0.340213,positive,5,2,1,1,,aspiny,,,,,
13,313862373,Inh_2,Aspiny_4,ME_Inh_7,249.183146,926.652428,1.614714,-29.985693,Pvalb-IRES-Cre,left,VISl6a,VISl,6a,251.498887,6.0,21.0,0.888561,1.106200,-62.468754,1.285196,,0.054545,630.0,6.523529,,,67.500003,22.145867,-76.893822,5740.0,5738.0,9465.942232,2210.363516,2482.082983,0.841597,positive,0,0,0,0,,aspiny,,,,,
16,314642645,Exc_3,Spiny_3,ME_Exc_17,233.914695,372.685943,2.251238,24.773125,Rorb-IRES2-Cre,right,VISp4,VISp,4,420.596408,8.0,19.0,0.873577,0.896965,-51.718754,3.573322,0.036117,0.242548,50.0,20.371942,59.855312,16.706955,230.468705,2.778125,-75.896225,5728.0,5737.0,9396.041202,1466.394217,2676.181200,0.352524,positive,5,2,1,1,intact,spiny,,,,,
19,314804042,Exc_3,Spiny_4,ME_Exc_16,238.336909,492.354331,2.874654,-12.595367,Rorb-IRES2-Cre,left,VISp5,VISp,5,422.170626,4.0,28.0,0.890831,0.837658,-49.718754,3.387520,0.092932,0.154762,90.0,31.553622,101.343333,9.867447,215.624928,3.744635,-71.105034,7582.0,5744.0,8766.931931,1245.064860,8245.147191,0.494978,positive,0,1,1,0,intact,spiny,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1847,609435731,Inh_8,Aspiny_5,ME_Inh_21,203.329521,161.374566,2.544643,6.803797,Slc32a1-T2A-FlpO|Vipr2-IRES2-Cre,right,VISp2/3,VISp,2/3,182.184613,5.0,10.0,0.874339,0.998800,-51.187504,1.373199,0.004752,1.256024,230.0,5.893217,12.221333,81.824133,126.406327,11.881320,-74.926704,9434.0,9427.0,8253.000000,593.839011,3532.394271,0.161134,positive,2,0,0,0,,aspiny,,,,,
1852,614767057,Inh_11,Aspiny_5,ME_Inh_21,128.359888,145.730763,2.910187,3.077278,Pvalb-T2A-FlpO|Vipr2-IRES2-Cre,left,VISrl2/3,VISrl,2/3,201.927056,7.0,27.0,0.847959,0.997689,-54.437504,1.603262,0.045264,0.665566,190.0,8.242097,66.424000,15.054799,109.218784,9.300680,-75.454056,7588.0,7578.0,8186.000000,1179.841807,9118.530636,0.159576,positive,2,0,0,0,,aspiny,,,,,
1853,614777438,Inh_12,Aspiny_7,ME_Inh_16,229.880212,191.903363,2.546375,19.518115,Pvalb-T2A-FlpO|Vipr2-IRES2-Cre,right,VISp2/3,VISp,2/3,213.221228,5.0,19.0,0.861621,1.000000,-58.781250,1.296216,0.002116,0.724734,530.0,5.224834,19.722286,50.704062,46.406135,18.271527,-79.500710,5742.0,7574.0,9088.307843,895.315355,3073.085594,0.192573,positive,2,0,0,0,,aspiny,,,,,
1855,623185845,Inh_4,Aspiny_3,ME_Inh_22,178.976579,683.768376,2.377664,3.547938,Nos1-CreERT2|Sst-IRES-FlpO,right,VISp5,VISp,5,296.215491,6.0,41.0,0.850527,0.990384,-57.031250,2.505891,,0.027381,70.0,35.495663,60.500000,16.528926,198.437676,1.912420,-60.638649,3901.0,5739.0,8184.000000,1123.795427,3446.625649,0.740827,positive,0,0,0,0,,sparsely spiny,,,,,


In [25]:
# mouse cells have further information about their cell types
alias_df = pd.read_excel('../docs/Miller_et_al_2020_Sup_1/cell_set_nomenclature.xlsx',sheet_name='cell set nomenclature')

In [45]:
alias_df.rename(columns={'species':'nomenclature_species','modality':'nomenclature_modality','taxonomy_id':'nomenclature_taxonomy_id'},inplace=True)

In [48]:
alias_df.drop(columns=['cell_set_ alias_assignee'],inplace=True)

In [49]:
alias_df

Unnamed: 0,cell_set_preferred_alias,cell_set_ label,cell_set_ accession,cell_set_ aligned_alias,cell_set_ additional_alias,cell_set_ alias_citation,cell_set_ structure,cell_set_ ontology_tag,nomenclature_taxonomy_id,nomenclature_species,nomenclature_modality
0,Inh L1 LAMP5 PVRL2,RNAseq 001,CS201912131_1,,Lamp5_1,10.1101/2020.03.31.016972,primary motor cortex,UBERON:0001384,CCN201912131,Human,RNAseq
1,Inh L1 LAMP5 RAB11FIP1,RNAseq 002,CS201912131_2,,,10.1101/2020.03.31.016972,primary motor cortex,UBERON:0001384,CCN201912131,Human,RNAseq
2,Inh L1-6 LAMP5 AARD,RNAseq 003,CS201912131_3,,,10.1101/2020.03.31.016972,primary motor cortex,UBERON:0001384,CCN201912131,Human,RNAseq
3,Inh L1-6 LAMP5 NES,RNAseq 004,CS201912131_4,,,10.1101/2020.03.31.016972,primary motor cortex,UBERON:0001384,CCN201912131,Human,RNAseq
4,Inh L1-6 LAMP5 CA1,RNAseq 005,CS201912131_5,,,10.1101/2020.03.31.016972,primary motor cortex,UBERON:0001384,CCN201912131,Human,RNAseq
...,...,...,...,...,...,...,...,...,...,...,...
1526,L4,ME 28-29,CS201906170_44,L4,,10.1038/s41593-019-0417-0,primary visual cortex,UBERON_0002436,CCN201906170,Mouse,Morphology|Electrophysiology
1527,L6 IT,ME 33-34,CS201906170_45,L6 IT,,10.1038/s41593-019-0417-0,primary visual cortex,UBERON_0002436,CCN201906170,Mouse,Morphology|Electrophysiology
1528,L6 CT,ME 35-36,CS201906170_46,L6 CT,,10.1038/s41593-019-0417-0,primary visual cortex,UBERON_0002436,CCN201906170,Mouse,Morphology|Electrophysiology
1529,Sst (Martinotti),"ME 15, 24-25",CS201906170_47,,Martinotti,10.1038/s41593-019-0417-0,primary visual cortex,UBERON_0002436,CCN201906170,Mouse,Morphology|Electrophysiology


In [51]:
cell_alias_df = pd.merge(left=lite_df,right=alias_df,left_on='me-type',right_on='cell_set_preferred_alias',how='left')

In [53]:
cell_alias_df.to_csv('../result/cell_alias.tsv',sep='\t',index=False)