This script merges cell features, cell specimen IDs and corresponding cell set nomenclature and cell type information together. This output a dataframe that help categorizes individual cells for analysis of firing rate and cell volume. 

In [1]:
import numpy as np
import pandas as pd

In [2]:
pd.set_option('display.max_columns', 50)

In [None]:
# import features of each cell specimen
feature_df = pd.read_csv('../data/cell_types_specimen_details.csv')

In [None]:
feature_df.shape # a matrix of 2333 cells, 54 features

In [None]:
# This dataframe has no duplicated specimen IDs
feature_df[feature_df.duplicated(subset=['specimen__id'],keep=False)]

In [None]:
# import type classifications from Gouwens et al., 2019
# Classification of electrophysiological and morphological neuron types in the mouse visual cortex
# Supplementary Dataset 3 
# Type classifications and morphological adjustment parameters by cell
# e-types: 17 electrophysiological types
# m-types: 38 morphological types
# me-types: 46 morpho-electric types
type_df = pd.read_excel('../docs/Allen_Classification_Paper_Sup_3.xlsx',sheet_name='Data')

In [7]:
type_df.shape # a matrix of 1947 cells, 8 features

(1947, 8)

In [8]:
# The dataframe has no duplicated specimen IDs
type_df[type_df.duplicated(subset=['specimen_id'],keep=False)]

Unnamed: 0,specimen_id,e-type,m-type,me-type,upright_angle,soma_distance_from_pia,estimated_shrinkage_factor,estimated_slice_angle


In [9]:
df = pd.merge(left=type_df,right=feature_df,left_on='specimen_id',right_on='specimen__id',how='inner')

In [10]:
# No duplicated specimen IDs
df[df.duplicated(subset=['specimen_id'],keep=False)]

Unnamed: 0,specimen_id,e-type,m-type,me-type,upright_angle,soma_distance_from_pia,estimated_shrinkage_factor,estimated_slice_angle,line_name,specimen__id,specimen__name,specimen__hemisphere,structure__id,structure__name,structure__acronym,structure_parent__id,structure_parent__acronym,structure__layer,nr__max_euclidean_distance,nr__number_stems,nr__number_bifurcations,nr__average_contraction,nr__average_parent_daughter_ratio,nr__reconstruction_type,nrwkf__id,...,si__height,si__width,si__path,csl__x,csl__y,csl__z,csl__normalized_depth,cell_reporter_status,m__glif,m__biophys,m__biophys_perisomatic,m__biophys_all_active,tag__apical,tag__dendrite_type,morph_thumb_path,ephys_thumb_path,ephys_inst_thresh_thumb_path,donor__age,donor__sex,donor__disease_state,donor__race,donor__years_of_seizure_history,donor__species,donor__id,donor__name


In [11]:
df.to_csv('../result/cell_type.tsv',sep='\t',index=False)

In [15]:
# Creating a lite info table 
lite_df = df.dropna(subset=['me-type']).copy(deep=True)

In [16]:
drop_list=['specimen__id','donor__species','donor__name',
           'specimen__name','structure__id','structure__name',
           'ephys_thumb_path','ephys_inst_thresh_thumb_path',
           'structure_parent__id','morph_thumb_path','donor__id',
           'nr__reconstruction_type','nrwkf__id',
           'erwkf__id','si__path']

In [17]:
lite_df.drop(drop_list,axis=1,inplace=True)

In [18]:
lite_df.to_csv('../result/cell_type_lite.tsv',sep='\t',index=False)