# Extracting the morphometric statistics


**Please make sure to preprocess the raw-reconstructions using the preprocess-morph-SWC-files.ipynb before running this notebook**


In [1]:
import pandas as pd
import numpy as np
import copy
import os

from neurontree import NeuronTree as nt
from helpers.morph_features import get_morphometrics

#PLOTTING
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
root_path = 'D:/mpinb/data/mouse_v1'
save_path = root_path + '/processed/morph/features/'
path_to_reconstructions = root_path + '/processed/morph/nt/'

In [3]:
meta_data_file_path = root_path + '/20200711_patchseq_metadata_mouse.csv'
cells = pd.read_csv(meta_data_file_path, sep=',', index_col=1)
cells = cells[cells['neuron_reconstruction_type'] == 'full']

In [8]:
cells

Unnamed: 0_level_0,project,cell_specimen_name,hemisphere,structure,donor_id,donor_name,biological_sex,age,ethnicity,medical_conditions,...,dendrite_type,apical_dendrite_status,neuron_reconstruction_type,cell_soma_normalized_depth,depth_from_pia_um,ephys_session_id,transcriptomics_sample_id,transcriptomics_batch,Tree_first_cl_label,Unnamed: 21
cell_specimen_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
601506507,mIVSCC-MET,Vip-IRES-Cre;Ai14-331294.04.01.01,left,VISpm4,331294,Vip-IRES-Cre;Ai14-331294,M,P55,,,...,sparsely spiny,,full,0.361015,,601506492,PS0810_E1-50_S88,BT035,CS180626100018,Vip Gpc3 Slc18a3
601790961,mIVSCC-MET,Sst-IRES-Cre;Ai14-331663.04.01.01,left,VISpm5,331663,Sst-IRES-Cre;Ai14-331663,F,P57,,,...,aspiny,,full,0.621829,,601790945,PS0817_E1-50_S19,BT036,CS180626100041,Sst Tac2 Myh4
601803754,mIVSCC-MET,Sst-IRES-Cre;Ai14-331663.04.02.02,right,VISp2/3,331663,Sst-IRES-Cre;Ai14-331663,F,P57,,,...,sparsely spiny,,full,0.164240,,601803733,PS0817_E1-50_S25,BT036,CS180626100035,Sst Calb2 Pdlim5
601808698,mIVSCC-MET,Sst-IRES-Cre;Ai14-331663.04.02.03,right,VISp5,331663,Sst-IRES-Cre;Ai14-331663,F,P57,,,...,aspiny,,full,0.483030,,601808619,PS0817_E1-50_S26,BT036,CS180626100043,Sst Hpse Cbln4
601810307,mIVSCC-MET,Sst-IRES-Cre;Ai14-331663.04.02.04,right,VISp2/3,331663,Sst-IRES-Cre;Ai14-331663,F,P57,,,...,aspiny,,full,0.295656,,601810292,PS0817_E1-50_S27,BT036,CS180626100034,Sst Calb2 Necab1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
992386952,mIVSCC-MET,Sncg-IRES2-FlpO-neo;Ai65F-499189.04.02.02,right,VISl1,499189,Sncg-IRES2-FlpO-neo;Ai65F-499189,F,P60,,,...,aspiny,,full,0.143579,,992386928,SM-J39ZH_S505_E1-50,RSC-240,CS180626100011,Sncg Vip Itih5
992830261,mIVSCC-MET,Sncg-IRES2-FlpO-neo;Ai65F-499191.05.02.02,right,VISp5,499191,Sncg-IRES2-FlpO-neo;Ai65F-499191,F,P62,,,...,aspiny,,full,0.691884,,992830126,SM-J39ZH_S566_E1-50,RSC-240,CS180626100008,Sncg Slc17a8
993243528,mIVSCC-MET,Sncg-IRES2-FlpO-neo;Ai65F-500414.03.01.01,left,VISp1,500414,Sncg-IRES2-FlpO-neo;Ai65F-500414,M,P55,,,...,aspiny,,full,0.118258,,993243447,SM-J39TJ_S267_E1-50,RSC-242,CS180626100002,Lamp5 Fam19a1 Pax6
993245688,mIVSCC-MET,Sncg-IRES2-FlpO-neo;Ai65F-500414.05.01.01,left,VISp2/3,500414,Sncg-IRES2-FlpO-neo;Ai65F-500414,M,P55,,,...,aspiny,,full,0.270298,,993245664,SM-J39TJ_S275_E1-50,RSC-242,CS180626100010,Sncg Gpr50


In [4]:
i = 0
for file_name in list(cells.index):
        
    if not os.path.exists(save_path + str(file_name) + '.csv'):

        print('%i: Calculating morphometric statistics for %s' % (i,file_name))

        # load in data
        swc = pd.read_csv(path_to_reconstructions + str(file_name) + '.swc', 
                          delim_whitespace=True, comment='#',
                              names=['n', 'type', 'x', 'y', 'z', 'radius', 'parent'], index_col=False)
        # create a neurontree
        N = nt.NeuronTree(swc=swc)

        z = dict()
        z['cell id'] = file_name

        # get the morphometrics
        norm_depth = cells.loc[file_name, 'cell_soma_normalized_depth']
        thickness = 1500
        depth = norm_depth * thickness
        d = get_morphometrics(N, depth, thickness)

        z.update(d)

        # save data 
        morphometry_data = pd.DataFrame(z)
        morphometry_data.to_csv(save_path + str(file_name) + ".csv")     

        i+=1       
    else:
        continue

0: Calculating morphometric statistics for 601506507
1: Calculating morphometric statistics for 601790961
2: Calculating morphometric statistics for 601803754
3: Calculating morphometric statistics for 601808698
4: Calculating morphometric statistics for 601810307
5: Calculating morphometric statistics for 601811091
6: Calculating morphometric statistics for 602231708
7: Calculating morphometric statistics for 602535278
8: Calculating morphometric statistics for 604646725
9: Calculating morphometric statistics for 605060256
10: Calculating morphometric statistics for 605160536
11: Calculating morphometric statistics for 606098116
12: Calculating morphometric statistics for 606271263
13: Calculating morphometric statistics for 606308411
14: Calculating morphometric statistics for 614728680
15: Calculating morphometric statistics for 623326230
16: Calculating morphometric statistics for 623415197
17: Calculating morphometric statistics for 623434306
18: Calculating morphometric statistic

# Explore morphometric features



In [5]:
data_path = save_path

# load in all morphometrics files into one data frame
morphometrics = pd.DataFrame()
root, _, files = list(os.walk(data_path))[0]
for f in files:
    temp = pd.read_csv(data_path+f, index_col=0)
    morphometrics = morphometrics.append(temp)

morphometrics = morphometrics.reset_index()
del morphometrics['index']

In [6]:
full_idx = list(morphometrics.columns)
full_idx.remove('cell id')
full_idx.remove('dendrite z-profile')
full_idx.remove('axon z-profile')
full_idx.remove('axon soma-centered z-profile')
full_idx.remove('dendrite soma-centered z-profile')
len(full_idx)

75

## Inhibitory cells

In [21]:
# get only inhibitory cells
inhibitory_cells = morphometrics.set_index('cell id')
len(full_idx)

75

In [22]:
idx_inh_morphometrics = copy.copy(full_idx)
indices, counts = np.unique(np.where(inhibitory_cells[full_idx].isnull())[1], return_counts=True)

to_remove = [idx_inh_morphometrics[z] for z in indices[counts> 100]]
to_remove += [k for k in morphometrics.columns if k.find('"apical"')>-1 ]
to_remove += ['axon depth', 'axon robust depth', 'dendrite depth', 'dendrite robust depth']
for z in set(to_remove):
    print('deleting %s'%z)
    idx_inh_morphometrics.remove(z)

deleting "apical" mean bifurcation distance
deleting "apical" branch points
deleting "apical" log1p number of outer bifurcations
deleting axon robust depth
deleting "apical" std bifurcation distance
deleting axon depth
deleting "apical" height
deleting "apical" width
deleting dendrite depth
deleting "apical" robust width
deleting "apical" robust height
deleting "apical" total length
deleting dendrite robust depth


In [23]:
inhibitory_cells = inhibitory_cells[idx_inh_morphometrics]

# # morphometrics to be excluded due to little variation
# print('Features excluded due to little variation: \n', np.array(idx_inh_morphometrics)[(inhibitory_cells.abs().std()/inhibitory_cells.abs().mean() < 0.25).values])

# idx_after_cv = np.array(idx_inh_morphometrics)[(inhibitory_cells.abs().std()/inhibitory_cells.abs().mean() >= 0.25).values]
# print('remaining features: \n', idx_after_cv, ' \n number of features: ', len(idx_after_cv))
len(idx_inh_morphometrics)

62

In [24]:
# final_inh_idx = list(idx_after_cv)

# exclude features after visual inspection
# to_remove = [ 'Log1p fraction of dendrite below axon',
#              'dendrite max branch angle', 'dendrite min branch angle']

# for z in to_remove:
#     final_inh_idx.remove(z)

# print('Final number of inhibitory features: ', len(final_inh_idx))

# Concatenate and store the features

## Morphometric statistics

In [25]:
morphometric_used = inhibitory_cells[idx_inh_morphometrics]
morphometric_used.to_csv(save_path + '/../v1_patchseq_morph_features.csv')

## z-profiles

In [26]:
inhibitory_cells = morphometrics.set_index('cell id')
    
# first load them assigned to their index/name. Then put them in an array. To make sure they correspond. 
profile_dict_i=dict()
for rn,item in list(inhibitory_cells.iterrows()):
    profile_dict_i[rn] = np.array([])
    for profile in item[['axon z-profile']]:
        if profile is not np.nan:
            s = profile.replace('\n', '').replace('[', '').replace(']','')
            no = [x for x in s.split(' ') if x != '']
            temp = np.array([float(n) for n in no])
        else:
            temp = np.zeros((1,20))
        
        if profile_dict_i[rn].size == 0:
            profile_dict_i[rn] = temp
        else:  
            profile_dict_i[rn] = np.vstack((profile_dict_i[rn],temp))


In [27]:
z_profiles = pd.DataFrame(profile_dict_i).T
z_profiles = z_profiles.reset_index().rename(columns={'index':'cell id'}).set_index('cell id')
z_profiles.to_csv(save_path + '/../v1_patchseq_morph_zprofiles.csv')