# Extracting the morphometric statistics


**Please make sure to preprocess the raw-reconstructions using the preprocess-morph-SWC-files.ipynb before running this notebook**


In [6]:
import pandas as pd
import numpy as np
import copy
import os

from neurontree import NeuronTree as nt
from helpers.morph_features import get_morphometrics

In [7]:
root_path = 'D:/mpinb/data/mouse_m1'
save_path = root_path + '/processed/morph/features/'
path_to_reconstructions= root_path + '/processed/morph/nt/'

In [8]:
meta_data_file_path = root_path + '/m1_patchseq_meta_data.csv'
cells = pd.read_csv(meta_data_file_path, sep='\t', index_col=0)
cells = cells[cells['Traced'] == 'y']

FileNotFoundError: [Errno 2] No such file or directory: 'D:/mpinb/data/mouse_m1/m1_patchseq_meta_data.csv'

In [None]:
#create indices for excitatory and inhibitory cells
inhibitory_index = (cells['RNA family'] == 'Lamp5') | (cells['RNA family'] == 'Pvalb') | \
                    (cells['RNA family'] == 'Sncg') | (cells['RNA family'] == 'Sst') | \
                    (cells['RNA family'] == 'Vip') | (cells['Cell'] == '20190606_sample_7') | \
                    (cells['Cell'] == '20190905_sample_1')

no_dendrite_index = (cells['Cell'] != '20180921_sample_3')

inhibitory_index = inhibitory_index * no_dendrite_index

In [None]:
cells = cells[inhibitory_index]

In [None]:
cells

In [None]:
for rn, item in list(cells.iterrows()):

    file_name = item['Cell']
            
    if not os.path.exists(save_path + file_name + '.csv'):
        
        print('%i: Calculating morphometric statistics for %s' % (rn,file_name))
        
        
        # load in data
        swc = pd.read_csv(path_to_reconstructions + file_name + '.swc', 
                          delim_whitespace=True, comment='#',
                              names=['n', 'type', 'x', 'y', 'z', 'radius', 'parent'], index_col=False)
        # create a neurontree
        N = nt.NeuronTree(swc=swc)

        z = dict()
        z['cell id'] = file_name
        
        # get the morphometrics
        depth = float(item['Soma depth (µm)'])
        thickness = float(item['Cortical thickness (µm)'])
        d = get_morphometrics(N, depth, thickness)
        
        z.update(d)
        
        # save data 
        morphometry_data = pd.DataFrame(z)
        morphometry_data.to_csv(save_path+ file_name + ".csv")        
    else:
        continue

# Explore morphometric features



In [None]:
data_path = save_path

# load in all morphometrics files into one data frame
morphometrics = pd.DataFrame()
root, _, files = list(os.walk(data_path))[0]
for f in files:
    temp = pd.read_csv(data_path+f, index_col=0)
    morphometrics = morphometrics.append(temp)

morphometrics = morphometrics.reset_index()
del morphometrics['index']

In [None]:
full_idx = list(morphometrics.columns)
full_idx.remove('cell id')
full_idx.remove('dendrite z-profile')
full_idx.remove('axon z-profile')
full_idx.remove('axon soma-centered z-profile')
full_idx.remove('dendrite soma-centered z-profile')
len(full_idx)

## Inhibitory cells

In [None]:
# get only inhibitory cells
inhibitory_cells = morphometrics.set_index('cell id').loc[cells[inhibitory_index]['Cell'].values]
len(full_idx)

In [None]:
idx_inh_morphometrics = copy.copy(full_idx)
indices, counts = np.unique(np.where(inhibitory_cells[full_idx].isnull())[1], return_counts=True)

to_remove = [idx_inh_morphometrics[z] for z in indices[counts> 100]]
to_remove += [k for k in morphometrics.columns if k.find('"apical"')>-1 ]
to_remove += ['axon depth', 'axon robust depth', 'dendrite depth', 'dendrite robust depth']
for z in set(to_remove):
    print('deleting %s'%z)
    idx_inh_morphometrics.remove(z)

In [None]:
inhibitory_cells = inhibitory_cells[idx_inh_morphometrics]

# # morphometrics to be excluded due to little variation
# print('Features excluded due to little variation: \n', np.array(idx_inh_morphometrics)[(inhibitory_cells.abs().std()/inhibitory_cells.abs().mean() < 0.25).values])

# idx_after_cv = np.array(idx_inh_morphometrics)[(inhibitory_cells.abs().std()/inhibitory_cells.abs().mean() >= 0.25).values]
# print('remaining features: \n', idx_after_cv, ' \n number of features: ', len(idx_after_cv))
len(idx_inh_morphometrics)

In [None]:
# final_inh_idx = list(idx_after_cv)

# exclude features after visual inspection
# to_remove = [ 'Log1p fraction of dendrite below axon',
#              'dendrite max branch angle', 'dendrite min branch angle']

# for z in to_remove:
#     final_inh_idx.remove(z)

# print('Final number of inhibitory features: ', len(final_inh_idx))

# Concatenate and store the features

## Morphometric statistics

In [None]:
morphometric_used = inhibitory_cells[idx_inh_morphometrics]
morphometric_used.to_csv(save_path + '/../m1_patchseq_morph_features.csv')

## z-profiles

In [None]:
inhibitory_cells = morphometrics.set_index('cell id').loc[cells[inhibitory_index]['Cell'].values]
    
# first load them assigned to their index/name. Then put them in an array. To make sure they correspond. 
profile_dict_i=dict()
for rn,item in list(inhibitory_cells.iterrows()):
    profile_dict_i[rn] = np.array([])
    for profile in item[['axon z-profile']]:
        if profile is not np.nan:
            s = profile.replace('\n', '').replace('[', '').replace(']','')
            no = [x for x in s.split(' ') if x != '']
            temp = np.array([float(n) for n in no])
        else:
            temp = np.zeros((1,20))
        
        if profile_dict_i[rn].size == 0:
            profile_dict_i[rn] = temp
        else:  
            profile_dict_i[rn] = np.vstack((profile_dict_i[rn],temp))


In [None]:
z_profiles = pd.DataFrame(profile_dict_i).T
z_profiles = z_profiles.reset_index().rename(columns={'index':'cell id'}).set_index('cell id')
z_profiles.to_csv(save_path + '/../m1_patchseq_morph_zprofiles.csv')