In [42]:
from Cell.morpho_utils import plot_and_save_swcs
from pathlib import Path
import pandas as pd
from tqdm.notebook import tqdm
import pickle
import numpy as np

# Plot morphology

In [2]:
swcs = list(Path('swc_own').iterdir())
fig_path = Path('swc_own_fig')
fig_path.mkdir(exist_ok=True)
plot_and_save_swcs(swcs, fig_path)

  0%|          | 0/392 [00:00<?, ?it/s]



# Extract features

In [6]:
boundary_df = pd.read_csv('Layer_boundary.csv')
avg_boundary = boundary_df.mean()

In [8]:
standard_boundary_df = pd.read_csv('../Nature/m1_nissl_layer_boundaries.csv', sep='\t')
avg_standard_boundary = standard_boundary_df.mean()

In [11]:
avg_boundary['L6']=avg_standard_boundary['L6/total']

In [14]:
avg_boundary

L1       129.233333
L2/3     449.333333
L4       627.466667
L5       982.333333
L6      1181.133333
dtype: float64

In [27]:
depth_df = pd.read_csv('Soma_depth.csv', index_col='CellID')

In [34]:
swc_files_df = pd.DataFrame({'CellID': f.stem.split('.')[0].replace('-',''),
                             'path': f}
                            for f in Path('swc_own').iterdir()).set_index('CellID')
meta_df = depth_df.join(swc_files_df, how='inner')

In [35]:
meta_df

Unnamed: 0_level_0,Soma_depth,path
CellID,Unnamed: 1_level_1,Unnamed: 2_level_1
20180613cell5,947.9,swc_own/2018-06-13cell5.DAT.swc
20180614cell2,1141.7,swc_own/2018-06-14cell2.DAT.swc
20180628cell6,1112.0,swc_own/2018-06-28cell6.DAT.swc
20180711cell4,966.4,swc_own/2018-07-11cell4.DAT.swc
20180711cell8,967.8,swc_own/2018-07-11cell8.DAT.swc
...,...,...
20190621cell3,900.7,swc_own/2019-06-21cell3.DAT.swc
20190621cell6,985.1,swc_own/2019-06-21cell6.DAT.swc
20190622cell4,835.6,swc_own/2019-06-22cell4.DAT.swc
20190622cell6,947.4,swc_own/2019-06-22cell6.DAT.swc


export unmatched swc path and cell IDs

In [72]:
unmatched_path = pd.DataFrame({'path':swc_files_df.loc[~swc_files_df.index.isin(meta_df.index), 'path'].map(lambda p: p.as_posix()).tolist()})
unmatched_cellid = pd.DataFrame({'cell_id':depth_df[~depth_df.index.isin(meta_df.index)].index.tolist()})
with pd.ExcelWriter('unmatched_cell_ids.xlsx') as writer:
    unmatched_path.to_excel(writer, 'swc', index=False)
    unmatched_cellid.to_excel(writer, 'cell_id', index=False)

add adjusted metadata

In [81]:
unmatched_df = pd.read_csv('unmatched.cell.id.csv')

In [80]:
meta_df_2 = (swc_files_df.reset_index().merge(unmatched_df[['CellID', 'CellID_provider']])
             .drop(columns='CellID').rename(columns={'CellID_provider': 'CellID'})
             .set_index('CellID').join(depth_df))
meta_df_2

Unnamed: 0_level_0,path,Soma_depth
CellID,Unnamed: 1_level_1,Unnamed: 2_level_1
20190524cell4,swc_own/2018-10-26cell9.DAT.swc,852.3
20180917cell1,swc_own/2018-11-28cell4.DAT.swc,870.0
20190317cell10,swc_own/2018-11-28cell5.DAT.swc,754.3
20180710cell9,swc_own/2018-11-28cell6.DAT.swc,977.5
20190420cell6,swc_own/2018-12-03cell3.DAT.swc,897.6
20190408cell4,swc_own/2018-12-04cell3.DAT.swc,764.9
20190306cell6,swc_own/2018-12-05cell1.DAT.swc,1000.5
20190429cell3,swc_own/2019-01-06cell9.DAT.swc,755.6
20191118cell6,swc_own/2019-04-18cell5.DAT.swc,709.9
20190407cell1,swc_own/2019-04-20cell3.DAT.swc,723.0


In [107]:
unmatched_path_2 = pd.DataFrame({'path':swc_files_df.loc[~swc_files_df['path'].isin(meta_df.append(meta_df_2)['path']), 'path']
                                 .map(lambda p: p.as_posix()).tolist()})
unmatched_cellid_2 = pd.DataFrame({'cell_id':depth_df[~depth_df.index.isin(meta_df.append(meta_df_2).index)].index.tolist()})
with pd.ExcelWriter('unmatched_cell_ids_2.xlsx') as writer:
    unmatched_path_2.to_excel(writer, 'swc', index=False)
    unmatched_cellid_2.to_excel(writer, 'cell_id', index=False)

use updated SWCs

In [113]:
modified_swc_files_df = pd.DataFrame({'CellID': f.stem.split('.')[0].replace('-',''),
                             'path': f}
                         for f in Path('modified_swc').iterdir()).set_index('CellID')
meta_df_3=modified_swc_files_df.join(depth_df, how='inner')
meta_df_3

Unnamed: 0_level_0,path,Soma_depth
CellID,Unnamed: 1_level_1,Unnamed: 2_level_1
20180613cell5,modified_swc/2018-06-13cell5.DAT.swc,947.9
20180628cell6,modified_swc/2018-06-28cell6.DAT.swc,1112.0
20180903cell6,modified_swc/2018-09-03cell6.DAT.swc,729.2
20180921cell6,modified_swc/2018-09-21cell6.DAT.swc,745.8
20181025cell10,modified_swc/2018-10-25cell10.DAT.swc,905.4
20181026cell8,modified_swc/2018-10-26cell8.DAT.swc,709.9
20181217cell2,modified_swc/2018-12-17cell2.DAT.swc,878.0
20190106cell2,modified_swc/2019-01-06cell2.DAT.swc,869.7
20190106cell5,modified_swc/2019-01-06cell5.DAT.swc,646.7
20190416cell7,modified_swc/2019-04-16cell7.DAT.swc,621.1


## Layer features

In [39]:
from neuron_morphology.feature_extractor.feature_extractor import FeatureExtractor
from CJW.morpho_utils import layer_features
from Cell.morpho_utils import distribution_features, across_cortex_distribution_features, updown_and_move_soma_to_depth
from neuron_morphology.feature_extractor.feature_writer import FeatureWriter
from neuron_morphology.features.layer.reference_layer_depths import ReferenceLayerDepths
from neuron_morphology.swc_io import morphology_from_swc
from neuron_morphology.feature_extractor.data import Data

In [40]:
layer_dist_extractor = FeatureExtractor()
layer_dist_extractor.register_features(layer_features+distribution_features+across_cortex_distribution_features)
writer = FeatureWriter("me_layer_distribution_features.h5")
ref = {'L1': ReferenceLayerDepths(0, avg_boundary['L1']),
       'L2/3': ReferenceLayerDepths(avg_boundary['L1'], avg_boundary['L2/3']),
       'L4': ReferenceLayerDepths(avg_boundary['L2/3'], avg_boundary['L4']),
       'L5': ReferenceLayerDepths(avg_boundary['L4'], avg_boundary['L5']),
       'L6': ReferenceLayerDepths(avg_boundary['L5'], avg_boundary['L6']),
       'wm': ReferenceLayerDepths(avg_boundary['L6'], avg_boundary['L6'] + 1500, scale=False)}
depth_range = (0, avg_boundary['L6'])
for cell_id, sample in tqdm(meta_df.iterrows(), total=meta_df.shape[0], desc='Extracting'):
    morpho = morphology_from_swc(sample['path'])
    updown_and_move_soma_to_depth(morpho, sample['Soma_depth'])
    data = Data(morpho, reference_layer_depths=ref, cortex_depth_range=depth_range)
    extract_run = layer_dist_extractor.extract(data)
    writer.add_run(cell_id, extract_run.serialize())
res_layer_dist_df = writer.build_output_table()

  self.heavy_file = h5py.File(self.heavy_path, driver="core")


Extracting:   0%|          | 0/338 [00:00<?, ?it/s]

  nodes = swc_data.to_dict('record')
2021-08-08 09:17:33,770 root         INFO     selected marks: <bound method FeatureExtractionRun.select_marks of <neuron_morphology.feature_extractor.feature_extraction_run.FeatureExtractionRun object at 0x7fa209c09f70>>
2021-08-08 09:17:33,772 root         INFO     selected features: ['axon.above_soma.nodes_ratio', 'axon.below_soma.nodes_ratio', 'axon.all_layers.nodes_ratio', 'axon.above_soma.length_ratio', 'axon.below_soma.length_ratio', 'axon.all_layers.length_ratio', 'axon.nodes_distribution', 'axon.length_distribution', 'axon.across_cortex.length_distribution', 'axon.across_cortex.nodes_distribution']
2021-08-08 09:17:51,054 root         INFO     selected marks: <bound method FeatureExtractionRun.select_marks of <neuron_morphology.feature_extractor.feature_extraction_run.FeatureExtractionRun object at 0x7fa209c09a90>>
2021-08-08 09:17:51,056 root         INFO     selected features: ['axon.above_soma.nodes_ratio', 'axon.below_soma.nodes_ratio', 

In [53]:
array_columns=['axon.nodes_distribution', 'axon.length_distribution','axon.across_cortex.length_distribution','axon.across_cortex.nodes_distribution']
res_layer_dist_df.drop(columns=array_columns).to_csv('layer_features_0808.csv')
with open('me_layer_features_0808.pkl', 'wb') as f:
    pickle.dump(res_layer_dist_df, f)
res_array_df = res_layer_dist_df[array_columns].dropna()
res_node_dist_df = pd.DataFrame(np.stack(res_array_df['axon.nodes_distribution'].values),
                                index=res_array_df.index)
res_node_dist_df.to_csv('axon.nodes_distribution_0808.csv')
res_length_dist_df = pd.DataFrame(np.stack(res_array_df['axon.length_distribution'].values),
                                  index=res_array_df.index)
res_length_dist_df.to_csv('axon.length_distribution_0808.csv')
res_across_length_dist_df = pd.DataFrame(np.stack(res_array_df['axon.across_cortex.length_distribution'].values),
                                         index=res_array_df.index)
res_across_length_dist_df.to_csv('axon.across_cortex.length_distribution_0808.csv')
res_across_node_dist_df = pd.DataFrame(np.stack(res_array_df['axon.across_cortex.nodes_distribution'].values),
                                       index=res_array_df.index)
res_across_node_dist_df.to_csv('axon.across_cortex.nodes_distribution_0808.csv')

In [108]:
writer = FeatureWriter("me_layer_distribution_features_unmatched.h5")
for cell_id, sample in tqdm(meta_df_2.iterrows(), total=meta_df_2.shape[0], desc='Extracting'):
    morpho = morphology_from_swc(sample['path'])
    updown_and_move_soma_to_depth(morpho, sample['Soma_depth'])
    data = Data(morpho, reference_layer_depths=ref, cortex_depth_range=depth_range)
    extract_run = layer_dist_extractor.extract(data)
    writer.add_run(cell_id, extract_run.serialize())
res_layer_dist_df_2 = writer.build_output_table()

  self.heavy_file = h5py.File(self.heavy_path, driver="core")


Extracting:   0%|          | 0/47 [00:00<?, ?it/s]

  nodes = swc_data.to_dict('record')
2021-08-08 16:28:16,311 root         INFO     selected marks: <bound method FeatureExtractionRun.select_marks of <neuron_morphology.feature_extractor.feature_extraction_run.FeatureExtractionRun object at 0x7fa1f956bdc0>>
2021-08-08 16:28:16,313 root         INFO     selected features: ['axon.above_soma.nodes_ratio', 'axon.below_soma.nodes_ratio', 'axon.all_layers.nodes_ratio', 'axon.above_soma.length_ratio', 'axon.below_soma.length_ratio', 'axon.all_layers.length_ratio', 'axon.nodes_distribution', 'axon.length_distribution', 'axon.across_cortex.length_distribution', 'axon.across_cortex.nodes_distribution']
2021-08-08 16:28:36,203 root         INFO     selected marks: <bound method FeatureExtractionRun.select_marks of <neuron_morphology.feature_extractor.feature_extraction_run.FeatureExtractionRun object at 0x7fa1f956ba30>>
2021-08-08 16:28:36,206 root         INFO     selected features: ['axon.above_soma.nodes_ratio', 'axon.below_soma.nodes_ratio', 

In [109]:
res_layer_dist_df_2.drop(columns=array_columns).to_csv('layer_features_0809.csv')
with open('me_layer_features_0809.pkl', 'wb') as f:
    pickle.dump(res_layer_dist_df_2, f)
res_array_df_2 = res_layer_dist_df_2[array_columns].dropna()
for col in array_columns:
    res_part_df_2 = pd.DataFrame(np.stack(res_array_df_2[col].values), index=res_array_df_2.index)
    res_part_df_2.to_csv(f'{col}_0809.csv')

In [115]:
writer = FeatureWriter("me_layer_distribution_features_unmatched.h5")
for cell_id, sample in tqdm(meta_df_3.iterrows(), total=meta_df_3.shape[0], desc='Extracting'):
    morpho = morphology_from_swc(sample['path'])
    updown_and_move_soma_to_depth(morpho, sample['Soma_depth'])
    data = Data(morpho, reference_layer_depths=ref, cortex_depth_range=depth_range)
    extract_run = layer_dist_extractor.extract(data)
    writer.add_run(cell_id, extract_run.serialize())
res_layer_dist_df_3 = writer.build_output_table()

  self.heavy_file = h5py.File(self.heavy_path, driver="core")


Extracting:   0%|          | 0/46 [00:00<?, ?it/s]

  nodes = swc_data.to_dict('record')
2021-08-10 14:29:39,389 root         INFO     selected marks: <bound method FeatureExtractionRun.select_marks of <neuron_morphology.feature_extractor.feature_extraction_run.FeatureExtractionRun object at 0x7fa1f95e0490>>
2021-08-10 14:29:39,391 root         INFO     selected features: ['axon.above_soma.nodes_ratio', 'axon.below_soma.nodes_ratio', 'axon.all_layers.nodes_ratio', 'axon.above_soma.length_ratio', 'axon.below_soma.length_ratio', 'axon.all_layers.length_ratio', 'axon.nodes_distribution', 'axon.length_distribution', 'axon.across_cortex.length_distribution', 'axon.across_cortex.nodes_distribution']
2021-08-10 14:29:46,438 root         INFO     selected marks: <bound method FeatureExtractionRun.select_marks of <neuron_morphology.feature_extractor.feature_extraction_run.FeatureExtractionRun object at 0x7fa1fa730fd0>>
2021-08-10 14:29:46,439 root         INFO     selected features: ['axon.above_soma.nodes_ratio', 'axon.below_soma.nodes_ratio', 

In [116]:
res_layer_dist_df_3.drop(columns=array_columns).to_csv('layer_features_0810.csv')
with open('me_layer_features_0810.pkl', 'wb') as f:
    pickle.dump(res_layer_dist_df_3, f)
res_array_df_3 = res_layer_dist_df_3[array_columns].dropna()
for col in array_columns:
    res_part_df_3 = pd.DataFrame(np.stack(res_array_df_3[col].values), index=res_array_df_3.index)
    res_part_df_3.to_csv(f'{col}_0810.csv')