### Step 1 - NeuroMorpho access and cell acquisition

#### This Python notebook is used to find and generate.pkl and .csv files which have contain information regarding queried cells from the NeuroMorpho database. The query is done by using 3 search criteria:
- Brain region
- Species (Animal)
- Cell type

#### A total of 63 columns/parameters are created for each queried cell

#### The entire code is organized into two cells. The first one is used to make a selection of the three previosly mentioned criteria. The second one calls the NeuroMorpho API and generates the output files based on the selected parameters
#### Additional filtering criteria can be applied to the downloaded .pkl or .csv file (See Step 2 notebook)

#### If the query reports:
- status': 404, 'error': 'Not Found', 'message': 'Requested neuron(s) not found'

#### please contact NeuroMorpho administrators: nmoadmin@gmu.edu

In [1]:
# Module importing

import requests
import json
import csv
import pandas as pd
from pandas import DataFrame
import pickle
import ipywidgets as widget
from urllib3._collections import HTTPHeaderDict

# Widget where users select three search criterias (Brain region, Species, Cell type)

widg1 = widget.Dropdown(options=['All', 'abdominal ganglion', 'accessory lobe', 'accessory olfactory bulb', 'adult subesophageal zone', 'amygdala'
                                'antenna', 'antennal lobe', 'anterior olfactory nucleus', 'basal forbrain', 'basal ganglia',
                                'brainstem', 'Central complex', 'Central nervous system', 'cerebellum', 'cerebral ganglion',
                                'Cochlea', 'corpus callosum', 'cortex', 'electrosensory lobe', 'endocrine system', 'enthorinal cortex',
                                'eye circuit', 'forebrain', 'fornix', 'ganglion', 'hippocampus', 'hypothalamus', 'lateral complex',
                                'lateral horn', 'lateral line organ', 'left', 'Left Adult Central Complex', 'Left Mushroom Body', 'main olfactory bulb'
                                'meninges', 'mesencephalon', 'myelencephalon', 'neocortex', 'nuchal organs', 'olfactory cortex', 'olfactory pit', 'optic lobe',
                                'pallium', 'parasubiculum', ' peptidergic circuit', 'peripheral nervous system', 'pharyngeal nervous system', 'pons', 'Pro-subiculum', 
                                'protocerebrum', 'retina', 'retinorecipient mesencephalon and diencephalon', 'Right Adult Central Complex',
                                'Right Mushroom Body', 'somatic nervous system', 'spinal cord', 'stomatogastric ganglion', 'subesophageal ganglion',
                                'subesophageal zone-(SEZ)', 'subiculum', 'subpallium', 'Subventricular zone', 'thalamus', 'ventral nerve cord',
                                'ventral striatum', 'ventral thalamus', 'ventrolateral neuropils'], 
                        value=  'cerebellum', description='Brain Region:')
display(widg1)

widg2 = widget.Dropdown(options=['All', 'African wild dog', 'agouti', 'Apis mellifera', 'Aplysia', 'Axolotl', 'Baboon',
                                'Blind mole-rat', 'blowfly', 'Blue wildebeest', 'Bonobo', 'bottlenose dolphin', 'C. elegans',
                                'Calango lizard', 'capuchin monkey', 'Caracal', 'cat', 'cheetah', 'chicken', 'chimpanzee', 'Clam worm', 'clouded leopard', 'Crab', 'cricket',
                                'Crisia eburnea', 'Domestic dog', 'domestic pig', 'dragonfly', 'drosophila melanogaster', 'drosophila sechellia',
                                'elephant', 'ferret', 'giraffe', 'goldfish', 'grasshopper', 'Greater kudu', 'guinea pig', 'Hamster', 'human', 'humpback whale', 
                                'Lemur', 'leopard', 'Lion', 'locust', 'manatee', 'minke whale', 'Mongoose', 'monkey', 'Mormyrid fish', 'moth',
                                 'mouse', 'pouched lamprey', 'Praying mantis (Hierodula membranacea)', 'Praying mantis (Hierodula membranacea)',
                                 'proechimys', 'rabbit', 'Rana esculenta', 'Ranitomeya imitator', 'rat', 'Rhinella arenarum', 'Ruddy turnstone', 'salamander',
                                 'Scinax granulatus', 'Sea lamprey', 'Semipalmated plover', 'Semipalmated sandpiper', 'sheep', 'Silkmoth', 'spiny lobster', 'Stellers Sculpin',
                                 'Tiger', 'Toadfish', 'Treeshrew', 'turtle', 'Wallaby', 'Xenopus laevis', 'Xenopus tropicalis', 'Zebra', 'zebra finch', 'zebrafish'], 
                       value=  'mouse', description='Species:')
display(widg2)

widg3 = widget.Dropdown(options=['All', 'Glia', 'interneuron', 'principal cell', 'sensory'], 
                        value=  'principal cell', description='Cell Type:')
display(widg3)

Dropdown(description='Brain Region:', index=13, options=('All', 'abdominal ganglion', 'accessory lobe', 'acces…

Dropdown(description='Species:', index=50, options=('All', 'African wild dog', 'agouti', 'Apis mellifera', 'Ap…

Dropdown(description='Cell Type:', index=3, options=('All', 'Glia', 'interneuron', 'principal cell', 'sensory'…

In [17]:
# Make selections for Brain region, Animal and Cell type and run the second next cell
# This will run the code and generate the .pkl and .csv files based on the search criteria

In [2]:
brain_region = widg1.value
species = widg2.value
cell_type = widg3.value

params_widg = {}
if brain_region != 'All':
  params_widg['brain_region'] = 'brain_region:' + brain_region
if species != 'All':
  params_widg['species'] = 'species:' + species
if cell_type != 'All':
  params_widg['cell_type'] = 'cell_type:' + cell_type

params = {}
params['page'] =  0
fq = []
first = 0
for key, value in params_widg.items():
  if first == 0:
    first = 1
    params['q'] = value
  else:
    fq.append(value)
    params['fq'] = fq

# based on the previous criteria the url link is created and json is called
# in the next cell the returned info using json is transferred into a dictionary

if brain_region == 'All' and species == 'All' and cell_type == 'All':
  url = 'http://neuromorpho.org/api/neuron'
else:
  url = 'http://neuromorpho.org/api/neuron/select'


first_page_response = requests.get(url, params)

print(first_page_response.request.url)
print(first_page_response.request.body)
print(first_page_response.request.headers)


if first_page_response.status_code == 404 or first_page_response.status_code == 500:
  exit (1)

print (first_page_response.json())
totalPages = first_page_response.json()['page']['totalPages']


df_dict = {
        'NeuronID' : list(),
        'Neuron Name' : list(),
        'Archive' : list(),
        'Note' : list(),
        'Age Scale' : list(),
        'Gender' :  list(),
        'Age Classification' :  list(), 
        'Brain Region' :  list(),
        'Cell Type' : list(),
        'Species' :  list(),
        'Strain' :  list(),
        'Scientific Name' :  list(),
        'Stain' :  list(),
        'Experiment Condition' :  list(),
        'Protocol' :  list(),
        'Slicing Direction' :  list(),
        'Reconstruction Software' : list(),
        'Objective Type' :  list(),
        'Original Format' :  list(),
        'Domain' :  list(),
        'Attributes' :  list(),
        'Magnification' :  list(),
        'Upload Date' :  list(),
        'Deposition Date' :  list(),
        'Shrinkage Reported' :  list(),
        'Shrinkage Corrected' :  list(),
        'Reported Value' :  list(),
        'Reported XY' :  list(),
        'Reported Z' :  list(),
        'Corrected Value' :  list(),
        'Corrected XY' :  list(),
        'Corrected Z' :  list(),
        'Slicing Thickness' :  list(),
        'Min Age' :  list(),
        'Max Age' :  list(),
        'Min Weight' :  list(),
        'Max Weight' :  list(),
        'Png URL' :  list(),
        'Reference PMID' :  list(),
        'Reference DOI' :  list(),
        'Physical Integrity' :  list() }

for pageNum in range(totalPages):
    params['page'] = pageNum
    response = requests.get(url, params)
    print('Querying page {} -> status code: {}'.format(
        pageNum, response.status_code))
    if (response.status_code == 200):    #only parse successful requests
        data = response.json()
        for row in data['_embedded']['neuronResources']:
            df_dict['NeuronID'].append(str(row['neuron_id']))
            df_dict['Neuron Name'].append(str(row['neuron_name']))
            df_dict['Archive'].append(str(row['archive']))
            df_dict['Note'].append(str(row['note']))
            df_dict['Age Scale'].append(str(row['age_scale']))
            df_dict['Gender'].append(str(row['gender']))
            df_dict['Age Classification'].append(str(row['age_classification']))
            df_dict['Brain Region'].append(str(row['brain_region']))
            df_dict['Cell Type'].append(str(row['cell_type']))
            df_dict['Species'].append(str(row['species']))
            df_dict['Strain'].append(str(row['strain']))
            df_dict['Scientific Name'].append(str(row['scientific_name']))
            df_dict['Stain'].append(str(row['stain']))
            df_dict['Experiment Condition'].append(str(row['experiment_condition']))
            df_dict['Protocol'].append(str(row['protocol']))
            df_dict['Slicing Direction'].append(str(row['slicing_direction']))
            df_dict['Reconstruction Software'].append(str(row['reconstruction_software']))
            df_dict['Objective Type'].append(str(row['objective_type']))
            df_dict['Original Format'].append(str(row['original_format']))
            df_dict['Domain'].append(str(row['domain']))
            df_dict['Attributes'].append(str(row['attributes']))
            df_dict['Magnification'].append(str(row['magnification']))
            df_dict['Upload Date'].append(str(row['upload_date']))
            df_dict['Deposition Date'].append(str(row['deposition_date']))
            df_dict['Shrinkage Reported'].append(str(row['shrinkage_reported']))
            df_dict['Shrinkage Corrected'].append(str(row['shrinkage_corrected']))
            df_dict['Reported Value'].append(str(row['reported_value']))
            df_dict['Reported XY'].append(str(row['reported_xy']))
            df_dict['Reported Z'].append(str(row['reported_z']))
            df_dict['Corrected Value'].append(str(row['corrected_value']))
            df_dict['Corrected XY'].append(str(row['corrected_xy']))
            df_dict['Corrected Z'].append(str(row['corrected_z']))
            df_dict['Slicing Thickness'].append(str(row['slicing_thickness']))
            df_dict['Min Age'].append(str(row['min_age']))
            df_dict['Max Age'].append(str(row['max_age']))
            df_dict['Min Weight'].append(str(row['min_weight']))
            df_dict['Max Weight'].append(str(row['max_weight']))
            df_dict['Png URL'].append(str(row['png_url']))
            df_dict['Reference PMID'].append(str(row['reference_pmid']))
            df_dict['Reference DOI'].append(str(row['reference_doi']))
            df_dict['Physical Integrity'].append(str(row['physical_Integrity']))
           
neurons_df = pd.DataFrame(df_dict)

neurons_df.to_pickle("./neurons.pkl")

# the ID number of previously obtained neurons is used to obtain their morphometric details

n = neurons_df['NeuronID'].to_numpy()
morphometry = []
for i in n:
    url = "http://neuromorpho.org/api/morphometry/id/" + str(i)
    response = requests.get(url)
    json_data = response.json()
    morphometry.append(json_data)

df_dict = {}
df_dict['Neuron ID'] = []
df_dict['Surface'] = []
df_dict['Volume'] = []
df_dict['Soma surface'] = []
df_dict['Number of stems'] = []
df_dict['Number of bifurcations'] = []
df_dict['Number of branches'] = []
df_dict['Width'] = []
df_dict['Height'] = []
df_dict['Depth'] = []
df_dict['Diameter'] = []
df_dict['Euclidian distance'] = []
df_dict['Path distance'] = []
df_dict['Branching order'] = []
df_dict['Contraction'] = []
df_dict['Fragmentation'] = []
df_dict['Partition asymmetry'] = []
df_dict['Pk classic'] = []
df_dict['Bifurcation angle local'] = []
df_dict['Fractal dimension'] = []
df_dict['Bifurcation angle remote'] = []
df_dict['Length'] = []
for row in morphometry:
    df_dict['Neuron ID'].append(str(row['neuron_id']))
    df_dict['Surface'].append(str(row['surface']))
    df_dict['Volume'].append(str(row['volume']))
    df_dict['Soma surface'].append(str(row['soma_Surface']))
    df_dict['Number of stems'].append(str(row['n_stems']))
    df_dict['Number of bifurcations'].append(str(row['n_bifs']))
    df_dict['Number of branches'].append(str(row['n_branch']))
    df_dict['Width'].append(str(row['width']))
    df_dict['Height'].append(str(row['height']))
    df_dict['Depth'].append(str(row['depth']))
    df_dict['Diameter'].append(str(row['diameter']))
    df_dict['Euclidian distance'].append(str(row['eucDistance']))
    df_dict['Path distance'].append(str(row['pathDistance']))
    df_dict['Branching order'].append(str(row['branch_Order']))
    df_dict['Contraction'].append(str(row['contraction']))
    df_dict['Fragmentation'].append(str(row['fragmentation']))
    df_dict['Partition asymmetry'].append(str(row['partition_asymmetry']))
    df_dict['Pk classic'].append(str(row['pk_classic']))
    df_dict['Bifurcation angle local'].append(str(row['bif_ampl_local']))
    df_dict['Fractal dimension'].append(str(row['fractal_Dim']))
    df_dict['Bifurcation angle remote'].append(str(row['bif_ampl_remote']))
    df_dict['Length'].append(str(row['length']))
    morphometry_df = pd.DataFrame(df_dict)

morphometry_df.to_pickle("./morphometry.pkl")

# the following is a list of steps used to currate the morphometric data
# and merge the two obtained dataframes (general neuron parameters and morphometric data)
# this results in the creation of final .pkl and .csv files at the end of the notebook

neurons = open("morphometry.pkl","rb")
neurons_df = pickle.load(neurons)
neurons.close()
neurons_df

neurons_df = neurons_df.replace({'Soma surface': {'None': ''}}, regex=True)

neurons_df["Surface"] = pd.to_numeric(neurons_df["Surface"], downcast="float")
neurons_df["Volume"] = pd.to_numeric(neurons_df["Volume"], downcast="float")
neurons_df["Soma surface"] = pd.to_numeric(neurons_df["Soma surface"], downcast="float")
neurons_df["Number of stems"] = pd.to_numeric(neurons_df["Number of stems"], downcast="float")
neurons_df["Number of bifurcations"] = pd.to_numeric(neurons_df["Number of bifurcations"], downcast="float")
neurons_df["Number of branches"] = pd.to_numeric(neurons_df["Number of branches"], downcast="float")
neurons_df["Width"] = pd.to_numeric(neurons_df["Width"], downcast="float")
neurons_df["Height"] = pd.to_numeric(neurons_df["Height"], downcast="float")
neurons_df["Depth"] = pd.to_numeric(neurons_df["Depth"], downcast="float")
neurons_df["Diameter"] = pd.to_numeric(neurons_df["Diameter"], downcast="float")
neurons_df["Euclidian distance"] = pd.to_numeric(neurons_df["Euclidian distance"], downcast="float")
neurons_df["Path distance"] = pd.to_numeric(neurons_df["Path distance"], downcast="float")
neurons_df["Branching order"] = pd.to_numeric(neurons_df["Branching order"], downcast="float")
neurons_df["Contraction"] = pd.to_numeric(neurons_df["Contraction"], downcast="float")
neurons_df["Fragmentation"] = pd.to_numeric(neurons_df["Fragmentation"], downcast="float")
neurons_df["Partition asymmetry"] = pd.to_numeric(neurons_df["Partition asymmetry"], downcast="float")
neurons_df["Pk classic"] = pd.to_numeric(neurons_df["Pk classic"], downcast="float")
neurons_df["Bifurcation angle local"] = pd.to_numeric(neurons_df["Bifurcation angle local"], downcast="float")
neurons_df["Fractal dimension"] = pd.to_numeric(neurons_df["Fractal dimension"], downcast="float")
neurons_df["Number of branches"] = pd.to_numeric(neurons_df["Number of branches"], downcast="float")
neurons_df["Bifurcation angle remote"] = pd.to_numeric(neurons_df["Bifurcation angle remote"], downcast="float")
neurons_df["Length"] = pd.to_numeric(neurons_df["Length"], downcast="float")

neurons_df.to_pickle("./neurons_float.pkl")

neurons = open("neurons.pkl","rb")
neurons_id_df = pickle.load(neurons)
neurons.close()
neurons_id_df

neuron_morphometry = open("neurons_float.pkl","rb")
neuron_morphometry_df = pickle.load(neuron_morphometry)
neuron_morphometry.close()
neuron_morphometry_df

final_df = neurons_id_df.join(neuron_morphometry_df)
final_df

# excess NeuronID column left when joinging two dataframes
final_df = final_df.drop(columns=['NeuronID'])

file_name="NM_"+str(brain_region)+"_"+str(species)+"_"+str(cell_type)+".csv"

final_df.to_pickle(file_name)

final_df.to_csv(file_name, index=False)

http://neuromorpho.org/api/neuron/select?page=0&q=brain_region%3Ahippocampus&fq=cell_type%3Aprincipal+cell
None
{'User-Agent': 'python-requests/2.26.0', 'Accept-Encoding': 'gzip, deflate, br', 'Accept': '*/*', 'Connection': 'keep-alive'}
{'_embedded': {'neuronResources': [{'neuron_id': 100, 'neuron_name': 'n419', 'archive': 'Turner', 'note': 'When originally released, this reconstruction had been incompletely processed, and this issue was fixed in release 6.3 (February 2016). The pre-6.3 version of the processed file is available for download <a href=" dableFiles/previous/v6.3/turner/n419.CNG.swc ">here</a>.', 'age_scale': 'Month', 'gender': 'Male/Female', 'age_classification': 'young', 'brain_region': ['hippocampus', 'CA1'], 'cell_type': ['pyramidal', 'principal cell'], 'species': 'rat', 'strain': 'Fischer 344', 'scientific_name': 'rattus norvegicus', 'stain': 'biocytin', 'experiment_condition': ['Control'], 'protocol': 'in vivo', 'slicing_direction': 'coronal', 'reconstruction_softwa