In [1]:
import numpy as np
import pandas as pd
import json
import os

In [2]:
# Neurons
f = open('Data/neuromorpho_neuron.json', 'r')
neurons = json.loads(f.read())
f.close()

# Morphology Data
f = open('Data/neuromorpho_morphology.json', 'r')
morpho = json.loads(f.read())
f.close()

# Models
f = open('Data/modeldb_models.json', 'r')
models = json.loads(f.read())
f.close()

# Celltypes
f = open('Data/modeldb_celltype.json', 'r')
celltypes = json.loads(f.read())
f.close()

# Regions
f = open('Data/modeldb_regions.json', 'r')
regions = json.loads(f.read())
f.close()

# Papers
f = open('Data/modeldb_papers.json', 'r')
papers = json.loads(f.read())
f.close()

# NeuroElectro
electro = pd.read_csv('Data/neuro_electro.csv', sep = '\t')

# Database Tables (brainstorming)
* neurons
    * **neuron_id**
    * doi
    * pmid
    * brain_region
    * cell_type
        * connects to publication on doi or pmid
* neuron_cell_type
    * **neuron_id**
    * cell_type
* neuron_exp_condition
    * **neuron_id**
    * experiment_condition
* neuron_pmid
    * **neuron_id**
    * reference_pmid
* neuron_doi
    * **neuron_id**
    * reference_doi
* electro_pub
    * **Index**
    * Pmid
* models
    * **id** (name this model id)
    * model_paper_id
    * neurons.value.id
    * 
        * Connects to modeldb_papers which can connect to publications (model_paper_id)
        * Connects to celltype on neurons.values.id
* publications
    * **pmid**
    * doi
* regions
    * **id** (name this region_id)
    * name
        * not sure how this is connecting so far
* celltypes
    * **id** (name this celltype_id)
    * Connects to neuroelectro (id to Index)
    * Can connect to neuron (region to url, going to be difficult)
* electro
    * **Index**
    * Pmid
    * NeuronName
    * ArticleID
        * Connects to publications on pmid
        * connects to neurons on brain_region or neuronname or publication

# Neurons

In [3]:
neuron_df = pd.json_normalize(neurons)

In [4]:
neuron_df.head(3).T

Unnamed: 0,0,1,2
neuron_id,1,10,100
neuron_name,cnic_001,cnic_041,n419
archive,Wearne_Hof,Wearne_Hof,Turner
note,"When originally released, this reconstruction ...","When originally released, this reconstruction ...","When originally released, this reconstruction ..."
age_scale,Year,Year,Month
gender,Male/Female,Male/Female,Male/Female
age_classification,old,old,young
brain_region,"[neocortex, prefrontal, layer 3]","[neocortex, prefrontal, layer 3]","[hippocampus, CA1]"
cell_type,"[Local projecting, pyramidal, principal cell]","[Local projecting, pyramidal, principal cell]","[pyramidal, principal cell]"
species,monkey,monkey,rat


### Notes on normal forms

There is some data that is not atomic so far, need to fix that. List of non-atomic data:
* brain_region
* cell_type
* experiment_condition
* reference_pmid
* rederence_doi

### Questions to think about:
* Cell type table for non-atomic
* Can I just drop certain columns I don't expect to use? 

### Non-atomic data

#### Brain Regions
The brain regions can just be different columns, named region, subregion, subsubregion

In [5]:
# got an error with NoneType, check if some have region as empty
sum(neuron_df['brain_region'].isnull())
# there are 2,512 with no brain region

2512

In [6]:
# check which are null
idx = neuron_df['brain_region'].isnull()
neuron_df[idx]

Unnamed: 0,neuron_id,neuron_name,archive,note,age_scale,gender,age_classification,brain_region,cell_type,species,...,max_age,min_weight,max_weight,png_url,reference_pmid,reference_doi,physical_Integrity,_links.self.href,_links.measurements.href,_links.persistence_vector.href
46504,144732,cs36_roi4_stacked,Brennand,This reconstruction belongs to C2 cell line. A...,Day,Male,fetus,,"[principal cell, induced pluripotent stem cell...",human,...,24.0,Not Reported,Not Reported,http://neuromorpho.org/images/imageFiles/Brenn...,[31548722],[10.1038/s41588-019-0497-5],Neurites Moderate,http://neuromorpho.org/api/neuron/id/144732,http://neuromorpho.org/api/morphometry/id/144732,http://neuromorpho.org/api/pvec/id/144732
46505,144733,cs11_roi1_stacked,Brennand,This reconstruction belongs to C2 cell line. A...,Day,Male,fetus,,"[principal cell, induced pluripotent stem cell...",human,...,24.0,Not Reported,Not Reported,http://neuromorpho.org/images/imageFiles/Brenn...,[31548722],[10.1038/s41588-019-0497-5],Neurites Moderate,http://neuromorpho.org/api/neuron/id/144733,http://neuromorpho.org/api/morphometry/id/144733,http://neuromorpho.org/api/pvec/id/144733
46506,144734,cs17_roi1_stacked,Brennand,This reconstruction belongs to C2 cell line. A...,Day,Male,fetus,,"[principal cell, induced pluripotent stem cell...",human,...,24.0,Not Reported,Not Reported,http://neuromorpho.org/images/imageFiles/Brenn...,[31548722],[10.1038/s41588-019-0497-5],Neurites Moderate,http://neuromorpho.org/api/neuron/id/144734,http://neuromorpho.org/api/morphometry/id/144734,http://neuromorpho.org/api/pvec/id/144734
46507,144735,cs10_roi5_stacked,Brennand,This reconstruction belongs to C2 cell line. A...,Day,Male,fetus,,"[principal cell, induced pluripotent stem cell...",human,...,24.0,Not Reported,Not Reported,http://neuromorpho.org/images/imageFiles/Brenn...,[31548722],[10.1038/s41588-019-0497-5],Neurites Moderate,http://neuromorpho.org/api/neuron/id/144735,http://neuromorpho.org/api/morphometry/id/144735,http://neuromorpho.org/api/pvec/id/144735
46508,144736,cs30_roi3_stacked,Brennand,This reconstruction belongs to C2 cell line. A...,Day,Male,fetus,,"[principal cell, induced pluripotent stem cell...",human,...,24.0,Not Reported,Not Reported,http://neuromorpho.org/images/imageFiles/Brenn...,[31548722],[10.1038/s41588-019-0497-5],Neurites Moderate,http://neuromorpho.org/api/neuron/id/144736,http://neuromorpho.org/api/morphometry/id/144736,http://neuromorpho.org/api/pvec/id/144736
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235737,77489,298_Cyst2_3,Koshy,,Month,Male/Female,adult,,[principal cell],mouse,...,3.0,Not Reported,Not Reported,http://neuromorpho.org/images/imageFiles/Koshy...,"[26895155, 25549001]","[10.1371/journal.ppat.1005447, 10.3791/52237]",Dendrites Moderate,http://neuromorpho.org/api/neuron/id/77489,http://neuromorpho.org/api/morphometry/id/77489,http://neuromorpho.org/api/pvec/id/77489
235739,77490,298_Cyst4,Koshy,,Month,Male/Female,adult,,[principal cell],mouse,...,3.0,Not Reported,Not Reported,http://neuromorpho.org/images/imageFiles/Koshy...,"[26895155, 25549001]","[10.1371/journal.ppat.1005447, 10.3791/52237]",Dendrites Moderate,http://neuromorpho.org/api/neuron/id/77490,http://neuromorpho.org/api/morphometry/id/77490,http://neuromorpho.org/api/pvec/id/77490
235740,77491,298_Cyst6,Koshy,,Month,Male/Female,adult,,[principal cell],mouse,...,3.0,Not Reported,Not Reported,http://neuromorpho.org/images/imageFiles/Koshy...,"[26895155, 25549001]","[10.1371/journal.ppat.1005447, 10.3791/52237]",Dendrites Moderate,http://neuromorpho.org/api/neuron/id/77491,http://neuromorpho.org/api/morphometry/id/77491,http://neuromorpho.org/api/pvec/id/77491
235741,77492,298_Cyst9,Koshy,,Month,Male/Female,adult,,[principal cell],mouse,...,3.0,Not Reported,Not Reported,http://neuromorpho.org/images/imageFiles/Koshy...,"[26895155, 25549001]","[10.1371/journal.ppat.1005447, 10.3791/52237]",Dendrites Moderate,http://neuromorpho.org/api/neuron/id/77492,http://neuromorpho.org/api/morphometry/id/77492,http://neuromorpho.org/api/pvec/id/77492


In [7]:
# I can just make the null values have None in each of the 3 rows
neuron_df['brain_region_len'] = [len(x) if x != None else None for x in neuron_df['brain_region']]
#set([len(x) if x != None else None for x in neuron_df['brain_region']])

In [8]:
# there is sometimes more than 3 rows
neuron_df['brain_region_len'].value_counts()

brain_region_len
2.0    90535
3.0    76320
1.0    42354
4.0    39674
5.0     7529
6.0      784
7.0      300
8.0       70
Name: count, dtype: int64

**Looking at the website, they appear to keep the brain regions to 3 and just combine the rest of the regions in the brain region 3. I can combined it into a string. This at least allows for querying of the primary brain region. [An example neuron where there is more than 3 brain regions on the NeuroMorpho website](https://neuromorpho.org/neuron_info.jsp?neuron_name=TA-40x)**

In [9]:
# start by making 3 columns and putting an None list for each 
none_col = [None] * len(neuron_df)
# pbr = primary brain region
# sbr = seconary ...
# tbr = tertiary ...
pbr = pd.Series(none_col)
sbr = pd.Series(none_col)
tbr = pd.Series(none_col)

In [10]:
test = pd.Series(none_col)
test[0] = neuron_df['brain_region'][0][0]

In [11]:
for i in range(len(neuron_df)):
    region_list = neuron_df['brain_region'][i]
    if region_list != None:
        if len(region_list) == 1:
            pbr[i] = region_list[0]
        elif len(region_list) == 2:
            pbr[i] = region_list[0]
            sbr[i] = region_list[1]
        else:
            pbr[i] = region_list[0]
            sbr[i] = region_list[1]
            tbr[i] = ' '.join(region_list[2:])

In [12]:
neuron_df['primary_brain_region'] = pbr
neuron_df['secondary_brain_region'] = sbr
neuron_df['tertiary_brain_region'] = tbr

In [13]:
neuron_df.head(3).T

Unnamed: 0,0,1,2
neuron_id,1,10,100
neuron_name,cnic_001,cnic_041,n419
archive,Wearne_Hof,Wearne_Hof,Turner
note,"When originally released, this reconstruction ...","When originally released, this reconstruction ...","When originally released, this reconstruction ..."
age_scale,Year,Year,Month
gender,Male/Female,Male/Female,Male/Female
age_classification,old,old,young
brain_region,"[neocortex, prefrontal, layer 3]","[neocortex, prefrontal, layer 3]","[hippocampus, CA1]"
cell_type,"[Local projecting, pyramidal, principal cell]","[Local projecting, pyramidal, principal cell]","[pyramidal, principal cell]"
species,monkey,monkey,rat


#### Cell Type
Cell type table for non-atomic
* opting to just make a seperate table

In [14]:
neuron_df['cell_type'].value_counts()

cell_type
[Glia, microglia, Iba1-positive]                     59673
[principal cell, pyramidal]                          34029
[principal cell]                                     19146
[pyramidal, principal cell]                          10978
[Glia, astrocyte, GFAP-positive]                      5551
                                                     ...  
[principal cell, LPT23a]                                 1
[principal cell, LPT3b]                                  1
[sensory, horizontal-motion-sensitive neuron, H2]        1
[principal cell, LPT34b]                                 1
[principal cell, output neuron, AV4f1, AV4f]             1
Name: count, Length: 2786, dtype: int64

In [15]:
# a large number of elements in the cell type value, so make it a seperate table
set([len(x) if x != None else None for x in neuron_df['cell_type']])

{1, 10, 11, 15, 16, 2, 3, 4, 5, 6, 7, 8, 9, None}

In [16]:
# going to make these vlaues just their own rows, it will be a neuron_cell_type table that has id and cell type
neuron_cell_type = neuron_df[['neuron_id', 'cell_type']].explode('cell_type')
neuron_cell_type.head()

Unnamed: 0,neuron_id,cell_type
0,1,Local projecting
0,1,pyramidal
0,1,principal cell
1,10,Local projecting
1,10,pyramidal


#### Experiment Condition

In [17]:
# experiment condition can have 1, 2, or 3 values in the list of conditions
set([len(x) for x in neuron_df['experiment_condition']])

{1, 2, 3}

In [18]:
# 909 have greater than 1 experiment condition
len([x for x in neuron_df['experiment_condition'] if len(x) > 1])

909

In [19]:
[x for x in neuron_df['experiment_condition'] if len(x) > 1][0]

['Human APP with Indiana(V717F)',
 'Swedish(K670N+M671L)',
 'Arctic(E22G) mutations + C5a receptor 1 knockout']

In [20]:
# opting to do the same thing for experiment condition as I did for cell type
neuron_exp_condition = neuron_df[['neuron_id', 'experiment_condition']].explode('experiment_condition')

#### Domain

In [21]:
# could make a boolean value for dendrite/soma/axon/processes/neurites/
# do 0 for false and 1 for true
neuron_df['domain'].value_counts()

domain
Dendrites, Soma, No Axon       77532
Processes, Soma                60482
No Dendrites, No Soma, Axon    29936
Neurites, Soma                 24368
Dendrites, Soma, Axon          20901
Dendrites, No Soma, No Axon    19220
Neurites, No Soma              15108
Processes, No Soma             10576
No Dendrites, Soma, Axon        1313
Dendrites, No Soma, Axon         642
Name: count, dtype: int64

In [22]:
# dendrite, soma, axon, processes, neurites
dendrite = pd.Series(none_col)
soma = pd.Series(none_col)
axon = pd.Series(none_col)
processes = pd.Series(none_col)
neurites = pd.Series(none_col)

In [23]:
for i in range(len(neuron_df['domain'])):
    domains = neuron_df['domain'][i].split(', ')
    if 'Dendrites' in domains:
        dendrite[i] = 1
    else:
        dendrite[i] = 0
    if 'Soma' in domains:
        soma[i] = 1
    else:
        soma[i] = 0
    if 'Axon' in domains:
        axon[i] = 1
    else:
        axon[i] = 0
    if 'Processes' in domains:
        processes[i] = 1
    else:
        processes[i] = 0
    if 'Neurites' in domains:
        neurites[i] = 1
    else:
        neurites[i] = 0

In [24]:
neuron_df['dendrites'] = dendrite
neuron_df['soma'] = soma
neuron_df['axon'] = axon
neuron_df['processes'] = processes
neuron_df['neurites'] = neurites

#### Attributes

In [25]:
# could also make boolean values for diameter/3d/2d/angles/
neuron_df['attributes'].value_counts()

attributes
No Diameter, 3D, Angles    144045
Diameter, 3D, Angles        65550
No Diameter, 2D, Angles     42635
Diameter, 2D, Angles         7550
Diameter, 3D, No Angles       297
Diameter, 2D, No Angles         1
Name: count, dtype: int64

In [26]:
# same thing for attributes as domain
# Diameter, 3D, Angles
att_diameter = pd.Series(none_col)
att_angles = pd.Series(none_col)
att_3D = pd.Series(none_col)

In [27]:
for i in range(len(neuron_df['attributes'])):
    domains = neuron_df['attributes'][i].split(', ')
    if 'Diameter' in domains:
        att_diameter[i] = 1
    else:
        att_diameter[i] = 0
    if '3D' in domains:
        att_3D[i] = 1
    else:
        att_3D[i] = 0
    if 'Angles' in domains:
        att_angles[i] = 1
    else:
        att_angles[i] = 0

In [28]:
neuron_df['att_diameter'] = att_diameter
neuron_df['att_angles'] = att_angles
neuron_df['att_3D'] = att_3D

#### Reference pmid
* How can I express doi/pmid as non-atomic:
    * Publications table with the pmids and dois

In [29]:
neuron_df['reference_pmid']

0         [12204204, 12902394]
1         [12902394, 12204204]
2                    [9492204]
3                   [15054049]
4                   [19910365]
                  ...         
260073              [30013046]
260074              [30013046]
260075              [30013046]
260076              [30013046]
260077              [30013046]
Name: reference_pmid, Length: 260078, dtype: object

In [30]:
# most only have 1 pmid
pd.Series([len(x) for x in neuron_df['reference_pmid']]).value_counts()

1    256530
2      2178
4      1073
3       297
Name: count, dtype: int64

In [31]:
neuron_df['reference_doi']

0         [10.1016/S0306-4522(02)00305-6, 10.1093/cercor...
1         [10.1093/cercor/13.9.950, 10.1016/S0306-4522(0...
2         [10.1002/(SICI)1096-9861(19980216)391:3<335::A...
3                                   [10.1093/cercor/bhh029]
4                                      [10.1093/nar/gkp952]
                                ...                        
260073                          [10.1038/s41592-018-0049-4]
260074                          [10.1038/s41592-018-0049-4]
260075                          [10.1038/s41592-018-0049-4]
260076                          [10.1038/s41592-018-0049-4]
260077                          [10.1038/s41592-018-0049-4]
Name: reference_doi, Length: 260078, dtype: object

In [32]:
# some have 0 doi
pd.Series([len(x) if x != None else 0 for x in neuron_df['reference_doi']]).value_counts()

1    256515
2      2178
4      1073
3       297
0        15
Name: count, dtype: int64

In [33]:
# some don't have a doi but have a pmid
# not sure I can confirm that a gien doi matches to the pmid
# might be able to use an api to pull pmid using the doi
neuron_pmid = neuron_df[['neuron_id', 'reference_pmid']].explode('reference_pmid')
neuron_pmid

Unnamed: 0,neuron_id,reference_pmid
0,1,12204204
0,1,12902394
1,10,12902394
1,10,12204204
2,100,9492204
...,...,...
260073,99995,30013046
260074,99996,30013046
260075,99997,30013046
260076,99998,30013046


#### Reference doi

In [34]:
neuron_doi = neuron_df[['neuron_id', 'reference_doi']].explode('reference_doi')
neuron_doi

Unnamed: 0,neuron_id,reference_doi
0,1,10.1016/S0306-4522(02)00305-6
0,1,10.1093/cercor/13.9.950
1,10,10.1093/cercor/13.9.950
1,10,10.1016/S0306-4522(02)00305-6
2,100,10.1002/(SICI)1096-9861(19980216)391:3<335::AI...
...,...,...
260073,99995,10.1038/s41592-018-0049-4
260074,99996,10.1038/s41592-018-0049-4
260075,99997,10.1038/s41592-018-0049-4
260076,99998,10.1038/s41592-018-0049-4


In [35]:
# now remove all of the data that is now located on other tables
neuron_df = neuron_df.drop(['brain_region', 'cell_type', 'experiment_condition', 
                 'domain', 'attributes', 'reference_pmid', 'reference_doi'], axis = 1)
neuron_df.columns

Index(['neuron_id', 'neuron_name', 'archive', 'note', 'age_scale', 'gender',
       'age_classification', 'species', 'strain', 'scientific_name', 'stain',
       'protocol', 'slicing_direction', 'reconstruction_software',
       'objective_type', 'original_format', 'magnification', 'upload_date',
       'deposition_date', 'shrinkage_reported', 'shrinkage_corrected',
       'reported_value', 'reported_xy', 'reported_z', 'corrected_value',
       'corrected_xy', 'corrected_z', 'soma_surface', 'surface', 'volume',
       'slicing_thickness', 'min_age', 'max_age', 'min_weight', 'max_weight',
       'png_url', 'physical_Integrity', '_links.self.href',
       '_links.measurements.href', '_links.persistence_vector.href',
       'brain_region_len', 'primary_brain_region', 'secondary_brain_region',
       'tertiary_brain_region', 'dendrites', 'soma', 'axon', 'processes',
       'neurites', 'att_diameter', 'att_angles', 'att_3D'],
      dtype='object')

# morphology

In [36]:
morpho_df = pd.json_normalize(morpho)

In [37]:
morpho_df.head(3).T

Unnamed: 0,0,1,2
neuron_name,cnic_001,cnic_002,cnic_003
surface,8842.91,7257.42,10130.4
volume,4725.89,3468.16,4742.74
soma_Surface,834.0,669.819,843.96
n_stems,6.0,7.0,6.0
n_bifs,47.0,43.0,48.0
n_branch,100.0,93.0,102.0
width,230.779,201.5,330.96
height,330.4,298.69,343.73
depth,84.73,70.62,80.13


**This data follows the normal forms. It goes with the neuron table so we can merge the two**

In [38]:
len(morpho_df)

260078

In [39]:
len(neuron_df)

260078

In [40]:
test = pd.merge(neuron_df, morpho_df,
        on = ['neuron_id', 'neuron_name'],
        how = 'outer',
        validate = 'one_to_one',
        indicator = True)

In [41]:
test.query("_merge == 'right_only'")

Unnamed: 0,neuron_id,neuron_name,archive,note,age_scale,gender,age_classification,species,strain,scientific_name,...,branch_Order,contraction,fragmentation,partition_asymmetry,pk_classic,bif_ampl_local,fractal_Dim,bif_ampl_remote,length,_merge
260078,51958,605706,,,,,,,,,...,67.0,0.92313,2134.0,0.675423,1.41085,93.3333,1.09581,91.9344,547.766,right_only
260079,54499,P6-animal1-2,,,,,,,,,...,7.0,0.904933,352.0,0.398196,2.0,29.0535,1.03612,54.3384,446.434,right_only
260080,78668,cell-4ca3_2,,,,,,,,,...,6.0,0.831715,313.0,0.462963,1.58199,89.8503,1.07486,63.6774,524.177,right_only
260081,105715,20180701_Pair2_pre,,,,,,,,,...,11.0,0.901207,4916.0,0.429722,1.64435,85.5518,1.03231,65.8053,5696.87,right_only
260082,105716,20180711_Pair5_pre,,,,,,,,,...,14.0,0.92687,4248.0,0.480893,3.22361,78.5712,1.02378,66.935,5747.59,right_only
260083,105717,20180712_Pair7_post,,,,,,,,,...,12.0,0.900483,2878.0,0.593584,2.83422,90.5629,1.03773,79.2214,4530.21,right_only
260084,105718,20180702_Pair3_post,,,,,,,,,...,7.0,0.910768,9635.0,0.444841,1.24858,87.9668,1.02338,78.7374,4642.28,right_only
260085,105719,20180701_Pair2_post,,,,,,,,,...,13.0,0.906014,4265.0,0.582992,2.79937,87.5139,1.0301,72.9552,4994.12,right_only
260086,105720,20180711_Pair5_post,,,,,,,,,...,16.0,0.911843,2689.0,0.591481,3.10785,82.6465,1.03016,73.1698,3134.67,right_only
260087,105721,20180712_Pair7_pre,,,,,,,,,...,13.0,0.914613,3079.0,0.501014,1.63845,77.9696,1.02907,65.2235,4244.61,right_only


In [42]:
test.query("_merge == 'left_only'")

Unnamed: 0,neuron_id,neuron_name,archive,note,age_scale,gender,age_classification,species,strain,scientific_name,...,branch_Order,contraction,fragmentation,partition_asymmetry,pk_classic,bif_ampl_local,fractal_Dim,bif_ampl_remote,length,_merge
6230,105715,KW20180701_Pair2_pre,Yousheng,Synaptic contact from Pyramidal cell to Pyrami...,Day,Male/Female,young adult,mouse,C57B6,,...,,,,,,,,,,left_only
6231,105716,KW20180711_Pair5_pre,Yousheng,Synaptic contact from Pyramidal cell to Pyrami...,Day,Male/Female,young adult,mouse,C57B6,,...,,,,,,,,,,left_only
6232,105717,KW20180712_Pair7_post,Yousheng,Synaptic contact from Pyramidal cell to Pyrami...,Day,Male/Female,young adult,mouse,C57B6,,...,,,,,,,,,,left_only
6233,105718,KW20180702_Pair3_post,Yousheng,Synaptic contact from Pyramidal cell to Pyrami...,Day,Male/Female,young adult,mouse,C57B6,,...,,,,,,,,,,left_only
6234,105719,KW20180701_Pair2_post,Yousheng,Synaptic contact from Pyramidal cell to Pyrami...,Day,Male/Female,young adult,mouse,C57B6,,...,,,,,,,,,,left_only
6236,105720,KW20180711_Pair5_post,Yousheng,Synaptic contact from Pyramidal cell to Pyrami...,Day,Male/Female,young adult,mouse,C57B6,,...,,,,,,,,,,left_only
6237,105721,KW20180712_Pair7_pre,Yousheng,Synaptic contact from Pyramidal cell to Pyrami...,Day,Male/Female,young adult,mouse,C57B6,,...,,,,,,,,,,left_only
6238,105722,KW20180702_Pair3_pre,Yousheng,Synaptic contact from Pyramidal cell to Pyrami...,Day,Male/Female,young adult,mouse,C57B6,,...,,,,,,,,,,left_only
6239,105723,KW20180712_Pair8_post,Yousheng,Synaptic contact from Pyramidal cell to Pyrami...,Day,Male/Female,young adult,mouse,C57B6,,...,,,,,,,,,,left_only
6240,105724,KW20180712_Pair8_pre,Yousheng,Synaptic contact from Pyramidal cell to Pyrami...,Day,Male/Female,young adult,mouse,C57B6,,...,,,,,,,,,,left_only


In [43]:
# this is a case where the neuron_ids match but the names don't
morpho_df.query("neuron_id == 105716")
# KW20180701_Pair2_pre
# 20180701_Pair2_pre

Unnamed: 0,neuron_name,surface,volume,soma_Surface,n_stems,n_bifs,n_branch,width,height,depth,...,branch_Order,contraction,fragmentation,partition_asymmetry,pk_classic,bif_ampl_local,fractal_Dim,neuron_id,bif_ampl_remote,length
104354,20180711_Pair5_pre,19739.4,7323.27,507.638,9.0,60.0,129.0,342.16,573.02,33.3,...,14.0,0.92687,4248.0,0.480893,3.22361,78.5712,1.02378,105716,66.935,5747.59


In [44]:
# look to just merge on neuron_id
id_only = pd.merge(neuron_df, morpho_df,
        on = ['neuron_id'],
        how = 'outer',
        validate = 'one_to_one',
        indicator = True)

In [45]:
# this worked
id_only.query("_merge == 'right_only'")

Unnamed: 0,neuron_id,neuron_name_x,archive,note,age_scale,gender,age_classification,species,strain,scientific_name,...,branch_Order,contraction,fragmentation,partition_asymmetry,pk_classic,bif_ampl_local,fractal_Dim,bif_ampl_remote,length,_merge


In [46]:
# there's only 37 where there is not a correct name match, I am going to drop the neuron names for y and use just the x ones
len(neuron_df) - sum(id_only['neuron_name_x'] == id_only['neuron_name_y'])

37

In [47]:
# do the merge without validation now
neuron_df = pd.merge(neuron_df, morpho_df,
        on = ['neuron_id'],
        how = 'inner')

In [48]:
# drop the y neuron name and change the neuron_name_x to just neuron_name
neuron_df = neuron_df.drop(['neuron_name_y'], axis = 1)
neuron_df = neuron_df.rename(columns = {'neuron_name_x': 'neuron_name'})
neuron_df.head(3)

Unnamed: 0,neuron_id,neuron_name,archive,note,age_scale,gender,age_classification,species,strain,scientific_name,...,pathDistance,branch_Order,contraction,fragmentation,partition_asymmetry,pk_classic,bif_ampl_local,fractal_Dim,bif_ampl_remote,length
0,1,cnic_001,Wearne_Hof,"When originally released, this reconstruction ...",Year,Male/Female,old,monkey,Rhesus,Macaca mulatta,...,253.921,8.0,0.934755,1274.0,0.413619,1.52521,33.3799,1.01989,50.461,4911.5
1,10,cnic_041,Wearne_Hof,"When originally released, this reconstruction ...",Year,Male/Female,old,monkey,Rhesus,Macaca mulatta,...,264.03,7.0,0.909303,1038.0,0.410981,1.99859,66.5523,1.02661,52.7609,3450.3
2,100,n419,Turner,"When originally released, this reconstruction ...",Month,Male/Female,young,rat,Fischer 344,rattus norvegicus,...,1078.42,19.0,0.772968,4223.0,0.447733,1.68693,79.1183,1.04104,57.4892,13143.0


In [49]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(neuron_df.head(3).T)

Unnamed: 0,0,1,2
neuron_id,1,10,100
neuron_name,cnic_001,cnic_041,n419
archive,Wearne_Hof,Wearne_Hof,Turner
note,"When originally released, this reconstruction ...","When originally released, this reconstruction ...","When originally released, this reconstruction ..."
age_scale,Year,Year,Month
gender,Male/Female,Male/Female,Male/Female
age_classification,old,old,young
species,monkey,monkey,rat
strain,Rhesus,Rhesus,Fischer 344
scientific_name,Macaca mulatta,Macaca mulatta,rattus norvegicus


**neuron_df is now set to be put in the database**

# electro

In [50]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(electro.head().T)

Unnamed: 0,0,1,2,3,4
Index,150,149,151,152,88
Title,Cell type-specific effects of adenosine on cor...,Cell type-specific effects of adenosine on cor...,Cell type-specific effects of adenosine on cor...,Cell type-specific effects of adenosine on cor...,Age-related changes to layer 3 pyramidal cells...
Pmid,24108800,24108800,24108800,24108800,24323499
PubYear,2015,2015,2015,2015,2015
LastAuthor,Feldmeyer D,Feldmeyer D,Feldmeyer D,Feldmeyer D,Peters A
ArticleID,88449,88449,88449,88449,88469
TableID,35984.0,35984.0,35984.0,35984.0,36033.0
NeuronName,Neocortex basket cell,Neocortex Martinotti cell,Neocortex basket cell,Neocortex interneuron deep,Neocortex pyramidal cell layer 2-3
NeuronLongName,medial prefrontal cortex fast spiking interne...,medial prefrontal cortex low-threshold spiking...,somatosensory cortex fast-firing interneuron,somatosensory cortex non-fast firing interneuron,Primary visual cortex layer 3 pyramidal aged n...
NeuronPrefName,medial prefrontal cortex fast spiking interne...,medial prefrontal cortex low-threshold spiking...,somatosensory cortex fast-firing interneuron,somatosensory cortex non-fast firing interneuron,Primary visual cortex layer 3 pyramidal aged n...


## Notes on Normal Forms
* Atomic form violated
    * NeuroNERAnnots
* Can remove the values for the paper because they do not depend on the index

#### NeuroNERAnnots

In [51]:
# the lists are stored as string, I am not sure what these values mean and there is not documentation
electro['NeuroNERAnnots'][0]

'[ABA_REGION:714, HBP_EPHYS:0000080, HBP_EPHYS_TRIGGER:0000003, NeuronTrigger:interneuron]'

In [52]:
# find columns that are entirely nan values
empty = []
for i in electro.columns:
    # checking if fully null
    if sum(electro[i].isnull()) == len(electro):
        empty += [i]

In [53]:
empty

['FlagSoln',
 'sagamp_note',
 'surfarea_note',
 'mahpdur_note',
 'adpctinv_note',
 'adpdur_note',
 'mahpamprest_note',
 'sahpamprest_note',
 'fahpamprest_note',
 'sahpvolt',
 'sahpvolt_raw',
 'sahpvolt_err',
 'sahpvolt_n',
 'sahpvolt_sd',
 'sahpvolt_note',
 'mahpvolt',
 'mahpvolt_raw',
 'mahpvolt_err',
 'mahpvolt_n',
 'mahpvolt_sd',
 'mahpvolt_note']

In [54]:
drop_cols = ['NeuroNERAnnots'] + empty

In [55]:
# drop the NeuroNERAnnots and mahpvolt_note
electro = electro.drop(drop_cols, axis = 1)

In [56]:
electro[['Pmid', 'ArticleID']].drop_duplicates(['Pmid', 'ArticleID'])

Unnamed: 0,Pmid,ArticleID
0,24108800,88449
4,24323499,88469
6,24554724,88596
8,24904071,88491
10,25450961,92581
...,...,...
1510,7807199,32143
1511,8229799,39223
1512,2558172,85547
1513,3309264,44985


In [57]:
# table is not unique to the Pmid, keep on the electro table

In [58]:
electro_pub = electro[['Pmid', 'ArticleID','Title', 'PubYear', 'LastAuthor']].drop_duplicates(['Pmid', 'ArticleID'])
electro_pub

Unnamed: 0,Pmid,ArticleID,Title,PubYear,LastAuthor
0,24108800,88449,Cell type-specific effects of adenosine on cor...,2015,Feldmeyer D
4,24323499,88469,Age-related changes to layer 3 pyramidal cells...,2015,Peters A
6,24554724,88596,Transient Hearing Loss Within a Critical Perio...,2015,Sanes DH
8,24904071,88491,Functional Maturation of GABA Synapses During ...,2015,Lewis DA
10,25450961,92581,Developmental increase in hyperpolarization-ac...,2015,Kitamura K
...,...,...,...,...,...
1510,7807199,32143,Electrophysiology of globus pallidus neurons i...,1994,LlinÃ¡s R
1511,8229799,39223,Membrane properties of rat suprachiasmatic nuc...,1993,Dudek FE
1512,2558172,85547,Intrinsic properties of nucleus reticularis th...,1989,Spreafico R
1513,3309264,44985,The ventral and dorsal lateral geniculate nucl...,1987,Pirchio M


In [59]:
electro_pub['Pmid'] = electro_pub['Pmid'].astype('object')

In [60]:
electro = electro.drop(['Title', 'PubYear', 'LastAuthor', 'ArticleID'], axis = 1)
electro.head(3).T

Unnamed: 0,0,1,2
Index,150,149,151
Pmid,24108800,24108800,24108800
TableID,35984.0,35984.0,35984.0
NeuronName,Neocortex basket cell,Neocortex Martinotti cell,Neocortex basket cell
NeuronLongName,medial prefrontal cortex fast spiking interne...,medial prefrontal cortex low-threshold spiking...,somatosensory cortex fast-firing interneuron
...,...,...,...
fahpamprest,,,
fahpamprest_raw,,,
fahpamprest_err,,,
fahpamprest_n,,,


In [61]:
electro['BrainRegion'].value_counts()

BrainRegion
Isocortex                380
Hippocampal formation    279
Striatum                 110
Medulla                   73
Olfactory areas           61
Midbrain                  53
Thalamus                  36
Cerebellum                35
Pallidum                  34
Cortical subplate         22
Hypothalamus              17
Name: count, dtype: int64

In [62]:
# make BrainRegion lowercase
def lowercase(x):
    try:
        return x.lower()
    except:
        return None
electro['BrainRegion'] = electro['BrainRegion'].apply(lambda x: lowercase(x))
electro['Species'] = electro['Species'].apply(lambda x: lowercase(x))

In [63]:
unique_electro_regions = [x for x in pd.unique(electro['BrainRegion'])]

In [64]:
unique_neuron_regions = [x for x in pd.unique(neuron_df['primary_brain_region'])]

In [65]:
unique_neuron_regions

['neocortex',
 'hippocampus',
 'somatic nervous system',
 'basal ganglia',
 'ventral striatum',
 'cerebellum',
 'peripheral nervous system',
 'lateral line organ',
 'ventral nerve cord',
 'main olfactory bulb',
 'spinal cord',
 'mesencephalon',
 'retina',
 'forebrain',
 'optic lobe',
 'posterior',
 'antennal lobe',
 'lateral',
 'accessory lobe',
 'subesophageal ganglion',
 'protocerebrum',
 'subesophageal zone-(SEZ)',
 'basal forebrain',
 'brainstem',
 'amygdala',
 'subiculum',
 'pallium',
 'myelencephalon',
 'anterior olfactory nucleus',
 'pons',
 'Central nervous system',
 'adult subesophageal zone',
 'hypothalamus',
 'thalamus',
 'Central complex',
 'Right Adult Central Complex',
 'Left Adult Central Complex',
 'Pro-subiculum',
 'stomatogastric ganglion',
 'Right Mushroom Body',
 'ventrolateral neuropils',
 'Left Mushroom Body',
 'lateral horn',
 'olfactory cortex',
 'ventral thalamus',
 'parasubiculum',
 'entorhinal cortex',
 'columns of the fornix',
 'fornix',
 'corpus callosum',


In [66]:
results = []
for i in unique_electro_regions:
    data = {'region': i, 'in_neuron_df': (i in neuron_df)}
    results += [data]

In [67]:
results

[{'region': 'isocortex', 'in_neuron_df': False},
 {'region': None, 'in_neuron_df': False},
 {'region': 'midbrain', 'in_neuron_df': False},
 {'region': 'hippocampal formation', 'in_neuron_df': False},
 {'region': 'thalamus', 'in_neuron_df': False},
 {'region': 'striatum', 'in_neuron_df': False},
 {'region': 'pallidum', 'in_neuron_df': False},
 {'region': 'cortical subplate', 'in_neuron_df': False},
 {'region': 'olfactory areas', 'in_neuron_df': False},
 {'region': 'medulla', 'in_neuron_df': False},
 {'region': 'cerebellum', 'in_neuron_df': False},
 {'region': 'hypothalamus', 'in_neuron_df': False}]

In [68]:
# check if the neuron_df merges on the electro dataset with species
# can match on zebrafish and goldfish, not very helpful
test_merge = pd.merge(neuron_df, electro,
        left_on = 'species',
        right_on = 'Species',
        how = 'left',
        indicator = True)
test_merge.query("_merge == 'both'")['species'].value_counts()

species
zebrafish    6021
goldfish      117
Name: count, dtype: int64

In [69]:
# check if the neuron_df merges on the electro dataset with brain region
# keeps killing my kernel
#test_merge = pd.merge(neuron_df, electro,
        #left_on = 'primary_brain_region',
        #right_on = 'BrainRegion',
        #how = 'inner')

In [70]:
electro.rename({'Index': 'electro_id'}, axis = 1, inplace = True)

In [71]:
electro_pub.rename({'Index': 'electro_id'}, axis = 1, inplace = True)

# models

In [72]:
models_df = pd.json_normalize(models)

In [73]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(models_df.head(3).T)

Unnamed: 0,0,1,2
id,279,2487,2488
name,Low Threshold Calcium Currents in TC cells (De...,Olfactory Mitral Cell (Davison et al 2000),Influence of dendritic structure on neocortica...
created,2001-01-01T00:00:00,2001-04-05T22:35:35,2001-04-05T22:36:18
ver_number,24,14,24
ver_date,2015-01-02T22:01:45,2022-05-27T18:13:12.8,2018-11-16T15:29:16.747
class_id,19,19,19
gitrepo,True,False,True
notes.value,"In Destexhe, Neubig, Ulrich, and Huguenard (19...",A four-compartment model of a mammalian olfact...,This package contains compartmental models of ...
notes.attr_id,24.0,24.0,24.0
neurons.value,"[{'object_id': 262, 'object_name': 'Thalamus g...","[{'object_id': 267, 'object_name': 'Olfactory ...","[{'object_id': 265, 'object_name': 'Neocortex ..."


Looking at this data frame, it is just a list of json objects that allows it to combine to other values from different API calls. I should only keep the values for the data I have or am interested in. The table doesn't make a lot of sense without joining onto the other tables from other endpoints. I want to keep the models table as just the ID and unique information though.

Right now I have data for the cell types, regions, and papers. I could get more data for the other tables I find interesting though.

#### List of additional data to get:
*  neurons
*  concetps
*  implemented
*  views
*  currents

In [74]:
def normalize_json_objects(df, column, column_names):
    '''
    df = data frame to expand
    column = column to expand
    column_names = names for new data frame to be returned
    '''
    values = []
    for index, row in df.iterrows():
        try:
            id = row['id']
            data = pd.json_normalize(row[column])
            data['id'] = pd.Series([id] * len(data))
            values += data.values.tolist()
        except:
            pass
    return pd.DataFrame(values, columns = column_names)

In [75]:
model_currents_df = normalize_json_objects(models_df, 'currents.value', ['model_current_id', 'model_current_name', 'model_id'])

model_neurons_df = normalize_json_objects(models_df, 'neurons.value', ['model_neuron_ids', 'neuron_name', 'model_id'])

model_types_df = normalize_json_objects(models_df, 'model_type.value', ['model_type_id', 'model_type_name', 'model_id'])

model_concepts_df = normalize_json_objects(models_df, 'model_concept.value', ['model_concept_ids', 'model_concept_name', 'model_id'])

model_applications_df = normalize_json_objects(models_df, 'modeling_application.value', ['model_application_ids', 'model_application_name', 'model_id'])

model_paper_df = normalize_json_objects(models_df, 'model_paper.value', ['model_paper_id', 'model_paper_name', 'model_id'])

model_runprotocols_df = normalize_json_objects(models_df, 'runprotocols.value', ['model_runprotocols_id', 'model_runprotocols_name', 'model_id'])

model_implemented_df = normalize_json_objects(models_df, 'implemented_by.value', ['model_implemented_id', 'model_implemented_name', 'model_id'])

model_views_df = normalize_json_objects(models_df, 'modelviews.all', ['model_views_name', 'model_id'])

model_region_df = normalize_json_objects(models_df, 'region.value', ['model_region_id', 'model_region_name', 'model_id'])

model_transmitter_df = normalize_json_objects(models_df, 'neurotransmitters.value', ['model_transmitter_id', 'model_transmitter_name', 'model_id'])

model_species_df = normalize_json_objects(models_df, 'species.value', ['model_species_id', 'model_species_name', 'model_id'])

In [76]:
# model_runprotocols_name was just the model_id with subscripts
model_runprotocols_df = model_runprotocols_df[['model_runprotocols_id', 'model_id']]

In [77]:
# remove the columns from the models_df, most are just reminants of the json

models_df = models_df[['id', 'name', 'created', 'ver_number', 'ver_date', 'class_id', 'gitrepo']]

In [78]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(models_df.head(3).T)

Unnamed: 0,0,1,2
id,279,2487,2488
name,Low Threshold Calcium Currents in TC cells (De...,Olfactory Mitral Cell (Davison et al 2000),Influence of dendritic structure on neocortica...
created,2001-01-01T00:00:00,2001-04-05T22:35:35,2001-04-05T22:36:18
ver_number,24,14,24
ver_date,2015-01-02T22:01:45,2022-05-27T18:13:12.8,2018-11-16T15:29:16.747
class_id,19,19,19
gitrepo,True,False,True


In [79]:
model_currents_df

Unnamed: 0,model_current_id,model_current_name,model_id
0,242,"I Na,t",279
1,245,I T low threshold,279
2,248,I K,279
3,242,"I Na,t",2487
4,243,I L high threshold,2487
...,...,...,...
4729,253,"I K,Ca",2015414
4730,248,I K,2014996
4731,241,"I Na,p",2014996
4732,137264,I trp,2015413


# celltypes

**has neuro_electro data and url to search for neuro_morpho**

In [80]:
celltypes_df = pd.json_normalize(celltypes)

In [81]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(celltypes_df.head(3).T)

Unnamed: 0,0,1,2
id,257,258,259
name,Dentate gyrus granule GLU cell,Hippocampus CA1 pyramidal GLU cell,Hippocampus CA3 pyramidal GLU cell
created,2000-06-23T00:00:00,2000-06-23T00:00:00,2000-06-23T00:00:00
ver_number,13.0,17.0,13.0
ver_date,2015-12-18T11:13:46.103,2015-12-18T11:11:51.99,2015-12-18T11:12:17.647
class_id,18,18,18
Picture.value,"[{'file_name': '', 'file_ext': 'gif', 'file_mi...","[{'file_name': 'newCA1.gif', 'file_ext': 'gif'...","[{'file_name': '', 'file_ext': 'gif', 'file_mi..."
Picture.attr_id,22.0,22.0,22.0
Can_Form_type.value,"[{'object_id': 285, 'object_name': 'Canonical ...","[{'object_id': 86121, 'object_name': 'Canonica...","[{'object_id': 291, 'object_name': 'Canonical ..."
Can_Form_type.attr_id,48.0,48.0,48.0


In [82]:
len(celltypes_df)

278

In [83]:
# neuroelectro.value may be the link
celltypes_df['neuroelectro.value'].value_counts()

neuroelectro.value
99         3
209        2
111        2
110        2
66         1
177        1
233        1
78         1
184        1
21         1
115        1
148        1
128        1
130        1
40         1
175        1
85         1
37         1
18         1
150        1
135        1
129        1
190        1
194        1
183        1
117        1
89         1
passive    1
Name: count, dtype: int64

In [84]:
electro.query("electro_id == 507")

Unnamed: 0,electro_id,Pmid,TableID,NeuronName,NeuronLongName,NeuronPrefName,BrainRegion,MetadataCurated,Species,Strain,...,sahpamprest,sahpamprest_raw,sahpamprest_err,sahpamprest_n,sahpamprest_sd,fahpamprest,fahpamprest_raw,fahpamprest_err,fahpamprest_n,fahpamprest_sd
115,507,24592213,27442.0,Dentate gyrus granule cell,Dentate gyrus more mature granule cell with ov...,Dentate gyrus more mature granule cell with ov...,hippocampal formation,True,rats,Wistar,...,,,,,,,,,,


In [85]:
celltypes_df['neuromorpho_url.value']

0                                                    NaN
1      https://neuromorpho.org/MetaDataResult.jsp?cou...
2                                                    NaN
3                                                    NaN
4                                                    NaN
                             ...                        
273                                                  NaN
274                                                  NaN
275                                                  NaN
276                                                  NaN
277                                                  NaN
Name: neuromorpho_url.value, Length: 278, dtype: object

In [86]:
# these names are too specific to match on with the neurons
celltypes_df['name']

0                         Dentate gyrus granule GLU cell
1                     Hippocampus CA1 pyramidal GLU cell
2                     Hippocampus CA3 pyramidal GLU cell
3      Neostriatum medium spiny direct pathway GABA cell
4                 Substantia nigra pars compacta DA cell
                             ...                        
273                           Spinal cord Ia interneuron
274                 Dopaminergic substantia nigra neuron
275                            Vestibular nucleus neuron
276                    Barrel cortex L2/3 pyramidal cell
277                                 Pancreatic Beta Cell
Name: name, Length: 278, dtype: object

In [87]:
# columns to keep
# id, name, neuroelectro.attr_id, description.value, neurolex.value
celltypes_df = celltypes_df[['id', 'name', 'neuroelectro.attr_id', 'description.value', 'neurolex.value']]

In [88]:
celltypes_df = celltypes_df.rename({'id': 'model_celltypes_id',
                    'neuroelectro.attr_id': 'electro_id',
                    'description.value': 'description',
                    'neurolex.value': 'neurolex_name'}, axis = 1)

# regions

In [89]:
regions_df = pd.json_normalize(regions)

In [90]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(regions_df.head(3).T)

Unnamed: 0,0,1,2
id,115945,115946,115947
name,Neocortex,Hippocampus,Dentate gyrus
created,2008-10-06T17:06:41,2008-10-06T17:07:19,2008-10-06T17:07:49
ver_number,1,1,1
ver_date,2008-10-06T17:06:41,2008-10-06T17:07:19,2008-10-06T17:07:49
class_id,144,144,144
parent.object_name,Vertebrate regions,Vertebrate regions,Vertebrate regions


In [91]:
regions_df

Unnamed: 0,id,name,created,ver_number,ver_date,class_id,parent.object_name
0,115945,Neocortex,2008-10-06T17:06:41,1,2008-10-06T17:06:41,144,Vertebrate regions
1,115946,Hippocampus,2008-10-06T17:07:19,1,2008-10-06T17:07:19,144,Vertebrate regions
2,115947,Dentate gyrus,2008-10-06T17:07:49,1,2008-10-06T17:07:49,144,Vertebrate regions
3,115948,Turtle cortex,2008-10-06T17:08:14,1,2008-10-06T17:08:14,144,Vertebrate regions
4,115949,Olfactory cortex,2008-10-06T17:09:18,1,2008-10-06T17:09:18,144,Vertebrate regions
5,115950,Olfactory bulb,2008-10-06T17:09:51,1,2008-10-06T17:09:51,144,Vertebrate regions
6,115951,Thalamus,2008-10-06T17:10:15,1,2008-10-06T17:10:15,144,Vertebrate regions
7,115952,Basal ganglia,2008-10-06T17:10:36,1,2008-10-06T17:10:36,144,Vertebrate regions
8,115953,Cerebellum,2008-10-06T17:11:05,1,2008-10-06T17:11:05,144,Vertebrate regions
9,115954,Spinal motoneuron,2008-10-06T17:11:40,2,2008-10-06T17:41:01,144,Vertebrate regions


In [92]:
regions_df = regions_df.rename(columns = {'name': 'primary_brain_region'})
regions_df['primary_brain_region'] = regions_df['primary_brain_region'].apply(lambda x: x.lower())

In [93]:
# merge the regions on the neuron_df data set to see if there is overlap
test_merge = pd.merge(neuron_df, regions_df,
        on = 'primary_brain_region',
        validate = 'many_to_one',
        how = 'left',
        indicator = True)

In [94]:
# they can be merged on the brain region which is helpful
test_merge.query("_merge == 'both'")

Unnamed: 0,neuron_id,neuron_name,archive,note,age_scale,gender,age_classification,species,strain,scientific_name,...,fractal_Dim,bif_ampl_remote,length,id,created,ver_number,ver_date,class_id,parent.object_name,_merge
0,1,cnic_001,Wearne_Hof,"When originally released, this reconstruction ...",Year,Male/Female,old,monkey,Rhesus,Macaca mulatta,...,1.01989,50.4610,4911.50,115945.0,2008-10-06T17:06:41,1.0,2008-10-06T17:06:41,144.0,Vertebrate regions,both
1,10,cnic_041,Wearne_Hof,"When originally released, this reconstruction ...",Year,Male/Female,old,monkey,Rhesus,Macaca mulatta,...,1.02661,52.7609,3450.30,115945.0,2008-10-06T17:06:41,1.0,2008-10-06T17:06:41,144.0,Vertebrate regions,both
2,100,n419,Turner,"When originally released, this reconstruction ...",Month,Male/Female,young,rat,Fischer 344,rattus norvegicus,...,1.04104,57.4892,13143.00,115946.0,2008-10-06T17:07:19,1.0,2008-10-06T17:07:19,144.0,Vertebrate regions,both
3,1000,DS3_030701,Staiger,"When originally released, this reconstruction ...",Day,Male,young,rat,Wistar,rattus norvegicus,...,1.04026,57.4939,16972.20,115945.0,2008-10-06T17:06:41,1.0,2008-10-06T17:06:41,144.0,Vertebrate regions,both
5,100000,test_set_skeleton_246,Kornfeld,,Day,Male,adult,zebra finch,Not reported,,...,1.10664,95.8647,1592.93,115952.0,2008-10-06T17:10:36,1.0,2008-10-06T17:10:36,144.0,Vertebrate regions,both
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
260073,99995,test_set_skeleton_2,Kornfeld,,Day,Male,adult,zebra finch,Not reported,,...,1.10539,80.5452,1948.35,115952.0,2008-10-06T17:10:36,1.0,2008-10-06T17:10:36,144.0,Vertebrate regions,both
260074,99996,test_set_skeleton_214,Kornfeld,,Day,Male,adult,zebra finch,Not reported,,...,1.06018,99.6512,2196.69,115952.0,2008-10-06T17:10:36,1.0,2008-10-06T17:10:36,144.0,Vertebrate regions,both
260075,99997,test_set_skeleton_227,Kornfeld,,Day,Male,adult,zebra finch,Not reported,,...,1.07523,96.1616,1813.50,115952.0,2008-10-06T17:10:36,1.0,2008-10-06T17:10:36,144.0,Vertebrate regions,both
260076,99998,test_set_skeleton_233,Kornfeld,,Day,Male,adult,zebra finch,Not reported,,...,1.07932,91.0878,2962.11,115952.0,2008-10-06T17:10:36,1.0,2008-10-06T17:10:36,144.0,Vertebrate regions,both


In [95]:
regions_df = regions_df.rename({'id': 'model_region_id',
                  'parent.object_name': 'parent'}, axis = 1)

# papers

In [96]:
papers_df = pd.json_normalize(papers)

In [97]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(papers_df.head(3).T)

Unnamed: 0,0,1,2
id,4161,4036,4252
name,"Destexhe A, Neubig M, Ulrich D, Huguenard J (1...","Davison AP, Feng J, Brown D (2000)","Mainen ZF, Sejnowski TJ (1996)"
created,2002-03-15T11:18:19,2002-03-15T11:06:36,2002-03-15T11:23:56
ver_number,5,5,5
ver_date,2023-05-08T16:49:08.358398,2023-05-08T16:49:08.328236,2023-05-08T16:49:10.952346
class_id,42.0,42.0,42.0
title.value,Dendritic low-threshold calcium currents in th...,A reduced compartmental model of the mitral ce...,Influence of dendritic structure on firing pat...
title.attr_id,139.0,139.0,139.0
references.value,"[{'object_id': 4170, 'object_name': 'Destexhe ...","[{'object_id': 4373, 'object_name': 'Bush PC, ...","[{'object_id': 4290, 'object_name': 'Mainen ZF..."
references.attr_id,140.0,140.0,140.0


In [98]:
papers_df = papers_df[['id', 'name', 'created', 'title.value', 'pubmed_id.value', 
                       'year.value', 'journal.value', 'doi.value', 'doi.value_lower']]

In [99]:
papers_df = papers_df.rename({'id': 'model_paper_id',
                 'title.value': 'title',
                 'pubmed_id.value': 'pubmed',
                 'year.value': 'year',
                 'journal.value': 'journal',
                 'doi.value': 'doi',
                 'doi.value_lower': 'doi_lower'}, axis = 1)

In [100]:
# check if the doi match 
test_merge = pd.merge(neuron_doi, papers_df,
        left_on = 'reference_doi',
        right_on = 'doi',
        how = 'outer',
        indicator = True)

In [101]:
# 2388 rows match on the doi
test_merge.query("_merge == 'both'")

Unnamed: 0,neuron_id,reference_doi,model_paper_id,name,created,title,pubmed,year,journal,doi,doi_lower,_merge
2102,10033.0,10.1152/jn.00891.2012,155603.0,"Yu J, Proddutur A, Elgammal FS, Ito T, Santhak...",2014-06-30T09:13:37,Status epilepticus enhances tonic GABA current...,23324316,2013,Journal of neurophysiology,10.1152/jn.00891.2012,10.1152/jn.00891.2012,both
2103,10034.0,10.1152/jn.00891.2012,155603.0,"Yu J, Proddutur A, Elgammal FS, Ito T, Santhak...",2014-06-30T09:13:37,Status epilepticus enhances tonic GABA current...,23324316,2013,Journal of neurophysiology,10.1152/jn.00891.2012,10.1152/jn.00891.2012,both
2104,10035.0,10.1152/jn.00891.2012,155603.0,"Yu J, Proddutur A, Elgammal FS, Ito T, Santhak...",2014-06-30T09:13:37,Status epilepticus enhances tonic GABA current...,23324316,2013,Journal of neurophysiology,10.1152/jn.00891.2012,10.1152/jn.00891.2012,both
2105,10036.0,10.1152/jn.00891.2012,155603.0,"Yu J, Proddutur A, Elgammal FS, Ito T, Santhak...",2014-06-30T09:13:37,Status epilepticus enhances tonic GABA current...,23324316,2013,Journal of neurophysiology,10.1152/jn.00891.2012,10.1152/jn.00891.2012,both
2347,101622.0,10.1371/journal.pone.0200626,248831.0,Goodliffe JW et al. (2018),2019-01-08T11:28:07.057,Differential changes to D1 and D2 medium spiny...,30118496,2018,PloS one,10.1371/journal.pone.0200626,10.1371/journal.pone.0200626,both
...,...,...,...,...,...,...,...,...,...,...,...,...
264702,9422.0,10.1523/JNEUROSCI.2581-12.2012,144558.0,Amatrudo JM et al. (2012),2012-07-06T11:26:38,Influence of highly distinctive structural pro...,23035077,2012,The Journal of neuroscience : the official jou...,10.1523/JNEUROSCI.2581-12.2012,10.1523/jneurosci.2581-12.2012,both
264703,9423.0,10.1523/JNEUROSCI.2581-12.2012,144558.0,Amatrudo JM et al. (2012),2012-07-06T11:26:38,Influence of highly distinctive structural pro...,23035077,2012,The Journal of neuroscience : the official jou...,10.1523/JNEUROSCI.2581-12.2012,10.1523/jneurosci.2581-12.2012,both
264704,9424.0,10.1523/JNEUROSCI.2581-12.2012,144558.0,Amatrudo JM et al. (2012),2012-07-06T11:26:38,Influence of highly distinctive structural pro...,23035077,2012,The Journal of neuroscience : the official jou...,10.1523/JNEUROSCI.2581-12.2012,10.1523/jneurosci.2581-12.2012,both
264705,9425.0,10.1523/JNEUROSCI.2581-12.2012,144558.0,Amatrudo JM et al. (2012),2012-07-06T11:26:38,Influence of highly distinctive structural pro...,23035077,2012,The Journal of neuroscience : the official jou...,10.1523/JNEUROSCI.2581-12.2012,10.1523/jneurosci.2581-12.2012,both


In [102]:
# checking on pubmed_id
test_merge = pd.merge(neuron_pmid, papers_df,
        left_on = 'reference_pmid',
        right_on = 'pubmed',
        how = 'outer',
        indicator = True)
test_merge.query("_merge == 'both'")
# 671 rows match

Unnamed: 0,neuron_id,reference_pmid,model_paper_id,name,created,title,pubmed,year,journal,doi,doi_lower,_merge
2102,10033.0,23324316,155603.0,"Yu J, Proddutur A, Elgammal FS, Ito T, Santhak...",2014-06-30T09:13:37,Status epilepticus enhances tonic GABA current...,23324316,2013,Journal of neurophysiology,10.1152/jn.00891.2012,10.1152/jn.00891.2012,both
2103,10034.0,23324316,155603.0,"Yu J, Proddutur A, Elgammal FS, Ito T, Santhak...",2014-06-30T09:13:37,Status epilepticus enhances tonic GABA current...,23324316,2013,Journal of neurophysiology,10.1152/jn.00891.2012,10.1152/jn.00891.2012,both
2104,10035.0,23324316,155603.0,"Yu J, Proddutur A, Elgammal FS, Ito T, Santhak...",2014-06-30T09:13:37,Status epilepticus enhances tonic GABA current...,23324316,2013,Journal of neurophysiology,10.1152/jn.00891.2012,10.1152/jn.00891.2012,both
2105,10036.0,23324316,155603.0,"Yu J, Proddutur A, Elgammal FS, Ito T, Santhak...",2014-06-30T09:13:37,Status epilepticus enhances tonic GABA current...,23324316,2013,Journal of neurophysiology,10.1152/jn.00891.2012,10.1152/jn.00891.2012,both
2347,101622.0,30118496,248831.0,Goodliffe JW et al. (2018),2019-01-08T11:28:07.057,Differential changes to D1 and D2 medium spiny...,30118496,2018,PloS one,10.1371/journal.pone.0200626,10.1371/journal.pone.0200626,both
...,...,...,...,...,...,...,...,...,...,...,...,...
262947,9422.0,23035077,144558.0,Amatrudo JM et al. (2012),2012-07-06T11:26:38,Influence of highly distinctive structural pro...,23035077,2012,The Journal of neuroscience : the official jou...,10.1523/JNEUROSCI.2581-12.2012,10.1523/jneurosci.2581-12.2012,both
262948,9423.0,23035077,144558.0,Amatrudo JM et al. (2012),2012-07-06T11:26:38,Influence of highly distinctive structural pro...,23035077,2012,The Journal of neuroscience : the official jou...,10.1523/JNEUROSCI.2581-12.2012,10.1523/jneurosci.2581-12.2012,both
262949,9424.0,23035077,144558.0,Amatrudo JM et al. (2012),2012-07-06T11:26:38,Influence of highly distinctive structural pro...,23035077,2012,The Journal of neuroscience : the official jou...,10.1523/JNEUROSCI.2581-12.2012,10.1523/jneurosci.2581-12.2012,both
262950,9425.0,23035077,144558.0,Amatrudo JM et al. (2012),2012-07-06T11:26:38,Influence of highly distinctive structural pro...,23035077,2012,The Journal of neuroscience : the official jou...,10.1523/JNEUROSCI.2581-12.2012,10.1523/jneurosci.2581-12.2012,both


### modelconcepts

In [103]:
f = open('Data/modeldb_modelconcepts.json', 'r')
modelconcepts = json.loads(f.read())
f.close()

In [104]:
modelconcepts_df = pd.json_normalize(modelconcepts)

In [105]:
modelconcepts_df = modelconcepts_df[['id', 'name', 'description.value']]
modelconcepts_df.rename({'id': 'model_concept_id',
                        'description.value': 'description'}, axis = 1, inplace = True)

In [106]:
modelconcepts_df

Unnamed: 0,model_concept_id,name,description
0,3541,Action Potential Initiation,The model is used to investigate factors affec...
1,3542,Pattern Recognition,The model is able to recognize spatial and/or ...
2,3543,Activity Patterns,Spatial and/or temporal patterns of spiking ac...
3,3629,Dendritic Action Potentials,The model is used to investigate action potent...
4,3630,Bursting,The model is used to investigate mechanisms of...
...,...,...,...
209,267152,Dynamic extracellular concentrations,
210,267153,Ramping,
211,267345,Effective Optokinetic Response (OKR),
212,267348,Eyeblink Conditioning (EBC),


### modeltypes

In [107]:
f = open('Data/modeldb_modeltypes.json', 'r')
modeltypes = json.loads(f.read())
f.close()

In [108]:
modeltypes_df = pd.json_normalize(modeltypes)

In [109]:
modeltypes_df = modeltypes_df[['id', 'name']]

In [110]:
modeltypes_df.rename({'id': 'model_type_id'}, axis = 1, inplace = True)

### receptors

In [111]:
f = open('Data/modeldb_receptors.json', 'r')
receptors = json.loads(f.read())
f.close()

In [112]:
receptors = pd.json_normalize(receptors)

In [113]:
receptors.head(3).T

Unnamed: 0,0,1,2
id,178,179,180
name,Nicotinic,M1,M3
created,2002-04-02T00:00:02,2002-04-02T00:00:02,2002-04-02T00:00:02
ver_number,1,1,1
ver_date,2002-04-02T00:00:02,2002-04-02T00:00:02,2002-04-02T00:00:02
class_id,9,9,9
parent_receptor.value,"[{'object_id': 218, 'object_name': 'Cholinergi...","[{'object_id': 204, 'object_name': 'Muscarinic'}]","[{'object_id': 204, 'object_name': 'Muscarinic'}]"
parent_receptor.attr_id,66.0,66.0,66.0


In [114]:
parent_receptor_df = normalize_json_objects(receptors, 'parent_receptor.value', ['parent_id', 'parent_receptor_name', 'model_receptor_id'])

In [115]:
receptors.rename({'id': 'model_receptor_id'}, axis = 1, inplace = True)

In [116]:
parent_receptor_types_df = parent_receptor_df[['parent_id', 'parent_receptor_name']].drop_duplicates(['parent_id', 'parent_receptor_name'])

In [117]:
parent_receptor_types_df

Unnamed: 0,parent_id,parent_receptor_name
0,218,Cholinergic Receptors
1,204,Muscarinic
6,207,mGluR
14,215,Adrenergic
15,192,Alpha
18,223,Dopaminergic Receptor
20,216,Serotonin
23,217,Histamine
24,213,Gaba
27,212,Glutamate


### Transmitters

In [118]:
f = open('Data/modeldb_transmitters.json', 'r')
transmitters = json.loads(f.read())
f.close()

In [119]:
transmitters_df = pd.json_normalize(transmitters)

In [120]:
transmitters_df.head(3).T

Unnamed: 0,0,1,2
id,209,214,224
name,Acetylcholine,Glycine,Dopamine
created,2004-01-08T13:19:48,2002-04-02T00:00:02,2001-02-15T11:38:07
ver_number,1,1,1
ver_date,2004-01-08T13:19:48,2002-04-02T00:00:02,2001-02-15T11:38:07
class_id,7,7,7
Parent_Neurotrans.value,,"[{'object_id': 231, 'object_name': 'Amino Acid...","[{'object_id': 234, 'object_name': 'Monoamines'}]"
Parent_Neurotrans.attr_id,,67.0,67.0
color.value,,,
color.attr_id,,,


In [121]:
model_neurotrans_classes = normalize_json_objects(transmitters_df, 'Parent_Neurotrans.value', ['neurotrans_class_id', 'neurotrans_class_name', 'model_id'])

In [122]:
neurotrans_classes = model_neurotrans_classes[['neurotrans_class_id', 'neurotrans_class_name']].drop_duplicates(['neurotrans_class_id', 'neurotrans_class_name'])

In [123]:
neurotrans_classes

Unnamed: 0,neurotrans_class_id,neurotrans_class_name
0,231,Amino Acids
1,234,Monoamines
2,236,Ions
3,237,Gases
5,235,Peptides


# Looking Through Tables and Inserting to Data Base

In [124]:
POSTGRES_PASSWORD = os.getenv('POSTGRES_PASSWORD')
import psycopg2
from sqlalchemy import create_engine

In [125]:
dbserver = psycopg2.connect(
    host = 'postgres',
    user = 'postgres',
    password = POSTGRES_PASSWORD,
    port = 5432
)
dbserver.autocommit = True

In [126]:
cursor = dbserver.cursor()

In [127]:
try:
    cursor.execute('CREATE DATABASE neurodash')
except:
    cursor.execute('DROP DATABASE neurodash')
    cursor.execute('CREATE DATABASE neurodash')

In [128]:
engine = create_engine('postgresql+psycopg2://{user}:{password}@{host}:{port}/{db}'.format(
    user = 'postgres',
    password = POSTGRES_PASSWORD,
    host = 'postgres',
    port = 5432,
    db = 'neurodash'
))

### Neuron

In [129]:
neuron_df.drop(['soma_surface'], axis = 1, inplace = True)

In [130]:
neuron_df.columns = neuron_df.columns.str.lower()

In [131]:
neuron_df.head(2)

Unnamed: 0,neuron_id,neuron_name,archive,note,age_scale,gender,age_classification,species,strain,scientific_name,...,pathdistance,branch_order,contraction,fragmentation,partition_asymmetry,pk_classic,bif_ampl_local,fractal_dim,bif_ampl_remote,length
0,1,cnic_001,Wearne_Hof,"When originally released, this reconstruction ...",Year,Male/Female,old,monkey,Rhesus,Macaca mulatta,...,253.921,8.0,0.934755,1274.0,0.413619,1.52521,33.3799,1.01989,50.461,4911.5
1,10,cnic_041,Wearne_Hof,"When originally released, this reconstruction ...",Year,Male/Female,old,monkey,Rhesus,Macaca mulatta,...,264.03,7.0,0.909303,1038.0,0.410981,1.99859,66.5523,1.02661,52.7609,3450.3


In [132]:
print(neuron_df.shape[0])
neuron_df.to_sql('neuron', con=engine, index=False, chunksize=100, if_exists = 'replace')

260078


260078

In [133]:
neuron_cell_type.head(2)

Unnamed: 0,neuron_id,cell_type
0,1,Local projecting
0,1,pyramidal


In [134]:
print(neuron_cell_type.shape[0])
neuron_cell_type.to_sql('neuron_cell_type', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

664383


664383

In [135]:
neuron_exp_condition.head(2)

Unnamed: 0,neuron_id,experiment_condition
0,1,Control
1,10,Control


In [136]:
print(neuron_exp_condition.shape[0])
neuron_exp_condition.to_sql('neuron_exp_condition', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

261100


261100

In [137]:
neuron_pmid.head(2)

Unnamed: 0,neuron_id,reference_pmid
0,1,12204204
0,1,12902394


In [138]:
print(neuron_pmid.shape[0])
neuron_pmid.to_sql('neuron_pmid', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

266069


266069

In [139]:
neuron_doi.head(2)

Unnamed: 0,neuron_id,reference_doi
0,1,10.1016/S0306-4522(02)00305-6
0,1,10.1093/cercor/13.9.950


In [140]:
print(neuron_doi.shape[0])
neuron_doi.to_sql('neuron_doi', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

266069


266069

### Electro

In [141]:
electro.columns = electro.columns.str.lower()
electro.head(2)

Unnamed: 0,electro_id,pmid,tableid,neuronname,neuronlongname,neuronprefname,brainregion,metadatacurated,species,strain,...,sahpamprest,sahpamprest_raw,sahpamprest_err,sahpamprest_n,sahpamprest_sd,fahpamprest,fahpamprest_raw,fahpamprest_err,fahpamprest_n,fahpamprest_sd
0,150,24108800,35984.0,Neocortex basket cell,medial prefrontal cortex fast spiking interne...,medial prefrontal cortex fast spiking interne...,isocortex,True,rats,"Rats, Wistar",...,,,,,,,,,,
1,149,24108800,35984.0,Neocortex Martinotti cell,medial prefrontal cortex low-threshold spiking...,medial prefrontal cortex low-threshold spiking...,isocortex,True,rats,"Rats, Wistar",...,,,,,,,,,,


In [142]:
print(electro.shape[0])
electro.to_sql('electro', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

1515


1515

In [143]:
electro_pub.columns = electro_pub.columns.str.lower()
electro_pub['pmid'] = electro_pub['pmid'].astype('object')
electro_pub.head(2)

Unnamed: 0,pmid,articleid,title,pubyear,lastauthor
0,24108800,88449,Cell type-specific effects of adenosine on cor...,2015,Feldmeyer D
4,24323499,88469,Age-related changes to layer 3 pyramidal cells...,2015,Peters A


In [144]:
print(electro_pub.shape[0])
electro_pub.to_sql('electro_pub', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

749


749

### Models

In [145]:
models_df.rename({'id': 'model_id'}, axis = 1, inplace = True)
models_df.head(2)

Unnamed: 0,model_id,name,created,ver_number,ver_date,class_id,gitrepo
0,279,Low Threshold Calcium Currents in TC cells (De...,2001-01-01T00:00:00,24,2015-01-02T22:01:45,19,True
1,2487,Olfactory Mitral Cell (Davison et al 2000),2001-04-05T22:35:35,14,2022-05-27T18:13:12.8,19,False


In [146]:
print(models_df.shape[0])
models_df.to_sql('models', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

1836


1836

In [147]:
currents = model_currents_df[['model_current_id', 'model_current_name']].drop_duplicates(['model_current_id', 'model_current_name'])

In [148]:
currents.head(2)

Unnamed: 0,model_current_id,model_current_name
0,242,"I Na,t"
1,245,I T low threshold


In [149]:
print(currents.shape[0])
currents.to_sql('model_currents', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

64


64

In [150]:
model_currents_df.drop(['model_current_name'], axis = 1, inplace = True)
model_currents_df.head(2)

Unnamed: 0,model_current_id,model_id
0,242,279
1,245,279


In [151]:
print(model_currents_df.shape[0])
model_currents_df.to_sql('model_to_current', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

4734


4734

In [152]:
model_to_celltypes = model_neurons_df[['model_id', 'model_neuron_ids']].rename({'model_neuron_ids': 'model_celltypes_id'}, axis = 1)
model_to_celltypes.head(2)

Unnamed: 0,model_id,model_celltypes_id
0,279,262
1,2487,267


In [153]:
print(model_to_celltypes.shape[0])
model_to_celltypes.to_sql('model_to_celltypes', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

2112


2112

In [154]:
celltypes_df.head(2)

Unnamed: 0,model_celltypes_id,name,electro_id,description,neurolex_name
0,257,Dentate gyrus granule GLU cell,507.0,The principal neuron of the fascia dentata of ...,
1,258,Hippocampus CA1 pyramidal GLU cell,507.0,The principal neuron of region CA1 of the hipp...,Hippocampus_CA1_pyramidal_cell


In [155]:
print(celltypes_df.shape[0])
celltypes_df.to_sql('model_celltypes', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

278


278

In [156]:
model_types_df.drop(['model_type_name'], axis = 1, inplace = True)
model_types_df.head(2)

Unnamed: 0,model_type_id,model_id
0,3537,279
1,3537,2487


In [157]:
print(model_types_df.shape[0])
model_types_df.to_sql('model_to_types', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

2157


2157

In [158]:
modeltypes_df.head(2)

Unnamed: 0,model_type_id,name
0,3536,Realistic Network
1,3537,Neuron or other electrically excitable cell


In [159]:
print(modeltypes_df.shape[0])
modeltypes_df.to_sql('model_types', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

21


21

In [160]:
#receptors.head(2)

In [161]:
#print(receptors.shape[0])
#receptors.to_sql('model_types', con=engine, index=False, 
                        #chunksize=100, if_exists = 'replace')

In [162]:
#parent_receptor_types_df.head(2)

In [163]:
model_concepts_df.rename({'model_concept_ids': 'model_concept_id'}, axis = 1,
                        inplace = True)
model_to_concept = model_concepts_df[['model_id', 'model_concept_id']]
model_to_concept.head(2)

Unnamed: 0,model_id,model_concept_id
0,279,3629
1,279,3630


In [164]:
print(model_to_concept.shape[0])
model_to_concept.to_sql('model_to_concept', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

5352


5352

In [165]:
modelconcepts_df.head(2)

Unnamed: 0,model_concept_id,name,description
0,3541,Action Potential Initiation,The model is used to investigate factors affec...
1,3542,Pattern Recognition,The model is able to recognize spatial and/or ...


In [166]:
print(modelconcepts_df.shape[0])
modelconcepts_df.to_sql('model_concepts', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

214


214

In [167]:
# don't have the model applications data from API
# model_applications_df.head(2)

In [168]:
model_to_paper = model_paper_df[['model_id', 'model_paper_id']]
model_to_paper.head(2)

Unnamed: 0,model_id,model_paper_id
0,279,4161
1,2487,4036


In [169]:
print(model_to_paper.shape[0])
model_to_paper.to_sql('model_to_papers', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

2100


2100

In [170]:
papers_df.head(2)

Unnamed: 0,model_paper_id,name,created,title,pubmed,year,journal,doi,doi_lower
0,4161,"Destexhe A, Neubig M, Ulrich D, Huguenard J (1...",2002-03-15T11:18:19,Dendritic low-threshold calcium currents in th...,9570789,1998,The Journal of neuroscience : the official jou...,10.1523/JNEUROSCI.18-10-03574.1998,10.1523/jneurosci.18-10-03574.1998
1,4036,"Davison AP, Feng J, Brown D (2000)",2002-03-15T11:06:36,A reduced compartmental model of the mitral ce...,10715559,2000,Brain research bulletin,10.1016/s0361-9230(99)00256-7,10.1016/s0361-9230(99)00256-7


In [171]:
print(papers_df.shape[0])
papers_df.to_sql('model_papers', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

1817


1817

In [172]:
# not sure I need this, don't have run protocols from api
# model_runprotocols_df.head(2)

In [173]:
model_implemented = model_implemented_df[['model_implemented_id', 'model_implemented_name']].drop_duplicates()
model_implemented.head(2)

Unnamed: 0,model_implemented_id,model_implemented_name
0,33744,"Destexhe, Alain [Destexhe at iaf.cnrs-gif.fr]"
1,33739,"Davison, Andrew [Andrew.Davison at iaf.cnrs-gi..."


In [174]:
print(model_implemented.shape[0])
model_implemented.to_sql('model_implemented', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

1030


1030

In [175]:
model_implemented_df = model_implemented_df[['model_id', 'model_implemented_id']]

In [176]:
model_implemented_df.head(2)

Unnamed: 0,model_id,model_implemented_id
0,279,33744
1,2487,33739


In [177]:
print(model_implemented_df.shape[0])
model_implemented_df.to_sql('model_to_implemented', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

1972


1972

In [178]:
model_views_df.head(2)

Unnamed: 0,model_views_name,model_id
0,Burst behavior in single-compartment model,279
1,Burst behavior in 3-compartment model,279


In [179]:
print(model_views_df.shape[0])
model_views_df.to_sql('model_views', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

594


594

In [180]:
model_to_region = model_region_df[['model_id', 'model_region_id']]
model_to_region.head(2)

Unnamed: 0,model_id,model_region_id
0,2730,115950
1,2798,115949


In [181]:
print(model_to_region.shape[0])
model_to_region.to_sql('model_to_region', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

933


933

In [182]:
regions_df.rename({'region_id': 'model_region_id'}, axis = 1, inplace = True)
regions_df.head(2)

Unnamed: 0,model_region_id,primary_brain_region,created,ver_number,ver_date,class_id,parent
0,115945,neocortex,2008-10-06T17:06:41,1,2008-10-06T17:06:41,144,Vertebrate regions
1,115946,hippocampus,2008-10-06T17:07:19,1,2008-10-06T17:07:19,144,Vertebrate regions


In [183]:
print(regions_df.shape[0])
regions_df.to_sql('model_regions', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

47


47

In [184]:
model_to_transmitter = model_transmitter_df[['model_id', 'model_transmitter_id']]
model_to_transmitter.head(2)

Unnamed: 0,model_id,model_transmitter_id
0,2730,232
1,2730,233


In [185]:
print(model_to_transmitter.shape[0])
model_to_transmitter.to_sql('model_to_transmitter', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

677


677

In [186]:
transmitters_df.rename({'id':'model_transmitter_id',
                       'Parent_Neurotrans.attr_id': 'neurotrans_class_id'}, axis = 1, inplace = True)
transmitters_df.head(2)

Unnamed: 0,model_transmitter_id,name,created,ver_number,ver_date,class_id,Parent_Neurotrans.value,neurotrans_class_id,color.value,color.attr_id
0,209,Acetylcholine,2004-01-08T13:19:48,1,2004-01-08T13:19:48,7,,,,
1,214,Glycine,2002-04-02T00:00:02,1,2002-04-02T00:00:02,7,"[{'object_id': 231, 'object_name': 'Amino Acid...",67.0,,


In [187]:
model_transmitters = transmitters_df.drop(['Parent_Neurotrans.value', 'color.value', 'color.attr_id'], axis = 1)
model_transmitters.rename({'neutrans_class_id': 'neurotrans_class_id'}, axis = 1, inplace = True)
model_transmitters.head(2)

Unnamed: 0,model_transmitter_id,name,created,ver_number,ver_date,class_id,neurotrans_class_id
0,209,Acetylcholine,2004-01-08T13:19:48,1,2004-01-08T13:19:48,7,
1,214,Glycine,2002-04-02T00:00:02,1,2002-04-02T00:00:02,7,67.0


In [188]:
print(model_transmitters.shape[0])
model_transmitters.to_sql('model_transmitters', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

25


25

In [189]:
neurotrans_classes.head(2)

Unnamed: 0,neurotrans_class_id,neurotrans_class_name
0,231,Amino Acids
1,234,Monoamines


In [190]:
print(neurotrans_classes.shape[0])
neurotrans_classes.to_sql('neurotrans_classes', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

5


5

In [191]:
model_species_df.head(2)

Unnamed: 0,model_species_id,model_species_name,model_id
0,114272,Hirudinea (leech),19698
1,249921,Aplysia,34168


In [192]:
model_species = model_species_df[['model_species_id', 'model_species_name']].drop_duplicates(['model_species_id', 'model_species_name'])
model_species.head(2)

Unnamed: 0,model_species_id,model_species_name
0,114272,Hirudinea (leech)
1,249921,Aplysia


In [193]:
print(model_species.shape[0])
model_species.to_sql('model_species', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

12


12

In [194]:
model_to_species = model_species_df[['model_id', 'model_species_id']]
model_to_species.head(2)

Unnamed: 0,model_id,model_species_id
0,19698,114272
1,34168,249921


In [195]:
print(model_to_species.shape[0])
model_to_species.to_sql('model_to_species', con=engine, index=False, 
                        chunksize=100, if_exists = 'replace')

40


40