In [1]:
import requests
import json
import pandas as pd
import math

# Data Collection

In [2]:
# define base url
limit = 1000
offset = 0
url = "https://neuroelectro.org/api/1/nedm/?limit={}&offset={}".format(limit, offset)
print(url)

https://neuroelectro.org/api/1/nedm/?limit=1000&offset=0


In [None]:
# Get total count of all data
url = "https://neuroelectro.org/api/1/nedm/"
total_count = requests.get(url).json()["meta"]["total_count"]
print("total_count =", total_count)

In [None]:
math.ceil(total_count/1000)

In [None]:
# Generate list of urls
urls = []
for i in range(0,math.ceil(total_count/1000)):
    # print(i)
    urls.append("https://neuroelectro.org/api/1/nedm/?limit={}&offset={}".format(limit, offset))
    offset += 1000
print(urls)

In [None]:
# API has a cap of 1000 per call
# This code takes forever to run
neuro_jsons = []
for url in urls:
    neuro_jsons.append(requests.get(url).json())
    print("added json to list")

In [None]:
# measurement type
neuro_data["objects"][0]["ecm"]["e"]["name"]

In [None]:
#neuron type
neuro_data["objects"][0]["ncm"]

In [None]:
#values
neuro_data["objects"][0]["val"]

In [None]:
measurements = []
cells = []
values = []
for json in neuro_jsons:
    for i in json["objects"]:
        measurements.append(i["ecm"]["e"]["name"])
        cells.append(i["ncm"]["n"]["name"])
        values.append(i["val"])

In [None]:
# measurements
# cells
len(values)

In [None]:
data = list(zip(cells, values, measurements))
data

In [None]:
df = pd.DataFrame(data, columns=["Cell Type", "Value", "Measurement"]) 
df

# Data Preprocessing (Start running cells here)

In [3]:
import pandas as pd
neuro = pd.read_csv("all_neuron_data.csv")
len(neuro.index)

16676

In [2]:
neuro["Cell Type"].unique()

array(['Dorsal root ganglion cell',
       'Spinal cord intermediate horn motor neuron sympathetic',
       'Hippocampus CA1 pyramidal cell', 'Cerebellar nucleus cell',
       'Hippocampus CA3 pyramidal cell',
       'Basalis nucleus cholinergic neuron', 'Neocortex basket cell',
       'Spinal cord ventral horn motor neuron alpha',
       'Neocortex pyramidal cell layer 5-6',
       'Neostriatum medium spiny neuron',
       'Amygdaloid nucleus paracapsular intercalated cell',
       'Spinal cord ventral horn interneuron V2',
       'Suprachiasmatic nucleus neuron', 'Dentate gyrus granule cell',
       'Neocortex Martinotti cell', 'Neocortex pyramidal cell layer 2-3',
       'Other', 'Thalamic reticular nucleus cell',
       'Neocortex uncharacterized cell',
       'Locus coeruleus noradrenergic neuron',
       'Medial Nucleus of Trapezoid Body neuron',
       'Hypoglossal nucleus motor neuron',
       'Trigeminal nucleus principal cell',
       'Spinal cord ventral horn interneuron Ren

### Data Filtering section

In [1]:
# Lambda function for assigning areas to individual neurons
# can also take in a "words" list like below here
# neuro["Area"] = neuro["Cell Type"].map(lambda x: "motor" if "motor" in x.lower() \
#                                        else("Hippocampus" if "CA1" in x else "other"))

In [2]:
# Testing out using a list in lambda function
# hippo = ["CA1", "CA3"]
# neuro["Area"] = neuro["Cell Type"].map(lambda x: "motor" if "motor" in x.lower() \
#                                        else("Hippocampus" if any(word for word in hippo) in x else "other"))

In [None]:
# hippo = ["CA1", "CA3"]
# neuro["Area"] = neuro["Cell Type"].map(lambda x: "motor", filter(lambda u: u in u.lower(), x) \
#                                        else("Hippocampus" if any(word for word in hippo) in x else "other"))

Key areas of hippocampus include:
    - CA1
    - CA2
    - CA3
    - CA4 (no cells in dataset)
    - Subiculum
    - Dentate gyrus
    - Parahippocampal gyrus (no cells)
    - Fimbria (no cells)

In [24]:
# Search through dataframe for cells part of hippocampus
neuro[neuro['Cell Type'].str.contains("gyrus")]["Cell Type"].unique()

array(['Dentate gyrus granule cell', 'Dentate gyrus hilar cell',
       'Dentate gyrus mossy cell', 'Dentate gyrus basket cell',
       'Dentate gyrus axo-axonic cell', 'Dentate gyrus HICAP cell',
       'Dentate gyrus HIPP cell'], dtype=object)

In [3]:
# also assigns neurons to areas but uses a for loop and if statement instead of lambda
hippos = ["CA1", "CA2", "CA3", "Subiculum", "Dentate"]

for i, row in neuro.iterrows():
    if any(hippo.lower() in row["Cell Type"].lower() for hippo in hippos):
#         print(hippo)
        neuro.loc[i, "Structure"] = "Hippocampus"
#         row["Area"] = "Hippocampus"
    else:
        neuro.loc[i, "Structure"] = "other"
        row["Area"] = "other"
#     print(row["Cell Type"])

In [None]:
# display all rows that contain CA1 or CA3 in their cell type 
# neuro[neuro["Cell Type"].str.contains("CA1|CA3")]
# neuro[neuro["Cell Type"].str.contains("Entorhinal|entorhinal")]
# neuro[neuro["Cell Type"].str.contains("denta")]

# len(neuro[neuro["Area"].str.contains("Hippocampus")])
neuro[neuro["Cell Type"].str.contains("spin")]["Cell Type"].unique()

In [None]:
# add brain lobe column
temporal = ["CA1", "CA3", "Subiculum", "Entorhinal"]
spinal = ["Dorsal root ganglion"]
eye = ["Retina"]


In [4]:
# takes all unique categories in that specific column and categorizes it true false
df = pd.get_dummies(neuro, columns=["Measurement"])
df

Unnamed: 0,Cell Type,Value,Structure,Measurement_ADP amplitude,Measurement_ADP duration,Measurement_AHP amplitude,Measurement_AHP amplitude from resting,Measurement_AHP duration,Measurement_AHP voltage,Measurement_FI slope,...,Measurement_spike amplitude from trough,Measurement_spike decay time,Measurement_spike half-width,Measurement_spike max decay slope,Measurement_spike max rise slope,Measurement_spike peak,Measurement_spike rise time,Measurement_spike threshold,Measurement_spike width,Measurement_spontaneous firing rate
0,Dorsal root ganglion cell,-54.30,other,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Dorsal root ganglion cell,-27.40,other,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,Dorsal root ganglion cell,101.30,other,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Dorsal root ganglion cell,2.00,other,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,Dorsal root ganglion cell,0.39,other,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,Dorsal root ganglion cell,192.00,other,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,Dorsal root ganglion cell,-16.90,other,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,Spinal cord intermediate horn motor neuron sym...,-59.80,other,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,Spinal cord intermediate horn motor neuron sym...,1.14,other,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,Spinal cord intermediate horn motor neuron sym...,92.40,other,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Machine Learning

## For all data (don't run if using filter by brain area/ structure)

In [None]:
x=df.drop("Cell Type", axis=1)
y=df["Cell Type"]

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20, random_state=42)

In [None]:
from sklearn.ensemble import RandomForestClassifier
#cls = classifier
cls= RandomForestClassifier(n_estimators=500)
cls.fit(x_train, y_train)
y_predict = cls.predict(x_test)

In [None]:
from sklearn.metrics import accuracy_score
# how many right divided by the total
print(accuracy_score(y_test, y_predict))

# Just for hippocampus cells

In [5]:
hippo_df = df.loc[df["Structure"] == "Hippocampus"]
x=hippo_df.drop(["Cell Type", "Structure"], axis=1)
y=hippo_df["Cell Type"]

In [6]:
hippo_df["Cell Type"].unique()

array(['Hippocampus CA1 pyramidal cell', 'Hippocampus CA3 pyramidal cell',
       'Subiculum pyramidal cell', 'Hippocampus CA1 radiatum giant cell',
       'Hippocampus CA3 lacunosum moleculare neuron',
       'Hippocampus CA3 trilaminar neuron', 'Hippocampus CA3 basket cell',
       'Hippocampus CA3 stratum radiatum giant cell',
       'Hippocampus CA1 basket cell',
       'Hippocampus CA1 oriens lacunosum moleculare neuron',
       'Hippocampus CA1 ivy neuron', 'Hippocampus CA1 neurogliaform cell',
       'Hippocampus CA3 oriens interneuron',
       'Hippocampus CA1 IS-I neuron'], dtype=object)

In [7]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20, random_state=42)

In [8]:
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
#classifier
cls= RandomForestClassifier(n_estimators=500)
cls.fit(x_train, y_train)
y_predict = cls.predict(x_test)

In [9]:
# how many right divided by the total
print(accuracy_score(y_test, y_predict))

0.48623853211009177
