# Machine Learning on top 6 measurements
## Missing data is filled in with mean values of each column

In [1]:
import pandas as pd

In [2]:
neuro = pd.read_csv("all_neuron_data.csv")

In [3]:
# Jacob saturday
# Filter down our dataset to only use top 6 measurements
neuro_filtered = neuro.loc[(neuro["Measurement"] == "input resistance") |
                          (neuro["Measurement"] == "resting membrane potential") |
                          (neuro["Measurement"] == "spike threshold") |
                          (neuro["Measurement"] == "spike half-width") |
                          (neuro["Measurement"] == "spike amplitude") |
                          (neuro["Measurement"] == "membrane time constant")]

neuro_filtered

Unnamed: 0,Cell Type,Value,Measurement
0,Dorsal root ganglion cell,-54.30,resting membrane potential
1,Dorsal root ganglion cell,-27.40,spike threshold
2,Dorsal root ganglion cell,101.30,spike amplitude
5,Dorsal root ganglion cell,192.00,input resistance
7,Spinal cord intermediate horn motor neuron sym...,-59.80,resting membrane potential
8,Spinal cord intermediate horn motor neuron sym...,1.14,input resistance
9,Spinal cord intermediate horn motor neuron sym...,92.40,membrane time constant
11,Spinal cord intermediate horn motor neuron sym...,57.10,spike amplitude
13,Spinal cord intermediate horn motor neuron sym...,-45.30,spike threshold
18,Hippocampus CA1 pyramidal cell,-51.50,spike threshold


In [4]:
current_cell = ""
current_dict = {}
big_list = []

for index, row in neuro_filtered.iterrows():
    if current_cell != row["Cell Type"]:
        # if the current cell and next cell are different, make a new empty row
        big_list.append(current_dict)
        current_dict = {}
        current_cell = row["Cell Type"]
        current_dict["Cell Type"] = current_cell
        current_dict[row["Measurement"]] = row["Value"]
    else:
        # else add the measurement value into the appropriate measurement column
        current_dict[row["Measurement"]] = row["Value"]

del big_list[0]
print(big_list)

[{'Cell Type': 'Dorsal root ganglion cell', 'resting membrane potential': -54.3, 'spike threshold': -27.4, 'spike amplitude': 101.3, 'input resistance': 192.0}, {'Cell Type': 'Spinal cord intermediate horn motor neuron sympathetic', 'resting membrane potential': -59.8, 'input resistance': 1.14, 'membrane time constant': 92.4, 'spike amplitude': 57.1, 'spike threshold': -45.3}, {'Cell Type': 'Hippocampus CA1 pyramidal cell', 'spike threshold': -51.5, 'spike half-width': 1.6, 'membrane time constant': 27.9, 'resting membrane potential': -64.8, 'input resistance': 100.6}, {'Cell Type': 'Cerebellar nucleus cell', 'spike threshold': -68.58, 'spike half-width': 2.13, 'input resistance': 59.26, 'resting membrane potential': -58.84}, {'Cell Type': 'Hippocampus CA3 pyramidal cell', 'input resistance': 164.0, 'membrane time constant': 61.0, 'resting membrane potential': -76.0, 'spike threshold': -58.0, 'spike half-width': 0.79}, {'Cell Type': 'Basalis nucleus cholinergic neuron', 'resting membra

In [5]:
# Saturday
cleaned = pd.DataFrame(big_list)

mean_input_resist = cleaned["input resistance"].mean()
mean_mem_const = cleaned["membrane time constant"].mean()
mean_resting_mem = cleaned["resting membrane potential"].mean()
mean_spike_amp = cleaned["spike amplitude"].mean()
mean_spike_half = cleaned["spike half-width"].mean()
mean_spike_thresh = cleaned["spike threshold"].mean()

cleaned["input resistance"].fillna(mean_input_resist, inplace=True)
cleaned["membrane time constant"].fillna(mean_mem_const, inplace=True)
cleaned["resting membrane potential"].fillna(mean_resting_mem, inplace=True)
cleaned["spike amplitude"].fillna(mean_spike_amp, inplace=True)
cleaned["spike half-width"].fillna(mean_spike_half, inplace=True)
cleaned["spike threshold"].fillna(mean_spike_thresh, inplace=True)

cleaned

Unnamed: 0,Cell Type,input resistance,membrane time constant,resting membrane potential,spike amplitude,spike half-width,spike threshold
0,Dorsal root ganglion cell,192.000000,22.980182,-54.300000,101.300000,8.531983,-27.400000
1,Spinal cord intermediate horn motor neuron sym...,1.140000,92.400000,-59.800000,57.100000,8.531983,-45.300000
2,Hippocampus CA1 pyramidal cell,100.600000,27.900000,-64.800000,72.142382,1.600000,-51.500000
3,Cerebellar nucleus cell,59.260000,22.980182,-58.840000,72.142382,2.130000,-68.580000
4,Hippocampus CA3 pyramidal cell,164.000000,61.000000,-76.000000,72.142382,0.790000,-58.000000
5,Basalis nucleus cholinergic neuron,268.000000,28.200000,-48.000000,66.700000,0.520000,-31.900000
6,Neocortex basket cell,142.000000,4.800000,-59.000000,87.000000,0.300000,-40.000000
7,Spinal cord ventral horn motor neuron alpha,11.100000,4.900000,-55.000000,72.142382,8.531983,-34.683831
8,Neocortex basket cell,182.000000,7.700000,-67.000000,52.000000,0.380000,-34.000000
9,Dorsal root ganglion cell,392.000000,27.000000,-59.000000,105.000000,8.531983,-36.000000


In [6]:
# Run machine learning on this narrowed down dataset
X=cleaned.drop("Cell Type", axis=1)
y=cleaned["Cell Type"]

In [7]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X,y,test_size=0.20, random_state=42)

### Random forest model

In [8]:
from sklearn.ensemble import RandomForestClassifier
#cls = classifier
cls= RandomForestClassifier(n_estimators=500)
cls.fit(x_train, y_train)
y_predict = cls.predict(x_test)

In [9]:
from sklearn.metrics import accuracy_score
# how many right divided by the total
print(accuracy_score(y_test, y_predict))

0.168246445498


### Deep learning model

In [10]:
# Machine learning model will not work on data categories with only 1 row
# dropping all rows that have fewer than 5 data points associated with a category

counts = cleaned['Cell Type'].value_counts()
greater_than_10 = cleaned[cleaned['Cell Type'].isin(counts[counts >= 10].index)]

# Run machine learning on this narrowed down dataset
X=greater_than_10.drop("Cell Type", axis=1)
y=greater_than_10["Cell Type"]

In [11]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

# This code works in machine_learning2 notebook but not here. y is the same.  How come?
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

Using TensorFlow backend.


In [13]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

# Filter data, run machine learning on filtered data

In [None]:
words = ["CA1", "CA3"]
for i, row in cleaned.iterrows():
    if any(word in row["Cell Type"] for word in words):
#         print("true")0
        cleaned.loc[i, "Area"] = "Hippocampus"
#         row["Area"] = "Hippocampus"
    else:
        cleaned.loc[i, "Area"] = "other"
        row["Area"] = "other"

In [None]:
hippo_df = cleaned.loc[cleaned["Area"] == "Hippocampus"]
X=hippo_df.drop(["Cell Type", "Area"], axis=1)
y=hippo_df["Cell Type"]

In [None]:
print(y)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

y=y.tolist()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Step 1: Label-encode data set
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [None]:
y_train

In [None]:
y_train_categorical