# Machine Learning on top 6 measurements
## Missing data is filled in with mean values of each column

In [1]:
import pandas as pd
import numpy as np

In [2]:
neuro = pd.read_csv("all_neuron_data.csv")

In [3]:
# Jacob saturday
# Filter down our dataset to only use top 6 measurements
neuro_filtered = neuro.loc[(neuro["Measurement"] == "input resistance") |
                          (neuro["Measurement"] == "resting membrane potential") |
                          (neuro["Measurement"] == "spike threshold") |
                          (neuro["Measurement"] == "spike half-width") |
                          (neuro["Measurement"] == "spike amplitude") |
                          (neuro["Measurement"] == "membrane time constant")]

neuro_filtered

Unnamed: 0,Cell Type,Value,Measurement
0,Dorsal root ganglion cell,-54.30,resting membrane potential
1,Dorsal root ganglion cell,-27.40,spike threshold
2,Dorsal root ganglion cell,101.30,spike amplitude
5,Dorsal root ganglion cell,192.00,input resistance
7,Spinal cord intermediate horn motor neuron sym...,-59.80,resting membrane potential
8,Spinal cord intermediate horn motor neuron sym...,1.14,input resistance
9,Spinal cord intermediate horn motor neuron sym...,92.40,membrane time constant
11,Spinal cord intermediate horn motor neuron sym...,57.10,spike amplitude
13,Spinal cord intermediate horn motor neuron sym...,-45.30,spike threshold
18,Hippocampus CA1 pyramidal cell,-51.50,spike threshold


In [4]:
current_cell = ""
current_dict = {}
big_list = []

for index, row in neuro_filtered.iterrows():
    if current_cell != row["Cell Type"]:
        # if the current cell and next cell are different, make a new empty row
        big_list.append(current_dict)
        current_dict = {}
        current_cell = row["Cell Type"]
        current_dict["Cell Type"] = current_cell
        current_dict[row["Measurement"]] = row["Value"]
    else:
        # else add the measurement value into the appropriate measurement column
        current_dict[row["Measurement"]] = row["Value"]

del big_list[0]

In [5]:
# Fill in NaN values with mean values of that cell type's measurement (mean value of all values for a particular cell's
# input resistance column will be filled in for that cell)

mean_clean = pd.DataFrame(big_list)
mean_clean["input resistance"] = mean_clean.groupby("Cell Type")["input resistance"].transform(lambda x: x.fillna(x.mean()))
mean_clean["membrane time constant"] = mean_clean.groupby("Cell Type")["membrane time constant"].transform(lambda x: x.fillna(x.mean()))
mean_clean["resting membrane potential"] = mean_clean.groupby("Cell Type")["resting membrane potential"].transform(lambda x: x.fillna(x.mean()))
mean_clean["spike amplitude"] = mean_clean.groupby("Cell Type")["spike amplitude"].transform(lambda x: x.fillna(x.mean()))
mean_clean["spike half-width"] = mean_clean.groupby("Cell Type")["spike half-width"].transform(lambda x: x.fillna(x.mean()))
mean_clean["spike threshold"] = mean_clean.groupby("Cell Type")["spike threshold"].transform(lambda x: x.fillna(x.mean()))

mean_clean = mean_clean.dropna()
mean_clean = mean_clean.loc[mean_clean["Cell Type"] != "Other"]
mean_clean

Unnamed: 0,Cell Type,input resistance,membrane time constant,resting membrane potential,spike amplitude,spike half-width,spike threshold
0,Dorsal root ganglion cell,192.000000,14.630000,-54.300000,101.300000,3.122222,-27.400000
2,Hippocampus CA1 pyramidal cell,100.600000,27.900000,-64.800000,84.865093,1.600000,-51.500000
3,Cerebellar nucleus cell,59.260000,58.175000,-58.840000,72.225000,2.130000,-68.580000
4,Hippocampus CA3 pyramidal cell,164.000000,61.000000,-76.000000,78.985714,0.790000,-58.000000
5,Basalis nucleus cholinergic neuron,268.000000,28.200000,-48.000000,66.700000,0.520000,-31.900000
6,Neocortex basket cell,142.000000,4.800000,-59.000000,87.000000,0.300000,-40.000000
7,Spinal cord ventral horn motor neuron alpha,11.100000,4.900000,-55.000000,66.413333,0.961667,-43.965556
8,Neocortex basket cell,182.000000,7.700000,-67.000000,52.000000,0.380000,-34.000000
9,Dorsal root ganglion cell,392.000000,27.000000,-59.000000,105.000000,3.122222,-36.000000
10,Neocortex pyramidal cell layer 5-6,23.310000,11.430000,-72.690000,87.460000,0.630000,-42.560000


In [6]:
# Saturday (obsolete, do not run)
cleaned = pd.DataFrame(big_list)

mean_input_resist = cleaned["input resistance"].mean()
mean_mem_const = cleaned["membrane time constant"].mean()
mean_resting_mem = cleaned["resting membrane potential"].mean()
mean_spike_amp = cleaned["spike amplitude"].mean()
mean_spike_half = cleaned["spike half-width"].mean()
mean_spike_thresh = cleaned["spike threshold"].mean()

cleaned["input resistance"].fillna(mean_input_resist, inplace=True)
cleaned["membrane time constant"].fillna(mean_mem_const, inplace=True)
cleaned["resting membrane potential"].fillna(mean_resting_mem, inplace=True)
cleaned["spike amplitude"].fillna(mean_spike_amp, inplace=True)
cleaned["spike half-width"].fillna(mean_spike_half, inplace=True)
cleaned["spike threshold"].fillna(mean_spike_thresh, inplace=True)

cleaned

Unnamed: 0,Cell Type,input resistance,membrane time constant,resting membrane potential,spike amplitude,spike half-width,spike threshold
0,Dorsal root ganglion cell,192.000000,22.980182,-54.300000,101.300000,8.531983,-27.400000
1,Spinal cord intermediate horn motor neuron sym...,1.140000,92.400000,-59.800000,57.100000,8.531983,-45.300000
2,Hippocampus CA1 pyramidal cell,100.600000,27.900000,-64.800000,72.142382,1.600000,-51.500000
3,Cerebellar nucleus cell,59.260000,22.980182,-58.840000,72.142382,2.130000,-68.580000
4,Hippocampus CA3 pyramidal cell,164.000000,61.000000,-76.000000,72.142382,0.790000,-58.000000
5,Basalis nucleus cholinergic neuron,268.000000,28.200000,-48.000000,66.700000,0.520000,-31.900000
6,Neocortex basket cell,142.000000,4.800000,-59.000000,87.000000,0.300000,-40.000000
7,Spinal cord ventral horn motor neuron alpha,11.100000,4.900000,-55.000000,72.142382,8.531983,-34.683831
8,Neocortex basket cell,182.000000,7.700000,-67.000000,52.000000,0.380000,-34.000000
9,Dorsal root ganglion cell,392.000000,27.000000,-59.000000,105.000000,8.531983,-36.000000


In [7]:
cleaned = cleaned.loc[cleaned["Cell Type"] != "Other"]
cleaned

Unnamed: 0,Cell Type,input resistance,membrane time constant,resting membrane potential,spike amplitude,spike half-width,spike threshold
0,Dorsal root ganglion cell,192.000000,22.980182,-54.300000,101.300000,8.531983,-27.400000
1,Spinal cord intermediate horn motor neuron sym...,1.140000,92.400000,-59.800000,57.100000,8.531983,-45.300000
2,Hippocampus CA1 pyramidal cell,100.600000,27.900000,-64.800000,72.142382,1.600000,-51.500000
3,Cerebellar nucleus cell,59.260000,22.980182,-58.840000,72.142382,2.130000,-68.580000
4,Hippocampus CA3 pyramidal cell,164.000000,61.000000,-76.000000,72.142382,0.790000,-58.000000
5,Basalis nucleus cholinergic neuron,268.000000,28.200000,-48.000000,66.700000,0.520000,-31.900000
6,Neocortex basket cell,142.000000,4.800000,-59.000000,87.000000,0.300000,-40.000000
7,Spinal cord ventral horn motor neuron alpha,11.100000,4.900000,-55.000000,72.142382,8.531983,-34.683831
8,Neocortex basket cell,182.000000,7.700000,-67.000000,52.000000,0.380000,-34.000000
9,Dorsal root ganglion cell,392.000000,27.000000,-59.000000,105.000000,8.531983,-36.000000


In [8]:
dropped = pd.DataFrame(big_list)
dropped = dropped.dropna(axis=0)
dropped

Unnamed: 0,Cell Type,input resistance,membrane time constant,resting membrane potential,spike amplitude,spike half-width,spike threshold
5,Basalis nucleus cholinergic neuron,268.00,28.20,-48.00,66.70,0.520,-31.90
6,Neocortex basket cell,142.00,4.80,-59.00,87.00,0.300,-40.00
8,Neocortex basket cell,182.00,7.70,-67.00,52.00,0.380,-34.00
10,Neocortex pyramidal cell layer 5-6,23.31,11.43,-72.69,87.46,0.630,-42.56
17,Neocortex pyramidal cell layer 5-6,363.40,9.09,-71.20,85.90,1.580,-40.70
48,Neocortex basket cell,246.00,13.90,-68.40,66.00,0.700,-32.90
79,Hippocampus CA1 pyramidal cell,260.00,40.00,-68.00,89.60,0.900,-47.50
183,Nucleus of the solitary tract principal cell,0.70,41.00,-56.00,61.00,1.700,-39.00
189,Olfactory cortex semilunar cell,221.20,26.00,-67.20,76.20,0.740,-36.00
190,Olfactory cortex pyramidal cell,55.00,10.10,-76.30,92.90,0.760,-41.50


In [9]:
# Run machine learning on this narrowed down dataset
X=mean_clean.drop("Cell Type", axis=1)
y=mean_clean["Cell Type"]

In [10]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X,y,test_size=0.20, random_state=42)

In [11]:
mean_clean["spike threshold"].max()

763.0

### Random forest model

In [12]:
from sklearn.ensemble import RandomForestClassifier
#cls = classifier
cls= RandomForestClassifier(n_estimators=500, random_state=42)
cls.fit(x_train, y_train)
y_predict = cls.predict(x_test)

In [13]:
from sklearn.metrics import accuracy_score
# how many right divided by the total
print(accuracy_score(y_test, y_predict))

0.848101265823


In [14]:
# Save the random forest model
import pickle

filename = 'machine_learning3_forest.sav'
pickle.dump(cls, open(filename, 'wb'))

In [15]:
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.score(x_test, y_test)
print(result)

0.848101265823


### Boosting model (takes forever to run)

In [16]:
from sklearn.ensemble import GradientBoostingClassifier
#cls = classifier
cls= GradientBoostingClassifier(n_estimators=500, random_state=42)
cls.fit(x_train, y_train)
y_predict = cls.predict(x_test)

In [17]:
# Test accuracy of model
print(accuracy_score(y_test, y_predict))

0.860759493671


In [18]:
# Save the model
filename = 'machine_learning3_gradboost.sav'
pickle.dump(cls, open(filename, 'wb'))

### Load boosting model, take in user input

In [19]:
# load model
filename = 'machine_learning3_gradboost.sav'
gradboost_model = pickle.load(open(filename, 'rb'))
result = gradboost_model.score(x_test, y_test)
print(result)

0.860759493671


In [20]:
# Pick any row from our train or test data as input
user_input = np.array(x_train.iloc[42]).reshape(1,-1)

In [21]:
# pd.DataFrame({"true": y_test, "pred": y_predict})

#predicts probabilities for that class
# round(gradboost_model.predict_proba(user_input).max()*100,2)

# jsonify and pass to front end
output = {"predictions": gradboost_model.predict(user_input)[0], "probability": round(gradboost_model.predict_proba(user_input).max()*100,2)}
#jsonify this output
output

{'predictions': 'Neocortex pyramidal cell layer 5-6', 'probability': 100.0}

In [22]:
# Validate that the prediction was actually correct
y_train.iloc[42]

'Neocortex pyramidal cell layer 5-6'

In [23]:
# Number of classifications
y.nunique()

65

### Deep learning model

In [99]:
y_predict

array(['Neocortex basket cell', 'Neocortex pyramidal cell layer 2-3',
       'Neostriatum medium spiny neuron', 'Neocortex basket cell',
       'Neocortex Martinotti cell',
       'Substantia nigra pars compacta dopaminergic cell',
       'Neocortex pyramidal cell layer 5-6', 'Cerebellum granule cell',
       'Hippocampus CA1 pyramidal cell',
       'Neocortex pyramidal cell layer 5-6',
       'Neocortex pyramidal cell layer 5-6', 'Neocortex basket cell',
       'Neocortex uncharacterized cell', 'Neocortex basket cell',
       'Hippocampus CA3 stratum radiatum giant cell',
       'Neocortex Martinotti cell', 'Neostriatum gabaergic interneuron',
       'Dentate gyrus granule cell', 'Neocortex interneuron deep',
       'Dorsal root ganglion cell', 'Inferior colliculus neuron',
       'Thalamus relay cell', 'Neocortex pyramidal cell layer 5-6',
       'Trigeminal nucleus principal cell',
       'Spinal cord ventral horn motor neuron alpha',
       'Hippocampus CA1 pyramidal cell', 'Neocor

In [42]:
# Machine learning model will not work on data categories with only 1 row
# dropping all rows that have fewer than 5 data points associated with a category

counts = mean_clean['Cell Type'].value_counts()
greater_than_five = mean_clean[mean_clean['Cell Type'].isin(counts[counts >= 40].index)]

# Run machine learning on this narrowed down dataset
X=greater_than_five.drop("Cell Type", axis=1)
y=greater_than_five["Cell Type"]

In [43]:
greater_than_five

Unnamed: 0,Cell Type,input resistance,membrane time constant,resting membrane potential,spike amplitude,spike half-width,spike threshold
0,Dorsal root ganglion cell,192.000000,14.630000,-54.300000,101.300000,3.122222,-27.400000
2,Hippocampus CA1 pyramidal cell,100.600000,27.900000,-64.800000,84.865093,1.600000,-51.500000
4,Hippocampus CA3 pyramidal cell,164.000000,61.000000,-76.000000,78.985714,0.790000,-58.000000
6,Neocortex basket cell,142.000000,4.800000,-59.000000,87.000000,0.300000,-40.000000
8,Neocortex basket cell,182.000000,7.700000,-67.000000,52.000000,0.380000,-34.000000
9,Dorsal root ganglion cell,392.000000,27.000000,-59.000000,105.000000,3.122222,-36.000000
10,Neocortex pyramidal cell layer 5-6,23.310000,11.430000,-72.690000,87.460000,0.630000,-42.560000
11,Neostriatum medium spiny neuron,139.000000,12.198462,-83.500000,74.800000,1.040000,-33.900000
12,Dorsal root ganglion cell,225.000000,14.630000,-60.500000,100.200000,3.122222,-29.600000
13,Neocortex basket cell,163.920000,11.830000,-67.880000,65.474706,0.460000,-30.570000


In [44]:
greater_than_five["Cell Type"].value_counts()

Hippocampus CA1 pyramidal cell        164
Neocortex pyramidal cell layer 5-6    151
Neocortex pyramidal cell layer 2-3    141
Neocortex basket cell                 138
Neocortex Martinotti cell              83
Neocortex uncharacterized cell         73
Neostriatum medium spiny neuron        67
Dentate gyrus granule cell             57
Neocortex interneuron deep             53
Dorsal root ganglion cell              49
Hippocampus CA3 pyramidal cell         49
Name: Cell Type, dtype: int64

In [45]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

# This code works in machine_learning2 notebook but not here. y is the same.  How come?
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [46]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [47]:
y_train_list = y_train.tolist()

for i in range(0,len(y_train_list)):
    print(y_train_list[i] + " is now " + str(y_train_categorical[i]))

Hippocampus CA1 pyramidal cell is now [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
Neocortex interneuron deep is now [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
Neocortex pyramidal cell layer 2-3 is now [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
Neocortex pyramidal cell layer 5-6 is now [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
Neocortex pyramidal cell layer 2-3 is now [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
Neostriatum medium spiny neuron is now [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
Neocortex pyramidal cell layer 2-3 is now [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
Hippocampus CA1 pyramidal cell is now [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
Hippocampus CA3 pyramidal cell is now [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
Neocortex pyramidal cell layer 2-3 is now [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
Neocortex pyramidal cell layer 5-6 is now [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
Neocortex Martinotti cell is now [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
Neostriatum medium spiny neuron is now [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
Neocortex basket cell is now [0. 0

Neocortex basket cell is now [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
Neocortex pyramidal cell layer 2-3 is now [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
Hippocampus CA1 pyramidal cell is now [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
Neostriatum medium spiny neuron is now [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
Neostriatum medium spiny neuron is now [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
Neostriatum medium spiny neuron is now [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
Neocortex pyramidal cell layer 2-3 is now [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
Neocortex pyramidal cell layer 2-3 is now [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
Neocortex pyramidal cell layer 2-3 is now [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
Neocortex uncharacterized cell is now [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
Neocortex pyramidal cell layer 2-3 is now [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
Hippocampus CA1 pyramidal cell is now [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
Dentate gyrus granule cell is now [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Neocortex pyramidal cell layer 5-6 is now

Neostriatum medium spiny neuron is now [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
Hippocampus CA1 pyramidal cell is now [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
Hippocampus CA1 pyramidal cell is now [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
Neocortex pyramidal cell layer 5-6 is now [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
Neocortex pyramidal cell layer 5-6 is now [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
Neocortex uncharacterized cell is now [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
Hippocampus CA3 pyramidal cell is now [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
Hippocampus CA3 pyramidal cell is now [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
Dorsal root ganglion cell is now [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Neocortex basket cell is now [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
Neocortex interneuron deep is now [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
Neocortex pyramidal cell layer 5-6 is now [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]
Dentate gyrus granule cell is now [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Neocortex basket cell is now [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 

In [51]:
from keras.models import Sequential
from keras.layers import Dense

# Create model and add layers
model = Sequential()
model.add(Dense(units=18, activation='relu', input_dim=6))
model.add(Dense(units=20, activation='relu'))
model.add(Dense(units=11, activation='softmax'))

In [52]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 18)                126       
_________________________________________________________________
dense_8 (Dense)              (None, 20)                380       
_________________________________________________________________
dense_9 (Dense)              (None, 11)                231       
Total params: 737
Trainable params: 737
Non-trainable params: 0
_________________________________________________________________


In [50]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=100,
    shuffle=True,
    verbose=2
)

Epoch 1/100
 - 1s - loss: 2.4397 - acc: 0.0443
Epoch 2/100
 - 0s - loss: 2.3592 - acc: 0.1003
Epoch 3/100
 - 0s - loss: 2.2890 - acc: 0.2487
Epoch 4/100
 - 0s - loss: 2.2159 - acc: 0.3815
Epoch 5/100
 - 0s - loss: 2.1298 - acc: 0.4635
Epoch 6/100
 - 0s - loss: 2.0252 - acc: 0.4818
Epoch 7/100
 - 0s - loss: 1.9143 - acc: 0.4857
Epoch 8/100
 - 0s - loss: 1.8126 - acc: 0.4857
Epoch 9/100
 - 0s - loss: 1.7247 - acc: 0.4844
Epoch 10/100
 - 0s - loss: 1.6502 - acc: 0.4922
Epoch 11/100
 - 0s - loss: 1.5857 - acc: 0.5065
Epoch 12/100
 - 0s - loss: 1.5267 - acc: 0.5091
Epoch 13/100
 - 0s - loss: 1.4720 - acc: 0.5221
Epoch 14/100
 - 0s - loss: 1.4233 - acc: 0.5469
Epoch 15/100
 - 0s - loss: 1.3813 - acc: 0.5508
Epoch 16/100
 - 0s - loss: 1.3414 - acc: 0.5768
Epoch 17/100
 - 0s - loss: 1.3062 - acc: 0.5911
Epoch 18/100
 - 0s - loss: 1.2741 - acc: 0.6094
Epoch 19/100
 - 0s - loss: 1.2435 - acc: 0.6185
Epoch 20/100
 - 0s - loss: 1.2183 - acc: 0.6419
Epoch 21/100
 - 0s - loss: 1.1938 - acc: 0.6458
E

<keras.callbacks.History at 0x214a7d00780>

In [22]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 1.1111063689921152, Accuracy: 0.7626459143968871


In [None]:
# Save the model
model.save("machine_learning3_seq.h5")

# Getting model to predict a neuron (user input is hard-coded)

In [None]:
data = np.array([192,14,-54,101,3,-27]).reshape(6,)
# print(data.shape)
prediction = model.predict(np.array([data]))
print(prediction.shape)
# prediction=prediction.astype(int)
# prediction_string = prediction.argmax(axis=-1)
# type(prediction_string)
print(prediction)
# print(list(label_encoder.classes_))
# list(label_encoder.inverse_transform(prediction))

# prediction_string = keras.np_utils.probas_to_classes(prediction)
# prediction_string
# prediction_string = model.predict_classes(np.array([data]))
# prediction_string
print(type(prediction))

In [None]:
print(prediction.tolist())
the_pred = prediction.tolist()
flat_pred= [y for x in the_pred for y in x]
print(flat_pred)
count = 0
for i in flat_pred:
    count +=1
    #print(i)
    #print(flat_pred[i])
    if i == 1:
        the_index = i
        print(label_encoder.inverse_transform(count))

In [None]:
y_train_list = y_train.tolist()

for i in range(0,len(y_train_list)):
    print(y_train_list[i] + " is now " + str(y_train_categorical[i]))

In [None]:
prediction
np.unique(y_train_categorical)
type(y_train_categorical)
unique_rows = np.unique(y_train_categorical, axis=0)
unique_rows

In [None]:
# I can't zip this together with unique_rows because they're not in the same order
my_set = set(y_train_list)
my_set

In [None]:
y_train_categorical

In [None]:
np.array(y_train)

In [None]:
list(prediction)

In [None]:
# zip pre-encoded and post-encoded classifications together
categories = list(zip(y_train_categorical, np.array(y_train)))
categories[1]

my_lists = []
for i in categories:
    my_lists.append(list(i))
my_lists[1]

In [None]:
# if the prediction array is the same as the encoded classification, print the corresponding label
for i in my_lists:
#     print(i[0])
#     print(prediction[0])
    if all(i[0] == prediction[0]):
        result = i[1]
print(result)

In [None]:
y_train_categorical

# Load model, predict using dynamic user data

In [53]:
from keras.models import load_model
seq_model=load_model("machine_learning3_seq.h5")

In [54]:
# Saving minimum and maximum values for each measurement into variables

# Min/ max input resistance values
min_ir = greater_than_five["input resistance"].min()
max_ir = greater_than_five["input resistance"].max()
# Min/max membrane time constant values
min_mtc = greater_than_five["membrane time constant"].min()
max_mtc = greater_than_five["membrane time constant"].max()
# Min/ max resting membrane potential values
min_rmp = greater_than_five["resting membrane potential"].min()
max_rmp = greater_than_five["resting membrane potential"].max()
# Min/max spike amplitude values
min_sa = greater_than_five["spike amplitude"].min()
max_sa = greater_than_five["spike amplitude"].max()
# Min/max spike half-width values
min_shw = greater_than_five["spike half-width"].min()
max_shw = greater_than_five["spike half-width"].max()
# Min/max spike threshold values
min_st = greater_than_five["spike threshold"].min()
max_st = greater_than_five["spike threshold"].max()

In [55]:
# Attempt to pass in user input into "data" below using input()
input_resistance = input("input_resistance value (between {} and {}): ".format(min_ir, max_ir))
membrane_time_constant = input("membrane_time_constant value (between {} and {}): ".format(min_mtc, max_mtc))
resting_membrane_potential = input("resting_membrane_potential value (between {} and {}): ".format(min_rmp, max_rmp))
spike_amplitude = input("spike_amplitude value (between {} and {}): ".format(min_sa, max_sa))
spike_halfwidth = input("spike_half-width value (between {} and {}): ".format(min_shw, max_shw))
spike_threshold = input("spike_threshold value (between {} and {}): ".format(min_st, max_st))

input_resistance
membrane_time_constant
resting_membrane_potential
spike_amplitude
spike_halfwidth
spike_threshold

input_resistance value (between -191.1 and 1435.0): 40
membrane_time_constant value (between -38.0 and 283.0): 200
resting_membrane_potential value (between -95.3 and 88.7): 80
spike_amplitude value (between -7.21 and 116.9): 10
spike_half-width value (between 0.19 and 810.0): 500
spike_threshold value (between -63.5 and 763.0): -40


'-40'

In [57]:
# Place user data into numpy array in same order as dataframe's columns
user_data = np.array([input_resistance, membrane_time_constant, resting_membrane_potential, spike_amplitude, spike_halfwidth, spike_threshold]).reshape(6,).astype(np.float32)
user_data

array([ 40., 200.,  80.,  10., 500., -40.], dtype=float32)

In [58]:
prediction = seq_model.predict(np.array([user_data]))
prediction

array([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)

In [78]:
categories = list(zip(y_train_categorical, np.array(y_train)))
categories[1]

my_lists = []
for i in categories:
    my_lists.append(list(i))
my_lists[1]

[array([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.], dtype=float32),
 'Hippocampus CA1 pyramidal cell']

In [83]:
y_train.unique()

array(['Hippocampus CA1 pyramidal cell',
       'Medial entorhinal cortex layer II stellate cell',
       'Neocortex basket cell', 'Neostriatum cholinergic cell',
       'Neocortex pyramidal cell layer 2-3', 'Thalamus relay cell',
       'Neostriatum medium spiny neuron', 'Subiculum pyramidal cell',
       'Olfactory bulb (main) Blanes cell',
       'Neocortex uncharacterized cell',
       'Spinal cord ventral horn motor neuron alpha',
       'Locus coeruleus noradrenergic neuron',
       'Neocortex pyramidal cell layer 5-6',
       'Neocortex bouquet double cell',
       'Medial entorhinal cortex layer III pyramidal cell',
       'Nucleus accumbens shell neuron', 'Dorsal root ganglion cell',
       'Lateral amygdala projection neuron',
       'Nucleus of the solitary tract principal cell',
       'Trigeminal nucleus principal cell', 'Dentate gyrus granule cell',
       'Neocortex interneuron deep', 'Neocortex Martinotti cell',
       'Trigeminal nucleus motor neuron',
       'Olfactor

In [82]:
for i in my_lists:
    print(i[1])

Hippocampus CA1 pyramidal cell
Hippocampus CA1 pyramidal cell
Medial entorhinal cortex layer II stellate cell
Neocortex basket cell
Neostriatum cholinergic cell
Neocortex pyramidal cell layer 2-3
Thalamus relay cell
Neostriatum medium spiny neuron
Subiculum pyramidal cell
Olfactory bulb (main) Blanes cell
Neocortex uncharacterized cell
Hippocampus CA1 pyramidal cell
Neostriatum medium spiny neuron
Spinal cord ventral horn motor neuron alpha
Locus coeruleus noradrenergic neuron
Neocortex pyramidal cell layer 5-6
Neocortex basket cell
Subiculum pyramidal cell
Neocortex uncharacterized cell
Neocortex bouquet double cell
Neocortex pyramidal cell layer 2-3
Neocortex pyramidal cell layer 5-6
Neocortex basket cell
Neocortex basket cell
Thalamus relay cell
Neocortex pyramidal cell layer 2-3
Medial entorhinal cortex layer III pyramidal cell
Neostriatum medium spiny neuron
Nucleus accumbens shell neuron
Dorsal root ganglion cell
Lateral amygdala projection neuron
Neostriatum medium spiny neuron


In [79]:
for i in my_lists:
#     print(i[0])
#     print(prediction[0])
    if all(i[0] == prediction[0]):
        result = i[1]
print(result + )

Neocortex pyramidal cell layer 5-6


# Filter data, run machine learning on filtered data (probably won't use this)

In [None]:
words = ["CA1", "CA3"]
for i, row in cleaned.iterrows():
    if any(word in row["Cell Type"] for word in words):
#         print("true")0
        cleaned.loc[i, "Area"] = "Hippocampus"
#         row["Area"] = "Hippocampus"
    else:
        cleaned.loc[i, "Area"] = "other"
        row["Area"] = "other"

In [None]:
hippo_df = cleaned.loc[cleaned["Area"] == "Hippocampus"]
X=hippo_df.drop(["Cell Type", "Area"], axis=1)
y=hippo_df["Cell Type"]

In [None]:
print(y)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

y=y.tolist()

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Step 1: Label-encode data set
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

In [None]:
y_train

In [None]:
y_train_categorical