# Dummy variable machine learning and filtering down measurement types to top 6 using all neuron types

In [1]:
import pandas as pd
import numpy as np

In [2]:
neuro = pd.read_csv("all_neuron_data.csv")
neuro.head()

Unnamed: 0,Cell Type,Value,Measurement
0,Dorsal root ganglion cell,-54.3,resting membrane potential
1,Dorsal root ganglion cell,-27.4,spike threshold
2,Dorsal root ganglion cell,101.3,spike amplitude
3,Dorsal root ganglion cell,2.0,spike width
4,Dorsal root ganglion cell,0.39,rheobase


In [3]:
# neuro = neuro[["input resistance", "membrane time constant", "resting membrane potential", "spike amplitude", \
#               "spike half-width", "spike threshold"]]
# neuro
neuro_filtered = neuro.loc[(neuro["Measurement"] == "input resistance") |
                          (neuro["Measurement"] == "resting membrane potential") |
                          (neuro["Measurement"] == "spike threshold") |
                          (neuro["Measurement"] == "spike half-width") |
                          (neuro["Measurement"] == "spike amplitude") |
                          (neuro["Measurement"] == "membrane time constant")]

neuro_filtered.head()


Unnamed: 0,Cell Type,Value,Measurement
0,Dorsal root ganglion cell,-54.3,resting membrane potential
1,Dorsal root ganglion cell,-27.4,spike threshold
2,Dorsal root ganglion cell,101.3,spike amplitude
5,Dorsal root ganglion cell,192.0,input resistance
7,Spinal cord intermediate horn motor neuron sym...,-59.8,resting membrane potential


In [4]:
# Saving minimum and maximum values for each measurement into variables

# Min/ max input resistance values
min_ir = neuro[neuro["Measurement"] == "input resistance"]["Value"].min()
max_ir = neuro[neuro["Measurement"] == "input resistance"]["Value"].max()
# Min/ max resting membrane potential values
min_rmp = neuro[neuro["Measurement"] == "resting membrane potential"]["Value"].min()
max_rmp = neuro[neuro["Measurement"] == "resting membrane potential"]["Value"].max()
# Min/max spike threshold values
min_st = neuro[neuro["Measurement"] == "spike threshold"]["Value"].min()
max_st = neuro[neuro["Measurement"] == "spike threshold"]["Value"].max()
# Min/max spike half-width values
min_shw = neuro[neuro["Measurement"] == "spike half-width"]["Value"].min()
max_shw = neuro[neuro["Measurement"] == "spike half-width"]["Value"].max()
# Min/max spike amplitude values
min_sa = neuro[neuro["Measurement"] == "spike amplitude"]["Value"].min()
max_sa = neuro[neuro["Measurement"] == "spike amplitude"]["Value"].max()
# Min/max membrane time constant values
min_mtc = neuro[neuro["Measurement"] == "membrane time constant"]["Value"].min()
max_mtc = neuro[neuro["Measurement"] == "membrane time constant"]["Value"].max()

In [5]:
categorized_df = pd.get_dummies(neuro_filtered, columns=["Measurement"])
categorized_df.head()

Unnamed: 0,Cell Type,Value,Measurement_input resistance,Measurement_membrane time constant,Measurement_resting membrane potential,Measurement_spike amplitude,Measurement_spike half-width,Measurement_spike threshold
0,Dorsal root ganglion cell,-54.3,0,0,1,0,0,0
1,Dorsal root ganglion cell,-27.4,0,0,0,0,0,1
2,Dorsal root ganglion cell,101.3,0,0,0,1,0,0
5,Dorsal root ganglion cell,192.0,1,0,0,0,0,0
7,Spinal cord intermediate horn motor neuron sym...,-59.8,0,0,1,0,0,0


In [6]:
counts = categorized_df['Cell Type'].value_counts()
greater_than_five = categorized_df[categorized_df['Cell Type'].isin(counts[counts >= 5].index)]
greater_than_five["Cell Type"].value_counts()
greater_than_five[greater_than_five['Cell Type'] != "Other"]["Cell Type"].value_counts()

# Remove "Other" cells from data
greater_than_five = greater_than_five[greater_than_five['Cell Type'] != "Other"]

In [7]:
# categorize and filter greater_than_5 b/c model sucks at predicting
hippos = ["CA1", "CA2", "CA3", "Subiculum", "Dentate"]

for i, row in greater_than_five.iterrows():
    if any(hippo.lower() in row["Cell Type"].lower() for hippo in hippos):
#         print(hippo)
        greater_than_five.loc[i, "Structure"] = "Hippocampus"
#         row["Area"] = "Hippocampus"
    else:
        greater_than_five.loc[i, "Structure"] = "other"
#         row["Area"] = "other"
#     print(row["Cell Type"])
greater_than_five.head(10)

Unnamed: 0,Cell Type,Value,Measurement_input resistance,Measurement_membrane time constant,Measurement_resting membrane potential,Measurement_spike amplitude,Measurement_spike half-width,Measurement_spike threshold,Structure
0,Dorsal root ganglion cell,-54.3,0,0,1,0,0,0,other
1,Dorsal root ganglion cell,-27.4,0,0,0,0,0,1,other
2,Dorsal root ganglion cell,101.3,0,0,0,1,0,0,other
5,Dorsal root ganglion cell,192.0,1,0,0,0,0,0,other
7,Spinal cord intermediate horn motor neuron sym...,-59.8,0,0,1,0,0,0,other
8,Spinal cord intermediate horn motor neuron sym...,1.14,1,0,0,0,0,0,other
9,Spinal cord intermediate horn motor neuron sym...,92.4,0,1,0,0,0,0,other
11,Spinal cord intermediate horn motor neuron sym...,57.1,0,0,0,1,0,0,other
13,Spinal cord intermediate horn motor neuron sym...,-45.3,0,0,0,0,0,1,other
18,Hippocampus CA1 pyramidal cell,-51.5,0,0,0,0,0,1,Hippocampus


# Begin machine learning

In [8]:
hippo_df = greater_than_five.loc[greater_than_five["Structure"] == "Hippocampus"]
X=hippo_df.drop(["Cell Type", "Structure"], axis=1)
y=hippo_df["Cell Type"]

In [9]:
X.head()

Unnamed: 0,Value,Measurement_input resistance,Measurement_membrane time constant,Measurement_resting membrane potential,Measurement_spike amplitude,Measurement_spike half-width,Measurement_spike threshold
18,-51.5,0,0,0,0,0,1
19,1.6,0,0,0,0,1,0
21,27.9,0,1,0,0,0,0
22,-64.8,0,0,1,0,0,0
23,100.6,1,0,0,0,0,0


In [10]:
X.shape

(1633, 7)

In [11]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [12]:
# What are these numbers and why are there 7 of them??
# May have a bias term
X_test_scaled[0]

array([-0.25976585, -0.56854485, -0.38778337, -0.53831098, -0.38638811,
        2.64575131, -0.41251919])

In [13]:
np.array(X_test_scaled).shape

(409, 7)

In [14]:
from keras.models import Sequential
from keras.layers import Dense

# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=7))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=19, activation='softmax'))

In [15]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2
)

Epoch 1/60
 - 0s - loss: 2.4402 - acc: 0.3905
Epoch 2/60
 - 0s - loss: 1.9729 - acc: 0.4632
Epoch 3/60
 - 0s - loss: 1.9339 - acc: 0.4673
Epoch 4/60
 - 0s - loss: 1.9217 - acc: 0.4673
Epoch 5/60
 - 0s - loss: 1.9172 - acc: 0.4706
Epoch 6/60
 - 0s - loss: 1.9087 - acc: 0.4706
Epoch 7/60
 - 0s - loss: 1.9056 - acc: 0.4690
Epoch 8/60
 - 0s - loss: 1.9026 - acc: 0.4706
Epoch 9/60
 - 0s - loss: 1.9029 - acc: 0.4722
Epoch 10/60
 - 0s - loss: 1.8993 - acc: 0.4706
Epoch 11/60
 - 0s - loss: 1.8981 - acc: 0.4698
Epoch 12/60
 - 0s - loss: 1.8984 - acc: 0.4690
Epoch 13/60
 - 0s - loss: 1.8931 - acc: 0.4714
Epoch 14/60
 - 0s - loss: 1.8890 - acc: 0.4714
Epoch 15/60
 - 0s - loss: 1.8856 - acc: 0.4706
Epoch 16/60
 - 0s - loss: 1.8849 - acc: 0.4698
Epoch 17/60
 - 0s - loss: 1.8840 - acc: 0.4714
Epoch 18/60
 - 0s - loss: 1.8811 - acc: 0.4690
Epoch 19/60
 - 0s - loss: 1.8847 - acc: 0.4673
Epoch 20/60
 - 0s - loss: 1.8858 - acc: 0.4714
Epoch 21/60
 - 0s - loss: 1.8847 - acc: 0.4706
Epoch 22/60
 - 0s - lo

<keras.callbacks.History at 0x23c8db6e780>

In [16]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 1.9660031323327996, Accuracy: 0.4694376526660033


In [None]:
# Save the model
model.save("machine_learning4.h5")

In [17]:
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 100)               800       
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 19)                1919      
Total params: 12,819
Trainable params: 12,819
Non-trainable params: 0
_________________________________________________________________
None


# Take in user input into saved model

In [None]:
input_resistance = input("input_resistance value (between {} and {}): ".format(min_ir, max_ir))
membrane_time_constant = input("membrane_time_constant value (between {} and {}): ".format(min_mtc, max_mtc))
resting_membrane_potential = input("resting_membrane_potential value (between {} and {}): ".format(min_rmp, max_rmp))
spike_amplitude = input("spike_amplitude value (between {} and {}): ".format(min_sa, max_sa))
spike_halfwidth = input("spike_half-width value (between {} and {}): ".format(min_shw, max_shw))
spike_threshold = input("spike_threshold value (between {} and {}): ".format(min_st, max_st))

input_resistance
membrane_time_constant
resting_membrane_potential
spike_amplitude
spike_halfwidth
spike_threshold

In [None]:
# Place user data into numpy array in same order as dataframe's columns
user_data = np.array([input_resistance, membrane_time_constant, resting_membrane_potential, spike_amplitude, spike_halfwidth, spike_threshold]).reshape(6,)
user_data

In [43]:
# # model takes in x_test_scaled, y_test_categorical data types
# data = []
# data.extend([input_resistance, resting_membrane_potential, spike_threshold, spike_halfwidth, spike_amplitude, \
#             membrane_time_constant])
# data = list(map(int, data))
# # np.array(data).shape
# data

[200, 50, 700, 1000, 100, 50]

# Format input data into a dummy variable df to match training data format

In [35]:
measurement_list = ["input_resistance", "resting_membrane_potential", "spike_threshold", "spike_halfwidth", "spike_amplitude", \
            "membrane_time_constant"]
formatted_input = list(zip(data, measurement_list))
formatted_input_df = pd.DataFrame(formatted_input, columns=["Value", "Measurement"]) 
formatted_input_df

Unnamed: 0,Value,Measurement
0,200,input_resistance
1,50,resting_membrane_potential
2,700,spike_threshold
3,1000,spike_halfwidth
4,100,spike_amplitude
5,50,membrane_time_constant


In [36]:
categorized_input_df = pd.get_dummies(formatted_input_df, columns=["Measurement"])
categorized_input_df

Unnamed: 0,Value,Measurement_input_resistance,Measurement_membrane_time_constant,Measurement_resting_membrane_potential,Measurement_spike_amplitude,Measurement_spike_halfwidth,Measurement_spike_threshold
0,200,1,0,0,0,0,0
1,50,0,0,1,0,0,0
2,700,0,0,0,0,0,1
3,1000,0,0,0,0,1,0
4,100,0,0,0,1,0,0
5,50,0,1,0,0,0,0


In [38]:
# Mimicking the process of creating X_test_scaled from above
# Don't want to fit user data
# input_scaler = StandardScaler().fit(categorized_input_df)

input_scaled = StandardScaler().transform(categorized_input_df)
input_scaled

NotFittedError: This StandardScaler instance is not fitted yet. Call 'fit' with appropriate arguments before using this method.

In [33]:
np.array(input_scaled).shape

(6, 7)

In [60]:
data= np.array(data).reshape(6,)
print(data.shape)
print(neuro_model)

(6,)
<keras.engine.sequential.Sequential object at 0x000001FC892664A8>


In [64]:
from keras.models import load_model
neuro_model = load_model("machine_learning4.h5")
neuro_model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 100)               800       
_________________________________________________________________
dense_5 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_6 (Dense)              (None, 19)                1919      
Total params: 12,819
Trainable params: 12,819
Non-trainable params: 0
_________________________________________________________________


In [77]:
print(data)

# Change the model being used
# Maybe transpose data later
neuro_model.predict(np.array([[200,1,0,0,0,0,0]]))

[ 200   50  700 1000  100   50]


array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0.]], dtype=float32)

In [71]:
np.array([data])

array([[ 200,   50,  700, 1000,  100,   50]])