# Dummy variable machine learning and filtering down measurement types to top 6 using all neuron types

In [1]:
import pandas as pd
import numpy as np

In [2]:
neuro = pd.read_csv("all_neuron_data.csv")
neuro.head()

Unnamed: 0,Cell Type,Value,Measurement
0,Dorsal root ganglion cell,-54.3,resting membrane potential
1,Dorsal root ganglion cell,-27.4,spike threshold
2,Dorsal root ganglion cell,101.3,spike amplitude
3,Dorsal root ganglion cell,2.0,spike width
4,Dorsal root ganglion cell,0.39,rheobase


In [3]:
# neuro = neuro[["input resistance", "membrane time constant", "resting membrane potential", "spike amplitude", \
#               "spike half-width", "spike threshold"]]
# neuro
neuro_filtered = neuro.loc[(neuro["Measurement"] == "input resistance") |
                          (neuro["Measurement"] == "resting membrane potential") |
                          (neuro["Measurement"] == "spike threshold") |
                          (neuro["Measurement"] == "spike half-width") |
                          (neuro["Measurement"] == "spike amplitude") |
                          (neuro["Measurement"] == "membrane time constant")]

neuro_filtered.head()


Unnamed: 0,Cell Type,Value,Measurement
0,Dorsal root ganglion cell,-54.3,resting membrane potential
1,Dorsal root ganglion cell,-27.4,spike threshold
2,Dorsal root ganglion cell,101.3,spike amplitude
5,Dorsal root ganglion cell,192.0,input resistance
7,Spinal cord intermediate horn motor neuron sym...,-59.8,resting membrane potential


In [4]:
# Saving minimum and maximum values for each measurement into variables

# Min/ max input resistance values
min_ir = neuro[neuro["Measurement"] == "input resistance"]["Value"].min()
max_ir = neuro[neuro["Measurement"] == "input resistance"]["Value"].max()
# Min/ max resting membrane potential values
min_rmp = neuro[neuro["Measurement"] == "resting membrane potential"]["Value"].min()
max_rmp = neuro[neuro["Measurement"] == "resting membrane potential"]["Value"].max()
# Min/max spike threshold values
min_st = neuro[neuro["Measurement"] == "spike threshold"]["Value"].min()
max_st = neuro[neuro["Measurement"] == "spike threshold"]["Value"].max()
# Min/max spike half-width values
min_shw = neuro[neuro["Measurement"] == "spike half-width"]["Value"].min()
max_shw = neuro[neuro["Measurement"] == "spike half-width"]["Value"].max()
# Min/max spike amplitude values
min_sa = neuro[neuro["Measurement"] == "spike amplitude"]["Value"].min()
max_sa = neuro[neuro["Measurement"] == "spike amplitude"]["Value"].max()
# Min/max membrane time constant values
min_mtc = neuro[neuro["Measurement"] == "membrane time constant"]["Value"].min()
max_mtc = neuro[neuro["Measurement"] == "membrane time constant"]["Value"].max()

In [11]:
categorized_df = pd.get_dummies(neuro_filtered, columns=["Measurement"])
categorized_df.head()

Unnamed: 0,Cell Type,Value,Measurement_input resistance,Measurement_membrane time constant,Measurement_resting membrane potential,Measurement_spike amplitude,Measurement_spike half-width,Measurement_spike threshold
0,Dorsal root ganglion cell,-54.3,0,0,1,0,0,0
1,Dorsal root ganglion cell,-27.4,0,0,0,0,0,1
2,Dorsal root ganglion cell,101.3,0,0,0,1,0,0
5,Dorsal root ganglion cell,192.0,1,0,0,0,0,0
7,Spinal cord intermediate horn motor neuron sym...,-59.8,0,0,1,0,0,0


In [19]:
counts = categorized_df['Cell Type'].value_counts()
greater_than_five = categorized_df[categorized_df['Cell Type'].isin(counts[counts >= 5].index)]
greater_than_five["Cell Type"].value_counts()
greater_than_five[greater_than_five['Cell Type'] != "Other"]["Cell Type"].value_counts()

# Remove "Other" cells from data
greater_than_five = greater_than_five[greater_than_five['Cell Type'] != "Other"]

In [20]:
# categorize and filter greater_than_5 b/c model sucks at predicting
hippos = ["CA1", "CA2", "CA3", "Subiculum", "Dentate"]

for i, row in greater_than_five.iterrows():
    if any(hippo.lower() in row["Cell Type"].lower() for hippo in hippos):
#         print(hippo)
        greater_than_five.loc[i, "Structure"] = "Hippocampus"
#         row["Area"] = "Hippocampus"
    else:
        greater_than_five.loc[i, "Structure"] = "other"
#         row["Area"] = "other"
#     print(row["Cell Type"])
greater_than_five.head(10)

Unnamed: 0,Cell Type,Value,Measurement_input resistance,Measurement_membrane time constant,Measurement_resting membrane potential,Measurement_spike amplitude,Measurement_spike half-width,Measurement_spike threshold,Structure
0,Dorsal root ganglion cell,-54.3,0,0,1,0,0,0,other
1,Dorsal root ganglion cell,-27.4,0,0,0,0,0,1,other
2,Dorsal root ganglion cell,101.3,0,0,0,1,0,0,other
5,Dorsal root ganglion cell,192.0,1,0,0,0,0,0,other
7,Spinal cord intermediate horn motor neuron sym...,-59.8,0,0,1,0,0,0,other
8,Spinal cord intermediate horn motor neuron sym...,1.14,1,0,0,0,0,0,other
9,Spinal cord intermediate horn motor neuron sym...,92.4,0,1,0,0,0,0,other
11,Spinal cord intermediate horn motor neuron sym...,57.1,0,0,0,1,0,0,other
13,Spinal cord intermediate horn motor neuron sym...,-45.3,0,0,0,0,0,1,other
18,Hippocampus CA1 pyramidal cell,-51.5,0,0,0,0,0,1,Hippocampus


# Begin machine learning

In [21]:
hippo_df = greater_than_five.loc[greater_than_five["Structure"] == "Hippocampus"]
X=hippo_df.drop(["Cell Type", "Structure"], axis=1)
y=hippo_df["Cell Type"]

In [29]:
X.shape

(1633, 7)

In [23]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [27]:
np.array(X_test_scaled).shape

(409, 7)

In [None]:
from keras.models import Sequential
from keras.layers import Dense

# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=6))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=19, activation='softmax'))

In [None]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2
)

In [None]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

In [None]:
# Save the model
model.save("machine_learning4.h5")

# Take in user input into saved model

In [7]:
input_resistance = input("input_resistance value (between {} and {}): ".format(min_ir, max_ir))
resting_membrane_potential = input("resting_membrane_potential value (between {} and {}): ".format(min_rmp, max_rmp))
spike_threshold = input("spike_threshold value (between {} and {}): ".format(min_st, max_st))
spike_halfwidth = input("spike_half-width value (between {} and {}): ".format(min_shw, max_shw))
spike_amplitude = input("spike_amplitude value (between {} and {}): ".format(min_sa, max_sa))
membrane_time_constant = input("membrane_time_constant value (between {} and {}): ".format(min_mtc, max_mtc))
resting_membrane_potential
spike_threshold
spike_halfwidth
spike_amplitude
membrane_time_constant

input_resistance value (between -330.3 and 2250.0): 2200
resting_membrane_potential value (between -95.3 and 88.7): 50
spike_threshold value (between -75.0 and 763.0): 700
spike_half-width value (between 0.026000000000000002 and 1081.0): 100
spike_amplitude value (between -84.5 and 160.0): -30
membrane_time_constant value (between -38.0 and 285.0): -30


'-30'

In [12]:
# model takes in x_test_scaled, y_test_categorical data types
data = []
data.extend([input_resistance, resting_membrane_potential, spike_threshold, spike_halfwidth, spike_amplitude, \
            membrane_time_constant])
data
# np.array(data).shape

['2200', '50', '700', '100', '-30', '-30']

# Format input data into a dummy variable df to match training data format

In [17]:
measurement_list = ["input_resistance", "resting_membrane_potential", "spike_threshold", "spike_halfwidth", "spike_amplitude", \
            "membrane_time_constant"]
formatted_input = list(zip(data, measurement_list))
formatted_input_df = pd.DataFrame(formatted_input, columns=["Value", "Measurement"]) 
formatted_input_df

Unnamed: 0,Value,Measurement
0,2200,input_resistance
1,50,resting_membrane_potential
2,700,spike_threshold
3,100,spike_halfwidth
4,-30,spike_amplitude
5,-30,membrane_time_constant


In [18]:
categorized_input_df = pd.get_dummies(formatted_input_df, columns=["Measurement"])
categorized_input_df

Unnamed: 0,Value,Measurement_input_resistance,Measurement_membrane_time_constant,Measurement_resting_membrane_potential,Measurement_spike_amplitude,Measurement_spike_halfwidth,Measurement_spike_threshold
0,2200,1,0,0,0,0,0
1,50,0,0,1,0,0,0
2,700,0,0,0,0,0,1
3,100,0,0,0,0,1,0
4,-30,0,0,0,1,0,0
5,-30,0,1,0,0,0,0


In [32]:
input_scaler = StandardScaler().fit(categorized_input_df)
input_scaled = input_scaler.transform(categorized_input_df)
input_scaled

array([[ 2.12313837,  2.23606798, -0.4472136 , -0.4472136 , -0.4472136 ,
        -0.4472136 , -0.4472136 ],
       [-0.5593773 , -0.4472136 , -0.4472136 ,  2.23606798, -0.4472136 ,
        -0.4472136 , -0.4472136 ],
       [ 0.25161581, -0.4472136 , -0.4472136 , -0.4472136 , -0.4472136 ,
        -0.4472136 ,  2.23606798],
       [-0.49699321, -0.4472136 , -0.4472136 , -0.4472136 , -0.4472136 ,
         2.23606798, -0.4472136 ],
       [-0.65919183, -0.4472136 , -0.4472136 , -0.4472136 ,  2.23606798,
        -0.4472136 , -0.4472136 ],
       [-0.65919183, -0.4472136 ,  2.23606798, -0.4472136 , -0.4472136 ,
        -0.4472136 , -0.4472136 ]])

In [33]:
np.array(input_scaled).shape

(6, 7)

In [None]:
np.array(data).reshape(6,)

In [34]:
from keras.models import load_model
neuro_model = load_model("machine_learning4.h5")

In [35]:
# Change the model being used
# Maybe transpose data later
neuro_model.predict(np.asarray(input_scaled))

array([[1.31051810e-02, 1.39935777e-01, 8.32704529e-02, 4.38429788e-02,
        2.99768038e-02, 1.01049446e-01, 9.77032760e-05, 2.39625573e-02,
        5.56015819e-02, 2.36228421e-01, 2.78919964e-04, 2.82385328e-04,
        4.85998616e-02, 5.14394604e-04, 2.54105981e-02, 1.81414932e-02,
        1.16347477e-01, 6.82547688e-03, 5.65285869e-02],
       [6.86983345e-03, 1.18030459e-01, 1.97575875e-02, 1.35275153e-02,
        2.98026055e-02, 1.85450949e-02, 3.04186344e-03, 5.35214553e-03,
        2.80936025e-02, 5.53721845e-01, 1.30286301e-03, 4.16880986e-03,
        1.17547866e-02, 2.48245383e-03, 6.29363768e-03, 1.03995120e-02,
        5.97769283e-02, 2.82349228e-03, 1.04255058e-01],
       [1.02361720e-02, 1.02179401e-01, 2.18372960e-02, 1.02593238e-02,
        3.46559472e-02, 1.88357141e-02, 5.45118842e-03, 7.10972073e-03,
        1.97138004e-02, 3.02818924e-01, 1.53715315e-03, 1.33453612e-03,
        1.18038245e-02, 7.32732797e-03, 5.72213437e-03, 1.61630530e-02,
        4.57321294e-02