In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import keras.models
from keras.layers.convolutional import Conv1D, ZeroPadding1D
from keras.layers.recurrent import Recurrent, LSTM, GRU
from keras.utils import plot_model
from sklearn.preprocessing import LabelBinarizer
from keras.utils import np_utils
from math import floor, ceil
from sklearn.metrics import accuracy_score
% matplotlib inline

Using TensorFlow backend.


# preprocess

In [176]:
def load_and_format_data():
    # load data
    x_train = np.array([])
    y_train = np.array([])
    for i in range(1,10):
        dataset = 'train/'+str(i)
        x_data = pd.read_csv(dataset + '.train.calcium.csv')
        y_data = pd.read_csv(dataset + '.train.spikes.csv')    
        for key in x_data:
            x_train = np.concatenate((x_train,x_data[key].dropna()))
            y_train = np.concatenate((y_train,y_data[key].dropna()))
    x_test = pd.read_csv('train/10' + '.train.calcium.csv')['9'].dropna()
    y_test = pd.read_csv('train/10' + '.train.spikes.csv')['9'].dropna()
    num_y = int(np.max(y_train+1))
    # binarize labels
    lb = LabelBinarizer()
    lb.fit(y_train) # this lb takes 0 -> [1,0,0,0,0], 1-> [0,1,0,0,0], etc.
    y_train = lb.transform(y_train)
#     y_test = lb.transform(y_test)
    # reshape data
    x_train = x_train.reshape((1, len(x_train), 1))
    y_train = y_train.reshape((1, len(y_train), y_train.shape[1]))
    x_test = x_test.values.reshape((1, len(x_test), 1))
#     y_test = y_test.reshape((1, len(y_test), y_test.shape[1]))
    return x_train, y_train, x_test, y_test, num_y
x_train, y_train, x_test, y_test, num_y = load_and_format_data()
print("train, test sizes",x_train.shape,x_test.shape)

train, test sizes (1, 4447004, 1) (1, 9698, 1)


# create model

In [177]:
max_features = 10
embedding_dims = 10
kernel_sizes = [21,13,5] # should be odd
pad_sizes = [floor(s/2) for s in kernel_sizes]
model = keras.models.Sequential()
model.add(ZeroPadding1D(padding=pad_sizes[0], input_shape=(None, 1)))
model.add(Conv1D(filters=num_y, kernel_size=kernel_sizes[0], activation="linear"))
model.add(ZeroPadding1D(padding=pad_sizes[1], input_shape=(None, 1)))
model.add(Conv1D(filters=num_y, kernel_size=kernel_sizes[1], activation="linear"))
model.add(ZeroPadding1D(padding=pad_sizes[2], input_shape=(None, 1)))
model.add(Conv1D(filters=num_y, kernel_size=kernel_sizes[2], activation="softmax"))
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

# fit the model

In [178]:
epochs = 10
model.fit(x_train, y_train, epochs=epochs, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x12d765dd8>

# evaluate

In [None]:
def size_of_all_data():
    for folder in ['train','test']:
        nums = []
        if folder=='train':
            nums = range(1,11)
        else:
            nums = range(1,6)
        for num in nums:
            dataset = folder + '/' + str(num)
            print(dataset)
            data = pd.read_csv(dataset + '.'+folder+'.calcium.csv')  # todo: concat these, trim trailing zeros
            for index in data:
                print('\t',index,len(data[index]))
size_of_all_data()

In [57]:
def visualize(calcium, spikes, spikes2=None):
    t = np.arange(len(calcium)) / 100.0
    plt.hold(True)
    plt.plot(t, calcium, color='#348ABD')
    plt.plot(t, spikes / 2.0 - 3.2, color='black',label='gt')
    if not spikes2==None:
        plt.plot(t, spikes2 / 2.0 - 2, color='g',label='pred')
        plt.plot(t, (spikes-spikes2) / 2.0 - 5, color='r',label='diff')
    plt.yticks([])
    plt.xticks([])
    plt.ylim([-6, 3])
    plt.xlim([0,100])
    plt.tight_layout()
    plt.grid()
    plt.legend()
#     plt.show()

print('----------training----------')
y_pred = np.argmax(model.predict(x_train)[0,:,:],axis=1)
print('pred nonzeros\t',str(np.sum(y_pred) / y_pred.size))
print("acc\t\t",accuracy_score(y_pred,np.argmax(y_train[0,:,:],axis=1)))
print("corr\t\t",np.corrcoef(y_pred,np.argmax(y_train[0,:,:],axis=1))[0,1])

print('----------testing----------')
y_pred = np.argmax(model.predict(x_test)[0,:,:],axis=1)
print('pred nonzeros\t',str(np.sum(y_pred) / y_pred.size))
print("acc\t\t",accuracy_score(y_pred,y_test))
print("corr\t\t",np.corrcoef(y_pred,y_test)[0,1])
plt.figure(figsize=(18, 6))
visualize(x_test[0,:,0],y_test,y_pred)
plt.show()

----------training----------


NameError: name 'model' is not defined