# Try some quick setup

In [1]:
import pandas as pd
import pickle as pkl
import numpy as np
import matplotlib.pyplot as plt
import keras
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [2]:
from keras.layers import Input, Dense, Lambda, Layer
from keras.initializers import Constant
from keras.models import Model
from keras import backend as K

# Custom loss layer
class CustomMultiLossLayer(Layer):
    def __init__(self, nb_outputs=2, **kwargs):
        self.nb_outputs = nb_outputs
        self.is_placeholder = True
        super(CustomMultiLossLayer, self).__init__(**kwargs)
        
    def build(self, input_shape=None):
        # initialise log_vars
        self.log_vars = []
        for i in range(self.nb_outputs):
            self.log_vars += [self.add_weight(name='log_var' + str(i), shape=(1,),
                                              initializer=Constant(0.), trainable=True)]
        super(CustomMultiLossLayer, self).build(input_shape)

    def multi_loss(self, ys_true, ys_pred):
        assert len(ys_true) == self.nb_outputs and len(ys_pred) == self.nb_outputs
        loss = 0
        for y_true, y_pred, log_var in zip(ys_true, ys_pred, self.log_vars):
            precision = K.exp(-log_var[0])
            loss += K.sum(precision * (y_true - y_pred)**2. + log_var[0], -1)
        return K.mean(loss)

    def call(self, inputs):
        ys_true = inputs[:self.nb_outputs]
        ys_pred = inputs[self.nb_outputs:]
        loss = self.multi_loss(ys_true, ys_pred)
        self.add_loss(loss, inputs=inputs)
        # We won't actually use the output.
        return K.concatenate(inputs, -1)

# Pull in raw data

In [3]:
labels = pd.concat([pd.read_csv("/home/ms994/beat_pd/data/real-pd/data_labels/REAL-PD_Training_Data_IDs_Labels.csv")])

In [4]:
labels.shape

(591, 5)

In [5]:
labels.dropna().shape

(64, 5)

In [6]:
subject_specific = labels.groupby("subject_id").mean().dropna()
all_m_id = labels.measurement_id.unique().tolist()
train_ind, test_ind = train_test_split([i for i in range(len(all_m_id))], test_size=0.1, random_state=1)
train_ind, valid_ind = train_test_split(train_ind,  test_size=0.1, random_state=1)

In [7]:
import pickle as pkl
allResults = pkl.load(open("/n/scratch2/ms994/realPd.pkl", "rb"))

In [8]:
len(allResults)

63262

In [9]:
allResults = list(filter(lambda x: x[0].shape[0] >= 1500, allResults))
allResults = list(filter(lambda x: x[1].shape[0] >= 1500, allResults))
allResults = list(filter(lambda x: x[2].shape[0] >= 1500, allResults))
allResults = (list(filter(lambda x: (x[0].std(0) > 0.01).any() and (x[1].std(0) > 0.01).any() and (x[2].std(0) > 0.01).any(), allResults)))

  after removing the cwd from sys.path.


In [10]:
len(allResults)

41079

In [11]:
labels = [datum[3] for datum in allResults]

In [12]:
labels = pd.concat(labels, axis=1).T

In [13]:
labels = labels.reset_index().drop(["index"], axis=1)

In [14]:
all_subjects = labels["subject_id"].unique()

In [15]:
def get_model():
    num_cnn_layers = 5
    num_lstm_layers = 1
    num_lin_layers = 5
    dropout = 0.5
    lin_h=64



    watch_accel = keras.layers.Input((1500, 3), name="watch_accel_in")
    watch_gyro = keras.layers.Input((1500, 3), name="watch_gyro_in")
    phone_accel = keras.layers.Input((1500, 3), name="phone_accel_in")
    all_inputs = [watch_accel, watch_gyro, phone_accel]
    all_cnn_outputs = []

    for inputLayer in all_inputs:
        x = inputLayer
    #     x = keras.layers.GaussianNoise(0.1)(x)


        for i in range(num_cnn_layers):
            x = keras.layers.Conv1D(2**int(min(i, 4))*3, (3,), padding="same")(x)
            x = keras.layers.LeakyReLU()(x)
            x = keras.layers.MaxPool1D((2,))(x)
            x = keras.layers.BatchNormalization()(x)

        x = keras.layers.Flatten()(x)
        all_cnn_outputs.append(x)




    x = keras.layers.Concatenate()(all_cnn_outputs)
    x = keras.layers.Dense(lin_h)(x)
    x = keras.layers.LeakyReLU()(x)
    x = keras.layers.Dropout(0.5)(x)

    x_shared_flattened = x

    #one_off
    x = x_shared_flattened 
    for k in range(num_lin_layers):
        x = keras.layers.BatchNormalization()(x)
        x = keras.layers.Dense(lin_h)(x)
        x = keras.layers.LeakyReLU()(x)
        x = keras.layers.Dropout(dropout)(x)
    x = keras.layers.Dense(1)(x)
    x_on_off = keras.layers.ReLU(name="out", max_value=1)(x)

    model = keras.Model(inputs=all_inputs, outputs=[x_on_off])
    model.compile(keras.optimizers.Adam(lr=0.001), loss="mean_squared_error")
    return model

In [16]:
from addict import Dict

In [None]:
m_id = []
testScores = Dict()
for subject in all_subjects[:1]:
    watchAccel = []
    watchGyro = []
    phoneAccel = []
    allLabels = []
    label = labels[labels["subject_id"] == subject]
    for i in label.index:
        res = allResults[i]
        watchAccel.append(res[0][:1500])
        watchGyro.append(res[1][:1500])
        phoneAccel.append(res[2][:1500])
        allLabels.append(res[3])
    allLabels = pd.concat(allLabels, axis=1).T
    allLabels = allLabels.reset_index().drop(["index"], 1)
    assert allLabels.subject_id.unique() == subject
    m_id.append(allLabels.measurement_id.unique())
    train, test = train_test_split(labels[labels.subject_id == subject].measurement_id.unique(), test_size=0.1, random_state=1)
    train, valid = train_test_split(train, test_size=0.1, random_state=1)

    train_labels = allLabels[allLabels.measurement_id.apply(lambda m_id: m_id in train)]
    valid_labels = allLabels[allLabels.measurement_id.apply(lambda m_id: m_id in valid)]
    test_labels = allLabels[allLabels.measurement_id.apply(lambda m_id: m_id in test)]
    train_data = [np.array(watchAccel)[train_labels.index], np.array(watchGyro)[train_labels.index], np.array(phoneAccel)[train_labels.index]]
    valid_data = [np.array(watchAccel)[valid_labels.index], np.array(watchGyro)[valid_labels.index], np.array(phoneAccel)[valid_labels.index]]
    test_data = [np.array(watchAccel)[test_labels.index], np.array(watchGyro)[test_labels.index], np.array(phoneAccel)[test_labels.index]]


    for output in ["on_off", "dyskinesia", "tremor"]:
        model = get_model()
        weighting = allLabels[output].mean()
        modelCheckpoint = keras.callbacks.ModelCheckpoint(f"/n/scratch2/ms994/cnn_on_off_s_id_{subject}_{output}.h5", save_best_only=True, verbose=True)
        earlyStopping = keras.callbacks.EarlyStopping(patience=20)
        reduce_lr = keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=2)
        history = model.fit(train_data, train_labels[output], epochs=500, batch_size=32, validation_data=(valid_data, valid_labels[output]), callbacks=[modelCheckpoint, reduce_lr, earlyStopping])
        plt.plot(history.history["loss"])
        plt.plot(history.history["val_loss"])
        plt.legend(["Train", "Valid"])
        plt.show()
        testScores[subject][output] = model.evaluate(test_data)
        

W0510 17:21:18.477278 139989104256832 module_wrapper.py:139] From /home/ms994/miniconda3/envs/keras-redo-cpu/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0510 17:21:18.480878 139989104256832 module_wrapper.py:139] From /home/ms994/miniconda3/envs/keras-redo-cpu/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0510 17:21:18.481714 139989104256832 module_wrapper.py:139] From /home/ms994/miniconda3/envs/keras-redo-cpu/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0510 17:21:18.496351 139989104256832 module_wrapper.py:139] From /home/ms994/miniconda3/envs/keras-redo-cpu/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool 

Train on 3688 samples, validate on 462 samples
Epoch 1/500


In [None]:
train_data[1].shape

In [None]:
train_data.shape

In [None]:
model.fit(train_data, train_labels[output])

In [None]:
train_data[0].shape

In [None]:
pd.isnull(allLabels.).sum()

In [None]:
labels[labels["subject_id"] == a]

In [None]:
all_mid = labels.measurement_id.unique()

In [None]:
all_mid = all_mid.tolist()

In [None]:
def prep_data(all_data, indices, std_threshold, column):
    possible_match = []
    for i, datum in enumerate(all_data):
        
        if all_mid.index(datum[3].measurement_id) in indices \
            and (datum[0].std(0) > std_threshold).any() \
            and (datum[1].std(0) > std_threshold).any() \
            and (datum[2].std(0) > std_threshold).any() \
            and (datum[0].shape[0] >= 1500) \
            and (datum[1].shape[0] >= 1500) \
            and (datum[2].shape[0] >= 1500) \
            and (not np.isnan(datum[3][column])):
            
            possible_match.append((datum[0][:1500], datum[1][:1500], datum[2][:1500], datum[3]))
    labels = []
    watch_accel = []
    watch_gyro = []
    phone_accel = []
    for datum in possible_match:
        watch_accel.append(datum[0][:1500])
        watch_gyro.append(datum[1][:1500])
        phone_accel.append(datum[2][:1500])
        labels.append(datum[3][column])
    
    return np.array(watch_accel), np.array(watch_gyro), np.array(phone_accel), np.array(labels)

In [None]:
train_data = prep_data(allResults, train_ind, 0.01, "on_off")
valid_data = prep_data(allResults, valid_ind, 0.01, "on_off")
test_data = prep_data(allResults, test_ind, 0.01, "on_off")

In [None]:
datum[3]["on_off"]

In [None]:
len(train_data[0]), len(valid_data[0]), len(test_data[0])

In [None]:
train_data[3].shape

In [None]:
watch_accel = []
watch_gyro = []
phone_accel = []
on_off = []
dyskinesia = []
tremor = []
subject = []
for datum in train_data:
    watch_accel.append(datum[0][:1500])
    watch_gyro.append(datum[1][:1500])
    phone_accel.append(datum[2][:1500])
    on_off.append(datum[3].on_off)
    dyskinesia.append(datum[3].dyskinesia)
    tremor.append(datum[3].tremor)

In [None]:
watch_accel[0].shape

In [None]:
np.array(watch_accel).shape

In [None]:
np.array(watch_gyro).shape

# set up network

In [None]:
num_cnn_layers = 5
num_lstm_layers = 0
num_lin_layers = 5
dropout = 0.5
lin_h=128
watch_accel = keras.layers.Input((1500, 3), name="watch_accel_in")
watch_gyro = keras.layers.Input((1500, 3), name="watch_gyro_in")
phone_accel = keras.layers.Input((1500, 3), name="phone_accel_in")
all_inputs = [watch_accel, watch_gyro, phone_accel]
all_cnn_outputs = []

for inputLayer in all_inputs:
    x = inputLayer
    x = keras.layers.GaussianNoise(0.1)(x)


    for i in range(num_cnn_layers):
        x = keras.layers.Conv1D(2**int(min(i, 3))*3, (3,), padding="same")(x)
        x = keras.layers.LeakyReLU()(x)
        x = keras.layers.MaxPool1D((2,))(x)
        x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Flatten()(x)
    all_cnn_outputs.append(x)

x = keras.layers.Concatenate()(all_cnn_outputs)

x = keras.layers.Dense(lin_h)(x)
x = keras.layers.LeakyReLU()(x)
x = keras.layers.Dropout(0.5)(x)

x_shared_flattened = x

#one_off
x = x_shared_flattened 
for k in range(num_lin_layers):
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dense(lin_h)(x)
    x = keras.layers.LeakyReLU()(x)
    x = keras.layers.Dropout(dropout)(x)
x = keras.layers.Dense(1)(x)
x_on_off = keras.layers.ReLU(name="on_off", max_value=1)(x)



model = keras.Model(inputs=[watch_accel, watch_gyro, phone_accel], outputs=[x_on_off])
model.compile(keras.optimizers.Adam(lr=0.001), loss=["mean_squared_error"])
model.summary()