In [1]:
import glob
import os
import numpy as np
import pandas as pd

from pathlib import Path
import sys

source_dir = Path.cwd().parent / 'source'
sys.path.append(str(source_dir))

import data_import

In [4]:
file_name = 'deepheaven2saizure_pure'

data = data_import.load_pyarrow(file_name='segmented_data_300')
data.head()

data = data.drop(columns=['is_seizure', 'before_seizure', 'file', 'segment_id', 'seizure_start'])
data['target'] = (data['target'] > pd.Timedelta('0s')).astype(int)
data_import.save_pyarrow(data, file_name=file_name)
data_1 = data[data['target'] == 1].copy()
data_0 = data[data['target'] == 0].copy()
data_0 = data_0.iloc[:data_1.shape[0], :]
data_0.shape, data_1.shape

/home/weasel/reps/ai-seizure-detectives/source/../data/segmented_data_300.arrow was loaded.
/home/weasel/reps/ai-seizure-detectives/source/../data/deepheaven2saizure_pure.arrow was successfully written.


((4377600, 12), (4377600, 12))

In [5]:
# print(data_1['target'].sum())
# print(data_0['target'].sum())

data_0 = data_0.drop(columns=['target'])
data_1 = data_1.drop(columns=['target'])

# D = data.values
# np.save('data/'+ file_name + '.npy', D)
# D = D.reshape(data['epoch'].nunique(), -1)
data_0.shape, data_1.shape

((4377600, 11), (4377600, 11))

In [6]:
import os
import numpy as np
from scipy.io import loadmat

# Construct LSTM sequences from one segment
def lstm_sequence(input_segment, target, sampling_freq, window, stride, block_s = 60):
    """ Function for generating blocks of LSTM input tensors
        input_segment : The EEG segment
        target        : 1/0 (preictal/interictial); None for test
        sampling_freq : Samplig frequency
        window        : Window size for 1d convolutions on each block
        stride        : Stride size of the 1d convolution
        block_s       : Size of the block in seconds (default = 60)
    """

    # Dimensions
    n_channels, T_segment = input_segment.shape

    # Determine block dimensions
    block_len = sampling_freq * block_s   # Length of each block
    n_blocks = (T_segment-1) // block_len # Number of blocks
    blocks = [block for block in range(0,(n_blocks+1)*block_len,block_len)]

    # Determine the sequence length for LSTM
    div = (block_len - window)%stride
    if (div != 0):
        pad = stride - div # Size of padding neded
    else:
        pad = 0

    seq_len = (block_len + pad - window) // stride

    # Initiate tensor
    X = np.zeros((n_blocks, seq_len, n_channels))

    # Loop over blocks and fill X
    for ib in range(n_blocks):
        # Get block
        data_block = input_segment[:, blocks[ib]:blocks[ib+1]]

        # Pad if necessary
        if (pad !=0):
            data_block = np.concatenate((data_block, np.zeros((n_channels, pad))), axis=1)

        # 1d convolution by mean
        index = 0
        for j in range(seq_len):
            X[ib, j, :] = np.mean(data_block[:, (index+j):(index+j+seq_len)], axis = 1)

    # Fill in the target
    if (target == 1):
        Y = np.ones(n_blocks)
    elif(target == 0):
        Y = np.zeros(n_blocks)
    else:
        Y = None

    return X, Y, n_blocks


# Collect all the segments to build a tesnsor input for LSTM
def lstm_build_input(df, target, window, stride, block_s = 60):
    """ Collect all the data and build sequences for LSTM
        clips              : List of clips
        target             : 1/0 (preictal/interictial); None for test set
        window             : Window size for 1d convolutions
        stride             : Length of the stride in 1d convolution
        block_s            : Size of the block in seconds (default = 60)
    """
    epochs = [df[df['epoch'] == epoch] for epoch in df['epoch'].unique()]
    # Number of clips
    n_epochs = len(epochs)

    # Loop over all clips and store data
    iepoch = 0
    for epoch in epochs:
        # segment_name = list(clip.keys())[3] # Get segment name

        input_segment = epoch.drop(columns=['epoch']).values.T
        sampling_freq = 256

        # Get number of channels
        n_channels = epoch.shape[1] - 2

        # Get tensor input and targets from blocks
        X, Y, n_blocks = lstm_sequence(input_segment, target, sampling_freq, window, stride, block_s)

        # Concatenate the tensor and target vector
        if (iepoch == 0):
            X_train = X
            Y_train = Y[:,None] if Y is not None else None
        else:
            X_train = np.vstack((X_train,X))
            Y_train = np.vstack((Y_train,Y[:,None])) if Y is not None else None

        iepoch +=1

    return X_train, Y_train


In [7]:
# Window, stride and block_s
# window = 16000
# stride = 100
# block_s = 60
window = 5 * 256
stride = 5
block_s = 60

X_1, Y_1 = lstm_build_input(data_1, 1, window, stride)
X_0, Y_0 = lstm_build_input(data_0, 0, window, stride)

# Scale the data
X_1 = X_1 / np.max(np.abs(X_1), axis=1)[:,None,:]
X_0 = X_0 / np.max(np.abs(X_0), axis=1)[:,None,:]

# Combine the data
X = np.concatenate((X_0, X_1), axis = 0)
Y = np.concatenate((Y_0, Y_1), axis = 0)
Y = np.squeeze(Y)
n_channels = X.shape[2]

print("Data shape = ", X.shape)

Data shape =  (456, 2816, 10)


In [8]:
X.shape, Y.shape, Y.sum()

((456, 2816, 10), (456,), 228.0)

In [9]:
# Normalize
X = X / np.max(np.abs(X), axis=1)[:,None,:]

# Shuffle
np.random.seed(1)
shuffle = np.random.choice(np.arange(len(Y)), size=len(Y), replace=False)
X = X[shuffle]
Y = Y[shuffle]

In [10]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

model = keras.Sequential()
model.add(layers.Input(shape=(X.shape[1], X.shape[2])))
model.add(layers.LSTM(64))
model.add(layers.BatchNormalization())
model.add(layers.Dense(1, activation='sigmoid'))

2023-11-10 16:08:17.511491: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-10 16:08:17.660403: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-10 16:08:17.660427: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-10 16:08:17.661141: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-10 16:08:17.738227: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-11-10 16:08:17.739395: I tensorflow/core/platform/cpu_feature_guard.cc:182] This Tens

In [20]:
import numpy
from keras.callbacks import Callback

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score


class RocCallback(Callback):
    def __init__(self,training_data,validation_data):
        self.x = training_data[0]
        self.y = training_data[1]
        self.x_val = validation_data[0]
        self.y_val = validation_data[1]

    def on_train_begin(self, logs={}):
        return

    def on_train_end(self, logs={}):
        return

    def on_epoch_begin(self, epoch, logs={}):
        return

    def on_epoch_end(self, epoch, logs={}):
        y_pred_train = model.predict(self.x)
        roc_train = roc_auc_score(self.y, y_pred_train)
        y_pred_val = model.predict(self.x_val)
        roc_val = roc_auc_score(self.y_val, y_pred_val)
        print('roc-auc_train: ', roc_train)
        print('roc-auc_val: ', roc_val)
        return

    def on_batch_begin(self, batch, logs={}):
        return

    def on_batch_end(self, batch, logs={}):
        return

# Function that trains the model
def train_model(X, Y):
    X = X.reshape(X.shape[0], -1, n_channels)   # reshape DH table to 3d numpy array
    X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, stratify=Y, test_size = 0.1)
    roc = RocCallback(training_data=(X_train, Y_train), validation_data=(X_valid, Y_valid))
    model.compile(optimizer="adam", loss='binary_crossentropy', metrics=["recall"])
    
    model.fit(X_train, Y_train, validation_data=(X_valid, Y_valid), callbacks=[roc], batch_size = 200, epochs=100)

    # from sklearn.utils import class_weight
    # class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(Y), y=Y)
    # model.fit(X_train, Y_train, validation_data=(X_valid, Y_valid), callbacks=[roc], batch_size = 200, epochs=100, class_weight=dict(enumerate(class_weights)))

# Function that gets the model's predictions on input data
def predict_with_model(X):
    X = X.reshape(X.shape[0], -1, n_channels)  # reshape DH table to 3d numpy array
    Y_pred = model.predict(X, batch_size=200)
    return Y_pred

# Function to extract a list element at a given index
def get_predicted_class(data, idx):
    return data[idx]

# Split the data into training and test datasets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, stratify=Y, test_size = 0.2)

# Convert numpy arrays X_train and Y_train to DH table X_table
n_rows = X_train.shape[0]
n_cols = X_train.shape[1] * X_train.shape[2]
column_names = ['Col_'+str(i) for i in range(n_cols)]
X_reshaped = X_train.reshape(n_rows, n_cols)
# X_table = numpy.to_table(X_reshaped, cols=column_names)

def add_class_col(index):
    y_class = [int(i) for i in Y_train.tolist()]
    return y_class[index]

# X_table = X_table.update(["Class = (int)add_class_col(i)"])


train_model(X_train, Y_train)

# # Train the model
# learn.learn(
#     table=X_table,
#     model_func=train_model,
#     inputs=[learn.Input(column_names, table_to_array_double), learn.Input(["Class"], table_to_array_int)],
#     outputs=None,
#     batch_size=200
# )


# Convert numpy array X_test to DH table X_table_test
X_reshaped_test = X_test.reshape(X_test.shape[0], n_cols)
# X_table_test = numpy.to_table(X_reshaped_test, cols=column_names)

# Use the learn function to create a new table that contains predicted values
y_pred = predict_with_model(X_test)

# predicted = learn.learn(
#     table=X_table_test,
#     model_func=predict_with_model,
#     inputs=[learn.Input(column_names, table_to_array_double)],
#     outputs=[learn.Output("PredictedClass", get_predicted_class, "int")],
#     batch_size=200
# )


Epoch 1/100


TypeError: in user code:

    File "/home/weasel/reps/ai-seizure-detectives/.venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1377, in train_function  *
        return step_function(self, iterator)
    File "/home/weasel/reps/ai-seizure-detectives/.venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1360, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/weasel/reps/ai-seizure-detectives/.venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1349, in run_step  **
        outputs = model.train_step(data)
    File "/home/weasel/reps/ai-seizure-detectives/.venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1131, in train_step
        return self.compute_metrics(x, y, y_pred, sample_weight)
    File "/home/weasel/reps/ai-seizure-detectives/.venv/lib/python3.11/site-packages/keras/src/engine/training.py", line 1225, in compute_metrics
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "/home/weasel/reps/ai-seizure-detectives/.venv/lib/python3.11/site-packages/keras/src/engine/compile_utils.py", line 620, in update_state
        metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "/home/weasel/reps/ai-seizure-detectives/.venv/lib/python3.11/site-packages/keras/src/utils/metrics_utils.py", line 77, in decorated
        result = update_state_fn(*args, **kwargs)
    File "/home/weasel/reps/ai-seizure-detectives/.venv/lib/python3.11/site-packages/keras/src/metrics/base_metric.py", line 140, in update_state_fn
        return ag_update_state(*args, **kwargs)
    File "/home/weasel/reps/ai-seizure-detectives/.venv/lib/python3.11/site-packages/keras/src/metrics/base_metric.py", line 723, in update_state  **
        matches = ag_fn(y_true, y_pred, **self._fn_kwargs)

    TypeError: 'str' object is not callable


: 

In [18]:
y_pred = predict_with_model(X_test)
y_test = Y_test

from sklearn.metrics import precision_score, recall_score
y_pred_c = y_pred.round().astype(int)
precision = precision_score(y_test, y_pred_c)
recall = recall_score(y_test, y_pred_c)

loss, accuracy = model.evaluate(X_test, y_test)

print('Loss:', loss)
print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
Y_test.sum(), y_pred_c.sum()

Loss: 1.1476995944976807
Accuracy: 0.510869562625885
Precision: 0.5111111111111111
Recall: 0.5


(46.0, 45)

In [19]:
y_pred_c.sum(), y_test.sum(), (y_pred_c[:,0].astype(bool) & y_test.astype(bool)).sum()

(45, 46.0, 23)

In [None]:
y_pred = predict_with_model(X_test)
y_test = Y_test

from sklearn.metrics import precision_score, recall_score
y_pred_c = y_pred.round().astype(int)
precision = precision_score(y_test, y_pred_c)
recall = recall_score(y_test, y_pred_c)

loss, accuracy = model.evaluate(X_test, y_test)

print('Loss:', loss)
print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
Y_test.sum(), y_pred_c.sum()

Loss: 0.8820559978485107
Accuracy: 0.7398374080657959
Precision: 0.5454545454545454
Recall: 0.18181818181818182


(33.0, 11)