# Real-Time Seizure Classification Model

#### Imports

##### Import libraries

In [11]:
''' Import libraries'''
# Built-in Python libraries
import os

# Custom Functions
from feature_extraction import *
from log_reg_funcs import *

# 3rd-Party Libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy import signal
import seaborn as sns
# from seaborn import heatmap
# from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import tensorflow as tf


##### Import dataset

In [12]:
'''Import dataset'''
filename = 'datasets/dataset_all_filt.csv'
dataset = pd.read_csv(filename) # Preprocessed EEG Dataset

state = dataset['state'].to_numpy() # Correct labels for each time segment
filename = dataset['filename'].to_list() # filename for each sample
data = dataset.drop(columns=['state', 'filename']) # Voltage [uV]

fs = 256 # Sampling Frequency [Hz]

channels = data.keys().to_list()
num_channels = len(channels)
class_labels = {0:'Not Seizure', 1:'Seizure'} # classes to be predicted for each time segment

#### Feature Extraction

In [13]:
num_features = 9
num_segments = int( len(data) / fs ) # number of 1-second time windows
time = np.arange(num_segments, dtype=int)

# Create np array with shape (num_segments, num_channels, num_features) 
x_data = np.zeros(
    shape=(
        num_channels,
        num_segments,
        num_features
    ), dtype=np.float64)
print(x_data.shape)

# Perform feature extraction for each time segment across all channels
for ch_num, ch in enumerate(channels):
    for t in time:
        start_idx = t * fs
        end_idx = start_idx + fs
        x_data[ch_num][t] = extractFeatures(data[ch][start_idx:end_idx])

# Create y_data
y_data = [0 if state[t*fs] == 0 else 1 for t in time]

(2, 22030, 9)


In [14]:
x_df = pd.DataFrame(x_data[0], 
                    columns=["Area", "Normalized Decay", "Line Length", 
                             "Mean Energy", "Average Peak Amplitude", "Average Valley Amplitude",
                             "Normalized Peak Amplitude", "Peak Variation", "Root Mean Square"])
x_df.describe()

Unnamed: 0,Area,Normalized Decay,Line Length,Mean Energy,Average Peak Amplitude,Average Valley Amplitude,Normalized Peak Amplitude,Peak Variation,Root Mean Square
count,22030.0,22030.0,22030.0,22030.0,22030.0,22030.0,22030.0,22030.0,22030.0
mean,43.671584,0.0,2511.263459,6967.685701,2.828552,2.820109,1.683976,0.0,55.0202
std,50.313845,0.0,3157.510281,20651.014009,0.788422,0.780746,1.241062,0.0,62.337018
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,15.0,0.0,781.25,382.0,2.0,2.0,1.0,0.0,19.0
50%,26.0,0.0,1395.0,1121.0,3.0,3.0,2.0,0.0,33.0
75%,50.0,0.0,2974.0,4127.0,3.0,3.0,3.0,0.0,64.0
max,678.0,0.0,35827.0,573564.0,5.0,5.0,6.0,0.0,757.0


In [15]:
to_remove = []
for idx, feature in enumerate(x_df.columns):
    if x_df[feature].mean() == 0 and x_df[feature].std() == 0: to_remove.append(idx)

In [25]:
to_remove = []
for ch in x_data:
    x_df = pd.DataFrame(ch, 
                    columns=["Area", "Normalized Decay", "Line Length", 
                             "Mean Energy", "Average Peak Amplitude", "Average Valley Amplitude",
                             "Normalized Peak Amplitude", "Peak Variation", "Root Mean Square"])
    for idx, feature in enumerate(x_df.columns):
        if x_df[feature].mean() == 0 and x_df[feature].std() == 0: 
            to_remove.append(idx)
            
to_remove_idx = list(set(to_remove))
to_remove = x_df.columns[to_remove_idx].to_list()

for feature in to_remove:
    print(f"{feature} was removed.")

Normalized Decay was removed.
Peak Variation was removed.


#### Dataset Preparation

In [None]:
# Reshape into rank-2 array
x_data = x_data.reshape((num_segments, num_channels*num_features))
print(f"New Shape = {x_data.shape}")

# Split into training/validation/test sets
x_train, x_tv, y_train, y_tv = train_test_split(
    x_data, y_data,
    train_size=0.80, test_size=0.20,      # 50:50 split between training and test/validation sets
    shuffle=True, random_state=42         # shuffles data using same random seed every time
)

x_test, x_val, y_test, y_val = train_test_split(
    x_tv, y_tv,
    train_size = 0.50, test_size = 0.50,   # Even split btw test and validation sets
    shuffle=True, random_state=42         # same as above
)

In [None]:
# Store data in tensors
x_train = tf.convert_to_tensor(x_train, dtype=np.float32)
x_test = tf.convert_to_tensor(x_test, dtype=np.float32)
x_val = tf.convert_to_tensor(x_val, dtype=np.float32)

y_train = tf.convert_to_tensor(y_train, dtype=np.float32)
y_test = tf.convert_to_tensor(y_test, dtype=np.float32)
y_val = tf.convert_to_tensor(y_val, dtype=np.float32)

In [None]:
# Normalize
x_norm_train = Normalizer(x_train)
x_norm_test = Normalizer(x_test)
x_norm_val = Normalizer(x_val)

x_train_norm = x_norm_train.norm(x_train)
x_test_norm = x_norm_val.norm(x_test)
x_val_norm = x_norm_val.norm(x_val)

In [None]:
# Load datasets into Dataset objects
train_data = tf.data.Dataset.from_tensor_slices((x_train_norm, y_train))
test_data = tf.data.Dataset.from_tensor_slices((x_test_norm, y_test))
val_data = tf.data.Dataset.from_tensor_slices((x_val_norm, y_val))

# Randomize and separate into batches
batch_size = 400 # Train 400 time segments at a time

train_data = train_data.shuffle(buffer_size=x_train_norm.shape[0], seed=42).batch(batch_size=batch_size)
test_data = test_data.shuffle(buffer_size=x_test_norm.shape[0], seed=42).batch(batch_size=batch_size)
val_data = val_data.shuffle(buffer_size=x_val_norm.shape[0], seed=42).batch(batch_size=batch_size)

#### Training

In [None]:
log_reg_model = LogRegModel()

loop = TrainingLoop()
log_reg_model = loop.train(train_data=train_data, val_data=val_data, test_data=test_data, 
                           num_epochs=300, learn_rate=0.05, model=log_reg_model, output=True)

#### Performance Evaluation

In [None]:
# Plot loss and accuracy
loop.plot()

In [None]:
# Create heatmap
y_pred_train = log_reg_model(x_train_norm, train=False)
y_pred_test = log_reg_model(x_test_norm, train=False)

y_class_train = predict(y_pred_train)
y_class_test = predict(y_pred_test)

con_mat_train = confusion(y_pred=y_class_train, y_true=y_train, class_labels=class_labels, title='Training')
con_mat_test = confusion(y_pred=y_class_test, y_true=y_test, class_labels=class_labels, title='test')

In [None]:
TN = con_mat_test[0][0]
TP = con_mat_test[1][1]
FN = con_mat_test[1][0]
FP = con_mat_test[0][1]

recall = TP / (TP + FN) # How many seizures were predicted correctly
precision = TP / (TP + FP) # How many predicted seizures were actually positive
f_score = 2 * (recall * precision) / (recall + precision) # Evaluates whether recall and precision are balanced

print(f"Recall = {recall:.3}\nPrecision = {precision:.3}\nF-Score = {f_score:.3}")

### Saving Data

In [None]:
export_module = ExportModule(model=log_reg_model, normalizer=x_test_norm, class_pred=predict)
save_path = "output/models/export_module"
tf.saved_model.save(export_module, save_path)


In [None]:
loaded_model = tf.saved_model.load(save_path)
loaded_model