Download database.

In [1]:
!wget -r -N -c -np -q https://physionet.org/files/slpdb/1.0.0/

Check if database exists.

In [8]:
![ ! -d "physionet.org" ] && echo "Error: database not downloaded properly."

Download libraries.

In [11]:
import sys
!{sys.executable} -m pip install -q wfdb tinymlgen --user
!{sys.executable} -m pip install matplotlib==3.1.3 --user

You should consider upgrading via the '/usr/local/opt/python@3.9/bin/python3.9 -m pip install --upgrade pip' command.[0m[33m
You should consider upgrading via the '/usr/local/opt/python@3.9/bin/python3.9 -m pip install --upgrade pip' command.[0m[33m
[0m

Import libraries and set random seed.

In [4]:
# For reading database
import wfdb
import os
import copy
import numpy as np
import scipy as sp
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras import utils
import matplotlib.pyplot as plt
import librosa
import librosa.display

np.random.seed(1)
tf.random.set_seed(1)

## **1. Import Database**
Accessing data and basic data processing.


In [5]:
class PatientData (object):
    ECG_signal = None
    EEG_signal = None
    sleep_stages = None

    record_length = None
    sampling_frequency = None

    def __init__ (self, patient_name):
        self.patient_name = patient_name

In [6]:
DATABASE_PATH = 'physionet.org/files/slpdb/1.0.0'

with open(os.path.join(DATABASE_PATH, 'RECORDS'), 'r') as file:
    PATIENT_NAMES = file.read().split('\n')[:-1]
  
PATIENTS = {
    patient_name: PatientData(patient_name)
    for patient_name in PATIENT_NAMES
}

In [7]:
# 0 Awake
# 1 NREM stage 1
# 2 NREM stage 2
# 3 NREM stage 3 and 4
# 4 REM
def annotation_sleep_stage (annotation):
    if annotation.startswith('W'):
        return 0
    elif annotation.startswith('1'):
        return 1
    elif annotation.startswith('2'):
        return 2
    elif annotation.startswith('3') or annotation.startswith('4'):
        return 3
    elif annotation.startswith('R'):
        return 4
    # Ideally, all annotations are classified into one of the above 5
    else:
        return -1

# Since annotations only have labels and the time at which they occur,
# interpolate all the data so there's always a label at each time step
def step_interpolation (data, locations, total_length):
    step_interpolated_data = np.zeros(total_length)

    for i in range(len(locations) - 1):
        start_range = locations[i]
        end_range = locations[i + 1]

        # Convert string annotation into sleep stage
        step_interpolated_data[(start_range - 1) : end_range] = annotation_sleep_stage(data[i])

    return step_interpolated_data

In [9]:
for patient_name in PATIENT_NAMES:
    patient = PATIENTS[patient_name]

    # Retrieve raw signals and annotations
    record_path = os.path.join(DATABASE_PATH, patient_name)
    record = wfdb.io.rdrecord(record_path)
    annotation = wfdb.rdann(record_path, extension='st')

    # Sampling frequency
    # This might differ for each record
    patient.sampling_frequency = record.fs

    # 0 ECG
    # 1 BP
    # 2 EEG
    # 3 Resp (not available for all)
    patient.ECG_signal = record.p_signal[:, 0]
    patient.EEG_signal = record.p_signal[:, 2]
    patient.record_length = record.sig_len

    patient.sleep_stages = step_interpolation(annotation.aux_note, annotation.sample, patient.record_length)

In [12]:
# Example patient
patient_slp01a = PATIENTS['slp01a']
print(patient_slp01a.ECG_signal)
print(patient_slp01a.EEG_signal)
print(patient_slp01a.sleep_stages)

[0.085 0.08  0.125 ... 0.23  0.235 0.225]
[-0.03919129 -0.03888025 -0.03856921 ...  0.14727838  0.14681182
  0.14261275]
[3. 3. 3. ... 0. 0. 0.]


In [13]:
# Splitting the dataset into train, validation, test set
# Will be split in terms of patients, not sleep data
num_patients = len(PATIENTS)

# Shuffle patients
randomized_patients = copy.deepcopy(list(PATIENTS.keys()))
np.random.shuffle(randomized_patients)

# 80 / 10 / 10 split of 18 patients will be roughly 14 / 2 / 2
# Don't need test_end, since it'll be until the end of data
train_end = 14
valid_end = train_end + 2

# Split data using keys
train_patients = randomized_patients[ : train_end]
valid_patients = randomized_patients[train_end : valid_end]
test_patients = randomized_patients[valid_end : ]

## **2. Building the Neural Network**

In [73]:
def create_dataset(patient_set, window_size=15, batch_size=10):
    inputs = np.array([])
    labels = np.array([])
    
    for patient_name in patient_set:
        # Get training data for one patient
        print("Gathering {} patient data...".format(patient_name))
        patient = PATIENTS[patient_name]

        # Represents the number of samples (individual numbers) in one window of time (measured in seconds)
        num_samples_per_window = int(window_size * patient.sampling_frequency)
        
        # Represents how many data points is generated after division with windows
        # If the window size is larger, there will be less data points (but more samples per data point)
        num_data_points = int(patient.EEG_signal.shape[0] / num_samples_in_one_datum)
        print("Samples per window size of {} seconds = {}".format(window_size, num_samples_in_one_datum))
        print("Number of data points = {}".format(num_data_points))
        
        # Calculate the number of iterations, dependent on the number of data points and batch size
        num_iterations = np.ceil(num_data_points / batch_size)
        
        for _ in range(num_iterations):
            current_batch_inputs = np.array([])
            current_batch_labels = np.array([])
            
            for datum in range(batch_size):
                # Determine start and end of current batch
                start = (datum * num_samples_in_one_datum)
                end = (start + num_samples_in_one_datum)
                '''
                EEG_MFCC = librosa.feature.melspectrogram(
                    y=patient.EEG_signal[start : end], 
                    sr=patient.sampling_frequency)
                ECG_MFCC = librosa.feature.melspectrogram(
                    y=patient.ECG_signal[start : end], 
                    sr=patient.sampling_frequency)

                sample = np.expand_dims(
                    np.stack([ EEG_MFCC, ECG_MFCC ], axis=2), 
                    axis=0)
                '''
                
                sample = np.array([ patient.EEG_signal[start : end] ])
                
                # Only grab the label at the end of the current batch
                # This is such that we're using all of the data in the current batch
                # in order to predict the sleep stage by the end of the batch
                sample_labels = np.array([ patient.sleep_stages[end - 1] ])
                
                current_batch_inputs = np.append(current_batch_inputs, sample, axis=0)
                current_batch_labels = np.append(current_batch_labels, sample, axis=0)

            inputs = np.append(inputs, current_batch_inputs, axis=0)
            labels = np.append(labels, current_batch_labels, axis=0)
      
        break

    return data, labels

In [74]:
# Create training dataset
train_data, train_labels = create_dataset(train_patients)

print(train_data.shape)
print(train_labels.shape)

Gathering slp14 patient data...
Samples per window size of 15 seconds = 3750
Number of data points = 1440
(1440, 3750)
(1440,)


In [47]:
print(train_data[0])

[-0.00451128 -0.00032223  0.00440387 ...  0.00622986  0.00708915
  0.00816327]


In [51]:
filter_size = 3
pool_size = 2

width, height = 1440, 3750

model = models.Sequential([
    layers.Conv1D(32, filter_size, activation='relu', input_shape=(height, 1)),

    layers.Flatten(), 
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'), 
    layers.Dense(5, activation='softmax')
])

optimizer = tf.keras.optimizers.Nadam(
    learning_rate=0.007,
    epsilon=1e-7,
    name="Nadam"
)

# Compile the model
model.compile(
    optimizer=optimizer,
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)

model.summary()

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_9 (Conv1D)            (None, 3748, 32)          128       
_________________________________________________________________
flatten_9 (Flatten)          (None, 119936)            0         
_________________________________________________________________
dense_27 (Dense)             (None, 64)                7675968   
_________________________________________________________________
dense_28 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_29 (Dense)             (None, 5)                 165       
Total params: 7,678,341
Trainable params: 7,678,341
Non-trainable params: 0
_________________________________________________________________


In [52]:
# Train the neural network

history = model.fit(
  train_data,
  utils.to_categorical(train_labels),
  epochs=10
)

Epoch 1/10


ValueError: in user code:

    /usr/local/lib/python3.9/site-packages/keras/engine/training.py:853 train_function  *
        return step_function(self, iterator)
    /usr/local/lib/python3.9/site-packages/keras/engine/training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /usr/local/lib/python3.9/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /usr/local/lib/python3.9/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /usr/local/lib/python3.9/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    /usr/local/lib/python3.9/site-packages/keras/engine/training.py:835 run_step  **
        outputs = model.train_step(data)
    /usr/local/lib/python3.9/site-packages/keras/engine/training.py:787 train_step
        y_pred = self(x, training=True)
    /usr/local/lib/python3.9/site-packages/keras/engine/base_layer.py:1020 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /usr/local/lib/python3.9/site-packages/keras/engine/input_spec.py:229 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) + ' of layer ' +

    ValueError: Input 0 of layer sequential_9 is incompatible with the layer: : expected min_ndim=3, found ndim=2. Full shape received: (32, 3750)


In [None]:
# Graph training accuracy
plt.plot(history.history['accuracy'], label='accuracy')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.ylim([0, 1])
plt.legend(loc='lower right')

In [None]:
# Get accuracy of the network
model.evaluate(validation_data)

In [None]:
# implementing the class of conv net -- incompleted
class CNN_sleep_stages(tf.Module):
  def __init__(self, number_of_channels = , number_of_classes =):
    self.conv1 = models.Sequential(layers.Conv2D(, (, ), activation='relu', input_shape=(, , )))
    self.conv2 = models.Sequential(layers.Conv2D(, (, ), activation='relu'))
    self.pool =  models.Sequential(layers.MaxPool2D( 2 , 2 ))
    self.flat = models.Sequential(layers.Flatten())
    self.dense = models.Sequential(layers.Dense(, activation = 'Sigmoid'))
  
  def __call__(self, x):
    x = self.pool(self.conv1(x))
    return x

# Could improve the accuracy by:
# 1. Increasing the depth (adding more convolutional layer)
# 2. Adding dropout layer
# 3. Add fully-connected layers
# 4. Strides? Padding?