In [15]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

import os
from tqdm.notebook import tqdm
import numpy as np
import matplotlib.pyplot as plt

import mne
from mne.datasets.sleep_physionet.age import fetch_data

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer
import antropy as ant
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
import pandas as pd
from scipy.stats import skew, kurtosis


In [4]:
def process_edf(subject):
    EEG_path = "C:/Users/utkar/Desktop/Research_Paper/EEG Analysis/Sleep_Stage_Detection/sleep-edf-database-expanded-1.0.0/sleep-edf-database-expanded-1.0.0/sleep-telemetry/"
    raw=None
    annot_train=None
    files = os.listdir(EEG_path+subject)
    for file in os.listdir(EEG_path+subject):
        if (file.split('-')[1].split('.')[0] == 'PSG'):
            raw = mne.io.read_raw_edf(EEG_path+subject+'/'+file, stim_channel='Marker', preload=True, verbose=False)
        else:
            annot_train = mne.read_annotations(EEG_path+subject+'/'+file)
            raw.set_annotations(annot_train, emit_warning=False)

    raw.set_channel_types({'EOG horizontal': 'eog', 'EEG Fpz-Cz': 'eeg', 'EEG Pz-Oz': 'eeg', 'EMG submental': 'misc'})
    raw.drop_channels(['Marker'])

    raw.filter(0.3, 40, n_jobs=4)  # Filter the data with a 0.3-40 Hz bandpass
    annot_events = {
    'Sleep stage W': 0,
    'Sleep stage 1': 1,
    'Sleep stage 2': 2,
    'Sleep stage 3': 3,
    'Sleep stage 4': 3,
    'Sleep stage R': 4,
    }


    # keep last 30-min wake events before sleep and first 30-min wake events after
    # sleep and redefine annotations on raw data
    annot_train.crop(annot_train[0]['onset'] - 30 * 60,
                    annot_train[-2]['onset'] + 30 * 60)
    raw_train = raw.copy()
    raw_train.set_annotations(annot_train, emit_warning=False)
    events_train, _ = mne.events_from_annotations(raw_train, event_id = annot_events, chunk_duration=30.)

    # create a new event_id that unifies stages 3 and 4
    event_id = {'Sleep stage W': 0,
                'Sleep stage 1': 1,
                'Sleep stage 2': 2,
                'Sleep stage 3/4': 3,
                'Sleep stage R': 4}

    # plot events
    # fig = mne.viz.plot_events(events_train, event_id=event_id,
    #                         sfreq=raw_train.info['sfreq'],
    #                         first_samp=events_train[0, 0])

    # keep the color-code for further plotting
    stage_colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
    # Create epochs from the event data with 30sec time window
    tmax = 30. - 1. / raw_train.info['sfreq']  # tmax in included

    epochs_train = mne.Epochs(raw=raw_train, events=events_train,
                            event_id=event_id, tmin=0., tmax=tmax, baseline=None, preload=True)
   
    
    
    n_epochs = len(epochs_train)
    n_channels = len(epochs_train.ch_names)
    epoch_length = tmax

    svd_entropies = np.zeros((n_epochs, n_channels))
    hfd = np.zeros((n_epochs, n_channels))
    perm_ent = np.zeros((n_epochs, n_channels))
    det_fluc = np.zeros((n_epochs, n_channels))


    for i, epoch in enumerate(epochs_train.get_data()):
        for j, channel_data in enumerate(epoch):
            svd_entropies[i, j] = ant.svd_entropy(channel_data, normalize=True)
            hfd[i, j] = ant.higuchi_fd(channel_data)
            perm_ent[i, j] = ant.perm_entropy(channel_data, order=3, normalize=True)
            det_fluc[i, j] = ant.detrended_fluctuation(channel_data)

    X_svd = svd_entropies
    X_hfd = hfd
    X_perm = perm_ent
    X_det = det_fluc
    Y = epochs_train.events[:, -1]

    return X_svd, X_hfd, X_perm, X_det, Y

In [6]:
import logging
import sys
mne.set_log_level(logging.WARNING)

In [38]:
EEG_path = "C:/Users/utkar/Desktop/Research_Paper/EEG Analysis/Sleep_Stage_Detection/sleep-edf-database-expanded-1.0.0/sleep-edf-database-expanded-1.0.0/sleep-telemetry/"

X_all_svd, X_all_hfd, X_all_perm, X_all_det_fluc, Y_all = [], [], [], [], []

for subject in tqdm(os.listdir(EEG_path)):
    tqdm.write(subject, end='\r')
    X_svd, X_hfd, X_perm, X_det, Y = process_edf(subject)
    X_all_svd.append(X_svd)
    X_all_hfd.append(X_hfd)
    X_all_perm.append(X_perm)
    X_all_det_fluc.append(X_det)
    Y_all.append(Y)

  0%|          | 0/43 [00:00<?, ?it/s]

subject922

In [39]:
len(X_svd)

923

In [40]:
len(X_all_svd)

43

In [41]:
X_all_svd = np.vstack(X_all_svd)
X_all_hfd = np.vstack(X_all_hfd)
X_all_perm = np.vstack(X_all_perm)
X_all_det_fluc = np.vstack(X_all_det_fluc)

Y_all = np.concatenate(Y_all)

In [42]:
df_svd = pd.DataFrame(X_all_svd)
df_hfd = pd.DataFrame(X_all_hfd)
df_perm = pd.DataFrame(X_all_perm)
df_det = pd.DataFrame(X_all_det_fluc)

df_Y = pd.DataFrame(Y_all)

In [43]:
df_svd.columns = ['EEG Fpz-Cz', 'EEG Pz-Oz', 'EOG horizontal', 'EMG submental']
df_hfd.columns = ['EEG Fpz-Cz', 'EEG Pz-Oz', 'EOG horizontal', 'EMG submental']
df_perm.columns = ['EEG Fpz-Cz', 'EEG Pz-Oz', 'EOG horizontal', 'EMG submental']
df_det.columns = ['EEG Fpz-Cz', 'EEG Pz-Oz', 'EOG horizontal', 'EMG submental']

X_together = pd.concat([df_svd, df_hfd, df_perm, df_det, df_Y], axis=1)
X_together.rename(columns={0: "Sleep Stage"}, inplace=True)
X_together.columns = ['EEG Fpz-Cz_svd', 'EEG Pz-Oz_svd', 'EOG horizontal_svd', 'EMG submental_svd', 'EEG Fpz-Cz_hfd', 'EEG Pz-Oz_hfd', 'EOG horizontal_hfd', 'EMG submental_hfd', 'EEG Fpz-Cz_perm', 'EEG Pz-Oz_perm', 'EOG horizontal_perm', 'EMG submental_perm', 'EEG Fpz-Cz_det', 'EEG Pz-Oz_det', 'EOG horizontal_det', 'EMG submental_det', 'Sleep Stage']
X_together

Unnamed: 0,EEG Fpz-Cz_svd,EEG Pz-Oz_svd,EOG horizontal_svd,EMG submental_svd,EEG Fpz-Cz_hfd,EEG Pz-Oz_hfd,EOG horizontal_hfd,EMG submental_hfd,EEG Fpz-Cz_perm,EEG Pz-Oz_perm,EOG horizontal_perm,EMG submental_perm,EEG Fpz-Cz_det,EEG Pz-Oz_det,EOG horizontal_det,EMG submental_det,Sleep Stage
0,0.987039,0.981167,0.793655,0.999226,1.729953,1.892506,1.756279,1.954969,0.946917,0.955711,0.953098,0.963475,0.624881,0.887048,1.148590,0.444626,0
1,0.525045,0.530804,0.367215,0.998277,1.527066,1.838841,1.621904,2.022486,0.971058,0.983652,0.995925,0.999538,1.345350,1.382187,1.501120,0.681195,0
2,0.575597,0.671199,0.439653,0.987337,1.478273,1.847214,1.835012,2.008475,0.966203,0.989973,0.999774,0.998407,1.206655,1.287611,1.395749,0.794200,0
3,0.526120,0.614145,0.596802,0.999513,1.576452,1.838328,1.883132,2.011644,0.978298,0.987571,0.999406,0.998735,1.402531,1.441773,1.307505,0.678181,0
4,0.376618,0.498464,0.423870,0.908455,1.520067,1.809161,1.756443,2.015401,0.985030,0.960600,0.999392,0.997599,1.593217,1.468738,1.387261,0.777732,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41485,0.460974,0.942977,0.348455,0.388256,1.422695,1.915871,1.389126,1.768020,0.968626,0.993327,0.987921,0.998817,1.307973,0.926740,1.468160,1.555474,0
41486,0.513377,0.888967,0.426211,0.522939,1.453040,1.914228,1.500767,1.903348,0.975148,0.993540,0.982838,0.999357,1.284875,1.077147,1.462729,1.388786,0
41487,0.517469,0.919033,0.337180,0.633221,1.406252,1.899414,1.299624,1.946126,0.956023,0.993835,0.971756,0.998730,1.254892,1.014080,1.547991,1.175895,0
41488,0.529215,0.796246,0.455389,0.897802,1.514632,1.884125,1.552587,1.969740,0.978405,0.995693,0.994796,0.999592,1.319479,1.212552,1.471581,1.068173,0


In [44]:
X = X_together.iloc[:, :-1]
X

Unnamed: 0,EEG Fpz-Cz_svd,EEG Pz-Oz_svd,EOG horizontal_svd,EMG submental_svd,EEG Fpz-Cz_hfd,EEG Pz-Oz_hfd,EOG horizontal_hfd,EMG submental_hfd,EEG Fpz-Cz_perm,EEG Pz-Oz_perm,EOG horizontal_perm,EMG submental_perm,EEG Fpz-Cz_det,EEG Pz-Oz_det,EOG horizontal_det,EMG submental_det
0,0.987039,0.981167,0.793655,0.999226,1.729953,1.892506,1.756279,1.954969,0.946917,0.955711,0.953098,0.963475,0.624881,0.887048,1.148590,0.444626
1,0.525045,0.530804,0.367215,0.998277,1.527066,1.838841,1.621904,2.022486,0.971058,0.983652,0.995925,0.999538,1.345350,1.382187,1.501120,0.681195
2,0.575597,0.671199,0.439653,0.987337,1.478273,1.847214,1.835012,2.008475,0.966203,0.989973,0.999774,0.998407,1.206655,1.287611,1.395749,0.794200
3,0.526120,0.614145,0.596802,0.999513,1.576452,1.838328,1.883132,2.011644,0.978298,0.987571,0.999406,0.998735,1.402531,1.441773,1.307505,0.678181
4,0.376618,0.498464,0.423870,0.908455,1.520067,1.809161,1.756443,2.015401,0.985030,0.960600,0.999392,0.997599,1.593217,1.468738,1.387261,0.777732
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41485,0.460974,0.942977,0.348455,0.388256,1.422695,1.915871,1.389126,1.768020,0.968626,0.993327,0.987921,0.998817,1.307973,0.926740,1.468160,1.555474
41486,0.513377,0.888967,0.426211,0.522939,1.453040,1.914228,1.500767,1.903348,0.975148,0.993540,0.982838,0.999357,1.284875,1.077147,1.462729,1.388786
41487,0.517469,0.919033,0.337180,0.633221,1.406252,1.899414,1.299624,1.946126,0.956023,0.993835,0.971756,0.998730,1.254892,1.014080,1.547991,1.175895
41488,0.529215,0.796246,0.455389,0.897802,1.514632,1.884125,1.552587,1.969740,0.978405,0.995693,0.994796,0.999592,1.319479,1.212552,1.471581,1.068173


In [45]:
Y = X_together.iloc[:, -1]
Y

0        0
1        0
2        0
3        0
4        0
        ..
41485    0
41486    0
41487    0
41488    0
41489    0
Name: Sleep Stage, Length: 41490, dtype: int32

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Define the deep learning model
def create_model():
    model = Sequential([
        Dense(64, activation='relu', input_shape=(16,)),
        BatchNormalization(),
        Dropout(0.2),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(256, activation='relu'),
        BatchNormalization(),
        Dropout(0.4),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(5, activation='softmax')  # Assuming 5 sleep stages
    ])
    return model

# Create and compile the model
model = create_model()
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.1, callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)])

# Evaluate the model
y_pred = np.argmax(model.predict(X_test), axis=-1)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
confusion = confusion_matrix(y_test, y_pred)

print(f'Accuracy: {accuracy}, F1: {f1}, Confusion Matrix: {confusion}')

Epoch 1/100


  super(Adam, self).__init__(name, **kwargs)


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Accuracy: 0.09472161966738973, F1: 0.016391720180222003, Confusion Matrix: [[ 786    0    0    0    0]
 [ 713    0    0    0    0]
 [3818    0    0    0    0]
 [1310    0    0    0    0]
 [1671    0    0    0    0]]


In [49]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from kerastuner import RandomSearch

# Assuming X and y contain the preprocessed features and labels, respectively
# X shape: (41490, 16), y shape: (41490,)

# Split data into training, validation, and testing sets
X_train_val, X_test, y_train_val, y_test = train_test_split(X, Y, test_size=0.15, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1765, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
# Transform the validation and test data using the same scaler (do not fit the scaler again)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Define the deep learning model
def create_dense_model(hp):
    model = tf.keras.Sequential()
    
    # Tune the number of units in the first Dense layer
    hp_units1 = hp.Int('units1', min_value=32, max_value=256, step=32)
    model.add(Dense(units=hp_units1, activation='relu', input_shape=(16,)))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    
    # Tune the number of units in the second Dense layer
    hp_units2 = hp.Int('units2', min_value=32, max_value=256, step=32)
    model.add(Dense(units=hp_units2, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    
    # Tune the number of units in the third Dense layer
    hp_units3 = hp.Int('units3', min_value=32, max_value=256, step=32)
    model.add(Dense(units=hp_units3, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    
    model.add(Dense(5, activation='softmax'))

    # Tune the learning rate for the optimizer
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-3, 1e-4, 1e-5])

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_learning_rate),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

tuner = RandomSearch(
    create_dense_model,
    objective='val_accuracy',
    max_trials=20,
    executions_per_trial=2,
    directory='kerastuner',
    project_name='sleep_stage'
)

tuner.search_space_summary()

tuner.search(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_val, y_val),
             callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)])

tuner.results_summary()

# Retrieve the best model and evaluate it on the test data
best_model = tuner.get_best_models(num_models=1)[0]

_, accuracy = best_model.evaluate(X_test, y_test)
print(f'Test accuracy: {accuracy:.4f}')

# Optional: classification report for detailed metrics
y_pred = np.argmax(best_model.predict(X_test), axis=-1)
print(classification_report(y_test, y_pred))


Trial 20 Complete [00h 00m 27s]
val_accuracy: 0.10554216802120209

Best val_accuracy So Far: 0.10554216802120209
Total elapsed time: 00h 03m 52s
INFO:tensorflow:Oracle triggered exit


Results summary
Results in kerastuner\sleep_stage
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 10 summary
Hyperparameters:
units1: 64
units2: 96
units3: 96
learning_rate: 0.0001
Score: 0.10554216802120209

Trial 02 summary
Hyperparameters:
units1: 192
units2: 192
units3: 64
learning_rate: 0.001
Score: 0.10554216802120209

Trial 07 summary
Hyperparameters:
units1: 192
units2: 32
units3: 64
learning_rate: 0.0001
Score: 0.10554216802120209

Trial 00 summary
Hyperparameters:
units1: 160
units2: 160
units3: 192
learning_rate: 0.01
Score: 0.10554216802120209

Trial 03 summary
Hyperparameters:
units1: 160
units2: 32
units3: 224
learning_rate: 0.01
Score: 0.10554216802120209

Trial 06 summary
Hyperparameters:
units1: 32
units2: 64
units3: 160
learning_rate: 0.0001
Score: 0.10554216802120209

Trial 04 summary
Hyperparameters:
units1: 64
units2: 224
units3: 32
learning_rate: 0.001
Score: 0.10554216802120209

Trial 09 summary
Hyperparameters:
units1: 64
units2: 25

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
