### Imports

In [1]:
# !pip install mne
# !pip install mne-connectivity

In [2]:
import os

import numpy as np

import mne
from mne.time_frequency import psd_welch

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import cross_val_score

## Preprocessing

### Loading edf

In [3]:
file = "..\dataverse_files\h01.edf"
edfs_path = "..\dataverse_files"
manifest_path = "..\dataverse_files\MANIFEST.txt"

In [4]:
def load_patients_data(edfs_path):
    raw_patients_data = []
    
    edfs_file_names = [f for f in os.listdir(edfs_path) if f.endswith('.edf')]
    
    for file_name in edfs_file_names:
        path = edfs_path + '\\' + file_name 
        raw_data = mne.io.read_raw_edf(path, preload=True, verbose=False)
        raw_patients_data.append(raw_data)

    return raw_patients_data

In [5]:
raw_patients_data = load_patients_data(edfs_path)

In [6]:
len(raw_patients_data)

28

### EEG signals filtration

In [7]:
# low pass
filtered_patients_data = [raw_patient_data.copy()
                          .filter(l_freq=None, h_freq=40., fir_design='firwin', n_jobs=-1, verbose=False) 
                          for raw_patient_data in raw_patients_data]

In [8]:
raw_patients_data[0].to_data_frame().head(5)

Unnamed: 0,time,Fp2,F8,T4,T6,O2,Fp1,F7,T3,T5,O1,F4,C4,P4,F3,C3,P3,Fz,Cz,Pz
0,0,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025
1,4,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025
2,8,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025
3,12,0.461215,0.461215,0.30831,0.30831,0.155405,0.0025,0.0025,-0.150405,-0.150405,0.0025,0.0025,0.0025,0.0025,0.0025,-0.150405,-0.30331,0.0025,0.0025,-0.30331
4,16,0.461215,0.461215,0.461215,0.30831,0.155405,0.0025,0.0025,-0.150405,-0.150405,-0.30331,0.0025,0.155405,0.0025,0.0025,-0.150405,-0.30331,0.0025,0.0025,-0.150405


In [9]:
filtered_patients_data[0].to_data_frame().head(5)

Unnamed: 0,time,Fp2,F8,T4,T6,O2,Fp1,F7,T3,T5,O1,F4,C4,P4,F3,C3,P3,Fz,Cz,Pz
0,0,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025,0.0025
1,4,-0.013165,-0.051168,-0.141553,-0.04569,-0.019511,-0.137629,-0.016629,0.144165,0.078764,0.061897,-0.1369,-0.178747,-0.161818,0.035682,0.086381,0.036654,-0.042886,-0.050014,-0.082717
2,8,0.19341,0.171581,0.050402,0.108617,0.07995,-0.104558,-0.018838,0.025467,0.025338,0.042487,-0.086834,-0.10569,-0.101325,0.008695,0.00499,-0.068254,-0.042575,-0.058942,-0.156065
3,12,0.414561,0.456421,0.410248,0.32405,0.192855,0.064007,-0.002592,-0.219646,-0.135143,-0.071373,0.096946,0.15312,0.116469,-0.036361,-0.164928,-0.239526,0.007202,0.001351,-0.158983
4,16,0.215639,0.302485,0.418006,0.256428,0.095735,0.152239,0.020006,-0.183716,-0.225747,-0.194935,0.164545,0.265504,0.192684,0.007435,-0.168781,-0.244139,0.059761,0.106119,-0.03952


### Filtered EEG signals segmentation

In [10]:
def get_label(edf):
    patient_edf_file_name = edf.filenames[0].split('\\')[-1]
    isSick = patient_edf_file_name.lower().startswith('s')
    return int(isSick == True) # 1 - is sick, 0 is healthy

In [11]:
def print_info(epochs_num_per_patient, labels):
    print('\nEpochs number per patient: ', epochs_num_per_patient)
    
    class_0_num = sum(labels) 
    class_1_num = len(labels)-sum(labels)

    print('\nnegative: ', class_0_num)
    print('positive: ', class_1_num)

In [16]:
def transform_patients_data_into_X_y_sets(patients_data, info=True):
    epochs_per_patient = []
    labels = []
    
    epochs_num_per_patient = []
    for edf in raw_patients_data:
        epochs = mne.make_fixed_length_epochs(edf, duration=25, preload=True, verbose=False)
        
        epochs_per_patient.append(epochs)
        epochs_num_per_patient.append(len(epochs))
        
        label = get_label(edf)
        labels.extend([label for epoch in epochs])
    
    epochs = mne.concatenate_epochs(epochs_per_patient)
    
    if info:
        print_info(epochs_num_per_patient, labels)

    return (epochs, labels) # (X, y)

In [17]:
X, y = transform_patients_data_into_X_y_sets(filtered_patients_data)

Not setting metadata
1142 matching events found
No baseline correction applied
0 bad epochs dropped

Epochs number per patient:  [37, 36, 36, 37, 37, 37, 36, 36, 36, 44, 36, 36, 38, 34, 33, 45, 38, 48, 35, 29, 53, 36, 47, 34, 54, 43, 45, 86]

negative:  626
positive:  516


### Feature extraction

In [18]:
def eeg_power_band(epochs):
    """EEG relative power band feature extraction.

    This function takes an ``mne.Epochs`` object and creates EEG features based
    on relative power in specific frequency bands that are compatible with
    scikit-learn.

    Parameters
    ----------
    epochs : Epochs
        The data.

    Returns
    -------
    X : numpy array of shape [n_samples, 5]
        Transformed data.
    """
    # specific frequency bands
    FREQ_BANDS = {"delta": [0.5, 4.5],
                  "theta": [4.5, 8.5],
                  "alpha": [8.5, 11.5],
                  "sigma": [11.5, 15.5],
                  "beta": [15.5, 30]}

    psds, freqs = psd_welch(epochs, picks='eeg', fmin=0.5, fmax=30.)
    # Normalize the PSDs
    psds /= np.sum(psds, axis=-1, keepdims=True)

    X = []
    for fmin, fmax in FREQ_BANDS.values():
        psds_band = psds[:, :, (freqs >= fmin) & (freqs < fmax)].mean(axis=-1)
        X.append(psds_band.reshape(len(psds), -1))

    return np.concatenate(X, axis=1)

In [19]:
features = eeg_power_band(X)

Effective window size : 1.024 (s)


In [20]:
features.shape

(1142, 95)

In [21]:
len(features[0])

95

### Classification

In [22]:
X_train, X_test, y_train, y_test = train_test_split(features, y, test_size=0.33, shuffle=True, random_state=42)

pipe = make_pipeline(RandomForestClassifier(n_estimators=100, random_state=42))

# pipe = make_pipeline(FunctionTransformer(eeg_power_band, validate=False),
#                      RandomForestClassifier(n_estimators=100, random_state=42))

# Train
pipe.fit(X_train, y_train)

# Test
y_pred = pipe.predict(X_test)

# Assess the results
acc = accuracy_score(y_test, y_pred)

print("Accuracy score: {}".format(acc))

Accuracy score: 0.9257294429708223


### Cross Validatated Classification

In [23]:
clf = RandomForestClassifier(n_estimators=100, random_state=42)
scores = cross_val_score(clf, features, y, cv=20)

print("%0.2f accuracy with a standard deviation of %0.2f" % (scores.mean(), scores.std()))

0.85 accuracy with a standard deviation of 0.09
