In [1]:
import numpy as np
import mne
from scipy import signal
from scipy.interpolate import RectBivariateSpline
from mne.filter import resample, filter_data
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from lspopt import spectrogram_lspopt
from matplotlib.colors import Normalize, ListedColormap

import logging
LOGGING_TYPES = dict(DEBUG=logging.DEBUG, INFO=logging.INFO, WARNING=logging.WARNING,
                     ERROR=logging.ERROR, CRITICAL=logging.CRITICAL)
logger = logging.getLogger('yasa')

%matplotlib qt

In [2]:
location = "/Users/amirhosseindaraie/Desktop/data/autoscoring-material/data/Zmax Donders/P8_N3"
raw = mne.io.read_raw_edf(f'{location}/EEG L.edf', preload=True, verbose=0)
raw.pick_types(eeg=True)
# fig = raw.plot(use_opengl=False)

# Apply a bandpass filter between 0.5 - 45 Hz
raw.filter(0.5, 45)

# Extract the data and convert from V to uV
data = raw._data * 1e6
sf = raw.info['sfreq']
chan = raw.ch_names

# Let's have a look at the data
print('Chan =', chan)
print('Sampling frequency =', sf, 'Hz')
print('Data shape =', data.shape)

def format_seconds_to_hhmmss(seconds):
    hours = seconds // (60*60)
    seconds %= (60*60)
    minutes = seconds // 60
    seconds %= 60
    return "%02i:%02i:%02i" % (hours, minutes, seconds)

print(f'Duration: {data.shape[1]/sf} (sec) OR {format_seconds_to_hhmmss(data.shape[1]/sf)}')

Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1691 samples (6.605 sec)

Chan = ['EEG L']
Sampling frequency = 256.0 Hz
Data shape = (1, 7608320)
Duration: 29720.0 (sec) OR 08:15:20


In [3]:
import antropy as ant
import scipy.signal as sp_sig
import scipy.stats as sp_stats
from numpy import apply_along_axis as apply
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)


times = np.arange(data.size) / sf # Time vector is seconds

def sliding_window(data, sf, window, step=None, axis=-1):
    """Calculate a sliding window of a 1D or 2D EEG signal.
    .. versionadded:: 0.1.7
    Parameters
    ----------
    data : numpy array
        The 1D or 2D EEG data.
    sf : float
        The sampling frequency of ``data``.
    window : int
        The sliding window length, in seconds.
    step : int
        The sliding window step length, in seconds.
        If None (default), ``step`` is set to ``window``,
        which results in no overlap between the sliding windows.
    axis : int
        The axis to slide over. Defaults to the last axis.
    Returns
    -------
    times : numpy array
        Time vector, in seconds, corresponding to the START of each sliding
        epoch in ``strided``.
    strided : numpy array
        A matrix where row in last dimension consists of one instance
        of the sliding window, shape (n_epochs, ..., n_samples).
    Notes
    -----
    This is a wrapper around the
    :py:func:`numpy.lib.stride_tricks.as_strided` function.
    Examples
    --------
    With a 1-D array
    >>> import numpy as np
    >>> from yasa import sliding_window
    >>> data = np.arange(20)
    >>> times, epochs = sliding_window(data, sf=1, window=5)
    >>> times
    array([ 0.,  5., 10., 15.])
    >>> epochs
    array([[ 0,  1,  2,  3,  4],
           [ 5,  6,  7,  8,  9],
           [10, 11, 12, 13, 14],
           [15, 16, 17, 18, 19]])
    >>> sliding_window(data, sf=1, window=5, step=1)[1]
    array([[ 0,  1,  2,  3,  4],
           [ 2,  3,  4,  5,  6],
           [ 4,  5,  6,  7,  8],
           [ 6,  7,  8,  9, 10],
           [ 8,  9, 10, 11, 12],
           [10, 11, 12, 13, 14],
           [12, 13, 14, 15, 16],
           [14, 15, 16, 17, 18]])
    >>> sliding_window(data, sf=1, window=11)[1]
    array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]])
    With a N-D array
    >>> np.random.seed(42)
    >>> # 4 channels x 20 samples
    >>> data = np.random.randint(-100, 100, size=(4, 20))
    >>> epochs = sliding_window(data, sf=1, window=10)[1]
    >>> epochs.shape  # shape (n_epochs, n_channels, n_samples)
    (2, 4, 10)
    >>> epochs
    array([[[  2,  79,  -8, -86,   6, -29,  88, -80,   2,  21],
            [-13,  57, -63,  29,  91,  87, -80,  60, -43, -79],
            [-50,   7, -46, -37,  30, -50,  34, -80, -28,  66],
            [ -9,  10,  87,  98,  71, -93,  74, -66, -20,  63]],
           [[-26, -13,  16,  -1,   3,  51,  30,  49, -48, -99],
            [-12, -52, -42,  69,  87, -86,  89,  89,  74,  89],
            [-83,  31, -12, -41, -87, -92, -11, -48,  29, -17],
            [-51,   3,  31, -99,  33, -47,   5, -97, -47,  90]]])
    """
    from numpy.lib.stride_tricks import as_strided
    assert axis <= data.ndim, "Axis value out of range."
    assert isinstance(sf, (int, float)), 'sf must be int or float'
    assert isinstance(window, (int, float)), 'window must be int or float'
    assert isinstance(step, (int, float, type(None))), ('step must be int, '
                                                        'float or None.')
    if isinstance(sf, float):
        assert sf.is_integer(), 'sf must be a whole number.'
        sf = int(sf)
    assert isinstance(axis, int), 'axis must be int.'

    # window and step in samples instead of points
    window *= sf
    step = window if step is None else step * sf

    if isinstance(window, float):
        assert window.is_integer(), 'window * sf must be a whole number.'
        window = int(window)

    if isinstance(step, float):
        assert step.is_integer(), 'step * sf must be a whole number.'
        step = int(step)

    assert step >= 1, "Stepsize may not be zero or negative."
    assert window < data.shape[axis], ("Sliding window size may not exceed "
                                       "size of selected axis")

    # Define output shape
    shape = list(data.shape)
    shape[axis] = np.floor(data.shape[axis] / step - window / step + 1
                           ).astype(int)
    shape.append(window)

    # Calculate strides and time vector
    strides = list(data.strides)
    strides[axis] *= step
    strides.append(data.strides[axis])
    strided = as_strided(data, shape=shape, strides=strides)
    t = np.arange(strided.shape[-2]) * (step / sf)

    # Swap axis: n_epochs, ..., n_samples
    if strided.ndim > 2:
        strided = np.rollaxis(strided, -2, 0)
    return t, strided

# Convert the EEG data to 30-sec data
times, data_win = sliding_window(data[0], sf, window=30)

# Convert times to minutes
times /= 60

# Calculate standard descriptive statistics
hmob, hcomp = ant.hjorth_params(data_win, axis=1)

def lziv(x):
    """Binarize the EEG signal and calculate the Lempel-Ziv complexity.
    """
    return ant.lziv_complexity(x > x.mean(), normalize=True)

df_feat = {
    # Statistical
    'std': apply(np.std, arr=data_win, axis=1, ddof=1),
    'mean': apply(np.mean, arr=data_win, axis=1),
    'median': apply(np.median, arr=data_win, axis=1),
    'iqr': apply(sp_stats.iqr, arr=data_win, axis=1, rng=(25, 75)),
    'skew': apply(sp_stats.skew, arr=data_win, axis=1),
    'kurt': apply(sp_stats.kurtosis, arr=data_win, axis=1),
    'nzc': apply(ant.num_zerocross, arr=data_win, axis=1),
    'hmob': hmob,
    'hcomp': hcomp,
    # Entropy
    'perm_entropy': apply(ant.perm_entropy, axis=1, arr=data_win, normalize=True),
    'svd_entropy': apply(ant.svd_entropy, 1, data_win, normalize=True),
    'sample_entropy': apply(ant.sample_entropy, 1, data_win),
    # Fractal dimension
    'dfa': apply(ant.detrended_fluctuation, 1, data_win),
    'petrosian': apply(ant.petrosian_fd, 1, data_win),
    'katz': apply(ant.katz_fd, 1, data_win),
    'higuchi': apply(ant.higuchi_fd, 1, data_win),
    # Lempel-Ziv complexity
    'lziv': apply(lziv, 1, data_win)
}

df_feat = pd.DataFrame(df_feat)
df_feat.head()


  pd.set_option('max_colwidth', -1)


Unnamed: 0,std,mean,median,iqr,skew,kurt,nzc,hmob,hcomp,perm_entropy,svd_entropy,sample_entropy,dfa,petrosian,katz,higuchi,lziv
0,50.72824,-0.458406,0.610417,42.633953,0.580565,2.600725,205,0.059614,12.427032,0.775805,0.211023,0.141362,1.549203,1.009895,1.942383,1.322072,0.161336
1,36.40965,0.385982,-0.291012,32.186688,0.114546,2.405869,272,0.083842,9.601425,0.79736,0.271151,0.203777,1.489831,1.010768,1.945935,1.413717,0.20167
2,41.266214,0.059199,2.663321,48.64983,0.124992,0.945467,128,0.081633,10.302285,0.784943,0.267274,0.180985,1.545471,1.010286,2.090015,1.416221,0.109238
3,53.069446,0.372208,5.369971,34.552458,-2.218553,9.56239,209,0.094153,8.400673,0.799514,0.293044,0.139526,1.490483,1.010866,1.822021,1.455203,0.164697
4,60.364001,-0.25408,0.494972,34.939698,-0.358552,2.604883,248,0.049412,14.853697,0.767596,0.183659,0.072636,1.613247,1.009585,1.825622,1.307613,0.189906


In [4]:
from scipy.integrate import simps
from scipy.signal import welch
freqs, psd = welch(data_win, sf, nperseg=int(4 * sf))

def bandpower_from_psd_ndarray(psd, freqs, bands=[(0.5, 4, 'Delta'),
                               (4, 8, 'Theta'), (8, 12, 'Alpha'),
                               (12, 16, 'Sigma'), (16, 30, 'Beta'),
                               (30, 40, 'Gamma')], relative=True):
    """Compute bandpowers in N-dimensional PSD.
    This is a NumPy-only implementation of the :py:func:`yasa.bandpower_from_psd` function,
    which supports 1-D arrays of shape (n_freqs), or N-dimensional arays (e.g. 2-D (n_chan,
    n_freqs) or 3-D (n_chan, n_epochs, n_freqs))
    .. versionadded:: 0.2.0
    Parameters
    ----------
    psd : :py:class:`numpy.ndarray`
        Power spectral density of data, in uV^2/Hz. Must be a N-D array of shape (..., n_freqs).
        See :py:func:`scipy.signal.welch` for more details.
    freqs : :py:class:`numpy.ndarray`
        Array of frequencies. Must be a 1-D array of shape (n_freqs,)
    bands : list of tuples
        List of frequency bands of interests. Each tuple must contain the lower and upper
        frequencies, as well as the band name (e.g. (0.5, 4, 'Delta')).
    relative : boolean
        If True, bandpower is divided by the total power between the min and
        max frequencies defined in ``band`` (default 0.5 to 40 Hz).
    Returns
    -------
    bandpowers : :py:class:`numpy.ndarray`
        Bandpower array of shape *(n_bands, ...)*.
    """
    # Type checks
    assert isinstance(bands, list), 'bands must be a list of tuple(s)'
    assert isinstance(relative, bool), 'relative must be a boolean'

    # Safety checks
    freqs = np.asarray(freqs)
    psd = np.asarray(psd)
    assert freqs.ndim == 1, 'freqs must be a 1-D array of shape (n_freqs,)'
    assert psd.shape[-1] == freqs.shape[-1], 'n_freqs must be last axis of psd'

    # Extract frequencies of interest
    all_freqs = np.hstack([[b[0], b[1]] for b in bands])
    fmin, fmax = min(all_freqs), max(all_freqs)
    idx_good_freq = np.logical_and(freqs >= fmin, freqs <= fmax)
    freqs = freqs[idx_good_freq]
    res = freqs[1] - freqs[0]

    # Trim PSD to frequencies of interest
    psd = psd[..., idx_good_freq]

    # Check if there are negative values in PSD
    if (psd < 0).any():
        msg = (
            "There are negative values in PSD. This will result in incorrect "
            "bandpower values. We highly recommend working with an "
            "all-positive PSD. For more details, please refer to: "
            "https://github.com/raphaelvallat/yasa/issues/29")
        logger.warning(msg)

    # Calculate total power
    total_power = simps(psd, dx=res, axis=-1)
    total_power = total_power[np.newaxis, ...]

    # Initialize empty array
    bp = np.zeros((len(bands), *psd.shape[:-1]), dtype=np.float64)

    # Enumerate over the frequency bands
    labels = []
    for i, band in enumerate(bands):
        b0, b1, la = band
        labels.append(la)
        idx_band = np.logical_and(freqs >= b0, freqs <= b1)
        bp[i] = simps(psd[..., idx_band], dx=res, axis=-1)

    if relative:
        bp /= total_power
    return bp

bp = bandpower_from_psd_ndarray(psd, freqs)
bp = pd.DataFrame(bp.T, columns=['delta', 'theta', 'alpha', 'sigma', 'beta', 'gamma'])
df_feat = pd.concat([df_feat, bp], axis=1)
df_feat.head()


Unnamed: 0,std,mean,median,iqr,skew,kurt,nzc,hmob,hcomp,perm_entropy,svd_entropy,sample_entropy,dfa,petrosian,katz,higuchi,lziv,delta,theta,alpha,sigma,beta,gamma
0,50.72824,-0.458406,0.610417,42.633953,0.580565,2.600725,205,0.059614,12.427032,0.775805,0.211023,0.141362,1.549203,1.009895,1.942383,1.322072,0.161336,0.947087,0.03299,0.009982,0.002576,0.005049,0.002316
1,36.40965,0.385982,-0.291012,32.186688,0.114546,2.405869,272,0.083842,9.601425,0.79736,0.271151,0.203777,1.489831,1.010768,1.945935,1.413717,0.20167,0.954627,0.024764,0.008382,0.002574,0.005836,0.003815
2,41.266214,0.059199,2.663321,48.64983,0.124992,0.945467,128,0.081633,10.302285,0.784943,0.267274,0.180985,1.545471,1.010286,2.090015,1.416221,0.109238,0.954055,0.020523,0.010761,0.003061,0.007351,0.00425
3,53.069446,0.372208,5.369971,34.552458,-2.218553,9.56239,209,0.094153,8.400673,0.799514,0.293044,0.139526,1.490483,1.010866,1.822021,1.455203,0.164697,0.958963,0.012552,0.004894,0.003369,0.013351,0.006872
4,60.364001,-0.25408,0.494972,34.939698,-0.358552,2.604883,248,0.049412,14.853697,0.767596,0.183659,0.072636,1.613247,1.009585,1.825622,1.307613,0.189906,0.989613,0.00668,0.001312,0.000443,0.001215,0.000737


In [5]:
# Ratio of spectral power
df_feat.eval('dt = delta / theta', inplace=True)
df_feat.eval('db = delta / beta', inplace=True)
df_feat.eval('ds = delta / sigma', inplace=True)
df_feat.eval('at = alpha / theta', inplace=True)

df_feat.head()

Unnamed: 0,std,mean,median,iqr,skew,kurt,nzc,hmob,hcomp,perm_entropy,svd_entropy,sample_entropy,dfa,petrosian,katz,higuchi,lziv,delta,theta,alpha,sigma,beta,gamma,dt,db,ds,at
0,50.72824,-0.458406,0.610417,42.633953,0.580565,2.600725,205,0.059614,12.427032,0.775805,0.211023,0.141362,1.549203,1.009895,1.942383,1.322072,0.161336,0.947087,0.03299,0.009982,0.002576,0.005049,0.002316,28.70867,187.573489,367.638309,0.302586
1,36.40965,0.385982,-0.291012,32.186688,0.114546,2.405869,272,0.083842,9.601425,0.79736,0.271151,0.203777,1.489831,1.010768,1.945935,1.413717,0.20167,0.954627,0.024764,0.008382,0.002574,0.005836,0.003815,38.548423,163.564392,370.812011,0.33848
2,41.266214,0.059199,2.663321,48.64983,0.124992,0.945467,128,0.081633,10.302285,0.784943,0.267274,0.180985,1.545471,1.010286,2.090015,1.416221,0.109238,0.954055,0.020523,0.010761,0.003061,0.007351,0.00425,46.48809,129.784474,311.704843,0.524337
3,53.069446,0.372208,5.369971,34.552458,-2.218553,9.56239,209,0.094153,8.400673,0.799514,0.293044,0.139526,1.490483,1.010866,1.822021,1.455203,0.164697,0.958963,0.012552,0.004894,0.003369,0.013351,0.006872,76.400467,71.827131,284.671687,0.389866
4,60.364001,-0.25408,0.494972,34.939698,-0.358552,2.604883,248,0.049412,14.853697,0.767596,0.183659,0.072636,1.613247,1.009585,1.825622,1.307613,0.189906,0.989613,0.00668,0.001312,0.000443,0.001215,0.000737,148.152248,814.342235,2235.510571,0.196406


In [10]:
from sklearn.feature_selection import f_classif

# Load hypnogram
location_hypno = "/Users/amirhosseindaraie/Desktop/data/synced-hypnos"
hypno_30s = np.loadtxt(f'{location_hypno}/p8n3_synced.txt')[:,0]

# Extract sorted F-values
fvals = pd.Series(f_classif(X=df_feat, y=hypno_30s)[0], 
                  index=df_feat.columns
                 ).sort_values()

# Plot best features
plt.figure(figsize=(6, 6))
sns.barplot(y=fvals.index, x=fvals, palette='RdYlGn')
plt.xlabel('F-values')
plt.xticks(rotation=20)
plt.tight_layout()
plt.show()

In [9]:
# Plot hypnogram and higuchi
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 6), sharex=True)

hypno = pd.Series(hypno_30s).map({-1: -1, 0: 0, 1: 2, 2: 3, 3: 4, 4: 1}).values
hypno_rem = np.ma.masked_not_equal(hypno, 1)

# Plot the hypnogram
ax1.step(times, -1 * hypno, color='k', lw=1.5)
ax1.step(times, -1 * hypno_rem, color='r', lw=2.5)
ax1.set_yticks([0, -1, -2, -3, -4])
ax1.set_yticklabels(['W', 'R', 'N1', 'N2', 'N3'])
ax1.set_ylim(-4.5, 0.5)
ax1.set_ylabel('Sleep stage')

# Plot the non-linear feature
ax2.plot(times, df_feat['delta'])
ax2.set_ylabel('Higuchi Fractal Dimension')
ax2.set_xlabel('Time [minutes]')

ax2.set_xlim(0, times[-1])

plt.tight_layout()
plt.show()

In [None]:
bp