# Data for one participant 

In [110]:
import numpy as np
import mne
from scipy import signal
from scipy.interpolate import RectBivariateSpline
from mne.filter import resample, filter_data
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from lspopt import spectrogram_lspopt
from matplotlib.colors import Normalize, ListedColormap

import logging
LOGGING_TYPES = dict(DEBUG=logging.DEBUG, INFO=logging.INFO, WARNING=logging.WARNING,
                     ERROR=logging.ERROR, CRITICAL=logging.CRITICAL)
logger = logging.getLogger('yasa')

%matplotlib qt

In [111]:
# Load the EDF file
fname = "P20_N3"  # define here
lr = "L"  # define here
location = f"/Users/amirhosseindaraie/Desktop/data/autoscoring-material/data/Zmax Donders/{fname}"
raw = mne.io.read_raw_edf(f"{location}/EEG {lr}.edf", preload=True, verbose=0)
raw.pick_types(eeg=True)
# fig = raw.plot(use_opengl=False)

# Apply a zero-phase bandpass filter between 0.5 ~ 45 Hz
raw.filter(0.5, 45)

# Plot properties of the filter
filt = mne.filter.create_filter(raw._data, 256, 0.5, 40)
mne.viz.plot_filter(filt, 256)
plt.savefig("filter shape.png", dpi=100, bbox_inches="tight")

# Extract the data and convert from V to uV
data = raw._data * 1e6
sf = raw.info["sfreq"]
chan = raw.ch_names

# Let's have a look at the data
print("Chan =", chan)
print("Sampling frequency =", sf, "Hz")
print("Data shape =", data.shape)


def format_seconds_to_hhmmss(seconds):
    # Return hhmmss of total seconds parameter
    hours = seconds // (60 * 60)
    seconds %= 60 * 60
    minutes = seconds // 60
    seconds %= 60
    return "%02i:%02i:%02i" % (hours, minutes, seconds)


print(
    f"Duration: {data.shape[1]/sf} (sec) OR {format_seconds_to_hhmmss(data.shape[1]/sf)}"
)


Filtering raw data in 1 contiguous segment
Setting up band-pass filter from 0.5 - 45 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 45.00 Hz
- Upper transition bandwidth: 11.25 Hz (-6 dB cutoff frequency: 50.62 Hz)
- Filter length: 1691 samples (6.605 sec)

Setting up band-pass filter from 0.5 - 40 Hz

FIR filter parameters
---------------------
Designing a one-pass, zero-phase, non-causal bandpass filter:
- Windowed time-domain design (firwin) method
- Hamming window with 0.0194 passband ripple and 53 dB stopband attenuation
- Lower passband edge: 0.50
- Lower transition bandwidth: 0.50 Hz (-6 dB cutoff frequency: 0.25 Hz)
- Upper passband edge: 40.00 Hz
- Upper transition bandwidth

In [112]:
import antropy as ant
import scipy.signal as sp_sig
import scipy.stats as sp_stats
from numpy import apply_along_axis as apply

pd.set_option("display.max_columns", None)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("max_colwidth", -1)

# Time vector in seconds
times = np.arange(data.size) / sf


def sliding_window(data, sf, window, step=None, axis=-1):
    """Calculate a sliding window of a 1D or 2D EEG signal.
    .. versionadded:: 0.1.7
    Parameters
    ----------
    data : numpy array
        The 1D or 2D EEG data.
    sf : float
        The sampling frequency of ``data``.
    window : int
        The sliding window length, in seconds.
    step : int
        The sliding window step length, in seconds.
        If None (default), ``step`` is set to ``window``,
        which results in no overlap between the sliding windows.
    axis : int
        The axis to slide over. Defaults to the last axis.
    Returns
    -------
    times : numpy array
        Time vector, in seconds, corresponding to the START of each sliding
        epoch in ``strided``.
    strided : numpy array
        A matrix where row in last dimension consists of one instance
        of the sliding window, shape (n_epochs, ..., n_samples).
    Notes
    -----
    This is a wrapper around the
    :py:func:`numpy.lib.stride_tricks.as_strided` function.
    Examples
    --------
    With a 1-D array
    >>> import numpy as np
    >>> from yasa import sliding_window
    >>> data = np.arange(20)
    >>> times, epochs = sliding_window(data, sf=1, window=5)
    >>> times
    array([ 0.,  5., 10., 15.])
    >>> epochs
    array([[ 0,  1,  2,  3,  4],
           [ 5,  6,  7,  8,  9],
           [10, 11, 12, 13, 14],
           [15, 16, 17, 18, 19]])
    >>> sliding_window(data, sf=1, window=5, step=1)[1]
    array([[ 0,  1,  2,  3,  4],
           [ 2,  3,  4,  5,  6],
           [ 4,  5,  6,  7,  8],
           [ 6,  7,  8,  9, 10],
           [ 8,  9, 10, 11, 12],
           [10, 11, 12, 13, 14],
           [12, 13, 14, 15, 16],
           [14, 15, 16, 17, 18]])
    >>> sliding_window(data, sf=1, window=11)[1]
    array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10]])
    With a N-D array
    >>> np.random.seed(42)
    >>> # 4 channels x 20 samples
    >>> data = np.random.randint(-100, 100, size=(4, 20))
    >>> epochs = sliding_window(data, sf=1, window=10)[1]
    >>> epochs.shape  # shape (n_epochs, n_channels, n_samples)
    (2, 4, 10)
    >>> epochs
    array([[[  2,  79,  -8, -86,   6, -29,  88, -80,   2,  21],
            [-13,  57, -63,  29,  91,  87, -80,  60, -43, -79],
            [-50,   7, -46, -37,  30, -50,  34, -80, -28,  66],
            [ -9,  10,  87,  98,  71, -93,  74, -66, -20,  63]],
           [[-26, -13,  16,  -1,   3,  51,  30,  49, -48, -99],
            [-12, -52, -42,  69,  87, -86,  89,  89,  74,  89],
            [-83,  31, -12, -41, -87, -92, -11, -48,  29, -17],
            [-51,   3,  31, -99,  33, -47,   5, -97, -47,  90]]])
    """
    from numpy.lib.stride_tricks import as_strided

    assert axis <= data.ndim, "Axis value out of range."
    assert isinstance(sf, (int, float)), "sf must be int or float"
    assert isinstance(window, (int, float)), "window must be int or float"
    assert isinstance(step, (int, float, type(None))), (
        "step must be int, " "float or None."
    )
    if isinstance(sf, float):
        assert sf.is_integer(), "sf must be a whole number."
        sf = int(sf)
    assert isinstance(axis, int), "axis must be int."

    # window and step in samples instead of points
    window *= sf
    step = window if step is None else step * sf

    if isinstance(window, float):
        assert window.is_integer(), "window * sf must be a whole number."
        window = int(window)

    if isinstance(step, float):
        assert step.is_integer(), "step * sf must be a whole number."
        step = int(step)

    assert step >= 1, "Stepsize may not be zero or negative."
    assert window < data.shape[axis], (
        "Sliding window size may not exceed " "size of selected axis"
    )

    # Define output shape
    shape = list(data.shape)
    shape[axis] = np.floor(data.shape[axis] / step - window / step + 1).astype(int)
    shape.append(window)

    # Calculate strides and time vector
    strides = list(data.strides)
    strides[axis] *= step
    strides.append(data.strides[axis])
    strided = as_strided(data, shape=shape, strides=strides)
    t = np.arange(strided.shape[-2]) * (step / sf)

    # Swap axis: n_epochs, ..., n_samples
    if strided.ndim > 2:
        strided = np.rollaxis(strided, -2, 0)
    return t, strided


# Convert the EEG data to 30-sec data
times, data_win = sliding_window(data[0], sf, window=30)

# Convert times to minutes
times /= 60


def lziv(x):
    """Binarize the EEG signal and calculate the Lempel-Ziv complexity."""
    return ant.lziv_complexity(x > x.mean(), normalize=True)


  pd.set_option("max_colwidth", -1)


In [113]:
# generate a plot for manuscript

begin = 3.5
end = 3.6

plt.figure(figsize=(8, 3))

raw = mne.io.read_raw_edf(f"{location}/EEG L.edf", preload=True, verbose=0)
raw.pick_types(eeg=True)
data = raw._data * 1e6
y = data[0]
t = np.arange(data.size) / sf / 60 / 60
plt.plot(
    t[np.where(t == begin)[0][0] : np.where(t == end)[0][0]] * 60,
    y[np.where(t == begin)[0][0] : np.where(t == end)[0][0]],
    c="mediumblue",
    label="EEG L"
)

raw = mne.io.read_raw_edf(f"{location}/EEG R.edf", preload=True, verbose=0)
raw.pick_types(eeg=True)
data = raw._data * 1e6
y = data[0]
plt.plot(
    t[np.where(t == begin)[0][0] : np.where(t == end)[0][0]] * 60,
    y[np.where(t == begin)[0][0] : np.where(t == end)[0][0]] + 130,
    c="dodgerblue",
    label="EEG R"
)
plt.xlim([t[np.where(t == begin)[0][0]] * 60, t[np.where(t == end)[0][0]] * 60])
plt.yticks([])
plt.xlabel("Time ($min$)")
plt.ylabel("EEG ($mV$)")
plt.title("Sleep EEG Wave")
plt.legend()
plt.tight_layout()
plt.savefig("sleep_EEG_wave.svg")
plt.show()

# raw.filter(0.5, 45)
# data = raw._data * 1e6
# y = data[0]
# plt.plot(
#     t[np.where(t == begin)[0][0] : np.where(t == end)[0][0]] * 60,
#     y[np.where(t == begin)[0][0] : np.where(t == end)[0][0]],
# )


In [114]:
# Feature extraction
df_feat = {
    # Statistical
    "std": apply(np.std, arr=data_win, axis=1, ddof=1),
    "mean": apply(np.mean, arr=data_win, axis=1),
    "median": apply(np.median, arr=data_win, axis=1),
    "iqr": apply(sp_stats.iqr, arr=data_win, axis=1, rng=(25, 75)),
    "skew": apply(sp_stats.skew, arr=data_win, axis=1),
    "kurt": apply(sp_stats.kurtosis, arr=data_win, axis=1),
    "nzc": apply(ant.num_zerocross, arr=data_win, axis=1),
}


df_feat = pd.DataFrame(df_feat)
df_feat.head()


Unnamed: 0,std,mean,median,iqr,skew,kurt,nzc
0,75.927057,2.657841,8.410885,98.100541,0.083062,0.523938,167
1,102.776359,-0.986946,-7.63529,75.261315,1.071312,6.501378,186
2,114.790454,-2.047461,-4.301004,97.610991,0.724141,1.036594,121
3,69.575593,0.432048,-2.007351,40.823823,1.576895,9.094225,208
4,85.334876,0.289089,-2.830345,80.135352,0.384282,1.34796,118


In [115]:
from scipy.integrate import simps
from scipy.signal import welch

# Estimate power spectral density using Welch's method
freqs, psd = welch(data_win, sf, nperseg=int(4 * sf))


def bandpower_from_psd_ndarray(
    psd,
    freqs,
    bands=[
        (0.5, 4, "Delta"),
        (4, 8, "Theta"),
        (8, 12, "Alpha"),
        (12, 16, "Sigma"),
        (16, 30, "Beta"),
        (30, 40, "Gamma"),
    ],
    relative=True,
):
    """Compute bandpowers in N-dimensional PSD.
    This is a np-only implementation of the :py:func:`yasa.bandpower_from_psd` function,
    which supports 1-D arrays of shape (n_freqs), or N-dimensional arays (e.g. 2-D (n_chan,
    n_freqs) or 3-D (n_chan, n_epochs, n_freqs))
    .. versionadded:: 0.2.0
    Parameters
    ----------
    psd : :py:class:`np.ndarray`
        Power spectral density of data, in uV^2/Hz. Must be a N-D array of shape (..., n_freqs).
        See :py:func:`scipy.signal.welch` for more details.
    freqs : :py:class:`np.ndarray`
        Array of frequencies. Must be a 1-D array of shape (n_freqs,)
    bands : list of tuples
        List of frequency bands of interests. Each tuple must contain the lower and upper
        frequencies, as well as the band name (e.g. (0.5, 4, 'Delta')).
    relative : boolean
        If True, bandpower is divided by the total power between the min and
        max frequencies defined in ``band`` (default 0.5 to 40 Hz).
    Returns
    -------
    bandpowers : :py:class:`np.ndarray`
        Bandpower array of shape *(n_bands, ...)*.
    """
    # Type checks
    assert isinstance(bands, list), "bands must be a list of tuple(s)"
    assert isinstance(relative, bool), "relative must be a boolean"

    # Safety checks
    freqs = np.asarray(freqs)
    psd = np.asarray(psd)
    assert freqs.ndim == 1, "freqs must be a 1-D array of shape (n_freqs,)"
    assert psd.shape[-1] == freqs.shape[-1], "n_freqs must be last axis of psd"

    # Extract frequencies of interest
    all_freqs = np.hstack([[b[0], b[1]] for b in bands])
    fmin, fmax = min(all_freqs), max(all_freqs)
    idx_good_freq = np.logical_and(freqs >= fmin, freqs <= fmax)
    freqs = freqs[idx_good_freq]
    res = freqs[1] - freqs[0]

    # Trim PSD to frequencies of interest
    psd = psd[..., idx_good_freq]

    # Check if there are negative values in PSD
    if (psd < 0).any():
        msg = (
            "There are negative values in PSD. This will result in incorrect "
            "bandpower values. We highly recommend working with an "
            "all-positive PSD. For more details, please refer to: "
            "https://github.com/raphaelvallat/yasa/issues/29"
        )
        logger.warning(msg)

    # Calculate total power
    total_power = simps(psd, dx=res, axis=-1)
    total_power = total_power[np.newaxis, ...]

    # Initialize empty array
    bp = np.zeros((len(bands), *psd.shape[:-1]), dtype=np.float64)

    # Enumerate over the frequency bands
    labels = []
    for i, band in enumerate(bands):
        b0, b1, la = band
        labels.append(la)
        idx_band = np.logical_and(freqs >= b0, freqs <= b1)
        bp[i] = simps(psd[..., idx_band], dx=res, axis=-1)

    if relative:
        bp /= total_power
    return bp


# Compute bandpowers in N-dimensional PSD
bp = bandpower_from_psd_ndarray(psd, freqs)
bp = pd.DataFrame(bp.T, columns=["delta", "theta", "alpha", "sigma", "beta", "gamma"])
df_feat = pd.concat([df_feat, bp], axis=1)
df_feat.head()


Unnamed: 0,std,mean,median,iqr,skew,kurt,nzc,delta,theta,alpha,sigma,beta,gamma
0,75.927057,2.657841,8.410885,98.100541,0.083062,0.523938,167,0.960825,0.019146,0.006508,0.002389,0.005207,0.005926
1,102.776359,-0.986946,-7.63529,75.261315,1.071312,6.501378,186,0.957676,0.030717,0.005549,0.000923,0.003878,0.001258
2,114.790454,-2.047461,-4.301004,97.610991,0.724141,1.036594,121,0.993764,0.003204,0.000948,0.000456,0.001097,0.000531
3,69.575593,0.432048,-2.007351,40.823823,1.576895,9.094225,208,0.992665,0.003559,0.000923,0.000407,0.001635,0.000812
4,85.334876,0.289089,-2.830345,80.135352,0.384282,1.34796,118,0.991569,0.003488,0.001348,0.000594,0.002048,0.000953


In [116]:
# Ratio of spectral power
df_feat.eval("dt = delta / theta", inplace=True)
df_feat.eval("da = delta / alpha", inplace=True)
df_feat.eval("ds = delta / sigma", inplace=True)
df_feat.eval("db = delta / beta", inplace=True)
df_feat.eval("dg = delta / gamma", inplace=True)

df_feat.eval("td = theta / delta", inplace=True)
df_feat.eval("ta = theta / alpha", inplace=True)
df_feat.eval("ts = theta / sigma", inplace=True)
df_feat.eval("tb = theta / beta", inplace=True)
df_feat.eval("tg = theta / gamma", inplace=True)

df_feat.eval("ad = alpha / delta", inplace=True)
df_feat.eval("at = alpha / theta", inplace=True)
df_feat.eval("asi = alpha / sigma", inplace=True)
df_feat.eval("ab = alpha / beta", inplace=True)
df_feat.eval("ag = alpha / gamma", inplace=True)

df_feat.eval("sd = sigma / delta", inplace=True)
df_feat.eval("st = sigma / theta", inplace=True)
df_feat.eval("sa = sigma / alpha", inplace=True)
df_feat.eval("sb = sigma / beta", inplace=True)
df_feat.eval("sg = sigma / gamma", inplace=True)

df_feat.eval("bd = beta / delta", inplace=True)
df_feat.eval("bt = beta / theta", inplace=True)
df_feat.eval("ba = beta / alpha", inplace=True)
df_feat.eval("bs = beta / sigma", inplace=True)
df_feat.eval("bg = beta / gamma", inplace=True)

df_feat.eval("gd = gamma / delta", inplace=True)
df_feat.eval("gt = gamma / theta", inplace=True)
df_feat.eval("ga = gamma / alpha", inplace=True)
df_feat.eval("gs = gamma / sigma", inplace=True)
df_feat.eval("gb = gamma / beta", inplace=True)

df_feat.eval("ta_b = (theta + alpha)/beta", inplace=True)
df_feat.eval("ta_ab = (theta + alpha)/(alpha + beta)", inplace=True)
df_feat.eval("gb_da = (gamma + beta)/(delta + alpha)", inplace=True)

df_feat.head()


Unnamed: 0,std,mean,median,iqr,skew,kurt,nzc,delta,theta,alpha,sigma,beta,gamma,dt,da,ds,db,dg,td,ta,ts,tb,tg,ad,at,asi,ab,ag,sd,st,sa,sb,sg,bd,bt,ba,bs,bg,gd,gt,ga,gs,gb,ta_b,ta_ab,gb_da
0,75.927057,2.657841,8.410885,98.100541,0.083062,0.523938,167,0.960825,0.019146,0.006508,0.002389,0.005207,0.005926,50.183989,147.646495,402.186441,184.540427,162.140431,0.019927,2.942104,8.014238,3.677277,3.23092,0.006773,0.339893,2.723982,1.24988,1.098166,0.002486,0.124778,0.36711,0.458843,0.403147,0.005419,0.27194,0.800077,2.179395,0.878617,0.006167,0.309509,0.910609,2.480482,1.138152,4.927157,2.189964,0.011508
1,102.776359,-0.986946,-7.63529,75.261315,1.071312,6.501378,186,0.957676,0.030717,0.005549,0.000923,0.003878,0.001258,31.177594,172.587514,1038.089297,246.944016,761.307084,0.032074,5.535627,33.296004,7.92056,24.418404,0.005794,0.180648,6.014857,1.430834,4.411137,0.000963,0.030034,0.166255,0.237883,0.733373,0.00405,0.126254,0.698893,4.203743,3.082914,0.001314,0.040953,0.226699,1.363562,0.324368,9.351394,3.846991,0.005332
2,114.790454,-2.047461,-4.301004,97.610991,0.724141,1.036594,121,0.993764,0.003204,0.000948,0.000456,0.001097,0.000531,310.115402,1047.909459,2181.252282,906.019414,1871.909792,0.003225,3.379095,7.033679,2.921556,6.036172,0.000954,0.295937,2.081527,0.864597,1.786328,0.000458,0.142173,0.480416,0.415367,0.858181,0.001104,0.342283,1.156608,2.407512,2.066081,0.000534,0.165668,0.559808,1.165255,0.484008,3.786153,2.030547,0.001636
3,69.575593,0.432048,-2.007351,40.823823,1.576895,9.094225,208,0.992665,0.003559,0.000923,0.000407,0.001635,0.000812,278.935303,1075.914485,2439.582838,607.261451,1222.695085,0.003585,3.857219,8.746053,2.177069,4.383436,0.000929,0.259254,2.267451,0.564414,1.136424,0.00041,0.114337,0.441024,0.24892,0.50119,0.001647,0.459333,1.771748,4.017352,2.013457,0.000818,0.228132,0.879953,1.99525,0.496658,2.741483,1.752402,0.002462
4,85.334876,0.289089,-2.830345,80.135352,0.384282,1.34796,118,0.991569,0.003488,0.001348,0.000594,0.002048,0.000953,284.270097,735.464342,1670.221287,484.17263,1040.032573,0.003518,2.587203,5.875473,1.703213,3.658607,0.00136,0.386518,2.270975,0.658322,1.414117,0.000599,0.170199,0.440339,0.289885,0.622691,0.002065,0.587126,1.519013,3.44964,2.148061,0.000962,0.273328,0.707155,1.605932,0.465536,2.361536,1.424051,0.003023


In [117]:
# Write feature object to a comma-separated values (csv) file
df_feat.to_csv(f"feature {fname} {lr}.csv", index=False)


In [118]:
# Load feature object as a dataframe
df_feat = pd.read_csv(f"feature {fname} {lr}.csv", index_col=False)


In [119]:
def normalize(v):
    norm = np.linalg.norm(v)
    if norm == 0:
        return v
    return v / norm


In [120]:
plt.figure(figsize=(8, 6))
plt.plot(normalize(df_feat["gamma"]), label="Signal Gamma Power")
plt.xlabel("Epoch")
plt.ylabel("STD")
plt.title("Power of Gamma Band from Sleep Signal [P8_N3 L]")
plt.legend()
plt.tight_layout()
plt.savefig("gamma plot.svg", format="svg")
plt.show()


In [121]:
# Load hypnogram
location_hypno = "/Users/amirhosseindaraie/Desktop/data/synced-hypnos"
hypno_30s = np.loadtxt(f"{location_hypno}/p8n3_synced.txt")[:, 0]

# Plot hypnogram and a feature
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 6), sharex=True)

hypno = pd.Series(hypno_30s).map({-1: -1, 0: 0, 1: 2, 2: 3, 3: 4, 4: 1}).values
hypno_rem = np.ma.masked_not_equal(hypno, 1)

# Plot the hypnogram
ax1.step(times, -1 * hypno, color="k", lw=1.5)
ax1.step(times, -1 * hypno_rem, color="r", lw=2.5)
ax1.set_yticks([0, -1, -2, -3, -4])
ax1.set_yticklabels(["W", "R", "N1", "N2", "N3"])
ax1.set_ylim(-4.5, 0.5)
ax1.set_ylabel("Sleep stage")

# Plot the non-linear feature
ax2.plot(times, df_feat["gamma"])
ax2.set_ylabel("Gamma Band Power")
# ax2.set_ylabel('Higuchi Fractal Dimension')
ax2.set_xlabel("Time [minutes]")
 
ax2.set_xlim(0, times[-1])

plt.tight_layout()
plt.show()


ValueError: x and y must have same first dimension, but have shapes (933,) and (990,)