In [152]:
# import mne
import pywt
import numpy as np
import pandas as pd
import antropy as ant

from os import listdir
# from entropy import *
from tqdm import tqdm
from scipy.stats import entropy

from sklearn.decomposition import PCA
from sklearn.utils import shuffle
from scipy.stats import entropy
from multiprocessing import Pool
from pyrqa.settings import Settings
from pyrqa.analysis_type import Classic
from pyrqa.time_series import TimeSeries
from pyrqa.neighbourhood import FixedRadius
from pyrqa.metric import EuclideanMetric
from pyrqa.computation import RQAComputation
from dit.other import renyi_entropy

In [120]:
def load_data_mindbig(data, channel_num, id_num):
    """Load sample data of MindBigData from the data/ folder.
    Args:
        id: digit number the subject is seeing and thinking during the experiment. 
        Should be a number between 0 and 9 or all.
    Returns:
        all_data: datasets of EEG signals.
        all_label: datasets of labels, -1 for rest, 1 for seeing and thinking about a digit.
    """

    data_path = "./data/" + data +".txt"

    data_file = open(data_path, "r")
    data_lines = data_file.readlines()
    

    all_data = []
    all_label = []

    data = [line.split("\t") for line in data_lines]
    data = [i for i in data if i[5] == "256"]


    if id_num == "all":
        rest_data = [i for i in data if i[4] == "-1"]
        math_data = [i for i in data if i[4] != "-1"]
    else:
        rest_data = [i for i in data if i[4] == "-1"]
        math_data = [i for i in data if i[4] == id_num]

    rest_event = np.unique([i[1] for i in rest_data])
    math_event = np.unique([i[1] for i in math_data])
    
    print(len(rest_event))
    print(len(math_event))

#     rest_event = np.unique([i[1] for i in rest_data])
#     math_event = np.unique([i[1] for i in math_data])
    
    if id_num == "all":
        math_event = math_event[0 : len(rest_event)]
    else:
        rest_event = rest_event[0 : len(math_event)]

    for event in tqdm(rest_event):
        event_data = [
            [int(float(k)) for k in i[6].split(",")]
            for i in rest_data
            if i[1] == event
        ]
        if len(event_data) == channel_num:
            all_data.append(event_data)
            all_label.append(-1)

    for event in tqdm(math_event):
        event_data = [
            [int(float(k)) for k in i[6].split(",")]
            for i in math_data
            if i[1] == event
        ]
        if len(event_data) == channel_num:
            all_data.append(event_data)
            all_label.append(1)

    all_data, all_label = shuffle(all_data, all_label)

    return all_data, all_label

In [121]:
def wavelet_decompose(data, level, wavelet):
    """Decompose EEG input into different bands.
    Args:
        data (numpy.ndarray): array of dimension [number of samples,
                number of channels].
        level (float): Decomposition level (must be >= 0).
        wavelet (str): type of wavelet to use.
    Returns:
        all_bands (numpy.ndarray): [band frequency, channel
            number of bands].
    """
    all_bands = []

    for channel in data:
        all_bands.append([])
        coeffs = pywt.wavedec(channel, wavelet, level=level)
        for i in range(len(coeffs)):
            if i != 0:
                all_bands[len(all_bands) - 1].extend(coeffs[i])

    return all_bands

In [138]:
def _renyi_entropy(band):
    """Compute Renyi entropy.
    Renyi entropy is a super class of Shannon entropy when alpha approaches 1.
    Here we compute Shannon entropy instead.
    Args:
        band (numpy.ndarray): one band of samples [band frequency x channel].
    Returns:
        ren_en: calculated features from Renyi entropy.
    """

    pd_series = pd.Series(band)
    counts = pd_series.value_counts()
    ren_en = entropy(counts)

    return ren_en


def _rqa_entropy(band):
    """Compute RQA entropy.
    Args:
        band (numpy.ndarray): one band of samples [band frequency x channel].
    Returns:
        rr, det, entr, lam, tt, vmax, lmax (float): calculated features from RQA entropy.
    """
    time_series = TimeSeries(band, embedding_dimension=2, time_delay=2)
    settings = Settings(
        time_series,
        analysis_type=Classic,
        neighbourhood=FixedRadius(0.65),
        similarity_measure=EuclideanMetric,
        theiler_corrector=1,
    )

    computation = RQAComputation.create(settings, verbose=False)
    result = computation.run()
    result.min_diagonal_line_length = 2
    result.min_vertical_line_length = 2
    result.min_white_vertical_line_length = 2

    rr = result.recurrence_rate
    det = result.determinism
    entr = result.entropy_diagonal_lines
    lam = result.laminarity
    tt = result.trapping_time
    vmax = result.longest_vertical_line
    lmax = result.longest_diagonal_line

    return rr, det, entr, lam, tt, vmax, lmax

In [142]:
def compute_entropy(all_bands):
    """Compute entropy for each bands.
    Args:
        all-bands data after reshape (numpy.ndarray): [band frequency x channel,
            number of designated bands].
    Returns:
        entropy_output (numpy.ndarray):  [number of entropy features, number of bands].
    """
    entropy_output = []
    for band in all_bands:
        band = np.nan_to_num(band)
        # Approximate entropy
        ap_en = ant.app_entropy(band, order=2, metric="chebyshev")

        # Sample entropy
        samp_en = ant.sample_entropy(band, order=2, metric="chebyshev")

        # Renyi entropy
#         ren_en = _renyi_entropy(band)

        # Recurrence quantification analysis
        rr, det, entr, lam, tt, vmax, lmax = _rqa_entropy(band)
        entropy_all = [
            ap_en,
            samp_en,
#             ren_en,
            rr,
            det,
            entr,
            lam,
            tt,
            vmax,
            lmax,
        ]
        entropy_all = np.nan_to_num(entropy_all)
        entropy_output.extend(entropy_all)

    return entropy_output

In [150]:
def compute_pca(data):
    pca = PCA()
    pca_data = pca.fit_transform(data)
    return pca_data

In [128]:
all_data, all_label = load_data_mindbig("EP", 14, "4")

 27%|██▋       | 43/159 [00:00<00:00, 423.53it/s]

159
1191


100%|██████████| 159/159 [00:00<00:00, 382.23it/s]
100%|██████████| 1191/1191 [00:05<00:00, 217.91it/s]


In [134]:
len(all_data)

1350

In [143]:
eeg_data = []

for batch in tqdm(all_data):
    all_bands = wavelet_decompose(batch, 6, "db1")
    entropy = compute_entropy(all_bands)
    eeg_data.append(entropy)

100%|██████████| 1350/1350 [07:43<00:00,  2.92it/s]


In [144]:
len(eeg_data)

1350

In [153]:
eeg_data_pca = compute_pca(eeg_data)