In [2]:
import pandas as pd
import seaborn as sns
from scipy import signal
from scipy.io import loadmat
import matplotlib.pyplot as plt
import numpy as np

In [10]:
mat = loadmat('data/S06.mat')
panel = pd.Panel(mat['data'])
''' Item axis: Epoch
    Major axis: Electrode
    Minor axis: Sample '''
FREQ = 250 # Hz
panel

<class 'pandas.core.panel.Panel'>
Dimensions: 769 (items) x 256 (major_axis) x 201 (minor_axis)
Items axis: 0 to 768
Major_axis axis: 0 to 255
Minor_axis axis: 0 to 200

# Extracción de features por sujeto

In [53]:
import gc
gc.collect()

LIMITS = {'delta': (0,4), 'tita': (4,8), 'alpha': (8,13), 'beta': (13,30), 'gamma': (30, 45)}

def extract_frequency_power(panel):
    mean_samples = panel.mean(1)
    
    def belongs(frequency_range, frequency):
        return frequency_range[0] <= frequency < frequency_range[1]
    
    def compute_welch(series):
        frequencies, power = signal.welch(series, fs=FREQ, nperseg=len(series), nfft=2048)
    
        values_by_range = {'delta':0, 'tita':0, 'alpha':0, 'beta':0, 'gamma':0}
        
        for f, p in zip(frequencies, power):
            for key, limit in LIMITS.items():
                if belongs(limit, f):
                    values_by_range[key] += p
                    break
        return pd.Series(values_by_range)
    
    return mean_samples.apply(compute_welch)  # devuleve un dataframe de Epoch X Bandas

def frequency_features(dataframe, normalize=True, mean=True):
    dataframe = dataframe.copy()
    if normalize:
        for index, band in dataframe.iterrows():
            low_limit, upper_limit = LIMITS[index]
            diff = upper_limit - low_limit
            dataframe.loc[index] = dataframe.loc[index] / diff
    if mean:
        series = dataframe.mean(1)
    else:
        series = dataframe.std(1)
    
    def append_name(i):
        i += '_normalized' if normalize else ''
        i += '_mean' if mean else '_std'
        return i
    
    renamed_series = {append_name(i): series.loc[i] for i in series.index}
    return pd.Series(renamed_series)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,759,760,761,762,763,764,765,766,767,768
alpha,2.628992e-15,7.684495e-15,5.80559e-15,5.597188e-15,5.276057e-15,1.10378e-14,4.980598e-15,1.34863e-14,2.266736e-14,1.214454e-14,...,7.047034e-15,5.811933e-15,5.967935e-14,8.159302e-14,3.519343e-14,6.19464e-15,1.487251e-14,3.766392e-15,8.935297e-15,1.959162e-14
beta,2.24536e-15,2.045302e-15,2.613849e-15,1.374779e-15,3.378039e-15,4.01093e-15,2.320653e-15,3.020353e-15,1.397085e-15,1.97438e-15,...,1.814542e-15,5.289004e-15,4.606392e-15,1.091766e-14,8.095353e-15,4.424833e-15,4.422172e-15,2.868096e-15,6.492074e-15,4.651751e-15
delta,6.162635e-15,3.827108e-15,7.323667e-15,2.591838e-15,6.167803e-15,8.868549e-15,4.729536e-15,5.816139e-15,2.037311e-15,2.052723e-15,...,6.467649e-15,4.235336e-14,3.562014e-15,2.338017e-15,4.088139e-15,2.520822e-15,2.564985e-15,1.926385e-15,2.8918e-15,2.026559e-15
gamma,4.414909e-16,7.127991e-16,6.575873e-16,9.889878e-16,8.263826e-16,4.566659e-16,2.584855e-16,7.191908e-16,2.415051e-16,7.691159e-16,...,2.476363e-16,3.025788e-16,3.8789e-16,8.117243e-16,4.70849e-16,4.276378e-16,4.914445e-16,3.987315e-16,3.885467e-16,2.077082e-16
tita,2.699798e-15,3.050222e-15,2.146012e-15,4.428164e-15,2.695573e-15,1.432334e-15,4.80703e-15,2.489284e-15,6.088615e-15,4.28389e-15,...,1.084e-14,4.948983e-15,1.007462e-15,3.009715e-15,1.225484e-15,1.806605e-15,1.058176e-15,1.648803e-15,1.508606e-15,1.184176e-15


In [None]:
import scipy
from collections import defaultdict

ALPHA_RELEVANT_ELECTRODES = [8,44,80,131,185]  # tomamos estos electrodos como representativos del sujeto
                                               # para la intra e inter información

def get_intra(panel):
    def compute_entropy(series):
        histogram, _ = np.histogram(series, 'fd')
        probabilities = [frecuency/len(series) for frecuency in histogram]
        return scipy.stats.entropy(probabilities)

    def compute_entropies_for_subject(panel):
        n_epochs, n_electrodes, n_measures = panel.shape
        entropies = defaultdict(dict)
        for epoch in range(n_epochs):
            for electrode in ALPHA_RELEVANT_ELECTRODES:
                electrode_data = panel[epoch, electrode]
                entropy = compute_entropy(electrode_data)
                entropies[epoch][electrode] = entropy
        return pd.DataFrame(entropies).mean(0)
    
    return compute_entropies_for_subject(panel)

In [88]:
from itertools import combinations

def joint_entropy(signal_1, signal_2):
    _, bins = np.histogram(signal_1.append(signal_2), 'fd')
    
    signal_1 = np.digitize(signal_1, bins)
    signal_2 = np.digitize(signal_2, bins)
    
    signal = list(zip(signal_1, signal_2))
    probabilities = [signal.count(s) / len(signal) for s in signal]
    return scipy.stats.entropy(probabilities)
    

def inter_electrode_analysis(panel, epoch, electrode_1, electrode_2):
    signal_1 = panel[epoch, electrode_1]
    signal_2 = panel[epoch, electrode_2]
    return joint_entropy(signal_1, signal_2)


def joint_entropy_for_subject(panel):
    entropies = defaultdict(list)
    n_epochs, n_electrodes, n_measures = panel.shape
    for epoch in range(n_epochs):
        for combination in combinations(ALPHA_RELEVANT_ELECTRODES, 2):
            entropies[epoch].append(inter_electrode_analysis(panel, epoch, *combination))
    return pd.DataFrame(entropies).mean(0)

In [None]:
import gc
from os import listdir
from os.path import isfile, join

subjects = [f for f in listdir('data') if f.endswith('.mat')]

all_subject_features = {}

for subject_name in subjects:
    print('Processing subject {}'.format(subject_name))
    # panel loading
    panel = pd.Panel(loadmat(join('data', subject_name))['data'])
    
    # FEATURE EXTRACTION
    print('\tIntra entropy')
    intra_entropy = get_intra(panel)
    intra_mean = intra_entropy.mean()
    intra_std = intra_entropy.std()
    
    print('\tInter entropy')
    inter_entropy = joint_entropy_for_subject(panel)
    inter_mean = inter_entropy.mean()
    inter_std = inter_entropy.std()
    
    print('\tBands')
    df = extract_frequency_power(panel)
    mean_normalized = frequency_features(df, normalize=True, mean=True)
    std_normalized = frequency_features(df, normalize=True, mean=False)
    mean_not_normalized = frequency_features(df, normalize=False, mean=True)
    std_not_normalized = frequency_features(df, normalize=False, mean=False)

    print('\tAppending all together')
    subject_features = mean_normalized.append(std_normalized).append(mean_not_normalized).append(std_not_normalized)
    subject_features.loc['type'] = subject_name[0]
    subject_features.loc['intra_mean'] = intra_mean
    subject_features.loc['intra_std'] = intra_std
    subject_features.loc['inter_mean'] = inter_mean
    subject_features.loc['inter_std'] = inter_std
    all_subject_features[subject_name] = subject_features 
    
    # garbage collection
    del panel
    gc.collect()

all_subject_features = pd.DataFrame(all_subject_features)
all_subject_features

Processing subject S02.mat
	Intra entropy
	Inter entropy
	Bands
	Appending all together
Processing subject P04.mat
	Intra entropy
	Inter entropy
	Bands
	Appending all together
Processing subject P08.mat
	Intra entropy
	Inter entropy
	Bands
	Appending all together
Processing subject P03.mat
	Intra entropy
	Inter entropy
	Bands
	Appending all together
Processing subject S01.mat
	Intra entropy
	Inter entropy
	Bands
	Appending all together
Processing subject P05.mat
	Intra entropy
	Inter entropy
	Bands
	Appending all together
Processing subject P09.mat
	Intra entropy
	Inter entropy
	Bands
	Appending all together
Processing subject S10.mat
	Intra entropy
	Inter entropy
	Bands
	Appending all together
Processing subject P01.mat
	Intra entropy
	Inter entropy
	Bands
	Appending all together
Processing subject P07.mat
	Intra entropy
	Inter entropy
	Bands
	Appending all together
Processing subject S08.mat
	Intra entropy
	Inter entropy
	Bands
	Appending all together
Processing subject S03.mat
	Intr

In [None]:
all_subject_features.to_csv('features.csv')

In [None]:
features = pd.load_csv('features.csv')