# Import everything

In [1]:
import glob
import pandas as pd
import mne
import matplotlib.pyplot as plt
from io import StringIO
import mne
from mne.io import read_raw_eeglab, read_epochs_eeglab
import numpy as np
from scipy import signal
from scipy import fftpack
import seaborn as sns
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
# from tqdm import tqdm, tqdm_notebook
from tqdm.notebook import tqdm
import math
from sklearn import preprocessing
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.svm import SVR
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from keras_tqdm import TQDMNotebookCallback
from tensorboard.plugins.hparams import api as hp
from livelossplot.tf_keras import PlotLossesCallback

import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
import multiprocessing
from tpot import TPOTRegressor
from oct2py import octave
from joblib import Parallel, delayed
import multiprocessing
from joblib import wrap_non_picklable_objects
import json
import pickle
import os.path
from mpl_toolkits.mplot3d import axes3d
import timeit
from skimage.transform import resize
from timeit import default_timer as timer
from datetime import timedelta
import json
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, callbacks
from tensorflow.keras.models import Model, load_model
from tensorboard.plugins.hparams import api as hp
from tensorflow.keras.utils import plot_model
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.genmod import bayes_mixed_glm as glm
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFECV
from spectrum import arburg, arma2psd, pburg
import pylab
from scipy.signal import find_peaks, butter
from scipy.integrate import simps
from scipy.io import loadmat
from numpy import trapz
import gzip

Using TensorFlow backend.


# Power and phase calculations

In [2]:
def blackman_harris_filter(channel, time, cutoffs, fs, numtaps=801):
    b = signal.firwin(numtaps, cutoffs, window='blackmanharris', fs=fs)
    filtered = signal.lfilter(b, 1, channel)
    delay = 0.5 * (numtaps - 1) / fs
    df = pd.DataFrame({
        'time': time-delay,
        'channel': filtered
    })
    return df

def butter_bandpass_filter(data, lowcut, highcut, fs, btype='bandpass', order=2):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype=btype)
    y = signal.lfilter(b, a, data)
    return y

def get_phase(channel, time, band, filter_type='butter', start_time_ms=0, stop_time_ms=1000, fs=256):
    if filter_type=='butter':
        df_filtered = pd.DataFrame({'channel': butter_bandpass_filter(channel, band[0], band[1], fs), 'time': time})
    else:
        df_filtered = blackman_harris_filter(channel, time, [0.000001, band[0]], fs)
        df_filtered = blackman_harris_filter(df_filtered['channel'].values, df_filtered['time'].values, band[1], fs)
    
    hilb = signal.hilbert(df_filtered[(df_filtered['time'] > start_time_ms) * (df_filtered['time'] < stop_time_ms)]['channel'])
    phase = np.angle(hilb, deg=True)
    df_phase = pd.DataFrame(phase, columns=['phase'])
    df_phase['time'] = df_filtered[(df_filtered['time'] > start_time_ms) * (df_filtered['time'] < stop_time_ms)]['time'].values
    return df_phase.iloc[-1]['phase'] + 180

def get_power(channel, time_millis, crop_start=-150, crop_end=-1, fs=256, method='pburg', filter_type='butter'):       
    # Crop.
    time = time_millis
    channel = channel[(time > crop_start) * (time < crop_end)].values
    time = time[(time > crop_start) * (time < crop_end)].values
    
    if filter_type == 'butter':
        channel = butter_bandpass_filter(channel, 48, 52, fs, 'bandstop')
    else:
        df_filtered = blackman_harris_filter(channel, time, [48, 52], fs, numtaps=101)
        channel = df_filtered['channel']
        time = df_filtered['time']
    
    # PSD.
    if method == 'welch':
        # Welch method
        freq, power = signal.welch(channel, fs, nfft=256)
        df_power = pd.DataFrame({'freq': freq, 'power': 10*np.log10(power)})
    elif method == 'fft':
        # FFT method
        power = np.abs(np.fft.fft(channel, n=256))
        freq = np.linspace(0, fs, len(power))
        df_power = pd.DataFrame({'freq': freq, 'power': 10*np.log10(power)})
    elif method == 'pburg':
        # Burgs method
        p = pburg(channel, int(len(channel)*0.25), sampling=fs, NFFT=256)
        power = p.psd
        freq = np.linspace(0, fs, len(power))
        df_power = pd.DataFrame({'freq': freq, 'power': 10*np.log10(power)})
        df_power = df_power[df_power['freq'] < fs/2]

    return df_power

In [3]:
# Read hjorth file.
def get_hjorth_trial(trial_file_raw):
    filename = trial_file_raw.replace('.csv', '-hjorth.mat')
    x = loadmat(filename)
    c3 = x['dat'][0][0][3][0][0][0]
    trials = []
    time = np.linspace(0, 1000, len(c3))
    return pd.DataFrame({'C3': c3, 'time': time})

In [4]:
# Calculate powers, phases and save to excel.
def calculate_power_phase(trial_file):
    df_trial_raw = pd.read_csv(trial_file)
    df_trial_hjorth = get_hjorth_trial(trial_file)
    subject = trial_file.split('/')[2]
    trial = int(trial_file.split('/')[3].split('-')[1].split('.')[0])
    df_sub = pd.read_csv('data/alcoholism-01-parsed/' + subject + '.csv')
    power_row = {
        'sub': subject,
        'trial': trial,
        'alcholic': df_sub.iloc[0]['alcoholic'],
        'condition': df_sub[df_sub['trial'] == trial].iloc[0]['condition']
    }
    
    for eeg_type in ['hjorth', 'raw', 'avg']:
        
        if eeg_type == 'hjorth':
            channel = df_trial_hjorth['C3']
            time_millis = df_trial_hjorth['time']
        elif eeg_type == 'raw':
            channel = df_trial_raw['C3']
            time_millis = df_trial_hjorth['time']
        else:
            channel = df_trial_raw[['C3', 'FC6', 'FC4', 'FC2', 'C6', 'C4', 'C2', 'CP6', 'CP4', 'CP2']].mean(axis=1)
            time_millis = df_trial_hjorth['time']
            
        for filter_name in ['butter', 'blackmanharris']:
            
            for method in ['pburg', 'welch', 'fft']:
                for time in [-750, -150]:
                    df_power = get_power(channel, time_millis, crop_start=1000+time, crop_end=1000, fs=256, method=method, filter_type=filter_name)
                    for band in ['theta', 'mu', 'beta', 'gamma']:
                        if band == 'theta':
                            power = df_power[(df_power['freq'] >= 3.5) * (df_power['freq'] < 8)]
                        elif band=='mu':
                            power = df_power[(df_power['freq'] >= 8) * (df_power['freq'] < 12)]
                        elif band=='beta':
                            power = df_power[(df_power['freq'] >= 13) * (df_power['freq'] < 30)]
                        elif band=='gamma':
                            power = df_power[(df_power['freq'] >= 30) * (df_power['freq'] < 80)]
                        power = power['power'].mean(axis=0)
                        col = "{}_{}_{}_{}_{}_power".format(eeg_type, filter_name, band, method, time)
                        power_row[col] = power

            # Phase   
            for band in ['theta', 'mu', 'beta', 'gamma']:
                if band == 'theta':
                    band_range = [3.5, 8]
                elif band=='mu':
                    band_range = [8, 12]
                elif band=='beta':
                    band_range = [13, 30]
                elif band=='gamma':
                    band_range = [30, 80]
                phase = get_phase(channel, time_millis, band_range, filter_type=filter_name, start_time_ms=0, stop_time_ms=1000, fs=256)
                col = "{}_{}_{}_phase".format(eeg_type, filter_name, band)
                power_row[col] = phase
    return power_row

trial_files = sorted(glob.glob('data/alcoholism-02-for-matlab/*/*.csv'))
num_cores = multiprocessing.cpu_count() - 2
power_rows = Parallel(n_jobs=num_cores)(delayed(calculate_power_phase)(trial_file) for trial_file in tqdm(trial_files))

# Save the dataframe
df_power_phase = pd.DataFrame(power_rows)
df_power_phase.to_excel('157-alc-power-phase-wide.xlsx')
print(df_power_phase.shape)
df_power_phase.head()

HBox(children=(FloatProgress(value=0.0, max=10962.0), HTML(value='')))


(10962, 172)


Unnamed: 0,alcholic,avg_blackmanharris_beta_fft_-150_power,avg_blackmanharris_beta_fft_-750_power,avg_blackmanharris_beta_pburg_-150_power,avg_blackmanharris_beta_pburg_-750_power,avg_blackmanharris_beta_phase,avg_blackmanharris_beta_welch_-150_power,avg_blackmanharris_beta_welch_-750_power,avg_blackmanharris_gamma_fft_-150_power,avg_blackmanharris_gamma_fft_-750_power,...,raw_butter_mu_welch_-750_power,raw_butter_theta_fft_-150_power,raw_butter_theta_fft_-750_power,raw_butter_theta_pburg_-150_power,raw_butter_theta_pburg_-750_power,raw_butter_theta_phase,raw_butter_theta_welch_-150_power,raw_butter_theta_welch_-750_power,sub,trial
0,True,-11.468717,12.805543,-89.932061,-20.0296,58.054118,-91.204433,-18.238747,-9.675166,10.660229,...,-3.461843,17.23457,17.927064,5.89591,-4.599229,54.08673,-10.815935,-11.00862,co2a0000364,0
1,True,-15.450877,12.000055,-115.683826,-20.543644,239.878412,-94.248568,-20.083139,-11.196195,9.633099,...,-10.909409,14.062405,16.459217,-6.944317,-6.586857,27.600111,-8.258216,-8.608602,co2a0000364,10
2,True,-13.153814,10.864661,-98.069371,-19.060627,198.595784,-93.474298,-20.809894,-11.301472,7.561442,...,-16.978828,13.570934,15.762492,-9.206106,-8.633393,22.07745,-15.276492,-11.980632,co2a0000364,100
3,True,-13.554193,10.772188,-101.676695,-16.734944,18.426839,-92.890408,-19.800385,-10.348795,7.050986,...,-15.259972,11.505653,15.790075,-4.016998,-4.178292,283.441901,-16.096598,-16.270484,co2a0000364,101
4,True,-14.788449,10.998339,-96.492311,-18.84517,59.854078,-87.817222,-19.143747,-8.786717,7.797171,...,-15.443537,16.566051,18.544656,-3.0217,-9.81395,75.080759,-9.900659,-9.814489,co2a0000364,102


In [7]:
# Long format - powers
value_vars = []
for eeg_type in tqdm(['hjorth', 'raw', 'avg']):
    for filter_type in ['butter', 'blackmanharris']:
        for band in ['theta', 'mu', 'beta', 'gamma']:
            for time in ['-750', '-150']:
                for method in ['fft', 'welch', 'pburg']:
                    col = "{}_{}_{}_{}_{}_power".format(eeg_type, filter_type, band, method, time)
                    value_vars.append(col)
                    
df_melt = pd.melt(df_power_phase, id_vars=['sub', 'trial', 'alcholic', 'condition'], value_vars=value_vars)
method_map = dict()
method_map = {
    'fft': 'FFT',
    'welch': 'Welch',
    'pburg': 'Burg'
}
eeg_type_map = {
    'raw': 'Raw',
    'hjorth': 'Hjorth',
    'avg': 'Average'
}
filter_map = {
    'butter': 'Butterworth',
    'blackmanharris': 'Blackman-Harris'
}
df_melt = df_melt.reset_index()
df_melt['Method'] = df_melt['variable'].apply(lambda x: method_map[x.split('_')[3]])
df_melt['Band'] = df_melt['variable'].apply(lambda x: x.split('_')[2].capitalize())
df_melt['Filter'] = df_melt['variable'].apply(lambda x: filter_map[x.split('_')[1]])
df_melt['Time'] = df_melt['variable'].apply(lambda x: str(x.split('_')[4]))
df_melt['EEG'] = df_melt['variable'].apply(lambda x: eeg_type_map[x.split('_')[0]])
df_melt = df_melt[df_melt['value'] != -np.inf]
df_melt = df_melt[~df_melt['value'].isnull()]
print(df_melt.shape)
df_melt.head()
df_melt.to_excel('157-alc-power-long.xlsx')

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))


(1574928, 12)


Unnamed: 0,index,sub,trial,alcholic,condition,variable,value,Method,Band,Filter,Time,EEG
0,0,co2a0000364,0,True,S1 obj,hjorth_butter_theta_fft_-750_power,15.879046,FFT,Theta,Butterworth,-750,Hjorth
1,1,co2a0000364,10,True,S1 obj,hjorth_butter_theta_fft_-750_power,18.234469,FFT,Theta,Butterworth,-750,Hjorth
2,2,co2a0000364,100,True,S1 obj,hjorth_butter_theta_fft_-750_power,13.372383,FFT,Theta,Butterworth,-750,Hjorth
3,3,co2a0000364,101,True,S2 match,hjorth_butter_theta_fft_-750_power,17.409102,FFT,Theta,Butterworth,-750,Hjorth
4,4,co2a0000364,102,True,S1 obj,hjorth_butter_theta_fft_-750_power,16.529428,FFT,Theta,Butterworth,-750,Hjorth


In [6]:
value_vars = []
for eeg_type in tqdm(['hjorth', 'raw', 'avg']):
    for filter_type in ['butter', 'blackmanharris']:
        for band in ['theta', 'mu', 'beta', 'gamma']:
            col = "{}_{}_{}_phase".format(eeg_type, filter_type, band)
            value_vars.append(col)

df_melt = pd.melt(df_power_phase, id_vars=['sub', 'trial', 'alcholic', 'condition'], value_vars=value_vars)
method_map = dict()

df_melt = df_melt.reset_index()
df_melt['Band'] = df_melt['variable'].apply(lambda x: x.split('_')[2].capitalize())
df_melt['Filter'] = df_melt['variable'].apply(lambda x: filter_map[x.split('_')[1]])
df_melt['EEG'] = df_melt['variable'].apply(lambda x: eeg_type_map[x.split('_')[0]])
df_melt = df_melt[~df_melt['value'].isnull()]
print(df_melt.shape)
df_melt.head()
df_melt.to_excel('157-alc-phase-long.xlsx')

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))


(263088, 10)


In [8]:
df = df_melt[df_melt['Filter'] == 'Butterworth']
df = df[df['Method'] == 'Welch']
df = df[df['sub'] == 'co2c0000340']
df = df[df['Band'] == 'Beta']
df = df[df['Time'] == '-750']
df.shape

(249, 12)

In [None]:
sns.lineplot(x="trial", y="value", hue="EEG", data=df)

# Plot

In [None]:
df = df_melt[df_melt['Method'] == 'Welch']
df = df[df['Filter'] == 'Butterworth']
df = df[df['EEG'] == 'Raw']
df = df[df['Time'] == '-150']
df = df[df['Band'] == 'Theta']

In [None]:
df['value'].hist()