In [1]:
import pandas as pd
import numpy as np
from PyEMD import EMD
import os
from tqdm.notebook import tqdm
from sklearn.decomposition import FastICA
from scipy.signal import butter, lfilter, freqz, hilbert

# Set Path

In [2]:
data_path = os.path.join('..', 'Data', 'EEG')

# Log file
log_path = os.path.join('..', 'Data', 'Log.xlsx')
df_log = pd.read_excel(log_path, index_col='Subject')

**[Ignore] Path Check**

In [3]:
os.listdir(data_path)

['23.csv',
 '7.csv',
 '16.csv',
 '21.csv',
 '14.csv',
 '15.csv',
 '9.csv',
 '5.csv',
 '12.csv',
 '3.csv',
 '10.csv',
 '22.csv',
 '6.csv',
 '4.csv',
 '8.csv',
 '2.csv',
 '1.csv',
 '13.csv',
 '17.csv',
 '20.csv',
 '11.csv',
 '18.csv',
 '19.csv']

In [4]:
df_log.head()

Unnamed: 0_level_0,Date,TestStartTime(IST),EEGStartTime(IST),EEGDuration,WatchStartTime(IST),WatchDuration,Form,Remarks
Subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,2020-02-20,15:09:00,15:07:10,48m 32s,,,,"EDA, HR, SKT data not available"
2,2020-02-21,14:58:42,14:59:35,46m 23s,14:58:46,48m 3s,,
3,2020-02-24,11:02:52,11:03:42,49m 12s,11:02:53,50m 12s,,
4,2020-12-22,11:32:10,11:30:04,48m 40s,11:30:55,48m 24s,Available,
5,2020-02-24,16:54:47,16:56:58,49m 01s,,,,"EDA, HR, SKT data not available"


# Preprocessing Functions

In [5]:
MAX_IMF = 4
LOWCUT = 0.5
HIGHCUT = 30
SAMPLING_FREQUENCY = 128

def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a


def butter_bandpass_filter(eeg, lowcut, highcut, fs, order=5):
    # Takes raw eeg and returns filtered EEG data
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, eeg)
    return y

def ICA(signal):
    # Takes a raw eeg and returns raw EEG with artefacts removed using ICA
    eeg = np.stack([signal]).T
    ica = FastICA()
    ica.fit(eeg)
    components = ica.transform(eeg)
    restored = ica.inverse_transform(components)
    signal = restored.T[0]
    
    return signal

def imf(eeg, max_imf):
    # Takes a raw EEG signal and returns a 2D array of IMF values
    self = np.array(eeg)
    tm = np.linspace(0, 1, self.shape[0])
    IMF = EMD().emd(self, tm, max_imf = max_imf)
    
    return IMF

def avg_instantaneous_freq_imf(imf_arr):
    # Takes an IMF array and returns the average instantaneous frequency which is an array
    fs = 128 # sampling frequency of the eeg
    analytic_signal = hilbert(imf_arr)
    amp = np.abs(analytic_signal)
    phase = np.unwrap(np.angle(analytic_signal))
    avg_freq = np.mean(np.diff(phase) / (2.0*np.pi) * fs)
    return avg_freq

def variance_imf(imf_arr):
    # Takes an IMF array and returns the variance which is an array
    return np.var(imf_arr)


# Helper Functions

def df_filter(df, cols):
    # tqdm() enables a progress bar over an iterable
    for ch in tqdm(cols[:]):
        df[ch] = butter_bandpass_filter(df[ch], LOWCUT, HIGHCUT, SAMPLING_FREQUENCY)   
    return df

def df_ica(df, cols):
    for ch in tqdm(cols[:]):
        df[ch] = ICA(df[ch])
    return df

def df_IMF(df, cols):
    imfs_dic = dict()
    
    for ch in tqdm(cols[:]):
        
        IMF = imf(df[ch], max_imf = MAX_IMF)
        
        imfs_dic[ch + ".IM0"] = IMF[0]
        imfs_dic[ch + ".IM1"] = IMF[1]
        imfs_dic[ch + ".IM2"] = IMF[2]
        imfs_dic[ch + ".IM3"] = IMF[3]
        imfs_dic[ch + ".IMR"] = IMF[4]
    
    new_df = pd.DataFrame(imfs_dic)
    new_df['Timestamp'] = df['Timestamp']
    return new_df

def df_features(df, imf_cols):
    s_freq = 8 # sampling frequency that we are keeping for the calculation of these features
    
    for col in tqdm(imf_cols):
        
        avg_inst_freq = np.zeros((df.shape[0]))
        var = np.zeros((df.shape[0]))
        entr = np.zeros((df.shape[0]))
        
        i = 0
        while(i < df.shape[0]):
            
            imf_arr = np.array(df[col][i: i + s_freq])
            
            # Average Instantaneous Frequency
            aif_value = avg_instantaneous_freq_imf(imf_arr)
            avg_inst_freq[i: i + s_freq] = aif_value
            
            # Variance
            var_value = variance_imf(imf_arr)
            var[i: i + s_freq] = var_value
            
            i = i + s_freq
        
        df[col + '.' + 'Avg_Inst_Freq'] = avg_inst_freq
        df[col + '.' + 'Variance'] = var
        
    return df

# Main

In [6]:
# EEG Channels
CHANNELS = ["AF3", "F7", "F3", "FC5", "T7", "P7", "O1", "O2", "P8", "T8", "FC6", "F4", "F8", "AF4"]

# EEG Data
data_dic = {}
for file in os.listdir(data_path):
    sub = int(file.split('.')[0]) # Subject no.
    data_dic[sub] = os.path.join(data_path, file)
    
SUB = list(df_log.index.values)

# Raw EEG Cols
raw_cols = ['EEG.' + x for x in CHANNELS] 

# IMF Cols
imf_cols = []
ims = ['IM0', 'IM1', 'IM2', 'IM3', 'IMR']
for i in CHANNELS:
    for j in ims:
        imf_cols.append('EEG.' + i + '.' + j)

In [7]:
if not os.path.exists(os.path.join('..', 'Preprocessed_Data')):
    os.makedirs(os.path.join('..', 'Preprocessed_Data'))

In [8]:
for sub in SUB[:]:
    print("Subject : " + str(sub))
    df = pd.read_csv(data_dic[sub], skiprows = 1)
    
    # Selecting Timestamp and Raw EEG signals only
    print("\tSelecting Timestamp and Raw EEG signals only: ")
    df = df[raw_cols + ['Timestamp']]
    
    # Filtering the Raw EEG signals
    print("\tFiltering the raw eeg signals:")
    df = df_filter(df, raw_cols)
    
    # Applying ICA for artefact removal
    print("\tApplying ICA for artefact removal:")
    df = df_ica(df, raw_cols)
    
    # Calculating IMFs
    print("\tCalculating IMFs:")
    df = df_IMF(df, raw_cols)
    
    # Extracting Features
    print("\tCalculating TimeFrequency Features:")
    df = df_features(df, imf_cols)
    
    # Storing Data
    store_path = os.path.join('..', 'Preprocessed_Data')
    df.to_csv(store_path + str(sub) + ".csv")
    
    print('\n-> Complete\n')

Subject : 1
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 2
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 3
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 4
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 5
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 6
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 7
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 8
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 9
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 10
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 11
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 12
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 13
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 14
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 15
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 16
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 17
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 18
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 19
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 20
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 21
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 22
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

Subject : 23
	Selecting Timestamp and Raw EEG signals only: 
	Filtering the raw eeg signals:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Applying ICA for artefact removal:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating IMFs:


HBox(children=(FloatProgress(value=0.0, max=14.0), HTML(value='')))


	Calculating TimeFrequency Features:


HBox(children=(FloatProgress(value=0.0, max=70.0), HTML(value='')))



-> Complete

