# Process of Feature Extraction

### Features are Extracted from each Signal

#### Apply DWT ('db4' family) on each Signal and get the coefficients.
#### Coefficients: 5 : One Approximate(cA), Four Detailed (cD1 to cD4)
#### Create a Dataframe of Coefficeints by concatanating each coefficient
#### Extract Following Features
##### Mean, Energy, Standard Deviation, Variance, Absolute Value, Avg Power
##### Save the Extracted Features in a CSV file

In [2]:
"""
Load the necessory packages and libraries
Need to install all the libraries that are not in your system
"""

'\nLoad the necessory packages and libraries\nNeed to install all the libraries that are not in your system\n'

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pywt
from scipy import signal
from sklearn.metrics import accuracy_score
import random
import pyswarms as ps
%matplotlib inline

In [4]:
#Loading Data and deleting unnessary columns
eeg_data = pd.read_csv("data.csv")

In [5]:
#Size of the Database on the Disk and Number of Sampled Signals
eeg_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11500 entries, 0 to 11499
Columns: 180 entries, Signal to y
dtypes: int64(179), object(1)
memory usage: 15.8+ MB


In [6]:
# Convert the 5 classes into binary class
def convert_binary_class(y):
    if y == 2 or y == 3 or y == 4 or y == 5 :
        return 0
    else :
        return 1

In [7]:
# Apply above function to convert into binary class
eeg_data['y'] = eeg_data['y'].apply(convert_binary_class)

In [8]:
# Remove unnecessory columns that is the name of the signal
del eeg_data['Signal']

In [9]:
#EEG Signal Data
eeg_X = eeg_data.drop('y',axis=1)
eeg_X.head(5)

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X169,X170,X171,X172,X173,X174,X175,X176,X177,X178
0,135,190,229,223,192,125,55,-9,-33,-38,...,8,-17,-15,-31,-77,-103,-127,-116,-83,-51
1,386,382,356,331,320,315,307,272,244,232,...,168,164,150,146,152,157,156,154,143,129
2,-32,-39,-47,-37,-32,-36,-57,-73,-85,-94,...,29,57,64,48,19,-12,-30,-35,-35,-36
3,-105,-101,-96,-92,-89,-95,-102,-100,-87,-79,...,-80,-82,-81,-80,-77,-85,-77,-72,-69,-65
4,-9,-65,-98,-102,-78,-48,-16,0,-21,-59,...,10,4,2,-12,-32,-41,-65,-83,-89,-73


In [10]:
#EEG Signal Label
eeg_Y = eeg_data['y']

In [12]:
eeg_Y.head()

0    0
1    1
2    0
3    0
4    0
Name: y, dtype: int64

In [13]:
# Functions for Calculating Features
#feature extraction
#mean, variance, standard deviation, average power, mean absolute value, energy
#############################################################################################
# Mean
def mean(x):
    mean_value = []
    for a in range(11500):
        mean_value.append(np.mean(x[a]))
    return mean_value

#Energy
def energy(x):
    energy = []
    for a in range(11500):
        energy.append(np.sum(np.square(x[a])))
    return energy


#Standard Deviation
def std(x):
    std = []
    for a in range(11500):
        std.append(np.std(x[a]))
    return std

#Variance
def variance(x):
    var = []
    for a in range(11500):
        var.append(np.var(x[a]))
    return var

#Mean Abolute Value
def absolute_value(x):
    av = []
    for a in range(11500):
        av.append(np.mean(np.absolute(x[a])))
    return av

#Average Power
def average_power(x):
    av_power = []
    for a in range(11500):
        av_power.append(np.mean(np.square(x[a])))
    return av_power

#####################################################################

In [14]:
# Creating Dataframes for coefficients
cA = [i for i in range(17)]
cD1 = [i for i in range(17)]
cD2 = [i for i in range(28)]
cD3 = [i for i in range(49)]
cD4 = [i for i in range(92)]

df_cA = pd.DataFrame(columns=cA)
df_cD1 = pd.DataFrame(columns=cD1)
df_cD2 = pd.DataFrame(columns=cD2)
df_cD3 = pd.DataFrame(columns=cD3)
df_cD4 = pd.DataFrame(columns=cD4)

def c1(x):
    # cA
    a,b,c,d,e = pywt.wavedec(x, 'db4', level=4)
    return a

def c2(x):
    # cD1
    a,b,c,d,e = pywt.wavedec(x, 'db4', level=4)
    return b

def c3(x):
    # cD2
    a,b,c,d,e = pywt.wavedec(x, 'db4', level=4)
    return c

def c4(x):
    # cD3
    a,b,c,d,e = pywt.wavedec(x, 'db4', level=4)
    return d

def c5(x):
    # cD4
    a,b,c,d,e = pywt.wavedec(x, 'db4', level=4)
    return e

## Start of Process Pipeline

In [15]:
#Creating cA dataframe for each signal
for i in range(11500):
    row = eeg_X.loc[i]
    a = c1(row)
    df_cA.loc[i] = a

In [16]:
#Creating cD1 dataframe for each signal
for i in range(11500):
    row = eeg_X.loc[i]
    p = c2(row)
    df_cD1.loc[i] = p

In [17]:
#Creating cD2 dataframe for each signal
for i in range(11500):
    row = eeg_X.loc[i]
    q = c3(row)
    df_cD2.loc[i] = q

In [18]:
#Creating cD3 dataframe for each signal
for i in range(11500):
    row = eeg_X.loc[i]
    r = c4(row)
    df_cD3.loc[i] = r

In [19]:
#Creating cD4 dataframe for each signal
for i in range(11500):
    row = eeg_X.loc[i]
    s = c5(row)
    df_cD4.loc[i] = s

In [20]:
df_cA.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,732.394456,759.641408,740.091372,723.449162,846.019372,414.866539,-48.830467,-145.248562,108.609829,-151.216527,-437.582298,-224.161392,-19.015142,333.243642,-228.434024,-194.470518,-342.174839
1,1447.688448,1410.195701,1406.972766,1380.90244,1731.219864,226.529476,-698.766092,1839.568598,-865.447868,-488.612159,1758.633067,1410.554964,-2753.187862,-556.583218,619.989521,521.913207,565.688503
2,-155.255244,-150.828158,-150.732842,-159.779832,-117.128242,-280.569629,-210.090841,-153.466544,-142.526643,-66.495571,-239.067149,-198.576806,-341.2281,-300.727982,-65.354128,6.920532,-87.965342
3,-387.620301,-385.543111,-390.670889,-384.158663,-400.65197,-341.09841,-309.792986,-275.051828,-295.871642,-213.157565,-220.018782,-274.164195,-295.872285,-284.105645,-242.700159,-280.585375,-275.655691
4,-263.433261,-282.264524,-255.283369,-275.166311,-272.458644,-249.697528,-81.635139,-127.531266,-33.75672,-79.855665,5.788142,126.59226,32.485208,74.8461,67.094208,1.208347,-332.923897


In [21]:
df_cD1.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,0.252147,9.552755,166.142329,112.742925,57.607552,46.675211,36.673628,240.774484,142.734978,-14.756858,-113.366906,60.56856,-49.779386,-44.255964,5.233921,81.601483,-8.474992
1,0.39004,10.390683,-115.802831,-1389.74844,1553.333536,-573.522621,-2010.096756,1364.946389,-541.149153,-411.809278,-506.059703,342.8178,-414.381261,118.299296,-18.570344,25.670071,180.871145
2,-0.906177,-1.98561,29.583571,-44.434483,4.506924,-193.011226,-11.51245,-87.233594,-50.737754,-14.125363,-11.508281,-198.727841,60.484583,-18.656041,7.851747,-5.245497,0.722108
3,-0.483301,-2.16986,-6.34364,41.293217,-9.110389,8.802419,69.059463,2.265281,22.221957,20.916516,-28.513226,53.291942,51.348795,-4.121937,2.855852,-15.176476,-64.003466
4,-0.577633,-1.672057,-13.467008,-88.312223,-118.39227,-50.295091,12.476511,-44.874754,-32.190405,47.479947,41.702877,-52.741412,-39.806086,-83.16049,28.561984,94.748441,13.638243


In [22]:
df_cD2.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,26,27
0,12.661658,46.936455,97.828924,-185.695815,-92.808246,199.451437,-214.310194,179.748135,-19.195658,-88.37367,...,-95.001503,-88.663885,-88.086851,-34.394708,-53.505101,61.659382,77.513872,-124.654154,-5.718993,70.467077
1,-29.97734,-96.159416,50.765921,232.131557,-1006.5972,1112.917886,-536.494547,124.058388,21.5502,130.715104,...,-724.940822,-692.310854,137.175483,-192.798354,110.52269,-9.497376,-34.36952,41.610969,-32.167679,-20.588677
2,1.196131,3.83979,7.884588,-34.733488,20.351289,-103.61813,61.140309,-64.360578,32.031595,48.361819,...,23.437977,-31.520346,60.136294,35.020886,-114.837619,165.62301,-47.006003,10.571694,-129.470917,134.608495
3,5.921091,19.355631,3.844411,23.603332,25.02714,-2.429176,3.730483,19.059348,9.442787,-17.123486,...,37.042023,20.490088,28.695277,-47.252833,-17.337436,1.567468,16.131987,-20.108353,7.504318,34.072257
4,-20.896864,-70.772611,-50.712662,-88.457068,-92.699354,102.512087,25.706375,8.351885,-34.383829,-98.086953,...,45.582547,37.394206,-29.010668,-66.178899,29.306758,10.993325,0.987749,-1.587358,-19.084345,-9.876078


In [23]:
df_cD3.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,39,40,41,42,43,44,45,46,47,48
0,-27.543784,-84.279825,64.476596,-69.998503,52.079663,30.080567,-83.949407,85.968819,-24.652191,-15.781984,...,17.703951,7.432134,-8.720315,17.084925,30.734667,-6.399003,3.925819,4.667098,-8.915494,0.114553
1,5.005767,10.853867,-32.942237,-2.97614,137.354654,-238.120807,67.246537,157.070445,-125.277786,-50.26347,...,-45.144787,58.520459,-1.09595,-42.835397,46.968987,-25.391574,10.057275,-2.609561,-0.355044,8.878645
2,4.236695,14.253789,5.598081,-15.658996,17.161941,6.004977,-10.57072,-16.754175,26.703243,-23.358519,...,30.63185,-24.411351,-24.443255,27.347314,10.83669,-15.023703,4.561,4.319773,-6.769692,11.169744
3,-2.051976,-5.386123,11.2329,-8.07624,6.689206,1.268496,3.487,-13.303956,8.175256,-8.753157,...,-5.15677,-4.357963,0.032203,-4.574684,2.473774,4.491891,3.392439,-1.391884,-5.173314,-2.525881
4,25.661837,78.191033,-49.737295,47.926606,-48.180947,48.524193,-45.129768,54.893599,-59.70829,22.410108,...,-9.321056,21.528318,-17.692511,18.496635,-5.704037,-1.240224,-20.394786,9.383345,17.408014,-3.109483


In [24]:
df_cD4.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,82,83,84,85,86,87,88,89,90,91
0,4.317069,9.896589,-5.393892,-1.75,8.84442,3.553129,-10.4955,5.631303,-6.387504,-9.852465,...,-3.588941,19.393409,-6.747552,11.21167,-14.48233,-2.712687,8.692073,-4.248188,-6.812255,12.937688
1,-3.35118,-4.805112,4.009157,5.349031,-5.08742,3.381239,20.407118,-13.330014,8.060179,-10.058303,...,1.389392,4.411215,-6.318527,-4.506468,5.288075,-2.503819,-1.354959,0.727061,2.142207,-0.546237
2,-1.108808,-4.824302,3.26061,-7.069765,2.537499,-1.321478,0.208929,-0.779183,4.933688,-7.431515,...,-2.471033,-4.913217,4.041457,-2.441967,-3.528846,2.426567,2.279262,-1.258328,-1.363927,0.848707
3,0.418703,0.604837,1.386213,-3.186404,5.480014,-1.31134,-2.643412,-1.392639,-0.425561,-0.280714,...,3.619723,-3.643596,-1.518909,1.375888,2.68946,3.406894,-1.555405,0.138935,0.518115,-4.471692
4,-2.363872,-4.681856,10.942293,1.160232,-7.962433,-0.886132,5.845228,2.47609,-9.122354,-3.868168,...,-0.774373,4.095398,1.24012,3.164378,-5.379119,-3.002318,-0.903666,1.023418,0.853193,5.839393


#### Feature Extraction from each band

In [25]:
#Feature extraction from cA
mean_cA = mean(np.array(df_cA))
energy_cA = energy(np.array(df_cA))
std_cA = std(np.array(df_cA))
var_cA = variance(np.array(df_cA))
absvalue_cA = absolute_value(np.array(df_cA))
avgpower_cA = average_power(np.array(df_cA))

In [26]:
#Feature extraction from cD1
mean_cD1 = mean(np.array(df_cD1))
energy_cD1 = energy(np.array(df_cD1))
std_cD1 = std(np.array(df_cD1))
var_cD1 = variance(np.array(df_cD1))
absvalue_cD1 = absolute_value(np.array(df_cD1))
avgpower_cD1 = average_power(np.array(df_cD1))

In [27]:
#Feature extraction from cD2
mean_cD2 = mean(np.array(df_cD2))
energy_cD2 = energy(np.array(df_cD2))
std_cD2 = std(np.array(df_cD2))
var_cD2 = variance(np.array(df_cD2))
absvalue_cD2 = absolute_value(np.array(df_cD2))
avgpower_cD2 = average_power(np.array(df_cD2))

In [28]:
#Feature extraction from cD3
mean_cD3 = mean(np.array(df_cD3))
energy_cD3 = energy(np.array(df_cD3))
std_cD3 = std(np.array(df_cD3))
var_cD3 = variance(np.array(df_cD3))
absvalue_cD3 = absolute_value(np.array(df_cD3))
avgpower_cD3 = average_power(np.array(df_cD3))

In [29]:
#Feature extraction from cD4
mean_cD4 = mean(np.array(df_cD4))
energy_cD4 = energy(np.array(df_cD4))
std_cD4 = std(np.array(df_cD4))
var_cD4 = variance(np.array(df_cD4))
absvalue_cD4 = absolute_value(np.array(df_cD4))
avgpower_cD4 = average_power(np.array(df_cD4))

#### Creating Feature vector

In [30]:
features = pd.DataFrame({'mean_cA': mean_cA,'energy_cA': energy_cA,'std_cA':std_cA,
                        'var_cA':var_cA,'absvalue_cA':absvalue_cA,'avgpower_cA':avgpower_cA,
                        
                        'mean_cD1': mean_cD1,'energy_cD1': energy_cD1,'std_cD1':std_cD1,
                        'var_cD1':var_cD1,'absvalue_cD1':absvalue_cD1,'avgpower_cD1':avgpower_cD1,
                         
                         'mean_cD2': mean_cD2,'energy_cD2': energy_cD2,'std_cD2':std_cD2,
                        'var_cD2':var_cD2,'absvalue_cD2':absvalue_cD2,'avgpower_cD2':avgpower_cD2,
                         
                         'mean_cD3': mean_cD3,'energy_cD3': energy_cD3,'std_cD3':std_cD3,
                        'var_cD3':var_cD3,'absvalue_cD3':absvalue_cD3,'avgpower_cD3':avgpower_cD3,
                         
                         'mean_cD4': mean_cD4,'energy_cD4': energy_cD4,'std_cD4':std_cD4,
                        'var_cD4':var_cD4,'absvalue_cD4':absvalue_cD4,'avgpower_cD4':avgpower_cD4
                        })

In [31]:
features.head()

Unnamed: 0,mean_cA,energy_cA,std_cA,var_cA,absvalue_cA,avgpower_cA,mean_cD1,energy_cD1,std_cD1,var_cD1,...,std_cD3,var_cD3,absvalue_cD3,avgpower_cD3,mean_cD4,energy_cD4,std_cD4,var_cD4,absvalue_cD4,avgpower_cD4
0,168.657765,3690802.0,434.350715,188660.5,379.379385,217106.0,42.936816,153526.3,84.778474,7187.389589,...,46.674148,2178.476128,36.189001,2184.247992,-0.101826,7569.491844,9.0701,82.266717,7.226628,82.277085
1,526.897609,29828450.0,1215.316304,1476994.0,1157.791397,1754615.0,-140.260084,11645730.0,815.702345,665370.315473,...,215.666286,46511.947059,106.431487,46516.016342,-0.642356,74217.980651,28.395502,806.30456,12.574428,806.717181
2,-165.463089,595484.0,87.466787,7650.439,166.27727,35028.47,-31.466787,94365.42,67.533312,4560.748192,...,19.472596,379.181988,16.762068,379.790965,-0.263038,1716.899579,4.311933,18.592763,3.476872,18.661952
3,-309.218794,1686014.0,59.674572,3561.055,309.218794,99177.32,8.360773,18258.1,31.687591,1004.103428,...,7.148773,51.104956,5.85184,51.254145,-0.0406,813.688912,2.973684,8.842796,2.419046,8.844445
4,-114.470121,595041.7,147.98326,21899.05,150.707093,35002.45,-16.875378,52989.87,53.219103,2832.27295,...,27.717026,768.233538,20.67428,770.06384,0.208534,2503.114777,5.211937,27.164283,4.083667,27.207769


In [32]:
features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11500 entries, 0 to 11499
Data columns (total 30 columns):
mean_cA         11500 non-null float64
energy_cA       11500 non-null float64
std_cA          11500 non-null float64
var_cA          11500 non-null float64
absvalue_cA     11500 non-null float64
avgpower_cA     11500 non-null float64
mean_cD1        11500 non-null float64
energy_cD1      11500 non-null float64
std_cD1         11500 non-null float64
var_cD1         11500 non-null float64
absvalue_cD1    11500 non-null float64
avgpower_cD1    11500 non-null float64
mean_cD2        11500 non-null float64
energy_cD2      11500 non-null float64
std_cD2         11500 non-null float64
var_cD2         11500 non-null float64
absvalue_cD2    11500 non-null float64
avgpower_cD2    11500 non-null float64
mean_cD3        11500 non-null float64
energy_cD3      11500 non-null float64
std_cD3         11500 non-null float64
var_cD3         11500 non-null float64
absvalue_cD3    11500 non-null fl

In [39]:
#Add the labels to the features dataframe
features = pd.concat([features, eeg_Y], axis=1, sort=False)

In [41]:
features.head(2)

Unnamed: 0,mean_cA,energy_cA,std_cA,var_cA,absvalue_cA,avgpower_cA,mean_cD1,energy_cD1,std_cD1,var_cD1,...,var_cD3,absvalue_cD3,avgpower_cD3,mean_cD4,energy_cD4,std_cD4,var_cD4,absvalue_cD4,avgpower_cD4,y
0,168.657765,3690802.0,434.350715,188660.5,379.379385,217106.0,42.936816,153526.3,84.778474,7187.389589,...,2178.476128,36.189001,2184.247992,-0.101826,7569.491844,9.0701,82.266717,7.226628,82.277085,0
1,526.897609,29828450.0,1215.316304,1476994.0,1157.791397,1754615.0,-140.260084,11645730.0,815.702345,665370.315473,...,46511.947059,106.431487,46516.016342,-0.642356,74217.980651,28.395502,806.30456,12.574428,806.717181,1


In [42]:
#creating csv file of features
features.to_csv("features.csv", sep=',',index=False, encoding='utf-8')

In [43]:
#Displaying the first five rows of extracted features
extracted_features = pd.read_csv('features.csv')
extracted_features.head()

Unnamed: 0,mean_cA,energy_cA,std_cA,var_cA,absvalue_cA,avgpower_cA,mean_cD1,energy_cD1,std_cD1,var_cD1,...,var_cD3,absvalue_cD3,avgpower_cD3,mean_cD4,energy_cD4,std_cD4,var_cD4,absvalue_cD4,avgpower_cD4,y
0,168.657765,3690802.0,434.350715,188660.5,379.379385,217106.0,42.936816,153526.3,84.778474,7187.389589,...,2178.476128,36.189001,2184.247992,-0.101826,7569.491844,9.0701,82.266717,7.226628,82.277085,0
1,526.897609,29828450.0,1215.316304,1476994.0,1157.791397,1754615.0,-140.260084,11645730.0,815.702345,665370.315473,...,46511.947059,106.431487,46516.016342,-0.642356,74217.980651,28.395502,806.30456,12.574428,806.717181,1
2,-165.463089,595484.0,87.466787,7650.439,166.27727,35028.47,-31.466787,94365.42,67.533312,4560.748192,...,379.181988,16.762068,379.790965,-0.263038,1716.899579,4.311933,18.592763,3.476872,18.661952,0
3,-309.218794,1686014.0,59.674572,3561.055,309.218794,99177.32,8.360773,18258.1,31.687591,1004.103428,...,51.104956,5.85184,51.254145,-0.0406,813.688912,2.973684,8.842796,2.419046,8.844445,0
4,-114.470121,595041.7,147.98326,21899.05,150.707093,35002.45,-16.875378,52989.87,53.219103,2832.27295,...,768.233538,20.67428,770.06384,0.208534,2503.114777,5.211937,27.164283,4.083667,27.207769,0
