# Stress Classification Based on EEG Data using SVM

In [12]:
# Imports

import os
import numpy as np
import pandas as pd
from mne_features.feature_extraction import FeatureExtractor

## 1. ETL

In [13]:
# Explore one recorded csv file
df = pd.read_csv("../data/recorded_csv/exp02.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,22,23,24,25,26,27,28,29,30,31
0,0,144.0,-10544.144874,-9603.918744,-16893.269645,19412.154783,8298.48746,-44.323509,-8133.956269,5991.698026,...,88.0,0.0,112.0,15.5,8.0,0.0,0.0,0.0,1667842000.0,0.0
1,1,146.0,-10536.858205,-9611.071302,-16871.566101,19500.958264,8247.927814,-39.71905,-8109.011723,6010.339381,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1667842000.0,0.0
2,2,148.0,-10516.294601,-9590.708863,-16854.936404,19063.422866,8267.530294,-24.050477,-8095.712435,6026.85732,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1667842000.0,0.0
3,3,150.0,-10554.627842,-9619.877889,-16905.138422,19647.36219,8284.070585,-51.051384,-8147.41202,5979.58338,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1667842000.0,0.0
4,4,152.0,-10505.454004,-9592.742871,-16839.670162,19133.853213,8233.667402,-22.307041,-8072.958359,6039.821331,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1667842000.0,0.0


In [14]:
df.columns

Index(['Unnamed: 0', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
       '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22',
       '23', '24', '25', '26', '27', '28', '29', '30', '31'],
      dtype='object')

In [15]:
# Get the list of csv file names
def recorded_csv_files(dir_path):
    all_file = os.listdir(dir_path)
    file_names = list()
    for file in all_file:
        if (file[0:3] == 'exp') & (file[-3:] == 'csv'):
            file_names.append(file)
    file_names = sorted(file_names)
    return file_names

In [16]:
#working_dir_path = os.getcwd()
dir_path = "../data/recorded_csv"
file_names = recorded_csv_files(dir_path)
file_names

['exp01.csv',
 'exp02.csv',
 'exp03.csv',
 'exp04.csv',
 'exp05.csv',
 'exp06.csv',
 'exp07.csv',
 'exp08.csv',
 'exp09.csv',
 'exp10.csv',
 'exp11.csv',
 'exp12.csv',
 'exp13.csv',
 'exp14.csv',
 'exp15.csv',
 'exp16.csv',
 'exp17.csv']

In [17]:
# Utility functions

# Make directory
def create_dir(dir_path):
    try:
        os.mkdir(dir_path)
    except OSError as error:
        print("Path already exist - " + out_dir_path)
    

In [20]:
# Extract the 16 EEG channel data we use and save them to cvs files
def channel_16_csv_files(in_dir_path, out_dir_path, file_names):
    # Make the output dir. if not exist
    create_dir(out_dir_path)
    
    for i in range(len(file_names)):
        channel_16_csv_file_path = out_dir_path + '/' + file_names[i]
        df = pd.read_csv(in_dir_path + '/' + file_names[i])
        df = df.drop(columns=['Unnamed: 0', '0', '17', '18', '19', '20', '21', '22',
            '23', '24', '25', '26', '27', '28', '29', '30', '31'])
        df.columns = ['Fp1', 'Fp2', 'F7', 'F3', 'F4', 'F8', 'T3', 'C3', 'C4', 'T4', 'T5', 'P3', 'P4', 'T6', 'O1', 'O2']
        df = df.set_index(df.columns[0])
        df.to_csv(channel_16_csv_file_path)

In [21]:
#working_dir_path = os.getcwd()
in_dir_path = "../data/recorded_csv"
out_dir_path = "../data/ch_16_csv"
print("in_dir_path", in_dir_path)
print("out_dir_path", out_dir_path)
channel_16_csv_files(in_dir_path, out_dir_path, file_names)

in_dir_path ../data/recorded_csv
out_dir_path ../data/ch_16_csv
Path already exist - ../data/ch_16_csv


In [22]:
print(len(os.listdir(out_dir_path)))
print(os.listdir(out_dir_path)[0])

17
exp10.csv


In [23]:
# Check one 16 channel csv file
test_16_channel_file_path = "../data/ch_16_csv/exp07.csv"
df = pd.read_csv(test_16_channel_file_path)
df.head()

Unnamed: 0,Fp1,Fp2,F7,F3,F4,F8,T3,C3,C4,T4,T5,P3,P4,T6,O1,O2
0,15404.241133,16658.531625,-2366.513296,32225.605217,11255.377383,6998.174727,15453.996116,3754.98131,-692.211174,8447.75241,21554.547138,8821.451225,3756.545932,16666.555901,7820.227184,11473.03867
1,15399.748433,16660.543282,-2363.942845,32232.355444,11236.75838,6995.693683,15453.906709,3741.302042,-693.999314,8450.009936,21558.771617,8800.328827,3762.111516,16668.858131,7834.822873,11487.656711
2,15401.827145,16654.351849,-2371.743604,32228.577999,11339.755218,6990.39632,15443.691962,3775.589618,-695.809805,8441.918605,21551.462597,8812.309362,3752.321452,16658.08459,7825.859824,11473.083374
3,15389.824258,16651.624936,-2374.179944,32220.39726,11191.317283,6987.468241,15443.490797,3729.522673,-699.140215,8437.582366,21548.199242,8807.839013,3750.175685,16655.804712,7821.009495,11471.608159
4,15391.120659,16651.356715,-2373.375281,32227.57217,11326.925317,6989.010512,15438.595764,3762.267978,-697.665,8438.677602,21537.939791,8789.644693,3754.020185,16654.530663,7813.991048,11470.669385


In [24]:
df.columns

Index(['Fp1', 'Fp2', 'F7', 'F3', 'F4', 'F8', 'T3', 'C3', 'C4', 'T4', 'T5',
       'P3', 'P4', 'T6', 'O1', 'O2'],
      dtype='object')

In [25]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45000 entries, 0 to 44999
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Fp1     45000 non-null  float64
 1   Fp2     45000 non-null  float64
 2   F7      45000 non-null  float64
 3   F3      45000 non-null  float64
 4   F4      45000 non-null  float64
 5   F8      45000 non-null  float64
 6   T3      45000 non-null  float64
 7   C3      45000 non-null  float64
 8   C4      45000 non-null  float64
 9   T4      45000 non-null  float64
 10  T5      45000 non-null  float64
 11  P3      45000 non-null  float64
 12  P4      45000 non-null  float64
 13  T6      45000 non-null  float64
 14  O1      45000 non-null  float64
 15  O2      45000 non-null  float64
dtypes: float64(16)
memory usage: 5.5 MB


In [26]:
def channel_16_csv_files_with_target(in_dir_path, out_dir_path, target_input_file):
    #Read the target_input_file
    target_df = pd.read_csv(target_input_file)
    
    #Input file name list
    ch_16_file_list = sorted(os.listdir(in_dir_path))
    
    # Make the output dir. if not exist
    create_dir(out_dir_path)
    
    for i in range(len(ch_16_file_list)):
        # File index num.
        f_name = ch_16_file_list[i]
        f_idx = f_name[8:]
        f_idx = f_idx[:-4]
        f_id = int(f_idx)
        
        ch_16_csv_f_path_with_target = out_dir_path + '/' + ch_16_file_list[i]
        df = pd.read_csv(in_dir_path + '/' + ch_16_file_list[i])
        
        #Target row
        row = target_df.loc[target_df['id'] == f_id]
        target = row['label'].tolist()[0]
        
        if target == 0:
            target_name = 'low'
        else:
            target_name = 'high'
            
        num_rows = df.shape[0]
        
        target_name_list = [target_name] * num_rows
        target_list = [target] * num_rows
        
        df['label_names'] = target_name_list
        df['lablel'] = target_list
        
        df.to_csv(ch_16_csv_f_path_with_target)

In [None]:
# in_dir_path = working_dir_path + "/ch_16_csv"
# out_dir_path = working_dir_path + "/ch_16_csv_with_target"
# target_input_file = "./targets/PSS10-Sheet1.csv"
# channel_16_csv_files_with_target(in_dir_path, out_dir_path, target_input_file)

In [None]:
# Convert one ch_16 CSV file into PSD
# df = pd.read_csv("../ch_16_csv/conv_exp02.csv")
# df.head()


In [None]:
# X = df[['Fp1', 'Fp2', 'F7', 'F3', 'F4', 'F8', 'T3', 'C3', 'C4', 'T4', 'T5',
#        'P3', 'P4', 'T6', 'O1', 'O2']]
# X.shape

In [None]:
# X_transp = X.T
# X_transp.shape

In [None]:
# X_transp = X_transp.to_numpy()
# X_transp.shape, type(X_transp)

In [None]:
# X_transp = X_transp.reshape(1,16,-1)
# X_transp.shape

In [None]:
# X = X.to_numpy()
# X = X.reshape(45000,16,-1)

In [None]:
#n_epochs, n_channels, n_times

In [None]:
# bands = [(4,8), (8,12), (12,30), (30,45)]
# params = dict({
#     'pow_freq_bands__log':True,
#     'pow_freq_bands__normalize':False,
#     'pow_freq_bands__freq_bands':bands,
# })
# fe = FeatureExtractor(sfreq=250, selected_funcs=['pow_freq_bands'],params=params)
# X = fe.fit_transform(X=X_transp[:,:16,:])