# Stress Classification Based on EEG Data using SVM

In [119]:
# Imports

import os
import numpy as np
import pandas as pd

## 1. ETL

In [120]:
# Explore one recorded csv file
df = pd.read_csv("./recorded_csv/exp01.csv")
df.head()


Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,...,22,23,24,25,26,27,28,29,30,31
0,0,116.0,-3203.25085,-7278.934989,-22030.236963,-9343.319755,-3711.41776,2255.693347,-12633.653001,3538.079982,...,72.0,126.5,96.0,15.0,72.0,0.0,0.0,0.0,1667839000.0,0.0
1,1,118.0,-3201.261544,-7280.209038,-22023.464384,-9339.251738,-3717.095103,2254.843981,-12623.482957,3549.121743,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1667839000.0,0.0
2,2,120.0,-3196.21005,-7273.592922,-22024.492565,-9336.636583,-3709.696676,2258.263798,-12628.355638,3541.052764,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1667839000.0,0.0
3,3,122.0,-3200.680399,-7275.045785,-22027.152422,-9340.548139,-3711.887146,2257.839114,-12625.986353,3540.337508,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1667839000.0,0.0
4,4,124.0,-3200.43453,-7280.454907,-22023.263219,-9337.888281,-3719.866719,2250.083059,-12625.360504,3544.740801,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1667839000.0,0.0


In [121]:
df.columns

Index(['Unnamed: 0', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
       '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22',
       '23', '24', '25', '26', '27', '28', '29', '30', '31'],
      dtype='object')

In [122]:
# Get the list of csv file names
def recorded_csv_files(dir_path):
    all_file = os.listdir(dir_path)
    file_names = list()
    for file in all_file:
        if (file[0:3] == 'exp') & (file[-3:] == 'csv'):
            file_names.append(file)
    file_names = sorted(file_names)
    return file_names

In [123]:
working_dir_path = os.getcwd()
dir_path = working_dir_path + "/recorded_csv"
file_names = recorded_csv_files(dir_path)
file_names

['exp01.csv',
 'exp02.csv',
 'exp03.csv',
 'exp04.csv',
 'exp05.csv',
 'exp06.csv',
 'exp07.csv',
 'exp08.csv',
 'exp09.csv',
 'exp10.csv',
 'exp11.csv',
 'exp12.csv',
 'exp13.csv',
 'exp14.csv',
 'exp15.csv',
 'exp16.csv',
 'exp17.csv']

In [124]:
# Utility functions

# Make directory
def create_dir(dir_path):
    try:
        os.mkdir(dir_path)
    except OSError as error:
        print("Path already exist - " + out_dir_path)
    

In [125]:
# Extract the 16 EEG channel data we use and save them to cvs files
def channel_16_csv_files(in_dir_path, out_dir_path, file_names):
    # Make the output dir. if not exist
    create_dir(out_dir_path)
    
    for i in range(len(file_names)):
        channel_16_csv_file_path = out_dir_path + '/conv_' + file_names[i]
        df = pd.read_csv(in_dir_path + '/' + file_names[i])
        df = df.drop(columns=['Unnamed: 0', '0', '17', '18', '19', '20', '21', '22',
            '23', '24', '25', '26', '27', '28', '29', '30', '31'])
        df.columns = ['Fp1', 'Fp2', 'F7', 'F3', 'F4', 'F8', 'T3', 'C3', 'C4', 'T4', 'T5', 'P3', 'P4', 'T6', 'O1', 'O2']
        df = df.set_index(df.columns[0])
        df.to_csv(channel_16_csv_file_path)

In [126]:
working_dir_path = os.getcwd()
in_dir_path = working_dir_path + "/recorded_csv"
out_dir_path = working_dir_path + "/ch_16_csv"
print("in_dir_path", in_dir_path)
print("out_dir_path", out_dir_path)
channel_16_csv_files(in_dir_path, out_dir_path, file_names)

in_dir_path /home/anjana/Courses/Aug_2022/Python/Project/CP_Project/recorded_csv
out_dir_path /home/anjana/Courses/Aug_2022/Python/Project/CP_Project/ch_16_csv
Path already exist - /home/anjana/Courses/Aug_2022/Python/Project/CP_Project/ch_16_csv


In [127]:
print(len(os.listdir(out_dir_path)))
print(os.listdir(out_dir_path)[0])

17
conv_exp07.csv


In [128]:
# Check one 16 channel csv file
test_16_channel_file_path = out_dir_path + "/conv_exp07.csv"
df = pd.read_csv(test_16_channel_file_path)
df.head()

Unnamed: 0,Fp1,Fp2,F7,F3,F4,F8,T3,C3,C4,T4,T5,P3,P4,T6,O1,O2
0,15404.241133,16658.531625,-2366.513296,32225.605217,11255.377383,6998.174727,15453.996116,3754.98131,-692.211174,8447.75241,21554.547138,8821.451225,3756.545932,16666.555901,7820.227184,11473.03867
1,15399.748433,16660.543282,-2363.942845,32232.355444,11236.75838,6995.693683,15453.906709,3741.302042,-693.999314,8450.009936,21558.771617,8800.328827,3762.111516,16668.858131,7834.822873,11487.656711
2,15401.827145,16654.351849,-2371.743604,32228.577999,11339.755218,6990.39632,15443.691962,3775.589618,-695.809805,8441.918605,21551.462597,8812.309362,3752.321452,16658.08459,7825.859824,11473.083374
3,15389.824258,16651.624936,-2374.179944,32220.39726,11191.317283,6987.468241,15443.490797,3729.522673,-699.140215,8437.582366,21548.199242,8807.839013,3750.175685,16655.804712,7821.009495,11471.608159
4,15391.120659,16651.356715,-2373.375281,32227.57217,11326.925317,6989.010512,15438.595764,3762.267978,-697.665,8438.677602,21537.939791,8789.644693,3754.020185,16654.530663,7813.991048,11470.669385


In [129]:
df.columns

Index(['Fp1', 'Fp2', 'F7', 'F3', 'F4', 'F8', 'T3', 'C3', 'C4', 'T4', 'T5',
       'P3', 'P4', 'T6', 'O1', 'O2'],
      dtype='object')

In [130]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45000 entries, 0 to 44999
Data columns (total 16 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Fp1     45000 non-null  float64
 1   Fp2     45000 non-null  float64
 2   F7      45000 non-null  float64
 3   F3      45000 non-null  float64
 4   F4      45000 non-null  float64
 5   F8      45000 non-null  float64
 6   T3      45000 non-null  float64
 7   C3      45000 non-null  float64
 8   C4      45000 non-null  float64
 9   T4      45000 non-null  float64
 10  T5      45000 non-null  float64
 11  P3      45000 non-null  float64
 12  P4      45000 non-null  float64
 13  T6      45000 non-null  float64
 14  O1      45000 non-null  float64
 15  O2      45000 non-null  float64
dtypes: float64(16)
memory usage: 5.5 MB


In [143]:
def channel_16_csv_files_with_target(in_dir_path, out_dir_path, target_input_file):
    #Read the target_input_file
    target_df = pd.read_csv(target_input_file)
    
    #Input file name list
    ch_16_file_list = sorted(os.listdir(in_dir_path))
    
    # Make the output dir. if not exist
    create_dir(out_dir_path)
    
    for i in range(len(ch_16_file_list)):
        # File index num.
        f_name = ch_16_file_list[i]
        f_idx = f_name[8:]
        f_idx = f_idx[:-4]
        f_id = int(f_idx)
        
        ch_16_csv_f_path_with_target = out_dir_path + '/' + ch_16_file_list[i]
        df = pd.read_csv(in_dir_path + '/' + ch_16_file_list[i])
        
        #Target row
        row = target_df.loc[target_df['id'] == f_id]
        target = row['label'].tolist()[0]
        
        if target == 0:
            target_name = 'low'
        else:
            target_name = 'high'
            
        num_rows = df.shape[0]
        
        target_name_list = [target_name] * num_rows
        target_list = [target] * num_rows
        
        df['label_names'] = target_name_list
        df['lablel'] = target_list
        
        df.to_csv(ch_16_csv_f_path_with_target)

In [144]:
# in_dir_path = working_dir_path + "/ch_16_csv"
# out_dir_path = working_dir_path + "/ch_16_csv_with_target"
# target_input_file = "./targets/PSS10-Sheet1.csv"
# channel_16_csv_files_with_target(in_dir_path, out_dir_path, target_input_file)