<a href="https://colab.research.google.com/github/avocadopelvis/mental-load/blob/main/mental_load_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from scipy.signal import butter, lfilter

In [None]:
# import os
# root = "/content/drive/MyDrive/MENTAL-LOAD-DATASET"
# folders = [ f.name for f in os.scandir(root) if f.is_dir() ]
# folders

In [None]:
# final df
final_df = pd.DataFrame()

folders = ["002", "004", "005", "006", "008", "010", "011", "012", "013", "014", "015", "016", "017", "018", "019", "020", "021", "022", "023", "024", "025"]

# iterate through the folder of each subject
for folder in folders:
  # LOAD DATA
  data = pd.read_csv(f"/content/drive/MyDrive/MENTAL-LOAD-DATASET/{folder}/inf_ecg.csv")
  rest_data = pd.read_csv(f"/content/drive/MyDrive/MENTAL-LOAD-DATASET/{folder}/inf_resting.csv") 

  # drop columns in data
  data = data.drop(["Trial 2:2back", "Trial 4:2back"], axis = 1)

  # get resting ecg column
  rest_data = rest_data["Resting_ECG"]

  # add rest_data to data
  # pd.concat will add NaN values if the size of the dataframes do not match
  # so subject 002 & 006 will have NaN values since their resting ecg dataset does not contain 76800 samples
  data = pd.concat([rest_data, data], axis = 1)

  # intitalize final
  final = []

  #iterate through data's columns i.e ["Resting_ECG", "Trial 1:0back", "Trial 3:3back", "Trial 5:3back", "Trial 6:0back"]
  for col in data.columns: 
    #take a single column
    data_col = data[col]

    # convert column (df) to numpy array
    # data_arr = data_col.to_numpy() # might not be necessary 

    # FILTER THE SIGNAL
    def bandpass(lowcut, highcut, order=5):
        nyq = 0.5 * fs
        low = lowcut / nyq
        high = highcut / nyq
        b, a = butter(order, [low, high], btype='band')
        return b, a

    fs = 256
    lowcut = 0.1
    highcut = 45
    b,a = bandpass(lowcut,highcut)
    ecg_raw = data_col
    ecg_filter = lfilter(b,a,ecg_raw)

    # BREAK THE SIGNAL
    m = 0
    n = 3840

    #gets 20 rows from a column
    while n < 76801:
      row = ecg_filter[m:n]
      #append to final
      final.append(row)
      m += 3840
      n += 3840

  # final should now have 100 rows from all 5 columns

  # load final as a df
  df = pd.DataFrame(final)
  # assign labels
  df = df.assign(label = 20*[0] + 20*[1] + 20*[2] + 20*[2] + 20*[1])

  #append df to final_df
  final_df = final_df.append(df)

In [None]:
final_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3831,3832,3833,3834,3835,3836,3837,3838,3839,label
0,-0.000937,-0.007203,-0.025761,-0.057455,-0.090132,-0.106145,-0.099081,-0.081342,-0.071394,-0.073434,...,-0.195488,-0.209389,-0.221418,-0.231474,-0.239308,-0.245136,-0.249153,-0.251306,-0.251726,0
1,-0.250747,-0.248100,-0.242925,-0.235485,-0.228200,-0.223662,-0.221579,-0.218735,-0.212500,-0.203598,...,0.053954,0.035626,0.022383,0.010377,-0.002666,-0.014822,-0.023016,-0.026749,-0.028513,0
2,-0.031275,-0.035827,-0.040436,-0.042807,-0.042064,-0.039083,-0.035664,-0.033251,-0.031587,-0.028869,...,0.242360,0.206749,0.176540,0.166375,0.182852,0.217199,0.252381,0.275658,0.284019,0
3,0.282774,0.282530,0.291160,0.303156,0.302636,0.282632,0.257437,0.250007,0.268365,0.298846,...,-0.000216,0.002085,0.007211,0.012925,0.016449,0.016999,0.016249,0.017634,0.027781,0
4,0.058582,0.123291,0.227564,0.364535,0.516889,0.659293,0.750784,0.724379,0.508810,0.090546,...,-0.283535,-0.277764,-0.270363,-0.263406,-0.257478,-0.252091,-0.246567,-0.240262,-0.233057,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,-0.131565,-0.136414,-0.139860,-0.132463,-0.113560,-0.094689,-0.088723,-0.095477,-0.099772,-0.086283,...,-0.105362,-0.102184,-0.089493,-0.075865,-0.068164,-0.067680,-0.068634,-0.062618,-0.048078,1
96,-0.034381,-0.032947,-0.044263,-0.057791,-0.064437,-0.063073,-0.055331,-0.042823,-0.030440,-0.022912,...,-0.052154,-0.052397,-0.053673,-0.050650,-0.042439,-0.033684,-0.029129,-0.029195,-0.031468,1
97,-0.032855,-0.030765,-0.025271,-0.018512,-0.011355,-0.004243,-0.000105,-0.001208,-0.004484,-0.004197,...,-0.182481,-0.195720,-0.195329,-0.201010,-0.220380,-0.243190,-0.255585,-0.255883,-0.252987,1
98,-0.253310,-0.255877,-0.257402,-0.256288,-0.252513,-0.246039,-0.236466,-0.225157,-0.216629,-0.214664,...,-0.024017,-0.017511,-0.012621,-0.012055,-0.013178,-0.011728,-0.007674,-0.004027,-0.002103,1


In [None]:
# check to see which rows contain NaN values
final_df[final_df.isna().any(axis=1)]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3831,3832,3833,3834,3835,3836,3837,3838,3839,label
19,0.095832,0.123349,0.122974,0.090498,0.0475,0.025901,0.034169,0.048359,0.041598,0.013798,...,,,,,,,,,,0
19,0.050985,0.051758,0.053814,0.058446,0.065257,0.07393,0.086052,0.103441,0.124335,0.14304,...,,,,,,,,,,0


In [None]:
# drop rows containing NaN values 
# two rows contain NaN values
final_df = final_df.dropna()
final_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3831,3832,3833,3834,3835,3836,3837,3838,3839,label
0,-0.000937,-0.007203,-0.025761,-0.057455,-0.090132,-0.106145,-0.099081,-0.081342,-0.071394,-0.073434,...,-0.195488,-0.209389,-0.221418,-0.231474,-0.239308,-0.245136,-0.249153,-0.251306,-0.251726,0
1,-0.250747,-0.248100,-0.242925,-0.235485,-0.228200,-0.223662,-0.221579,-0.218735,-0.212500,-0.203598,...,0.053954,0.035626,0.022383,0.010377,-0.002666,-0.014822,-0.023016,-0.026749,-0.028513,0
2,-0.031275,-0.035827,-0.040436,-0.042807,-0.042064,-0.039083,-0.035664,-0.033251,-0.031587,-0.028869,...,0.242360,0.206749,0.176540,0.166375,0.182852,0.217199,0.252381,0.275658,0.284019,0
3,0.282774,0.282530,0.291160,0.303156,0.302636,0.282632,0.257437,0.250007,0.268365,0.298846,...,-0.000216,0.002085,0.007211,0.012925,0.016449,0.016999,0.016249,0.017634,0.027781,0
4,0.058582,0.123291,0.227564,0.364535,0.516889,0.659293,0.750784,0.724379,0.508810,0.090546,...,-0.283535,-0.277764,-0.270363,-0.263406,-0.257478,-0.252091,-0.246567,-0.240262,-0.233057,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,-0.131565,-0.136414,-0.139860,-0.132463,-0.113560,-0.094689,-0.088723,-0.095477,-0.099772,-0.086283,...,-0.105362,-0.102184,-0.089493,-0.075865,-0.068164,-0.067680,-0.068634,-0.062618,-0.048078,1
96,-0.034381,-0.032947,-0.044263,-0.057791,-0.064437,-0.063073,-0.055331,-0.042823,-0.030440,-0.022912,...,-0.052154,-0.052397,-0.053673,-0.050650,-0.042439,-0.033684,-0.029129,-0.029195,-0.031468,1
97,-0.032855,-0.030765,-0.025271,-0.018512,-0.011355,-0.004243,-0.000105,-0.001208,-0.004484,-0.004197,...,-0.182481,-0.195720,-0.195329,-0.201010,-0.220380,-0.243190,-0.255585,-0.255883,-0.252987,1
98,-0.253310,-0.255877,-0.257402,-0.256288,-0.252513,-0.246039,-0.236466,-0.225157,-0.216629,-0.214664,...,-0.024017,-0.017511,-0.012621,-0.012055,-0.013178,-0.011728,-0.007674,-0.004027,-0.002103,1


In [None]:
# convert df to csv
# final_df.to_csv("/content/drive/MyDrive/MENTAL-LOAD-DATASET/final.csv")

In [None]:
#drop first column in rest data
# rest_data = rest_data.drop(rest_data.columns[0], axis = 1)

In [None]:
X = final_df.iloc[:, 0:3840]
y = final_df["label"]

0     0
1     0
2     0
3     0
4     0
     ..
95    1
96    1
97    1
98    1
99    1
Name: label, Length: 2098, dtype: int64

In [None]:
from sklearn.model import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

ImportError: ignored