In [102]:
import os
import pandas as pd
import numpy as np

class dataset():
    LABEL_NON_STRESSED = 0
    LABEL_NEUTRAL = 2
    LABEL_STRESSED = 1

    def __init__(self, dataset_path: str, EEG_folder:str="EEG/", PSS_name:str="PSS.csv"): 
        """
            dataset_path: path to the dataset. Inside must have have `EEG` as a folder and `PSS.csv` file.
            EEG_folder: if you want to change the default `EEG` folder to something else.
            PSS_name: if you want to change the default `PSS.csv` filename to something else.
        """
        #### Assert the files ####
        EEG_path = f"{dataset_path}/{EEG_folder}"
        PSS_path = f"{dataset_path}/{PSS_name}"
        assert os.path.exists(EEG_path), f"{EEG_path} does not exist."
        assert os.path.exists(PSS_path), f"{PSS_path} does not exist."

        #### Load PSS files ####
        PSS = pd.read_csv(PSS_path, index_col="No.", )

        #### Check EEG records ####
        self.files = []
        self.attrs = []
        self.scores = []
        for index in PSS.index:
            file = f"{EEG_path}{index:03d}.csv"
            assert os.path.exists(file), f"{file} is not exist."
            assert file not in self.files, f"{index:03d} is duplicated."
            self.files.append(file)
            attr = {
                'Gender': PSS.loc[index, 'Gender'],
                'MBTI': PSS.loc[index, 'MBTI'],
                'Age': PSS.loc[index, 'Age'],
            }
            self.attrs.append(attr)
            self.scores.append(PSS.loc[index, 'PSS Score'])

        print(f"Found: {len(self.files)} files")

        #### Init Attribute ####
        self.data = []
        self.labels = self._calculate_label()
        self.segment = 1

        print(f"Non-stressed:{sum(self.labels == self.LABEL_NON_STRESSED)}")
        print(f"Stressed:{sum(self.labels == self.LABEL_STRESSED)}")
        print(f"Neutral:{sum(self.labels == self.LABEL_NEUTRAL)}")

    def _calculate_label(self) -> np.ndarray:
        N = len(self.scores)
        mu = sum(self.scores)/N
        std = (sum((np.array(self.scores) - mu)**2)/N)**0.5
        print(f"Mean:{mu}, Std:{std}")
        Tu = mu + (std/2)
        Tl = mu - (std/2)

        self.labels = []
        for score in self.scores:
            if(score <= Tl): self.labels.append(self.LABEL_NON_STRESSED)
            elif(Tl < score < Tu): self.labels.append(self.LABEL_NEUTRAL)
            elif(score >= Tu): self.labels.append(self.LABEL_STRESSED)
        return np.array(self.labels)

a = dataset(dataset_path='data')
a

Found: 55 files
Mean:20.509090909090908, Std:6.149272617066855
Non-stressed:16
Stressed:19
Neutral:20


<__main__.dataset at 0x7f439c22f580>

In [97]:
labels = np.array(a.labels)
print(sum(labels == a.LABEL_STRESSED))
print(sum(labels == a.LABEL_NON_STRESSED))
print(sum(labels == a.LABEL_NEUTRAL))

19
16
20


In [66]:
mu = sum(a.scores)/55

(sum((a.scores - mu)**2)/55)**0.5


6.149272617066855

In [40]:
import pandas as pd
df = pd.read_csv('data/PSS.csv', index_col='No.')
# df = df.drop(columns=['Time', 'Name', 'Called'])
# df.loc[1]
df
# df.to_csv('data/PSS-masked.csv')

Unnamed: 0_level_0,Date,Gender,MBTI,Age,PSS Score
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,2021-10-26,Male,ENFP-A,25,12
2,2021-10-28,Male,ISFJ-T,24,37
3,2021-10-28,Female,INFP-T,25,22
4,2021-11-03,Female,INFJ-T,26,21
5,2021-11-08,Male,ENFJ-T,25,25
6,2021-11-09,Female,INFP-T,23,25
7,2021-11-14,Male,INFP-T,24,22
8,2021-11-16,Male,ISFP-T,23,28
9,2021-11-17,Male,ISTP-T,39,18
10,2021-11-23,Male,INTJ-T,25,22


In [17]:
df = pd.read_csv('data/PSS.csv', index_col='No.')
df.loc[df['Gender'] == 0,'Gender'] = 'Male'
df.loc[df['Gender'] == 1,'Gender'] = 'Female'

In [19]:
df.to_csv('data/PSS.csv')