In [1]:
import numpy as np
import pandas as pd
import neurokit2 as nk

In [2]:
cog_data_dir = "/home/ashish/Documents/github/VA/data/cognitive_data"
phy_data_dir = "/home/ashish/Documents/github/VA/data/physical_data"

- Currently, in Varun's data, there are 10 users with IDs from 0-9
- Each user has either one or two study sessions (session_1 and/or session_2)
- Each session contains multiple blocks of 0-back and 2-back tasks that the user performed.
    - Each block has three sets of data collected in different CSV files
        1. Score obtained (0-100) in the played session
            - Inside the block dir with ".csv" extension
        2. BiosignalPlux data (BSP) - ECG, GSR, Breathing, EMG, PulOxR, PulOxIR
            - Inside "block_x_x/bsp" dir with ".csv" extension
        3. EEG data
            - File inside "block_x_x/eeg" with no extension

## Collect all Cognitive Fatigue data into one Pandas DataFrame for all users

In [3]:
import os
from csv import reader

In [4]:
def get_n_back_score(block_dir):
    # return the final score (float from 0.0-1.0) of the N-back task that the user played (0-100)
    n_back_file = None
    for content in os.listdir(block_dir):
        if ".csv" in content:
            n_back_file = content
            break
    if not n_back_file:
        return 0
    
    with open(os.path.join(block_dir, n_back_file), "r") as file:
        csv_reader = reader(file)
        last_row = list(csv_reader)[-1]
        
    # Example row: ['3_Letter_C.png', '', '11', '0', '67', '1', '97.46835443037975', '0.0']
    # Second last column of the last row represents the final score in the game
    return round(float(last_row[-2]) / 100, 2)

In [5]:
def extract_eeg_features(block_dir):
    # Return as Pandas DF containing all relevant EEG features useful for the dataset
    eeg_dir = os.path.join(block_dir, "eeg")
    eeg_file_name = os.listdir(eeg_dir)[0]
    eeg_file_path = os.path.join(eeg_dir, eeg_file_name)
    pass

In [6]:
def extract_bsp_features(block_dir):
    # Return as Pandas DF containing all relevant BSP (ECG, GSR, Breathing, EMG, PulOxR, PulOxlR)
    bsp_dir = os.path.join(block_dir, "bsp")
    bsp_file_name = os.listdir(bsp_dir)[0]
    bsp_file_path = os.path.join(bsp_dir, bsp_file_name)
    pass

In [7]:
session_counter = 0
for user_id in range(10):
    user_dir = os.path.join(cog_data_dir, f"user_{user_id}")
    for session in os.listdir(user_dir):
        session_dir = os.path.join(user_dir, session)
        for block in os.listdir(session_dir):
            # Sanity check if the directory has the name "block" or not
            if "block" not in block or "practice" in block.lower():
                # Ignore directories other than block
                continue
            block_dir = os.path.join(session_dir, block)
            # For each block, we want to extract three different sets of data
            score = get_n_back_score(block_dir)
            print(f"{session_counter+1}. Score: {score} | Session: {session[-1]} | User_ID: {user_id}")
            session_counter += 1
            eeg_features = extract_eeg_features(block_dir)
            bsp_features = extract_bsp_features(block_dir)


1. Score: 0.75 | Session: 1 | User_ID: 0
2. Score: 0.97 | Session: 1 | User_ID: 1
3. Score: 1.0 | Session: 1 | User_ID: 1
4. Score: 0.92 | Session: 1 | User_ID: 1
5. Score: 1.0 | Session: 1 | User_ID: 1
6. Score: 1.0 | Session: 1 | User_ID: 1
7. Score: 0.97 | Session: 1 | User_ID: 1
8. Score: 1.0 | Session: 1 | User_ID: 1
9. Score: 0.9 | Session: 1 | User_ID: 1
10. Score: 0.95 | Session: 1 | User_ID: 1
11. Score: 0.92 | Session: 1 | User_ID: 1
12. Score: 0.97 | Session: 2 | User_ID: 1
13. Score: 1.0 | Session: 2 | User_ID: 1
14. Score: 1.0 | Session: 2 | User_ID: 1
15. Score: 0.9 | Session: 2 | User_ID: 1
16. Score: 0.97 | Session: 2 | User_ID: 1
17. Score: 1.0 | Session: 2 | User_ID: 1
18. Score: 1.0 | Session: 2 | User_ID: 1
19. Score: 0.92 | Session: 2 | User_ID: 1
20. Score: 0.9 | Session: 2 | User_ID: 1
21. Score: 0.85 | Session: 2 | User_ID: 1
22. Score: 1.0 | Session: 1 | User_ID: 2
23. Score: 1.0 | Session: 1 | User_ID: 2
24. Score: 0.97 | Session: 1 | User_ID: 2
25. Score: 0.9