In [4]:
import pyxdf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, timezone
from scipy import stats
import os
from IPython.display import display
import matplotlib.pyplot as plt

## Load the XDF file

In [5]:
from pathlib import Path
import re

BASE_DIR = Path(r".")       
SUBJECT  = "sub-P014"      
# ============================

CONDITIONS = ["v", "va", "vt", "vat"]

run_re = re.compile(r"_run-(\d+)_eeg\.xdf$", re.IGNORECASE)

def pick_latest_run(paths):
    """Pick the highest run-N file; fall back to newest modified time if no run number."""
    if not paths:
        return None
    # Try by run number
    with_run = []
    for p in paths:
        m = run_re.search(p.name)
        if m:
            with_run.append((int(m.group(1)), p))
    if with_run:
        with_run.sort(key=lambda x: x[0], reverse=True)
        return with_run[0][1]
    # Fallback: latest modified
    return sorted(paths, key=lambda p: p.stat().st_mtime, reverse=True)[0]

def find_condition_paths(base_dir: Path, subject: str):
    """Return dict condition->Path by scanning sub-<ID>/ses-<cond>/eeg/*.xdf."""
    result = {}
    subj_dir = base_dir / subject
    for cond in CONDITIONS:
        eeg_dir = subj_dir / f"ses-{cond}" / "eeg"
        candidates = list(eeg_dir.glob(f"{subject}_ses-{cond}_task-Default_run-*_eeg.xdf"))
        # If strict pattern fails, accept any *.xdf under eeg
        if not candidates:
            candidates = list(eeg_dir.glob("*.xdf"))
        chosen = pick_latest_run(candidates)
        if chosen:
            result[cond] = chosen
    return result

# -------- header helpers (reuse your logic) --------
def get_datetime_from_header(header):
    if 'info' in header and 'datetime' in header['info']:
        dt_list = header['info']['datetime']
        if isinstance(dt_list, list) and len(dt_list) > 0:
            return dt_list[0]
    return "Datetime not found"

def print_stream_info(streams, condition_name, datetime_str):
    print(f"\n--- Stream Information ({condition_name}) ---")
    print(f"Datetime from header: {datetime_str}")
    for i, stream in enumerate(streams):
        stream_name = stream['info']['name'][0]
        channel_count = stream['info']['channel_count'][0]
        print(f"Stream {i+1} Name: {stream_name}, Channel Count: {channel_count}, Datetime: {datetime_str}")

paths = find_condition_paths(BASE_DIR, SUBJECT)

In [6]:
# ---- Map discovered paths back to your original variable names ----
def require_found(paths_dict, cond_key, var_name):
    """Ensure a condition path exists; raise a clear error if missing."""
    p = paths_dict.get(cond_key)
    if p is None:
        raise FileNotFoundError(
            f"Missing file for condition '{cond_key}'. "
            f"Please check folder structure: sub-<ID>/ses-{cond_key}/eeg/*.xdf "
            f"and ensure it exists so we can assign `{var_name}`."
        )
    return str(p)  # keep as Windows-style path string

# Assign variables exactly as before
visual_file_path = require_found(paths, "v",   "visual_file_path")
va_file_path     = require_found(paths, "va",  "va_file_path")
vt_file_path     = require_found(paths, "vt",  "vt_file_path")
vat_file_path    = require_found(paths, "vat", "vat_file_path")

# (Optional) print to verify
print("\n== Assigned variables ==")
print("visual_file_path:", visual_file_path)
print("va_file_path    :", va_file_path)
print("vt_file_path    :", vt_file_path)
print("vat_file_path   :", vat_file_path)



== Assigned variables ==
visual_file_path: sub-P014\ses-v\eeg\sub-P014_ses-v_task-Default_run-001_eeg.xdf
va_file_path    : sub-P014\ses-va\eeg\sub-P014_ses-va_task-Default_run-001_eeg.xdf
vt_file_path    : sub-P014\ses-vt\eeg\sub-P014_ses-vt_task-Default_run-001_eeg.xdf
vat_file_path   : sub-P014\ses-vat\eeg\sub-P014_ses-vat_task-Default_run-001_eeg.xdf


## Load visual only data

In [7]:
# Load XDF file for Visual condition
visual_streams, visual_header = pyxdf.load_xdf(visual_file_path)
visual_datetime = get_datetime_from_header(visual_header)
print("Visual condition datetime:", visual_datetime)

# Print stream info for Visual condition with datetime
print_stream_info(visual_streams, "Visual", visual_datetime)

Stream 2: Calculated effective sampling rate 40.3620 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 40.3945 Hz is different from specified rate 250.0000 Hz.
Stream 3: Calculated effective sampling rate 40.8761 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 379.9766 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 25.9551 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 443.8161 Hz is different from specified rate 250.0000 Hz.
Stream 10: Calculated effective sampling rate 38.2778 Hz is different from specified rate 250.0000 Hz.


Visual condition datetime: 2025-10-10T13:59:34+1100

--- Stream Information (Visual) ---
Datetime from header: 2025-10-10T13:59:34+1100
Stream 1 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-10T13:59:34+1100
Stream 2 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-10T13:59:34+1100
Stream 3 Name: obci_stream_6, Channel Count: 1, Datetime: 2025-10-10T13:59:34+1100
Stream 4 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-10T13:59:34+1100
Stream 5 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-10T13:59:34+1100
Stream 6 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-10T13:59:34+1100
Stream 7 Name: obci_stream_7, Channel Count: 1, Datetime: 2025-10-10T13:59:34+1100
Stream 8 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-10T13:59:34+1100
Stream 9 Name: obci_stream_3, Channel Count: 125, Datetime: 2025-10-10T13:59:34+1100
Stream 10 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-10T13:59:34+1100
Stream 11 Name: obci_stream_

## Load VA data

In [8]:
# Load XDF file for Visual + Auditory condition (VA)
va_streams, va_header = pyxdf.load_xdf(va_file_path)
va_datetime = get_datetime_from_header(va_header)
print("Visual + Auditory condition datetime:", va_datetime)

# Print stream info for Visual + Auditory condition with datetime
print_stream_info(va_streams, "Visual + Auditory", va_datetime)

Stream 10: Calculated effective sampling rate 32.5189 Hz is different from specified rate 250.0000 Hz.
Stream 2: Calculated effective sampling rate 38.3058 Hz is different from specified rate 250.0000 Hz.
Stream 4: Calculated effective sampling rate 38.1692 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 307.7353 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 22.6840 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 326.0299 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 192.5431 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 38.8304 Hz is different from specified rate 250.0000 Hz.


Visual + Auditory condition datetime: 2025-10-10T14:46:52+1100

--- Stream Information (Visual + Auditory) ---
Datetime from header: 2025-10-10T14:46:52+1100
Stream 1 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-10T14:46:52+1100
Stream 2 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-10T14:46:52+1100
Stream 3 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-10T14:46:52+1100
Stream 4 Name: obci_stream_7, Channel Count: 1, Datetime: 2025-10-10T14:46:52+1100
Stream 5 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-10T14:46:52+1100
Stream 6 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-10T14:46:52+1100
Stream 7 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-10T14:46:52+1100
Stream 8 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-10T14:46:52+1100
Stream 9 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-10T14:46:52+1100
Stream 10 Name: obci_stream_3, Channel Count: 125, Datetime: 2025-10-10T14:46:52+1100
Stream

## Load VT data

In [9]:
# Load XDF file for Visual + Thermal condition (VT)
vt_streams, vt_header = pyxdf.load_xdf(vt_file_path)
vt_datetime = get_datetime_from_header(vt_header)
print("Visual + Thermal condition datetime:", vt_datetime)

# Print stream info for Visual + Thermal condition with datetime
print_stream_info(vt_streams, "Visual + Thermal", vt_datetime)

Stream 2: Calculated effective sampling rate 39.0305 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 192.2784 Hz is different from specified rate 250.0000 Hz.
Stream 6: Calculated effective sampling rate 39.6434 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 35.2238 Hz is different from specified rate 250.0000 Hz.
Stream 12: Calculated effective sampling rate 21.7044 Hz is different from specified rate 250.0000 Hz.
Stream 3: Calculated effective sampling rate 38.8067 Hz is different from specified rate 250.0000 Hz.
Stream 13: Calculated effective sampling rate 343.4047 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 309.4436 Hz is different from specified rate 250.0000 Hz.


Visual + Thermal condition datetime: 2025-10-10T14:33:35+1100

--- Stream Information (Visual + Thermal) ---
Datetime from header: 2025-10-10T14:33:35+1100
Stream 1 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-10T14:33:35+1100
Stream 2 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-10T14:33:35+1100
Stream 3 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-10T14:33:35+1100
Stream 4 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-10T14:33:35+1100
Stream 5 Name: obci_stream_10, Channel Count: 2, Datetime: 2025-10-10T14:33:35+1100
Stream 6 Name: obci_stream_6, Channel Count: 1, Datetime: 2025-10-10T14:33:35+1100
Stream 7 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-10T14:33:35+1100
Stream 8 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-10T14:33:35+1100
Stream 9 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-10T14:33:35+1100
Stream 10 Name: obci_stream_3, Channel Count: 125, Datetime: 2025-10-10T14:33:35+1100
Stream 

## Load VAT data

In [10]:
# Load XDF file for Visual + Auditory condition
vat_streams, vat_header = pyxdf.load_xdf(vat_file_path)
vat_datetime = get_datetime_from_header(vat_header)
print("VAT condition datetime:", vat_datetime)

# Print stream info for Visual + Auditory condition with datetime
print_stream_info(vat_streams, "VAT", vat_datetime)

Stream 3: Calculated effective sampling rate 41.0675 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 338.9286 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 23.1637 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 373.5739 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 202.7190 Hz is different from specified rate 250.0000 Hz.
Stream 10: Calculated effective sampling rate 35.2274 Hz is different from specified rate 250.0000 Hz.
Stream 6: Calculated effective sampling rate 41.6620 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 39.9000 Hz is different from specified rate 250.0000 Hz.


VAT condition datetime: 2025-10-10T14:18:03+1100

--- Stream Information (VAT) ---
Datetime from header: 2025-10-10T14:18:03+1100
Stream 1 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-10T14:18:03+1100
Stream 2 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-10T14:18:03+1100
Stream 3 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-10T14:18:03+1100
Stream 4 Name: GSR_PPG, Channel Count: 10, Datetime: 2025-10-10T14:18:03+1100
Stream 5 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-10T14:18:03+1100
Stream 6 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-10T14:18:03+1100
Stream 7 Name: obci_stream_3, Channel Count: 125, Datetime: 2025-10-10T14:18:03+1100
Stream 8 Name: obci_stream_10, Channel Count: 2, Datetime: 2025-10-10T14:18:03+1100
Stream 9 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-10T14:18:03+1100
Stream 10 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-10T14:18:03+1100
Stream 11 Name: obci_stream_6, Channel

## Extract data streams for EEG Band Power

In [11]:
# Function to parse datetime string from header
def parse_header_datetime(dt_str):
    if '+' in dt_str:
        base, zone = dt_str.split('+')
        if '.' in base:
            dt_fmt = "%Y-%m-%dT%H:%M:%S.%f"
        else:
            dt_fmt = "%Y-%m-%dT%H:%M:%S"
        dt_obj = datetime.strptime(base, dt_fmt)
        tz = timezone(timedelta(hours=int(zone[:2]), minutes=int(zone[2:])))
        dt_obj = dt_obj.replace(tzinfo=tz)
    else:
        dt_obj = datetime.fromisoformat(dt_str)
    return dt_obj

# Function to extract a specific stream as a DataFrame
def extract_stream_df(streams, start_time_str, stream_name_to_extract):
    start_dt = parse_header_datetime(start_time_str)
    # Find the target stream by name
    target_stream = None
    for stream in streams:
        if stream['info']['name'][0] == stream_name_to_extract:
            target_stream = stream
            break
    if target_stream is None:
        print(f"Stream '{stream_name_to_extract}' not found.")
        return None
    n_channels = int(target_stream['info']['channel_count'][0])
    df_stream = pd.DataFrame(
        target_stream['time_series'],
        columns=[f"Channel_{i+1}" for i in range(n_channels)]
    )
    df_stream['Timestamp'] = target_stream['time_stamps']
    df_stream['Relative_Seconds'] = df_stream['Timestamp'] - df_stream['Timestamp'].iloc[0]
    df_stream['Datetime_Local'] = [start_dt + timedelta(seconds=s) for s in df_stream['Relative_Seconds']]
    df_stream['Datetime_Local'] = df_stream['Datetime_Local'].map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S.%f %z'))
    return df_stream

# Set the stream name to extract
stream_name_to_extract = "obci_stream_2"


## Extract EEG for Average Band Power

In [12]:
# Function to parse datetime string from header
def parse_header_datetime(dt_str):
    if '+' in dt_str:
        base, zone = dt_str.split('+')
        if '.' in base:
            dt_fmt = "%Y-%m-%dT%H:%M:%S.%f"
        else:
            dt_fmt = "%Y-%m-%dT%H:%M:%S"
        dt_obj = datetime.strptime(base, dt_fmt)
        tz = timezone(timedelta(hours=int(zone[:2]), minutes=int(zone[2:])))
        dt_obj = dt_obj.replace(tzinfo=tz)
    else:
        dt_obj = datetime.fromisoformat(dt_str)
    return dt_obj

# Function to extract a specific stream as a DataFrame
def extract_stream_df(streams, start_time_str, stream_name_to_extract):
    start_dt = parse_header_datetime(start_time_str)
    # Find the target stream by name
    target_stream = None
    for stream in streams:
        if stream['info']['name'][0] == stream_name_to_extract:
            target_stream = stream
            break
    if target_stream is None:
        print(f"Stream '{stream_name_to_extract}' not found.")
        return None
    n_channels = int(target_stream['info']['channel_count'][0])
    df_stream_0 = pd.DataFrame(
        target_stream['time_series'],
        columns=[f"Channel_{i+1}" for i in range(n_channels)]
    )
    df_stream_0['Timestamp'] = target_stream['time_stamps']
    df_stream_0['Relative_Seconds'] = df_stream_0['Timestamp'] - df_stream_0['Timestamp'].iloc[0]
    df_stream_0['Datetime_Local'] = [start_dt + timedelta(seconds=s) for s in df_stream_0['Relative_Seconds']]
    df_stream_0['Datetime_Local'] = df_stream_0['Datetime_Local'].map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S.%f %z'))
    return df_stream_0

# Set the stream name to extract
stream_name_to_extract = "obci_stream_1" 


In [13]:
# visual only
df_visual_a_eeg = extract_stream_df(visual_streams, visual_datetime, stream_name_to_extract)

if df_visual_a_eeg is not None:
    print("==== Visual Only ====")
    df_visual_a_eeg = df_visual_a_eeg.rename(columns={
        'Channel_1': 'Delta',
        'Channel_2': 'Theta',
        'Channel_3': 'Alpha',
        'Channel_4': 'Beta',
        'Channel_5': 'Gamma',
    })

    display(pd.concat([df_visual_a_eeg.head(4), df_visual_a_eeg.tail(4)]))


==== Visual Only ====


Unnamed: 0,Delta,Theta,Alpha,Beta,Gamma,Timestamp,Relative_Seconds,Datetime_Local
0,0.023354,0.148457,0.071923,0.212482,0.543784,187976.13989,0.0,2025-10-10 13:59:34.000000 +1100
1,0.023736,0.150557,0.071635,0.212954,0.541117,187976.176975,0.037085,2025-10-10 13:59:34.037085 +1100
2,0.024533,0.154749,0.070825,0.213377,0.536516,187976.214059,0.074169,2025-10-10 13:59:34.074169 +1100
3,0.025359,0.15882,0.069847,0.213349,0.532625,187976.251144,0.111254,2025-10-10 13:59:34.111254 +1100
19414,0.057988,0.500222,0.123278,0.09662,0.221891,188739.078665,762.938774,2025-10-10 14:12:16.938774 +1100
19415,0.057843,0.509645,0.11678,0.095459,0.220273,188739.120231,762.980341,2025-10-10 14:12:16.980341 +1100
19416,0.057759,0.514367,0.113331,0.095215,0.219328,188739.161798,763.021908,2025-10-10 14:12:17.021908 +1100
19417,0.0573,0.530563,0.10168,0.096055,0.214402,188739.203365,763.063475,2025-10-10 14:12:17.063475 +1100


In [14]:
# va only
df_va_a_eeg = extract_stream_df(va_streams, va_datetime, stream_name_to_extract)

if df_va_a_eeg is not None:
    print("==== va Only ====")
    df_va_a_eeg = df_va_a_eeg.rename(columns={
        'Channel_1': 'Delta',
        'Channel_2': 'Theta',
        'Channel_3': 'Alpha',
        'Channel_4': 'Beta',
        'Channel_5': 'Gamma',
    })

    display(pd.concat([df_va_a_eeg.head(4), df_va_a_eeg.tail(4)]))


==== va Only ====


Unnamed: 0,Delta,Theta,Alpha,Beta,Gamma,Timestamp,Relative_Seconds,Datetime_Local
0,0.066348,0.493109,0.170355,0.179506,0.090682,190828.397951,0.0,2025-10-10 14:46:52.000000 +1100
1,0.060903,0.487786,0.169603,0.183952,0.097756,190828.442039,0.044088,2025-10-10 14:46:52.044088 +1100
2,0.057736,0.484845,0.168514,0.186639,0.102266,190828.486127,0.088175,2025-10-10 14:46:52.088175 +1100
3,0.052903,0.482213,0.165789,0.190644,0.108451,190828.530214,0.132263,2025-10-10 14:46:52.132263 +1100
10962,0.009839,0.286736,0.126335,0.24673,0.330359,191311.68578,483.287829,2025-10-10 14:54:55.287829 +1100
10963,0.009771,0.293234,0.126844,0.254849,0.315303,191311.729868,483.331917,2025-10-10 14:54:55.331917 +1100
10964,0.009713,0.298504,0.127586,0.262049,0.302148,191311.773956,483.376004,2025-10-10 14:54:55.376004 +1100
10965,0.009642,0.302635,0.128762,0.267851,0.291111,191311.818043,483.420092,2025-10-10 14:54:55.420092 +1100


In [15]:
# vt only
df_vt_a_eeg = extract_stream_df(vt_streams, vt_datetime, stream_name_to_extract)

if df_vt_a_eeg is not None:
    print("==== vt Only ====")
    df_vt_a_eeg = df_vt_a_eeg.rename(columns={
        'Channel_1': 'Delta',
        'Channel_2': 'Theta',
        'Channel_3': 'Alpha',
        'Channel_4': 'Beta',
        'Channel_5': 'Gamma',
    })

    display(pd.concat([df_vt_a_eeg.head(4), df_vt_a_eeg.tail(4)]))


==== vt Only ====


Unnamed: 0,Delta,Theta,Alpha,Beta,Gamma,Timestamp,Relative_Seconds,Datetime_Local
0,0.00572,0.041067,0.20389,0.357959,0.391364,190027.741655,0.0,2025-10-10 14:33:35.000000 +1100
1,0.00575,0.041302,0.202465,0.357745,0.392738,190027.787733,0.046077,2025-10-10 14:33:35.046077 +1100
2,0.005786,0.042042,0.201025,0.356294,0.394853,190027.83381,0.092155,2025-10-10 14:33:35.092155 +1100
3,0.005742,0.042896,0.202212,0.354145,0.395005,190027.879887,0.138232,2025-10-10 14:33:35.138232 +1100
11115,0.060058,0.551085,0.194525,0.184167,0.010166,190539.89169,512.150034,2025-10-10 14:42:07.150034 +1100
11116,0.05928,0.550498,0.194841,0.185178,0.010202,190539.937767,512.196112,2025-10-10 14:42:07.196112 +1100
11117,0.057769,0.549396,0.195451,0.187113,0.010271,190539.983844,512.242189,2025-10-10 14:42:07.242189 +1100
11118,0.056781,0.549373,0.195415,0.188117,0.010314,190540.029922,512.288266,2025-10-10 14:42:07.288266 +1100


In [16]:
# vat
df_vat_a_eeg = extract_stream_df(vat_streams, vat_datetime, stream_name_to_extract)

if df_vat_a_eeg is not None:
    print("==== vat Only ====")
    df_vat_a_eeg = df_vat_a_eeg.rename(columns={
        'Channel_1': 'Delta',
        'Channel_2': 'Theta',
        'Channel_3': 'Alpha',
        'Channel_4': 'Beta',
        'Channel_5': 'Gamma',
    })

    display(pd.concat([df_vat_a_eeg.head(4), df_vat_a_eeg.tail(4)]))

==== vat Only ====


Unnamed: 0,Delta,Theta,Alpha,Beta,Gamma,Timestamp,Relative_Seconds,Datetime_Local
0,0.03914,0.616095,0.156416,0.091466,0.096884,189081.844405,0.0,2025-10-10 14:18:03.000000 +1100
1,0.039036,0.617152,0.152499,0.091754,0.099559,189081.887579,0.043174,2025-10-10 14:18:03.043174 +1100
2,0.039188,0.618413,0.147308,0.09119,0.103902,189081.930753,0.086348,2025-10-10 14:18:03.086348 +1100
3,0.03928,0.618603,0.146421,0.090915,0.104781,189081.973927,0.129522,2025-10-10 14:18:03.129522 +1100
13871,0.020261,0.10649,0.350117,0.284847,0.238286,189680.709709,598.865304,2025-10-10 14:28:01.865304 +1100
13872,0.020104,0.106287,0.353012,0.282933,0.237664,189680.752883,598.908478,2025-10-10 14:28:01.908478 +1100
13873,0.01995,0.106078,0.355892,0.281059,0.237022,189680.796057,598.951652,2025-10-10 14:28:01.951652 +1100
13874,0.019797,0.105865,0.358755,0.279224,0.23636,189680.839231,598.994826,2025-10-10 14:28:01.994826 +1100


## Experiment Time

In [17]:
import pandas as pd

# Read the log files (updated file names)
log_v   = pd.read_csv('ButtonToggleLog_v.csv')
log_va  = pd.read_csv('ButtonToggleLog_va.csv')
log_vt  = pd.read_csv('ButtonToggleLog_vt.csv')
log_vat = pd.read_csv('ButtonToggleLog_vat.csv')

# Utility to fetch time for a given Group and DataFrame
def get_time(df, element_name, col='UTC+10_Time', add_hours=1, as_str=True):
    row = df.loc[df['Group'] == element_name, col]
    if row.empty:
        return None
    ts = pd.to_datetime(row.iloc[0], errors='coerce')
    if pd.isna(ts):
        return None

    ts = (ts + pd.Timedelta(hours=add_hours)).round('10ms')

    if not as_str:
        return ts

    centisec = ts.microsecond // 10000  # 0..99
    return f"{ts:%Y-%m-%d %H:%M:%S}.{centisec:02d}"

# ---- Visual ----
start_time_v   = get_time(log_v, 'S01_S')
state02_start_time_v = get_time(log_v, 'S02_S')
state03_start_time_v = get_time(log_v, 'S03_S')
state03_m_time_v     = get_time(log_v, 'S03_M')
end_time_v     = get_time(log_v, 'S03_E')

# ---- VA ----
start_time_va   = get_time(log_va, 'S01_S')
state02_start_time_va = get_time(log_va, 'S02_S')
state03_start_time_va = get_time(log_va, 'S03_S')
state03_m_time_va     = get_time(log_va, 'S03_M')
end_time_va     = get_time(log_va, 'S03_E')

# ---- VT ----
start_time_vt   = get_time(log_vt, 'S01_S')
state02_start_time_vt = get_time(log_vt, 'S02_S')
state03_start_time_vt = get_time(log_vt, 'S03_S')
state03_m_time_vt     = get_time(log_vt, 'S03_M')
end_time_vt     = get_time(log_vt, 'S03_E')

# ---- VAT ----
start_time_vat   = get_time(log_vat, 'S01_S')
state02_start_time_vat = get_time(log_vat, 'S02_S')
state03_start_time_vat = get_time(log_vat, 'S03_S')
state03_m_time_vat     = get_time(log_vat, 'S03_M')
end_time_vat     = get_time(log_vat, 'S03_E')

# ---- Print results ----
print("Visual condition start:", start_time_v)
print("Visual State 02 start:", state02_start_time_v)
print("Visual State 03 start:", state03_start_time_v)
print("Visual State 03 m_time:", state03_m_time_v)
print("Visual end:", end_time_v)
print("---")
print("VA condition start:", start_time_va)
print("VA State 02 start:", state02_start_time_va)
print("VA State 03 start:", state03_start_time_va)
print("VA State 03 m_time:", state03_m_time_va)
print("VA end:", end_time_va)
print("---")
print("VT condition start:", start_time_vt)
print("VT State 02 start:", state02_start_time_vt)
print("VT State 03 start:", state03_start_time_vt)
print("VT State 03 m_time:", state03_m_time_vt)
print("VT end:", end_time_vt)
print("---")
print("VAT condition start:", start_time_vat)
print("VAT State 02 start:", state02_start_time_vat)
print("VAT State 03 start:", state03_start_time_vat)
print("VAT State 03 m_time:", state03_m_time_vat)
print("VAT end:", end_time_vat)


Visual condition start: 2025-10-10 14:06:41.28
Visual State 02 start: 2025-10-10 14:09:46.52
Visual State 03 start: 2025-10-10 14:10:51.83
Visual State 03 m_time: 2025-10-10 14:11:05.96
Visual end: 2025-10-10 14:11:24.30
---
VA condition start: 2025-10-10 14:49:28.21
VA State 02 start: 2025-10-10 14:52:32.16
VA State 03 start: 2025-10-10 14:53:33.81
VA State 03 m_time: 2025-10-10 14:53:44.99
VA end: 2025-10-10 14:53:55.33
---
VT condition start: 2025-10-10 14:37:01.43
VT State 02 start: 2025-10-10 14:40:02.04
VT State 03 start: 2025-10-10 14:41:04.22
VT State 03 m_time: 2025-10-10 14:41:19.62
VT end: 2025-10-10 14:41:32.79
---
VAT condition start: 2025-10-10 14:22:41.10
VAT State 02 start: 2025-10-10 14:25:43.50
VAT State 03 start: 2025-10-10 14:26:48.91
VAT State 03 m_time: 2025-10-10 14:27:04.78
VAT end: 2025-10-10 14:27:19.17


## Basline calculation

In [18]:
import pandas as pd
from pandas import Timedelta

# Make copies (avoid modifying source DFs)
df_v   = df_visual_a_eeg.copy()
df_va  = df_va_a_eeg.copy()
df_vt  = df_vt_a_eeg.copy()
df_vat = df_vat_a_eeg.copy()

def get_interval_theta_alpha_beta(df, interval_start, interval_end):
    df['Datetime_Local'] = pd.to_datetime(df['Datetime_Local'])
    tz = df['Datetime_Local'].dt.tz

    interval_start = pd.to_datetime(interval_start)
    interval_end   = pd.to_datetime(interval_end)
    interval_start = interval_start.tz_localize(tz) if interval_start.tzinfo is None else interval_start.tz_convert(tz)
    interval_end   = interval_end.tz_localize(tz)   if interval_end.tzinfo   is None else interval_end.tz_convert(tz)

    df_interval = df[(df['Datetime_Local'] >= interval_start) & (df['Datetime_Local'] < interval_end)]

    mean_theta = df_interval['Theta'].mean()
    mean_alpha = df_interval['Alpha'].mean()
    mean_beta  = df_interval['Beta'].mean()

    mean_theta = round(mean_theta, 2) if pd.notnull(mean_theta) else np.nan
    mean_alpha = round(mean_alpha, 2) if pd.notnull(mean_alpha) else np.nan
    mean_beta  = round(mean_beta,  2) if pd.notnull(mean_beta)  else np.nan
    return mean_theta, mean_alpha, mean_beta

# ----- Baseline windows -----
# V
baseline_00_start_v  = pd.to_datetime(start_time_v)  - Timedelta(minutes=1)
baseline_00_end_v    = pd.to_datetime(start_time_v)
baseline_pre_start_v = pd.to_datetime(start_time_v)  - Timedelta(minutes=2)
baseline_pre_end_v   = pd.to_datetime(start_time_v)  - Timedelta(minutes=1)

# VA
baseline_00_start_va  = pd.to_datetime(start_time_va)  - Timedelta(minutes=1)
baseline_00_end_va    = pd.to_datetime(start_time_va)
baseline_pre_start_va = pd.to_datetime(start_time_va)  - Timedelta(minutes=2)
baseline_pre_end_va   = pd.to_datetime(start_time_va)  - Timedelta(minutes=1)

# VT
baseline_00_start_vt  = pd.to_datetime(start_time_vt)  - Timedelta(minutes=1)
baseline_00_end_vt    = pd.to_datetime(start_time_vt)
baseline_pre_start_vt = pd.to_datetime(start_time_vt)  - Timedelta(minutes=2)
baseline_pre_end_vt   = pd.to_datetime(start_time_vt)  - Timedelta(minutes=1)

# VAT
baseline_00_start_vat  = pd.to_datetime(start_time_vat) - Timedelta(minutes=1)
baseline_00_end_vat    = pd.to_datetime(start_time_vat)
baseline_pre_start_vat = pd.to_datetime(start_time_vat) - Timedelta(minutes=2)
baseline_pre_end_vat   = pd.to_datetime(start_time_vat) - Timedelta(minutes=1)

# ----- Compute means -----
theta_00_v,  alpha_00_v,  beta_00_v  = get_interval_theta_alpha_beta(df_v,   baseline_00_start_v,  baseline_00_end_v)
theta_pre_v, alpha_pre_v, beta_pre_v = get_interval_theta_alpha_beta(df_v,   baseline_pre_start_v, baseline_pre_end_v)

theta_00_va,  alpha_00_va,  beta_00_va  = get_interval_theta_alpha_beta(df_va,  baseline_00_start_va,  baseline_00_end_va)
theta_pre_va, alpha_pre_va, beta_pre_va = get_interval_theta_alpha_beta(df_va,  baseline_pre_start_va, baseline_pre_end_va)

theta_00_vt,  alpha_00_vt,  beta_00_vt  = get_interval_theta_alpha_beta(df_vt,  baseline_00_start_vt,  baseline_00_end_vt)
theta_pre_vt, alpha_pre_vt, beta_pre_vt = get_interval_theta_alpha_beta(df_vt,  baseline_pre_start_vt, baseline_pre_end_vt)

theta_00_vat,  alpha_00_vat,  beta_00_vat  = get_interval_theta_alpha_beta(df_vat, baseline_00_start_vat,  baseline_00_end_vat)
theta_pre_vat, alpha_pre_vat, beta_pre_vat = get_interval_theta_alpha_beta(df_vat, baseline_pre_start_vat, baseline_pre_end_vat)

# ----- Print -----
print(f"V   Baseline_00 (-1~0 min)    theta: {theta_00_v:.2f}, alpha: {alpha_00_v:.2f}, beta: {beta_00_v:.2f}")
print(f"V   Baseline_pre (-2~-1 min)  theta: {theta_pre_v:.2f}, alpha: {alpha_pre_v:.2f}, beta: {beta_pre_v:.2f}")
print('---')
print(f"VA  Baseline_00 (-1~0 min)    theta: {theta_00_va:.2f}, alpha: {alpha_00_va:.2f}, beta: {beta_00_va:.2f}")
# print(f"VA  Baseline_pre (-2~-1 min)  theta: {theta_pre_va:.2f}, alpha: {alpha_pre_va:.2f}, beta: {beta_pre_va:.2f}")
print('---')
print(f"VT  Baseline_00 (-1~0 min)    theta: {theta_00_vt:.2f}, alpha: {alpha_00_vt:.2f}, beta: {beta_00_vt:.2f}")
# print(f"VT  Baseline_pre (-2~-1 min)  theta: {theta_pre_vt:.2f}, alpha: {alpha_pre_vt:.2f}, beta: {beta_pre_vt:.2f}")
print('---')
print(f"VAT Baseline_00 (-1~0 min)    theta: {theta_00_vat:.2f}, alpha: {alpha_00_vat:.2f}, beta: {beta_00_vat:.2f}")
# print(f"VAT Baseline_pre (-2~-1 min)  theta: {theta_pre_vat:.2f}, alpha: {alpha_pre_vat:.2f}, beta: {beta_pre_vat:.2f}")


V   Baseline_00 (-1~0 min)    theta: 0.37, alpha: 0.24, beta: 0.24
V   Baseline_pre (-2~-1 min)  theta: 0.37, alpha: 0.25, beta: 0.24
---
VA  Baseline_00 (-1~0 min)    theta: 0.31, alpha: 0.27, beta: 0.26
---
VT  Baseline_00 (-1~0 min)    theta: 0.32, alpha: 0.29, beta: 0.23
---
VAT Baseline_00 (-1~0 min)    theta: 0.33, alpha: 0.27, beta: 0.23


## Calculate mean values in 30-second windows for Average EEG BP

In [19]:
pd.set_option('display.float_format', lambda x: f"{x:.2f}")

def calc_rolling_band_means_eeg(
    df_eeg,
    start_time,
    end_time,
    interval_s=30,
    min_total_s=270,
    max_total_s=270
):
    df = df_eeg.copy()

    # Ensure timezone-aware datetimes aligned to the data column
    df['Datetime_Local'] = pd.to_datetime(df['Datetime_Local'])
    tz = df['Datetime_Local'].dt.tz

    start_time = pd.to_datetime(start_time)
    end_time   = pd.to_datetime(end_time)
    start_time = start_time.tz_localize(tz) if start_time.tzinfo is None else start_time.tz_convert(tz)
    end_time   = end_time.tz_localize(tz)   if end_time.tzinfo   is None else end_time.tz_convert(tz)

    # Hard cap: never go beyond start_time + max_total_s (default 270s)
    cap_end_time = min(end_time, start_time + pd.Timedelta(seconds=max_total_s))

    interval = pd.Timedelta(seconds=interval_s)
    total_duration = cap_end_time - start_time  # duration after capping

    results = []
    idx = 1

    # Case A: whole interval shorter than one window -> single mean over the (capped) interval
    if total_duration < interval:
        seg_full = df[(df['Datetime_Local'] >= start_time) & (df['Datetime_Local'] < cap_end_time)]
        mT = seg_full['Theta'].mean()
        mA = seg_full['Alpha'].mean()
        mB = seg_full['Beta'].mean()
        seconds_marker = total_duration.total_seconds()  # < interval_s
        return pd.DataFrame([[round(seconds_marker, 2),
                              round(mT, 2) if pd.notnull(mT) else None,
                              round(mA, 2) if pd.notnull(mA) else None,
                              round(mB, 2) if pd.notnull(mB) else None]],
                            columns=['Seconds', 'theta', 'alpha', 'beta'])

    # Case B: full 30s (interval_s) windows up to the capped end
    current_start = start_time
    while current_start + interval <= cap_end_time:
        current_end = current_start + interval
        seg = df[(df['Datetime_Local'] >= current_start) & (df['Datetime_Local'] < current_end)]
        mT = seg['Theta'].mean()
        mA = seg['Alpha'].mean()
        mB = seg['Beta'].mean()
        results.append([idx * interval_s,
                        round(mT, 2) if pd.notnull(mT) else None,
                        round(mA, 2) if pd.notnull(mA) else None,
                        round(mB, 2) if pd.notnull(mB) else None])
        current_start = current_end
        idx += 1

    # Case C: trailing segment < interval_s (won't run if cap_end_time hits an exact multiple)
    if current_start < cap_end_time:
        # If capped duration < min_total_s (e.g., < 270s), use whole-interval mean; else use tail-only mean
        if total_duration.total_seconds() < min_total_s:
            seg_last = df[(df['Datetime_Local'] >= start_time) & (df['Datetime_Local'] < cap_end_time)]
        else:
            seg_last = df[(df['Datetime_Local'] >= current_start) & (df['Datetime_Local'] < cap_end_time)]

        mT = seg_last['Theta'].mean()
        mA = seg_last['Alpha'].mean()
        mB = seg_last['Beta'].mean()
        seconds_marker = (idx - 1) * interval_s + (cap_end_time - current_start).total_seconds()
        results.append([round(seconds_marker, 2),
                        round(mT, 2) if pd.notnull(mT) else None,
                        round(mA, 2) if pd.notnull(mA) else None,
                        round(mB, 2) if pd.notnull(mB) else None])

    return pd.DataFrame(results, columns=['Seconds', 'theta', 'alpha', 'beta'])



# --- compute for all four groups ---
df_v_means_eeg   = calc_rolling_band_means_eeg(df_v,   start_time_v,   end_time_v)
df_va_means_eeg  = calc_rolling_band_means_eeg(df_va,  start_time_va,  end_time_va)
df_vt_means_eeg  = calc_rolling_band_means_eeg(df_vt,  start_time_vt,  end_time_vt)
df_vat_means_eeg = calc_rolling_band_means_eeg(df_vat, start_time_vat, end_time_vat)

# --- align to common minimum length ---
min_len_eeg = min(len(df_v_means_eeg), len(df_va_means_eeg), len(df_vt_means_eeg), len(df_vat_means_eeg))
df_v_means_aligned_eeg   = df_v_means_eeg.iloc[:min_len_eeg].reset_index(drop=True)
df_va_means_aligned_eeg  = df_va_means_eeg.iloc[:min_len_eeg].reset_index(drop=True)
df_vt_means_aligned_eeg  = df_vt_means_eeg.iloc[:min_len_eeg].reset_index(drop=True)
df_vat_means_aligned_eeg = df_vat_means_eeg.iloc[:min_len_eeg].reset_index(drop=True)

# --- display ---
display('V EEG 30s Interval Means',   df_v_means_aligned_eeg)
display('VA EEG 30s Interval Means',  df_va_means_aligned_eeg)
display('VT EEG 30s Interval Means',  df_vt_means_aligned_eeg)
display('VAT EEG 30s Interval Means', df_vat_means_aligned_eeg)


'V EEG 30s Interval Means'

Unnamed: 0,Seconds,theta,alpha,beta
0,30,0.33,0.26,0.25
1,60,0.39,0.24,0.22
2,90,0.37,0.24,0.26
3,120,0.37,0.24,0.24
4,150,0.34,0.25,0.26
5,180,0.39,0.26,0.22
6,210,0.35,0.24,0.25
7,240,0.28,0.29,0.27
8,270,0.37,0.25,0.23


'VA EEG 30s Interval Means'

Unnamed: 0,Seconds,theta,alpha,beta
0,30.0,0.4,0.21,0.23
1,60.0,0.36,0.24,0.25
2,90.0,0.34,0.22,0.27
3,120.0,0.3,0.23,0.29
4,150.0,0.37,0.24,0.23
5,180.0,0.31,0.26,0.27
6,210.0,0.23,0.37,0.22
7,240.0,0.34,0.26,0.24
8,267.12,0.34,0.25,0.25


'VT EEG 30s Interval Means'

Unnamed: 0,Seconds,theta,alpha,beta
0,30,0.37,0.23,0.24
1,60,0.32,0.27,0.26
2,90,0.39,0.23,0.25
3,120,0.33,0.24,0.27
4,150,0.39,0.24,0.23
5,180,0.4,0.23,0.22
6,210,0.28,0.27,0.25
7,240,0.39,0.23,0.23
8,270,0.4,0.22,0.23


'VAT EEG 30s Interval Means'

Unnamed: 0,Seconds,theta,alpha,beta
0,30,0.24,0.3,0.26
1,60,0.27,0.19,0.3
2,90,0.19,0.26,0.3
3,120,0.19,0.24,0.39
4,150,0.2,0.2,0.39
5,180,0.23,0.23,0.35
6,210,0.2,0.34,0.28
7,240,0.22,0.37,0.27
8,270,0.16,0.36,0.29


## Export clean average eeg data to CSV files

In [20]:
import os
import pandas as pd

# ========== Define Participant ID and Gender ==========
participant_id = "P14"   # Change for each participant
gender = "M"             

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\EEG"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_EEG.csv"
filepath = os.path.join(output_dir, filename)

# ========== Build Output Row Function for EEG ==========
def build_output_row_eeg(
    participant_id, gender, condition,
    theta_pre, alpha_pre, beta_pre,
    theta_00, alpha_00, beta_00,
    means_df,
    skip_pre=False
):
    theta_list = means_df['theta'].tolist()
    alpha_list = means_df['alpha'].tolist()
    beta_list  = means_df['beta'].tolist()
    if skip_pre:
        row = [
            participant_id, gender, condition,
            "", "", "",                      # no pre values (e.g., VAT)
            theta_00, alpha_00, beta_00
        ]
    else:
        row = [
            participant_id, gender, condition,
            theta_pre, alpha_pre, beta_pre,
            theta_00,  alpha_00,  beta_00
        ]
    row.extend(theta_list)
    row.extend(alpha_list)
    row.extend(beta_list)
    return row

# ========== Using Aligned DataFrames ==========
# V
row_v = build_output_row_eeg(
    participant_id, gender, "V",
    theta_pre_v, alpha_pre_v, beta_pre_v,
    theta_00_v,  alpha_00_v,  beta_00_v,
    df_v_means_aligned_eeg
)

# VA
row_va = build_output_row_eeg(
    participant_id, gender, "VA",
    None, None, None,
    theta_00_va,  alpha_00_va,  beta_00_va,
    df_va_means_aligned_eeg
)

# VT
row_vt = build_output_row_eeg(
    participant_id, gender, "VT",
    None, None, None,
    theta_00_vt,  alpha_00_vt,  beta_00_vt,
    df_vt_means_aligned_eeg
)

# VAT (pre left empty)
row_vat = build_output_row_eeg(
    participant_id, gender, "VAT",
    None, None, None,                 # not used when skip_pre=True
    theta_00_vat, alpha_00_vat, beta_00_vat,
    df_vat_means_aligned_eeg,
    skip_pre=True
)

# ========== Build Columns Based on Number of Windows ==========
n_windows = len(df_v_means_aligned_eeg)
columns = [
    "Participant ID", "Gender", "Condition",
    "Baseline_Pre_theta", "Baseline_Pre_alpha", "Baseline_Pre_beta",
    "Baseline_00_theta", "Baseline_00_alpha", "Baseline_00_beta",
]
columns += [f"theta_{(i+1)*30}s" for i in range(n_windows)]
columns += [f"alpha_{(i+1)*30}s" for i in range(n_windows)]
columns += [f"beta_{(i+1)*30}s"  for i in range(n_windows)]

# ========== Save or Append ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, index=False)
else:
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")


File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\EEG\B_Participant_EEG.csv


## Calculate mean values in 2s pre and post the S3_S

In [21]:
import pandas as pd

# Ensure datetime for all four conditions
df_visual_a_eeg['Datetime_Local'] = pd.to_datetime(df_visual_a_eeg['Datetime_Local'], errors='coerce')
df_va_a_eeg['Datetime_Local']     = pd.to_datetime(df_va_a_eeg['Datetime_Local'], errors='coerce')
df_vt_a_eeg['Datetime_Local']     = pd.to_datetime(df_vt_a_eeg['Datetime_Local'], errors='coerce')
df_vat_a_eeg['Datetime_Local']    = pd.to_datetime(df_vat_a_eeg['Datetime_Local'], errors='coerce')

# Reference timezone from Visual DF
tz = df_visual_a_eeg['Datetime_Local'].dt.tz

# --- align S3_S timestamps to the same timezone (V / VA / VT / VAT) ---
s3_start_time_v = pd.to_datetime(state03_start_time_v)
s3_start_time_v = s3_start_time_v.tz_localize(tz) if s3_start_time_v.tzinfo is None else s3_start_time_v.tz_convert(tz)

s3_start_time_va = pd.to_datetime(state03_start_time_va)
s3_start_time_va = s3_start_time_va.tz_localize(tz) if s3_start_time_va.tzinfo is None else s3_start_time_va.tz_convert(tz)

s3_start_time_vt = pd.to_datetime(state03_start_time_vt)
s3_start_time_vt = s3_start_time_vt.tz_localize(tz) if s3_start_time_vt.tzinfo is None else s3_start_time_vt.tz_convert(tz)

s3_start_time_vat = pd.to_datetime(state03_start_time_vat)
s3_start_time_vat = s3_start_time_vat.tz_localize(tz) if s3_start_time_vat.tzinfo is None else s3_start_time_vat.tz_convert(tz)

# Window
interval_before = pd.Timedelta(seconds=2)
interval_after  = pd.Timedelta(seconds=2)

def before_after_windows(df, s3_start):
    df_before = df[(df['Datetime_Local'] >= (s3_start - interval_before)) &
                   (df['Datetime_Local'] <  s3_start)]
    df_after  = df[(df['Datetime_Local'] >=  s3_start) &
                   (df['Datetime_Local'] <  (s3_start + interval_after))]

    mean_before = df_before[['Delta','Theta','Alpha','Beta','Gamma']].mean().to_frame().T
    mean_before['Window'] = 'Before'
    mean_after  = df_after[['Delta','Theta','Alpha','Beta','Gamma']].mean().to_frame().T
    mean_after['Window']  = 'After'

    out = pd.concat([mean_before, mean_after], ignore_index=True)
    return out[['Window','Delta','Theta','Alpha','Beta','Gamma']]

# --- V ---
df_v_window = before_after_windows(df_visual_a_eeg, s3_start_time_v)
print("=== V Condition (Mean band power 2s before and after S3_S) ===")
display(df_v_window)

# --- VA ---
df_va_window = before_after_windows(df_va_a_eeg, s3_start_time_va)
print("=== VA Condition (Mean band power 2s before and after S3_S) ===")
display(df_va_window)

# --- VT ---
df_vt_window = before_after_windows(df_vt_a_eeg, s3_start_time_vt)
print("=== VT Condition (Mean band power 2s before and after S3_S) ===")
display(df_vt_window)

# --- VAT ---
df_vat_window = before_after_windows(df_vat_a_eeg, s3_start_time_vat)
print("=== VAT Condition (Mean band power 2s before and after S3_S) ===")
display(df_vat_window)


=== V Condition (Mean band power 2s before and after S3_S) ===


Unnamed: 0,Window,Delta,Theta,Alpha,Beta,Gamma
0,Before,0.06,0.4,0.25,0.19,0.1
1,After,0.06,0.42,0.25,0.19,0.08


=== VA Condition (Mean band power 2s before and after S3_S) ===


Unnamed: 0,Window,Delta,Theta,Alpha,Beta,Gamma
0,Before,0.04,0.41,0.2,0.26,0.09
1,After,0.07,0.44,0.23,0.18,0.08


=== VT Condition (Mean band power 2s before and after S3_S) ===


Unnamed: 0,Window,Delta,Theta,Alpha,Beta,Gamma
0,Before,0.03,0.42,0.24,0.24,0.08
1,After,0.02,0.43,0.26,0.21,0.07


=== VAT Condition (Mean band power 2s before and after S3_S) ===


Unnamed: 0,Window,Delta,Theta,Alpha,Beta,Gamma
0,Before,0.01,0.15,0.37,0.3,0.17
1,After,0.04,0.2,0.38,0.23,0.15


## Export clean average S3 2s Before and After Data to CSV files

In [22]:
import os
import pandas as pd

# ========== Define Participant ID and Gender ==========
participant_id = "P14"   # Change for each participant
gender = "M"             

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\EEG"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_EEG_S3.csv"
filepath = os.path.join(output_dir, filename)

# ========== Build Output Row Function ==========
def build_output_row_eeg_S3(
    participant_id, gender, condition,
    theta_pre, alpha_pre, beta_pre,
    theta_00, alpha_00, beta_00,
    theta_before, alpha_before, beta_before,
    theta_after, alpha_after, beta_after,
    skip_pre=False
):
    if skip_pre:
        row = [
            participant_id, gender, condition,
            "", "", "",       
            theta_00, alpha_00, beta_00,
            theta_before, alpha_before, beta_before,
            theta_after, alpha_after, beta_after
        ]
    else:
        row = [
            participant_id, gender, condition,
            theta_pre, alpha_pre, beta_pre,
            theta_00, alpha_00, beta_00,
            theta_before, alpha_before, beta_before,
            theta_after, alpha_after, beta_after
        ]
    return row

def pick_band(df_window, window_label, band):
    return float(df_window.loc[df_window['Window'] == window_label, band].values[0])

# ========== Fill in from calculations ==========
row_v = build_output_row_eeg_S3(
    participant_id, gender, "V",
    theta_pre_v, alpha_pre_v, beta_pre_v,
    theta_00_v, alpha_00_v, beta_00_v,
    pick_band(df_v_window, 'Before', 'Theta'),
    pick_band(df_v_window, 'Before', 'Alpha'),
    pick_band(df_v_window, 'Before', 'Beta'),
    pick_band(df_v_window, 'After', 'Theta'),
    pick_band(df_v_window, 'After', 'Alpha'),
    pick_band(df_v_window, 'After', 'Beta'),
    skip_pre=False
)

# VA
row_va = build_output_row_eeg_S3(
    participant_id, gender, "VA",
    None, None, None,
    theta_00_va,  alpha_00_va,  beta_00_va,
    pick_band(df_va_window, 'Before', 'Theta'),
    pick_band(df_va_window, 'Before', 'Alpha'),
    pick_band(df_va_window, 'Before', 'Beta'),
    pick_band(df_va_window, 'After', 'Theta'),
    pick_band(df_va_window, 'After', 'Alpha'),
    pick_band(df_va_window, 'After', 'Beta'),
    skip_pre=False
)

# VT
row_vt = build_output_row_eeg_S3(
    participant_id, gender, "VT",
    None, None, None,    theta_00_vt,  alpha_00_vt,  beta_00_vt,
    pick_band(df_vt_window, 'Before', 'Theta'),
    pick_band(df_vt_window, 'Before', 'Alpha'),
    pick_band(df_vt_window, 'Before', 'Beta'),
    pick_band(df_vt_window, 'After', 'Theta'),
    pick_band(df_vt_window, 'After', 'Alpha'),
    pick_band(df_vt_window, 'After', 'Beta'),
    skip_pre=False
)

# VAT（Pre 留空）
row_vat = build_output_row_eeg_S3(
    participant_id, gender, "VAT",
    None, None, None,
    theta_00_vat, alpha_00_vat, beta_00_vat,
    pick_band(df_vat_window, 'Before', 'Theta'),
    pick_band(df_vat_window, 'Before', 'Alpha'),
    pick_band(df_vat_window, 'Before', 'Beta'),
    pick_band(df_vat_window, 'After', 'Theta'),
    pick_band(df_vat_window, 'After', 'Alpha'),
    pick_band(df_vat_window, 'After', 'Beta'),
    skip_pre=True
)

# ========== Columns ==========
columns = [
    "Participant ID", "Gender", "Condition",
    "Baseline_Pre_theta", "Baseline_Pre_alpha", "Baseline_Pre_beta",
    "Baseline_00_theta", "Baseline_00_alpha", "Baseline_00_beta",
    "Before_theta", "Before_alpha", "Before_beta",
    "After_theta",  "After_alpha",  "After_beta"
]

# ========== Save or Append ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, index=False)
else:
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")

File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\EEG\B_Participant_EEG_S3.csv


## Calculate mean values in the three time points of S3

In [23]:
import pandas as pd

# Helper: pick nearest value at ts (align tz to df)
def val_at_timestamp(df, ts, ts_col='Datetime_Local', val_col='Theta'):
    # ensure ts column is datetime
    if not pd.api.types.is_datetime64_any_dtype(df[ts_col]):
        df[ts_col] = pd.to_datetime(df[ts_col], errors='coerce')

    ts = pd.to_datetime(ts)

    # align timezone
    tz = df[ts_col].dt.tz
    if tz is not None:
        if ts.tzinfo is None:
            ts = ts.tz_localize(tz)
        else:
            ts = ts.tz_convert(tz)

    # guard empty / all-NaT
    if df[ts_col].isna().all():
        return None
    idx = (df[ts_col] - ts).abs().idxmin()
    val = df.loc[idx, val_col] if val_col in df.columns else None
    return None if pd.isna(val) else round(float(val), 2)

# List candidate bands; we will only use those that actually exist in each DF
candidate_bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma']

# Build a row for one condition
def build_band_row(label, df, t_s, t_m, t_e):
    bands_present = [b for b in candidate_bands if b in df.columns]
    row = {'Condition': label}
    # S3_S / S3_M / S3_E for each band
    for band in bands_present:
        row[f"S3_S_{band}"] = val_at_timestamp(df, t_s, val_col=band)
        row[f"S3_M_{band}"] = val_at_timestamp(df, t_m, val_col=band)
        row[f"S3_E_{band}"] = val_at_timestamp(df, t_e, val_col=band)
    return row, bands_present

# Build rows using your EXISTING time variables & DFs:
rows = []
all_bands = set()

row_v, bands_v = build_band_row("V",
    df_visual_a_eeg, state03_start_time_v,  state03_m_time_v,  end_time_v)
rows.append(row_v); all_bands.update(bands_v)

row_va, bands_va = build_band_row("VA",
    df_va_a_eeg,     state03_start_time_va, state03_m_time_va, end_time_va)
rows.append(row_va); all_bands.update(bands_va)

row_vt, bands_vt = build_band_row("VT",
    df_vt_a_eeg,     state03_start_time_vt, state03_m_time_vt, end_time_vt)
rows.append(row_vt); all_bands.update(bands_vt)

row_vat, bands_vat = build_band_row("VAT",
    df_vat_a_eeg,    state03_start_time_vat, state03_m_time_vat, end_time_vat)
rows.append(row_vat); all_bands.update(bands_vat)

# Create a tidy column order: Condition, then per-band S3_S/M/E
ordered_cols = ['Condition']
for band in [b for b in candidate_bands if b in all_bands]:
    ordered_cols += [f"S3_S_{band}", f"S3_M_{band}", f"S3_E_{band}"]

df_eeg_points = pd.DataFrame(rows)[ordered_cols]

display(df_eeg_points)


Unnamed: 0,Condition,S3_S_Delta,S3_M_Delta,S3_E_Delta,S3_S_Theta,S3_M_Theta,S3_E_Theta,S3_S_Alpha,S3_M_Alpha,S3_E_Alpha,S3_S_Beta,S3_M_Beta,S3_E_Beta,S3_S_Gamma,S3_M_Gamma,S3_E_Gamma
0,V,0.05,0.04,0.04,0.41,0.43,0.38,0.27,0.22,0.26,0.18,0.23,0.23,0.09,0.08,0.09
1,VA,0.06,0.04,0.03,0.45,0.43,0.36,0.19,0.21,0.27,0.21,0.23,0.25,0.09,0.1,0.09
2,VT,0.02,0.03,0.05,0.4,0.41,0.28,0.25,0.27,0.27,0.25,0.21,0.3,0.08,0.07,0.1
3,VAT,0.01,0.07,0.01,0.16,0.26,0.1,0.44,0.34,0.25,0.25,0.23,0.42,0.14,0.1,0.21


In [24]:
import os
import pandas as pd

# ========== Define Participant ID and Gender ==========
participant_id = "P14"
gender = "M"

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\EEG"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_EEG_S3_Task.csv"
filepath = os.path.join(output_dir, filename)

# ========== Helper: nearest-sample value at a timestamp ==========
def val_at_timestamp(df, ts, ts_col='Datetime_Local', val_col='Theta'):
    """
    Return the value (rounded to 2 decimals) at the row whose timestamp is nearest to `ts`.
    Converts df[ts_col] to datetime if needed, and aligns `ts` timezone to df's tz if present.
    """
    if not pd.api.types.is_datetime64_any_dtype(df[ts_col]):
        df[ts_col] = pd.to_datetime(df[ts_col], errors='coerce')

    ts = pd.to_datetime(ts)

    tz = df[ts_col].dt.tz
    if tz is not None:
        if ts.tzinfo is None:
            ts = ts.tz_localize(tz)
        else:
            ts = ts.tz_convert(tz)

    if df[ts_col].isna().all():
        return None
    idx = (df[ts_col] - ts).abs().idxmin()
    val = df.loc[idx, val_col] if val_col in df.columns else None
    return None if pd.isna(val) else round(float(val), 2)

def fmt2(x):
    try:
        if x is None or (isinstance(x, float) and pd.isna(x)):
            return ""
        return f"{float(x):.2f}"
    except Exception:
        return ""

# ========== Build one row for a condition (Theta/Alpha/Beta) ==========
def build_row_eeg_points(condition_label, df,
                         pre_theta, pre_alpha, pre_beta,
                         zero_theta, zero_alpha, zero_beta,
                         t_s3s, t_s3m, t_s3e,
                         skip_pre=False):
    s3s_theta = val_at_timestamp(df, t_s3s, val_col='Theta')
    s3s_alpha = val_at_timestamp(df, t_s3s, val_col='Alpha')
    s3s_beta  = val_at_timestamp(df, t_s3s, val_col='Beta')

    s3m_theta = val_at_timestamp(df, t_s3m, val_col='Theta')
    s3m_alpha = val_at_timestamp(df, t_s3m, val_col='Alpha')
    s3m_beta  = val_at_timestamp(df, t_s3m, val_col='Beta')

    s3e_theta = val_at_timestamp(df, t_s3e, val_col='Theta')
    s3e_alpha = val_at_timestamp(df, t_s3e, val_col='Alpha')
    s3e_beta  = val_at_timestamp(df, t_s3e, val_col='Beta')

    if skip_pre:
        return [
            participant_id, gender, condition_label,
            "", "", "",
            fmt2(zero_theta), fmt2(zero_alpha), fmt2(zero_beta),
            fmt2(s3s_theta), fmt2(s3s_alpha), fmt2(s3s_beta),
            fmt2(s3m_theta), fmt2(s3m_alpha), fmt2(s3m_beta),
            fmt2(s3e_theta), fmt2(s3e_alpha), fmt2(s3e_beta),
        ]
    else:
        return [
            participant_id, gender, condition_label,
            fmt2(pre_theta), fmt2(pre_alpha), fmt2(pre_beta),
            fmt2(zero_theta), fmt2(zero_alpha), fmt2(zero_beta),
            fmt2(s3s_theta), fmt2(s3s_alpha), fmt2(s3s_beta),
            fmt2(s3m_theta), fmt2(s3m_alpha), fmt2(s3m_beta),
            fmt2(s3e_theta), fmt2(s3e_alpha), fmt2(s3e_beta),
        ]

# ========== Build rows using EXISTING variables & DFs ==========
# V:
row_v = build_row_eeg_points(
    "V",  df_visual_a_eeg,
    theta_pre_v, alpha_pre_v, beta_pre_v,
    theta_00_v,  alpha_00_v,  beta_00_v,
    state03_start_time_v, state03_m_time_v, end_time_v,
    skip_pre=False
)

# VA:
row_va = build_row_eeg_points(
    "VA", df_va_a_eeg,
    None, None, None,                         # ignored when skip_pre=True
    theta_00_va, alpha_00_va, beta_00_va,
    state03_start_time_va, state03_m_time_va, end_time_va,
    skip_pre=True
)

# VT：
row_vt = build_row_eeg_points(
    "VT", df_vt_a_eeg,
    None, None, None,
    theta_00_vt, alpha_00_vt, beta_00_vt,
    state03_start_time_vt, state03_m_time_vt, end_time_vt,
    skip_pre=True
)

# VAT
row_vat = build_row_eeg_points(
    "VAT", df_vat_a_eeg,
    None, None, None,
    theta_00_vat, alpha_00_vat, beta_00_vat,
    state03_start_time_vat, state03_m_time_vat, end_time_vat,
    skip_pre=True
)

# ========== Columns ==========
columns = [
    "Participant ID", "Gender", "Condition",
    "Baseline_Pre_theta", "Baseline_Pre_alpha", "Baseline_Pre_beta",
    "Baseline_00_theta", "Baseline_00_alpha", "Baseline_00_beta",
    "S3_S_Theta", "S3_S_Alpha", "S3_S_Beta",
    "S3_M_Theta", "S3_M_Alpha", "S3_M_Beta",
    "S3_E_Theta", "S3_E_Alpha", "S3_E_Beta",
]

# ========== Save or Append ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    pd.DataFrame(rows, columns=columns).to_csv(filepath, index=False)
else:
    pd.DataFrame(rows, columns=columns).to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")


File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\EEG\B_Participant_EEG_S3_Task.csv
