In [19]:
import pyxdf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, timezone
from scipy import stats
import os
from IPython.display import display
import matplotlib.pyplot as plt

## Load the XDF file

In [20]:
from pathlib import Path
import re

BASE_DIR = Path(r".")       
SUBJECT  = "sub-P007"      
# ============================

CONDITIONS = ["v", "va", "vt", "vat"]

run_re = re.compile(r"_run-(\d+)_eeg\.xdf$", re.IGNORECASE)

def pick_latest_run(paths):
    """Pick the highest run-N file; fall back to newest modified time if no run number."""
    if not paths:
        return None
    # Try by run number
    with_run = []
    for p in paths:
        m = run_re.search(p.name)
        if m:
            with_run.append((int(m.group(1)), p))
    if with_run:
        with_run.sort(key=lambda x: x[0], reverse=True)
        return with_run[0][1]
    # Fallback: latest modified
    return sorted(paths, key=lambda p: p.stat().st_mtime, reverse=True)[0]

def find_condition_paths(base_dir: Path, subject: str):
    """Return dict condition->Path by scanning sub-<ID>/ses-<cond>/eeg/*.xdf."""
    result = {}
    subj_dir = base_dir / subject
    for cond in CONDITIONS:
        eeg_dir = subj_dir / f"ses-{cond}" / "eeg"
        candidates = list(eeg_dir.glob(f"{subject}_ses-{cond}_task-Default_run-*_eeg.xdf"))
        # If strict pattern fails, accept any *.xdf under eeg
        if not candidates:
            candidates = list(eeg_dir.glob("*.xdf"))
        chosen = pick_latest_run(candidates)
        if chosen:
            result[cond] = chosen
    return result

# -------- header helpers (reuse your logic) --------
def get_datetime_from_header(header):
    if 'info' in header and 'datetime' in header['info']:
        dt_list = header['info']['datetime']
        if isinstance(dt_list, list) and len(dt_list) > 0:
            return dt_list[0]
    return "Datetime not found"

def print_stream_info(streams, condition_name, datetime_str):
    print(f"\n--- Stream Information ({condition_name}) ---")
    print(f"Datetime from header: {datetime_str}")
    for i, stream in enumerate(streams):
        stream_name = stream['info']['name'][0]
        channel_count = stream['info']['channel_count'][0]
        print(f"Stream {i+1} Name: {stream_name}, Channel Count: {channel_count}, Datetime: {datetime_str}")

paths = find_condition_paths(BASE_DIR, SUBJECT)

In [21]:
# ---- Map discovered paths back to your original variable names ----
def require_found(paths_dict, cond_key, var_name):
    """Ensure a condition path exists; raise a clear error if missing."""
    p = paths_dict.get(cond_key)
    if p is None:
        raise FileNotFoundError(
            f"Missing file for condition '{cond_key}'. "
            f"Please check folder structure: sub-<ID>/ses-{cond_key}/eeg/*.xdf "
            f"and ensure it exists so we can assign `{var_name}`."
        )
    return str(p)  # keep as Windows-style path string

# Assign variables exactly as before
visual_file_path = require_found(paths, "v",   "visual_file_path")
va_file_path     = require_found(paths, "va",  "va_file_path")
vt_file_path     = require_found(paths, "vt",  "vt_file_path")
vat_file_path    = require_found(paths, "vat", "vat_file_path")

# (Optional) print to verify
print("\n== Assigned variables ==")
print("visual_file_path:", visual_file_path)
print("va_file_path    :", va_file_path)
print("vt_file_path    :", vt_file_path)
print("vat_file_path   :", vat_file_path)



== Assigned variables ==
visual_file_path: sub-P007\ses-v\eeg\sub-P007_ses-v_task-Default_run-001_eeg.xdf
va_file_path    : sub-P007\ses-va\eeg\sub-P007_ses-va_task-Default_run-001_eeg.xdf
vt_file_path    : sub-P007\ses-vt\eeg\sub-P007_ses-vt_task-Default_run-001_eeg.xdf
vat_file_path   : sub-P007\ses-vat\eeg\sub-P007_ses-vat_task-Default_run-001_eeg.xdf


## Load visual only data

In [22]:
# Load XDF file for Visual condition
visual_streams, visual_header = pyxdf.load_xdf(visual_file_path)
visual_datetime = get_datetime_from_header(visual_header)
print("Visual condition datetime:", visual_datetime)

# Print stream info for Visual condition with datetime
print_stream_info(visual_streams, "Visual", visual_datetime)

Stream 3: Calculated effective sampling rate 10.2037 Hz is different from specified rate 100.0000 Hz.
Stream 5: Calculated effective sampling rate 607.4623 Hz is different from specified rate 250.0000 Hz.
Stream 12: Calculated effective sampling rate 34.8861 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 29.4068 Hz is different from specified rate 250.0000 Hz.
Stream 6: Calculated effective sampling rate 35.2033 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 33.3579 Hz is different from specified rate 250.0000 Hz.
Stream 4: Calculated effective sampling rate 311.9492 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 648.6850 Hz is different from specified rate 250.0000 Hz.
Stream 10: Calculated effective sampling rate 29.2983 Hz is different from specified rate 250.0000 Hz.


Visual condition datetime: 2025-10-02T10:29:44+1000

--- Stream Information (Visual) ---
Datetime from header: 2025-10-02T10:29:44+1000
Stream 1 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-02T10:29:44+1000
Stream 2 Name: GSR_PPG, Channel Count: 10, Datetime: 2025-10-02T10:29:44+1000
Stream 3 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-02T10:29:44+1000
Stream 4 Name: obci_stream_7, Channel Count: 1, Datetime: 2025-10-02T10:29:44+1000
Stream 5 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-02T10:29:44+1000
Stream 6 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-02T10:29:44+1000
Stream 7 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-02T10:29:44+1000
Stream 8 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-02T10:29:44+1000
Stream 9 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-02T10:29:44+1000
Stream 10 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-02T10:29:44+1000
Stream 11 Name: obci_stream_10, Cha

## Load VA data

In [23]:
# Load XDF file for Visual + Auditory condition (VA)
va_streams, va_header = pyxdf.load_xdf(va_file_path)
va_datetime = get_datetime_from_header(va_header)
print("Visual + Auditory condition datetime:", va_datetime)

# Print stream info for Visual + Auditory condition with datetime
print_stream_info(va_streams, "Visual + Auditory", va_datetime)

Stream 5: Calculated effective sampling rate 20.1419 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 47.7352 Hz is different from specified rate 250.0000 Hz.
Stream 1: Calculated effective sampling rate 22.8322 Hz is different from specified rate 250.0000 Hz.
Stream 4: Calculated effective sampling rate 192.1785 Hz is different from specified rate 250.0000 Hz.
Stream 10: Calculated effective sampling rate 29.3699 Hz is different from specified rate 250.0000 Hz.
Stream 13: Calculated effective sampling rate 306.7551 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 10.2044 Hz is different from specified rate 100.0000 Hz.
Stream 12: Calculated effective sampling rate 314.7820 Hz is different from specified rate 250.0000 Hz.
Stream 6: Calculated effective sampling rate 23.1527 Hz is different from specified rate 250.0000 Hz.


Visual + Auditory condition datetime: 2025-10-02T11:00:05+1000

--- Stream Information (Visual + Auditory) ---
Datetime from header: 2025-10-02T11:00:05+1000
Stream 1 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-02T11:00:05+1000
Stream 2 Name: obci_stream_6, Channel Count: 1, Datetime: 2025-10-02T11:00:05+1000
Stream 3 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-02T11:00:05+1000
Stream 4 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-02T11:00:05+1000
Stream 5 Name: obci_stream_10, Channel Count: 2, Datetime: 2025-10-02T11:00:05+1000
Stream 6 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-02T11:00:05+1000
Stream 7 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-02T11:00:05+1000
Stream 8 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-02T11:00:05+1000
Stream 9 Name: obci_stream_7, Channel Count: 1, Datetime: 2025-10-02T11:00:05+1000
Stream 10 Name: GSR_PPG, Channel Count: 10, Datetime: 2025-10-02T11:00:05+1000
Stream 11 Nam

## Load VT data

In [24]:
# Load XDF file for Visual + Thermal condition (VT)
vt_streams, vt_header = pyxdf.load_xdf(vt_file_path)
vt_datetime = get_datetime_from_header(vt_header)
print("Visual + Thermal condition datetime:", vt_datetime)

# Print stream info for Visual + Thermal condition with datetime
print_stream_info(vt_streams, "Visual + Thermal", vt_datetime)

Stream 1: Calculated effective sampling rate 23.5504 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 45.0638 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 335.7725 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 10.2043 Hz is different from specified rate 100.0000 Hz.
Stream 11: Calculated effective sampling rate 207.5193 Hz is different from specified rate 250.0000 Hz.
Stream 12: Calculated effective sampling rate 23.5342 Hz is different from specified rate 250.0000 Hz.
Stream 4: Calculated effective sampling rate 20.0107 Hz is different from specified rate 250.0000 Hz.
Stream 13: Calculated effective sampling rate 323.8675 Hz is different from specified rate 250.0000 Hz.
Stream 10: Calculated effective sampling rate 30.0159 Hz is different from specified rate 250.0000 Hz.


Visual + Thermal condition datetime: 2025-10-02T11:12:18+1000

--- Stream Information (Visual + Thermal) ---
Datetime from header: 2025-10-02T11:12:18+1000
Stream 1 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-02T11:12:18+1000
Stream 2 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-02T11:12:18+1000
Stream 3 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-02T11:12:18+1000
Stream 4 Name: obci_stream_6, Channel Count: 1, Datetime: 2025-10-02T11:12:18+1000
Stream 5 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-02T11:12:18+1000
Stream 6 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-02T11:12:18+1000
Stream 7 Name: GSR_PPG, Channel Count: 10, Datetime: 2025-10-02T11:12:18+1000
Stream 8 Name: obci_stream_10, Channel Count: 2, Datetime: 2025-10-02T11:12:18+1000
Stream 9 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-02T11:12:18+1000
Stream 10 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-02T11:12:18+1000
Stream 11 Name

## Load VAT data

In [25]:
# Load XDF file for Visual + Auditory condition
vat_streams, vat_header = pyxdf.load_xdf(vat_file_path)
vat_datetime = get_datetime_from_header(vat_header)
print("VAT condition datetime:", vat_datetime)

# Print stream info for Visual + Auditory condition with datetime
print_stream_info(vat_streams, "VAT", vat_datetime)

Stream 4: Calculated effective sampling rate 41.3696 Hz is different from specified rate 250.0000 Hz.
Stream 3: Calculated effective sampling rate 628.2684 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 24.4776 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 183.2153 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 41.0945 Hz is different from specified rate 250.0000 Hz.
Stream 2: Calculated effective sampling rate 321.4891 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 24.0566 Hz is different from specified rate 250.0000 Hz.
Stream 6: Calculated effective sampling rate 40.9352 Hz is different from specified rate 250.0000 Hz.
Stream 12: Calculated effective sampling rate 10.2044 Hz is different from specified rate 100.0000 Hz.


VAT condition datetime: 2025-10-02T11:44:49+1000

--- Stream Information (VAT) ---
Datetime from header: 2025-10-02T11:44:49+1000
Stream 1 Name: obci_stream_6, Channel Count: 1, Datetime: 2025-10-02T11:44:49+1000
Stream 2 Name: obci_stream_3, Channel Count: 125, Datetime: 2025-10-02T11:44:49+1000
Stream 3 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-02T11:44:49+1000
Stream 4 Name: obci_stream_10, Channel Count: 2, Datetime: 2025-10-02T11:44:49+1000
Stream 5 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-02T11:44:49+1000
Stream 6 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-02T11:44:49+1000
Stream 7 Name: obci_stream_7, Channel Count: 1, Datetime: 2025-10-02T11:44:49+1000
Stream 8 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-02T11:44:49+1000
Stream 9 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-02T11:44:49+1000
Stream 10 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-02T11:44:49+1000
Stream 11 Name: obci_stream_11, Cha

## Extract data streams for EEG Band Power

In [26]:
# Function to parse datetime string from header
def parse_header_datetime(dt_str):
    if '+' in dt_str:
        base, zone = dt_str.split('+')
        if '.' in base:
            dt_fmt = "%Y-%m-%dT%H:%M:%S.%f"
        else:
            dt_fmt = "%Y-%m-%dT%H:%M:%S"
        dt_obj = datetime.strptime(base, dt_fmt)
        tz = timezone(timedelta(hours=int(zone[:2]), minutes=int(zone[2:])))
        dt_obj = dt_obj.replace(tzinfo=tz)
    else:
        dt_obj = datetime.fromisoformat(dt_str)
    return dt_obj

# Function to extract a specific stream as a DataFrame
def extract_stream_df(streams, start_time_str, stream_name_to_extract):
    start_dt = parse_header_datetime(start_time_str)
    # Find the target stream by name
    target_stream = None
    for stream in streams:
        if stream['info']['name'][0] == stream_name_to_extract:
            target_stream = stream
            break
    if target_stream is None:
        print(f"Stream '{stream_name_to_extract}' not found.")
        return None
    n_channels = int(target_stream['info']['channel_count'][0])
    df_stream = pd.DataFrame(
        target_stream['time_series'],
        columns=[f"Channel_{i+1}" for i in range(n_channels)]
    )
    df_stream['Timestamp'] = target_stream['time_stamps']
    df_stream['Relative_Seconds'] = df_stream['Timestamp'] - df_stream['Timestamp'].iloc[0]
    df_stream['Datetime_Local'] = [start_dt + timedelta(seconds=s) for s in df_stream['Relative_Seconds']]
    df_stream['Datetime_Local'] = df_stream['Datetime_Local'].map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S.%f %z'))
    return df_stream

# Set the stream name to extract
stream_name_to_extract = "obci_stream_2"


## Extract EEG for Average Band Power

In [27]:
# Function to parse datetime string from header
def parse_header_datetime(dt_str):
    if '+' in dt_str:
        base, zone = dt_str.split('+')
        if '.' in base:
            dt_fmt = "%Y-%m-%dT%H:%M:%S.%f"
        else:
            dt_fmt = "%Y-%m-%dT%H:%M:%S"
        dt_obj = datetime.strptime(base, dt_fmt)
        tz = timezone(timedelta(hours=int(zone[:2]), minutes=int(zone[2:])))
        dt_obj = dt_obj.replace(tzinfo=tz)
    else:
        dt_obj = datetime.fromisoformat(dt_str)
    return dt_obj

# Function to extract a specific stream as a DataFrame
def extract_stream_df(streams, start_time_str, stream_name_to_extract):
    start_dt = parse_header_datetime(start_time_str)
    # Find the target stream by name
    target_stream = None
    for stream in streams:
        if stream['info']['name'][0] == stream_name_to_extract:
            target_stream = stream
            break
    if target_stream is None:
        print(f"Stream '{stream_name_to_extract}' not found.")
        return None
    n_channels = int(target_stream['info']['channel_count'][0])
    df_stream_0 = pd.DataFrame(
        target_stream['time_series'],
        columns=[f"Channel_{i+1}" for i in range(n_channels)]
    )
    df_stream_0['Timestamp'] = target_stream['time_stamps']
    df_stream_0['Relative_Seconds'] = df_stream_0['Timestamp'] - df_stream_0['Timestamp'].iloc[0]
    df_stream_0['Datetime_Local'] = [start_dt + timedelta(seconds=s) for s in df_stream_0['Relative_Seconds']]
    df_stream_0['Datetime_Local'] = df_stream_0['Datetime_Local'].map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S.%f %z'))
    return df_stream_0

# Set the stream name to extract
stream_name_to_extract = "obci_stream_1" 


In [28]:
# visual only
df_visual_a_eeg = extract_stream_df(visual_streams, visual_datetime, stream_name_to_extract)

if df_visual_a_eeg is not None:
    print("==== Visual Only ====")
    df_visual_a_eeg = df_visual_a_eeg.rename(columns={
        'Channel_1': 'Delta',
        'Channel_2': 'Theta',
        'Channel_3': 'Alpha',
        'Channel_4': 'Beta',
        'Channel_5': 'Gamma',
    })

    display(pd.concat([df_visual_a_eeg.head(4), df_visual_a_eeg.tail(4)]))


==== Visual Only ====


Unnamed: 0,Delta,Theta,Alpha,Beta,Gamma,Timestamp,Relative_Seconds,Datetime_Local
0,0.126342,0.580643,0.219405,0.064346,0.009263,432.585449,0.0,2025-10-02 10:29:44.000000 +1000
1,0.127653,0.576235,0.221832,0.064922,0.009358,432.613857,0.028408,2025-10-02 10:29:44.028408 +1000
2,0.128828,0.571087,0.225221,0.065371,0.009493,432.642264,0.056815,2025-10-02 10:29:44.056815 +1000
3,0.12939,0.568538,0.226902,0.065609,0.009561,432.670672,0.085223,2025-10-02 10:29:44.085223 +1000
18641,0.03585,0.185939,0.083494,0.105462,0.589255,962.133583,529.548134,2025-10-02 10:38:33.548134 +1000
18642,0.035573,0.186785,0.085387,0.111939,0.580317,962.16199,529.576542,2025-10-02 10:38:33.576542 +1000
18643,0.03549,0.187117,0.086055,0.114228,0.577111,962.190398,529.604949,2025-10-02 10:38:33.604949 +1000
18644,0.035229,0.18886,0.088455,0.121714,0.565741,962.218806,529.633357,2025-10-02 10:38:33.633357 +1000


In [29]:
# va only
df_va_a_eeg = extract_stream_df(va_streams, va_datetime, stream_name_to_extract)

if df_va_a_eeg is not None:
    print("==== va Only ====")
    df_va_a_eeg = df_va_a_eeg.rename(columns={
        'Channel_1': 'Delta',
        'Channel_2': 'Theta',
        'Channel_3': 'Alpha',
        'Channel_4': 'Beta',
        'Channel_5': 'Gamma',
    })

    display(pd.concat([df_va_a_eeg.head(4), df_va_a_eeg.tail(4)]))


==== va Only ====


Unnamed: 0,Delta,Theta,Alpha,Beta,Gamma,Timestamp,Relative_Seconds,Datetime_Local
0,0.082642,0.356967,0.376226,0.137237,0.046928,831.016981,0.0,2025-10-02 11:00:05.000000 +1000
1,0.08275,0.356545,0.37598,0.137597,0.047128,831.051032,0.034051,2025-10-02 11:00:05.034051 +1000
2,0.0837,0.356093,0.374067,0.138434,0.047705,831.085083,0.068102,2025-10-02 11:00:05.068102 +1000
3,0.08583,0.357934,0.368759,0.139158,0.048319,831.119134,0.102154,2025-10-02 11:00:05.102154 +1000
10769,0.006266,0.159871,0.48853,0.327536,0.017796,1197.714735,366.697754,2025-10-02 11:06:11.697754 +1000
10770,0.005753,0.160028,0.499359,0.3169,0.017959,1197.748786,366.731805,2025-10-02 11:06:11.731805 +1000
10771,0.005606,0.161024,0.501806,0.313605,0.017959,1197.782837,366.765857,2025-10-02 11:06:11.765857 +1000
10772,0.005565,0.161413,0.502424,0.312638,0.01796,1197.816888,366.799908,2025-10-02 11:06:11.799908 +1000


In [30]:
# vt only
df_vt_a_eeg = extract_stream_df(vt_streams, vt_datetime, stream_name_to_extract)

if df_vt_a_eeg is not None:
    print("==== vt Only ====")
    df_vt_a_eeg = df_vt_a_eeg.rename(columns={
        'Channel_1': 'Delta',
        'Channel_2': 'Theta',
        'Channel_3': 'Alpha',
        'Channel_4': 'Beta',
        'Channel_5': 'Gamma',
    })

    display(pd.concat([df_vt_a_eeg.head(4), df_vt_a_eeg.tail(4)]))


==== vt Only ====


Unnamed: 0,Delta,Theta,Alpha,Beta,Gamma,Timestamp,Relative_Seconds,Datetime_Local
0,0.060542,0.389058,0.329155,0.183789,0.037457,1551.152619,0.0,2025-10-02 11:12:18.000000 +1000
1,0.060226,0.389703,0.328879,0.183733,0.037459,1551.185937,0.033318,2025-10-02 11:12:18.033318 +1000
2,0.057776,0.392032,0.329463,0.1831,0.037629,1551.219255,0.066636,2025-10-02 11:12:18.066636 +1000
3,0.057027,0.392292,0.330216,0.182732,0.037734,1551.252573,0.099955,2025-10-02 11:12:18.099955 +1000
11738,0.007403,0.097944,0.254593,0.389361,0.250699,1942.241684,391.089065,2025-10-02 11:18:49.089065 +1000
11739,0.007164,0.096737,0.258191,0.386717,0.251191,1942.275002,391.122383,2025-10-02 11:18:49.122383 +1000
11740,0.007062,0.096182,0.259911,0.385594,0.25125,1942.30832,391.155702,2025-10-02 11:18:49.155702 +1000
11741,0.006965,0.09562,0.261614,0.384501,0.251301,1942.341638,391.18902,2025-10-02 11:18:49.189020 +1000


In [31]:
# vat
df_vat_a_eeg = extract_stream_df(vat_streams, vat_datetime, stream_name_to_extract)

if df_vat_a_eeg is not None:
    print("==== vat Only ====")
    df_vat_a_eeg = df_vat_a_eeg.rename(columns={
        'Channel_1': 'Delta',
        'Channel_2': 'Theta',
        'Channel_3': 'Alpha',
        'Channel_4': 'Beta',
        'Channel_5': 'Gamma',
    })

    display(pd.concat([df_vat_a_eeg.head(4), df_vat_a_eeg.tail(4)]))

==== vat Only ====


Unnamed: 0,Delta,Theta,Alpha,Beta,Gamma,Timestamp,Relative_Seconds,Datetime_Local
0,0.041171,0.380924,0.311906,0.225774,0.040225,632.602307,0.0,2025-10-02 11:44:49.000000 +1000
1,0.04158,0.378249,0.313556,0.226468,0.040147,632.643165,0.040858,2025-10-02 11:44:49.040858 +1000
2,0.041719,0.377348,0.314476,0.226417,0.04004,632.684023,0.081715,2025-10-02 11:44:49.081715 +1000
3,0.042072,0.37496,0.317208,0.226097,0.039663,632.72488,0.122573,2025-10-02 11:44:49.122573 +1000
9425,0.050991,0.318148,0.207537,0.389449,0.033875,1017.685374,385.083067,2025-10-02 11:51:14.083067 +1000
9426,0.049769,0.306643,0.20714,0.403417,0.033031,1017.726232,385.123925,2025-10-02 11:51:14.123925 +1000
9427,0.049208,0.301231,0.206923,0.410057,0.03258,1017.76709,385.164782,2025-10-02 11:51:14.164782 +1000
9428,0.044101,0.243773,0.202708,0.484747,0.024671,1017.807947,385.20564,2025-10-02 11:51:14.205640 +1000


## Experiment Time

In [32]:
import pandas as pd

# Read the log files (updated file names)
log_v   = pd.read_csv('ButtonToggleLog_v.csv')
log_va  = pd.read_csv('ButtonToggleLog_va.csv')
log_vt  = pd.read_csv('ButtonToggleLog_vt.csv')
log_vat = pd.read_csv('ButtonToggleLog_vat.csv')

# Utility to fetch time for a given Group and DataFrame
def get_time(df, element_name):
    row = df[df['Group'] == element_name]
    return row['UTC+10_Time'].iloc[0] if not row.empty else None

# ---- Visual ----
start_time_v   = get_time(log_v, 'S01_S')
state02_start_time_v = get_time(log_v, 'S02_S')
state03_start_time_v = get_time(log_v, 'S03_S')
state03_m_time_v     = get_time(log_v, 'S03_M')
end_time_v     = get_time(log_v, 'S03_E')

# ---- VA ----
start_time_va   = get_time(log_va, 'S01_S')
state02_start_time_va = get_time(log_va, 'S02_S')
state03_start_time_va = get_time(log_va, 'S03_S')
state03_m_time_va     = get_time(log_va, 'S03_M')
end_time_va     = get_time(log_va, 'S03_E')

# ---- VT ----
start_time_vt   = get_time(log_vt, 'S01_S')
state02_start_time_vt = get_time(log_vt, 'S02_S')
state03_start_time_vt = get_time(log_vt, 'S03_S')
state03_m_time_vt     = get_time(log_vt, 'S03_M')
end_time_vt     = get_time(log_vt, 'S03_E')

# ---- VAT ----
start_time_vat   = get_time(log_vat, 'S01_S')
state02_start_time_vat = get_time(log_vat, 'S02_S')
state03_start_time_vat = get_time(log_vat, 'S03_S')
state03_m_time_vat     = get_time(log_vat, 'S03_M')
end_time_vat     = get_time(log_vat, 'S03_E')

# ---- Print results ----
print("Visual condition start:", start_time_v)
print("Visual State 02 start:", state02_start_time_v)
print("Visual State 03 start:", state03_start_time_v)
print("Visual State 03 m_time:", state03_m_time_v)
print("Visual end:", end_time_v)
print("---")
print("VA condition start:", start_time_va)
print("VA State 02 start:", state02_start_time_va)
print("VA State 03 start:", state03_start_time_va)
print("VA State 03 m_time:", state03_m_time_va)
print("VA end:", end_time_va)
print("---")
print("VT condition start:", start_time_vt)
print("VT State 02 start:", state02_start_time_vt)
print("VT State 03 start:", state03_start_time_vt)
print("VT State 03 m_time:", state03_m_time_vt)
print("VT end:", end_time_vt)
print("---")
print("VAT condition start:", start_time_vat)
print("VAT State 02 start:", state02_start_time_vat)
print("VAT State 03 start:", state03_start_time_vat)
print("VAT State 03 m_time:", state03_m_time_vat)
print("VAT end:", end_time_vat)

Visual condition start: 2025/10/2 10:32:51.99
Visual State 02 start: 2025/10/2 10:36:00.48
Visual State 03 start: 2025/10/2 10:37:06.26
Visual State 03 m_time: 2025/10/2 10:37:19.60
Visual end: 2025/10/2 10:37:29.48
---
VA condition start: 2025/10/2 11:01:02.71
VA State 02 start: 2025/10/2 11:04:07.00
VA State 03 start: 2025/10/2 11:05:08.63
VA State 03 m_time: 2025/10/2 11:05:19.17
VA end: 2025/10/2 11:05:30.54
---
VT condition start: 2025-10-02 11:13:30.165
VT State 02 start: 2025-10-02 11:16:30.414
VT State 03 start: 2025-10-02 11:17:35.226
VT State 03 m_time: 2025-10-02 11:17:46.209
VT end: 2025-10-02 11:17:54.285
---
VAT condition start: 2025-10-02 11:46:20.637
VAT State 02 start: 2025-10-02 11:49:26.071
VAT State 03 start: 2025-10-02 11:50:26.516
VAT State 03 m_time: 2025-10-02 11:50:37.765
VAT end: 2025-10-02 11:50:46.133


## Basline calculation

In [33]:
import pandas as pd
from pandas import Timedelta

# Make copies (avoid modifying source DFs)
df_v   = df_visual_a_eeg.copy()
df_va  = df_va_a_eeg.copy()
df_vt  = df_vt_a_eeg.copy()
df_vat = df_vat_a_eeg.copy()

def get_interval_theta_alpha_beta(df, interval_start, interval_end):
    df['Datetime_Local'] = pd.to_datetime(df['Datetime_Local'])
    tz = df['Datetime_Local'].dt.tz

    interval_start = pd.to_datetime(interval_start)
    interval_end   = pd.to_datetime(interval_end)
    interval_start = interval_start.tz_localize(tz) if interval_start.tzinfo is None else interval_start.tz_convert(tz)
    interval_end   = interval_end.tz_localize(tz)   if interval_end.tzinfo   is None else interval_end.tz_convert(tz)

    df_interval = df[(df['Datetime_Local'] >= interval_start) & (df['Datetime_Local'] < interval_end)]

    mean_theta = df_interval['Theta'].mean()
    mean_alpha = df_interval['Alpha'].mean()
    mean_beta  = df_interval['Beta'].mean()

    mean_theta = round(mean_theta, 2) if pd.notnull(mean_theta) else None
    mean_alpha = round(mean_alpha, 2) if pd.notnull(mean_alpha) else None
    mean_beta  = round(mean_beta,  2) if pd.notnull(mean_beta)  else None
    return mean_theta, mean_alpha, mean_beta

# ----- Baseline windows -----
# V
baseline_00_start_v  = pd.to_datetime(start_time_v)  - Timedelta(minutes=1)
baseline_00_end_v    = pd.to_datetime(start_time_v)
baseline_pre_start_v = pd.to_datetime(start_time_v)  - Timedelta(minutes=2)
baseline_pre_end_v   = pd.to_datetime(start_time_v)  - Timedelta(minutes=1)

# VA
baseline_00_start_va  = pd.to_datetime(start_time_va)  - Timedelta(minutes=1)
baseline_00_end_va    = pd.to_datetime(start_time_va)
baseline_pre_start_va = pd.to_datetime(start_time_va)  - Timedelta(minutes=2)
baseline_pre_end_va   = pd.to_datetime(start_time_va)  - Timedelta(minutes=1)

# VT
baseline_00_start_vt  = pd.to_datetime(start_time_vt)  - Timedelta(minutes=1)
baseline_00_end_vt    = pd.to_datetime(start_time_vt)
baseline_pre_start_vt = pd.to_datetime(start_time_vt)  - Timedelta(minutes=2)
baseline_pre_end_vt   = pd.to_datetime(start_time_vt)  - Timedelta(minutes=1)

# VAT
baseline_00_start_vat  = pd.to_datetime(start_time_vat) - Timedelta(minutes=1)
baseline_00_end_vat    = pd.to_datetime(start_time_vat)
baseline_pre_start_vat = pd.to_datetime(start_time_vat) - Timedelta(minutes=2)
baseline_pre_end_vat   = pd.to_datetime(start_time_vat) - Timedelta(minutes=1)

# ----- Compute means -----
theta_00_v,  alpha_00_v,  beta_00_v  = get_interval_theta_alpha_beta(df_v,   baseline_00_start_v,  baseline_00_end_v)
theta_pre_v, alpha_pre_v, beta_pre_v = get_interval_theta_alpha_beta(df_v,   baseline_pre_start_v, baseline_pre_end_v)

theta_00_va,  alpha_00_va,  beta_00_va  = get_interval_theta_alpha_beta(df_va,  baseline_00_start_va,  baseline_00_end_va)
theta_pre_va, alpha_pre_va, beta_pre_va = get_interval_theta_alpha_beta(df_va,  baseline_pre_start_va, baseline_pre_end_va)

theta_00_vt,  alpha_00_vt,  beta_00_vt  = get_interval_theta_alpha_beta(df_vt,  baseline_00_start_vt,  baseline_00_end_vt)
theta_pre_vt, alpha_pre_vt, beta_pre_vt = get_interval_theta_alpha_beta(df_vt,  baseline_pre_start_vt, baseline_pre_end_vt)

theta_00_vat,  alpha_00_vat,  beta_00_vat  = get_interval_theta_alpha_beta(df_vat, baseline_00_start_vat,  baseline_00_end_vat)
theta_pre_vat, alpha_pre_vat, beta_pre_vat = get_interval_theta_alpha_beta(df_vat, baseline_pre_start_vat, baseline_pre_end_vat)

# ----- Print -----
print(f"V   Baseline_00 (-1~0 min)    theta: {theta_00_v:.2f}, alpha: {alpha_00_v:.2f}, beta: {beta_00_v:.2f}")
print(f"V   Baseline_pre (-2~-1 min)  theta: {theta_pre_v:.2f}, alpha: {alpha_pre_v:.2f}, beta: {beta_pre_v:.2f}")
print('---')
print(f"VA  Baseline_00 (-1~0 min)    theta: {theta_00_va:.2f}, alpha: {alpha_00_va:.2f}, beta: {beta_00_va:.2f}")
# print(f"VA  Baseline_pre (-2~-1 min)  theta: {theta_pre_va:.2f}, alpha: {alpha_pre_va:.2f}, beta: {beta_pre_va:.2f}")
print('---')
print(f"VT  Baseline_00 (-1~0 min)    theta: {theta_00_vt:.2f}, alpha: {alpha_00_vt:.2f}, beta: {beta_00_vt:.2f}")
# print(f"VT  Baseline_pre (-2~-1 min)  theta: {theta_pre_vt:.2f}, alpha: {alpha_pre_vt:.2f}, beta: {beta_pre_vt:.2f}")
print('---')
print(f"VAT Baseline_00 (-1~0 min)    theta: {theta_00_vat:.2f}, alpha: {alpha_00_vat:.2f}, beta: {beta_00_vat:.2f}")
# print(f"VAT Baseline_pre (-2~-1 min)  theta: {theta_pre_vat:.2f}, alpha: {alpha_pre_vat:.2f}, beta: {beta_pre_vat:.2f}")


V   Baseline_00 (-1~0 min)    theta: 0.42, alpha: 0.27, beta: 0.18
V   Baseline_pre (-2~-1 min)  theta: 0.39, alpha: 0.29, beta: 0.20
---
VA  Baseline_00 (-1~0 min)    theta: 0.48, alpha: 0.28, beta: 0.11
---
VT  Baseline_00 (-1~0 min)    theta: 0.43, alpha: 0.21, beta: 0.19
---
VAT Baseline_00 (-1~0 min)    theta: 0.44, alpha: 0.21, beta: 0.20


## Calculate mean values in 30-second windows for Average EEG BP

In [43]:
pd.set_option('display.float_format', lambda x: f"{x:.2f}")

def calc_rolling_band_means_eeg(df_eeg, start_time, end_time, interval_s=30, min_total_s=270):

    df = df_eeg.copy()
    # Ensure timezone-aligned datetimes
    df['Datetime_Local'] = pd.to_datetime(df['Datetime_Local'])
    tz = df['Datetime_Local'].dt.tz
    start_time = pd.to_datetime(start_time)
    end_time   = pd.to_datetime(end_time)
    start_time = start_time.tz_localize(tz) if start_time.tzinfo is None else start_time.tz_convert(tz)
    end_time   = end_time.tz_localize(tz)   if end_time.tzinfo   is None else end_time.tz_convert(tz)

    interval = pd.Timedelta(seconds=interval_s)
    total_duration = end_time - start_time

    results = []
    idx = 1

    # Case A: whole interval shorter than one window -> single mean over the whole interval
    if total_duration < interval:
        seg_full = df[(df['Datetime_Local'] >= start_time) & (df['Datetime_Local'] < end_time)]
        mT = seg_full['Theta'].mean()
        mA = seg_full['Alpha'].mean()
        mB = seg_full['Beta'].mean()
        seconds_marker = total_duration.total_seconds()
        return pd.DataFrame([[round(seconds_marker, 2),
                              round(mT, 2) if pd.notnull(mT) else None,
                              round(mA, 2) if pd.notnull(mA) else None,
                              round(mB, 2) if pd.notnull(mB) else None]],
                            columns=['Seconds', 'theta', 'alpha', 'beta'])

    # Case B: full 30s windows
    current_start = start_time
    while current_start + interval <= end_time:
        current_end = current_start + interval
        seg = df[(df['Datetime_Local'] >= current_start) & (df['Datetime_Local'] < current_end)]
        mT = seg['Theta'].mean()
        mA = seg['Alpha'].mean()
        mB = seg['Beta'].mean()
        results.append([idx * interval_s,
                        round(mT, 2) if pd.notnull(mT) else None,
                        round(mA, 2) if pd.notnull(mA) else None,
                        round(mB, 2) if pd.notnull(mB) else None])
        current_start = current_end
        idx += 1

    # Case C: trailing segment < 30s
    if current_start < end_time:
        # Decide whether to use whole-interval average or the tail's own average
        if total_duration.total_seconds() < min_total_s:
            seg_last = df[(df['Datetime_Local'] >= start_time) & (df['Datetime_Local'] < end_time)]  # whole interval
        else:
            seg_last = df[(df['Datetime_Local'] >= current_start) & (df['Datetime_Local'] < end_time)]  # tail only

        mT = seg_last['Theta'].mean()
        mA = seg_last['Alpha'].mean()
        mB = seg_last['Beta'].mean()
        seconds_marker = (idx - 1) * interval_s + (end_time - current_start).total_seconds()  # cumulative seconds
        results.append([round(seconds_marker, 2),
                        round(mT, 2) if pd.notnull(mT) else None,
                        round(mA, 2) if pd.notnull(mA) else None,
                        round(mB, 2) if pd.notnull(mB) else None])

    return pd.DataFrame(results, columns=['Seconds', 'theta', 'alpha', 'beta'])


# --- compute for all four groups ---
df_v_means_eeg   = calc_rolling_band_means_eeg(df_v,   start_time_v,   end_time_v)
df_va_means_eeg  = calc_rolling_band_means_eeg(df_va,  start_time_va,  end_time_va)
df_vt_means_eeg  = calc_rolling_band_means_eeg(df_vt,  start_time_vt,  end_time_vt)
df_vat_means_eeg = calc_rolling_band_means_eeg(df_vat, start_time_vat, end_time_vat)

# --- align to common minimum length ---
min_len_eeg = min(len(df_v_means_eeg), len(df_va_means_eeg), len(df_vt_means_eeg), len(df_vat_means_eeg))
df_v_means_aligned_eeg   = df_v_means_eeg.iloc[:min_len_eeg].reset_index(drop=True)
df_va_means_aligned_eeg  = df_va_means_eeg.iloc[:min_len_eeg].reset_index(drop=True)
df_vt_means_aligned_eeg  = df_vt_means_eeg.iloc[:min_len_eeg].reset_index(drop=True)
df_vat_means_aligned_eeg = df_vat_means_eeg.iloc[:min_len_eeg].reset_index(drop=True)

# --- display ---
display('V EEG 30s Interval Means',   df_v_means_aligned_eeg)
display('VA EEG 30s Interval Means',  df_va_means_aligned_eeg)
display('VT EEG 30s Interval Means',  df_vt_means_aligned_eeg)
display('VAT EEG 30s Interval Means', df_vat_means_aligned_eeg)


'V EEG 30s Interval Means'

Unnamed: 0,Seconds,theta,alpha,beta
0,30.0,0.43,0.22,0.15
1,60.0,0.44,0.21,0.18
2,90.0,0.51,0.22,0.12
3,120.0,0.55,0.19,0.09
4,150.0,0.58,0.17,0.08
5,180.0,0.53,0.17,0.11
6,210.0,0.53,0.15,0.11
7,240.0,0.47,0.24,0.14
8,270.0,0.36,0.23,0.22


'VA EEG 30s Interval Means'

Unnamed: 0,Seconds,theta,alpha,beta
0,30.0,0.53,0.18,0.14
1,60.0,0.45,0.18,0.19
2,90.0,0.45,0.27,0.14
3,120.0,0.5,0.3,0.12
4,150.0,0.46,0.33,0.1
5,180.0,0.48,0.23,0.15
6,210.0,0.34,0.34,0.21
7,240.0,0.28,0.32,0.21
8,267.83,0.44,0.27,0.15


'VT EEG 30s Interval Means'

Unnamed: 0,Seconds,theta,alpha,beta
0,30.0,0.42,0.19,0.21
1,60.0,0.43,0.18,0.2
2,90.0,0.37,0.23,0.19
3,120.0,0.41,0.18,0.22
4,150.0,0.38,0.22,0.23
5,180.0,0.36,0.19,0.25
6,210.0,0.33,0.22,0.26
7,240.0,0.33,0.24,0.22
8,264.12,0.38,0.2,0.22


'VAT EEG 30s Interval Means'

Unnamed: 0,Seconds,theta,alpha,beta
0,30.0,0.44,0.19,0.19
1,60.0,0.44,0.2,0.19
2,90.0,0.47,0.25,0.15
3,120.0,0.44,0.25,0.16
4,150.0,0.38,0.23,0.23
5,180.0,0.38,0.22,0.23
6,210.0,0.35,0.18,0.27
7,240.0,0.37,0.18,0.23
8,265.5,0.41,0.21,0.2


## Export clean average eeg data to CSV files

In [44]:
import os
import pandas as pd

# ========== Define Participant ID and Gender ==========
participant_id = "P07"   # Change for each participant
gender = "F"             

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\EEG"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_EEG.csv"
filepath = os.path.join(output_dir, filename)

# ========== Build Output Row Function for EEG ==========
def build_output_row_eeg(
    participant_id, gender, condition,
    theta_pre, alpha_pre, beta_pre,
    theta_00, alpha_00, beta_00,
    means_df,
    skip_pre=False
):
    theta_list = means_df['theta'].tolist()
    alpha_list = means_df['alpha'].tolist()
    beta_list  = means_df['beta'].tolist()
    if skip_pre:
        row = [
            participant_id, gender, condition,
            "", "", "",                      # no pre values (e.g., VAT)
            theta_00, alpha_00, beta_00
        ]
    else:
        row = [
            participant_id, gender, condition,
            theta_pre, alpha_pre, beta_pre,
            theta_00,  alpha_00,  beta_00
        ]
    row.extend(theta_list)
    row.extend(alpha_list)
    row.extend(beta_list)
    return row

# ========== Using Aligned DataFrames ==========
# V
row_v = build_output_row_eeg(
    participant_id, gender, "V",
    theta_pre_v, alpha_pre_v, beta_pre_v,
    theta_00_v,  alpha_00_v,  beta_00_v,
    df_v_means_aligned_eeg
)

# VA
row_va = build_output_row_eeg(
    participant_id, gender, "VA",
    None, None, None,
    theta_00_va,  alpha_00_va,  beta_00_va,
    df_va_means_aligned_eeg
)

# VT
row_vt = build_output_row_eeg(
    participant_id, gender, "VT",
    None, None, None,
    theta_00_vt,  alpha_00_vt,  beta_00_vt,
    df_vt_means_aligned_eeg
)

# VAT (pre left empty)
row_vat = build_output_row_eeg(
    participant_id, gender, "VAT",
    None, None, None,                 # not used when skip_pre=True
    theta_00_vat, alpha_00_vat, beta_00_vat,
    df_vat_means_aligned_eeg,
    skip_pre=True
)

# ========== Build Columns Based on Number of Windows ==========
n_windows = len(df_v_means_aligned_eeg)
columns = [
    "Participant ID", "Gender", "Condition",
    "Baseline_Pre_theta", "Baseline_Pre_alpha", "Baseline_Pre_beta",
    "Baseline_00_theta", "Baseline_00_alpha", "Baseline_00_beta",
]
columns += [f"theta_{(i+1)*30}s" for i in range(n_windows)]
columns += [f"alpha_{(i+1)*30}s" for i in range(n_windows)]
columns += [f"beta_{(i+1)*30}s"  for i in range(n_windows)]

# ========== Save or Append ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, index=False)
else:
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")


File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\EEG\B_Participant_EEG.csv


## Calculate mean values in 2s pre and post the S3_S

In [36]:
import pandas as pd

# Ensure datetime for all four conditions
df_visual_a_eeg['Datetime_Local'] = pd.to_datetime(df_visual_a_eeg['Datetime_Local'], errors='coerce')
df_va_a_eeg['Datetime_Local']     = pd.to_datetime(df_va_a_eeg['Datetime_Local'], errors='coerce')
df_vt_a_eeg['Datetime_Local']     = pd.to_datetime(df_vt_a_eeg['Datetime_Local'], errors='coerce')
df_vat_a_eeg['Datetime_Local']    = pd.to_datetime(df_vat_a_eeg['Datetime_Local'], errors='coerce')

# Reference timezone from Visual DF
tz = df_visual_a_eeg['Datetime_Local'].dt.tz

# --- align S3_S timestamps to the same timezone (V / VA / VT / VAT) ---
s3_start_time_v = pd.to_datetime(state03_start_time_v)
s3_start_time_v = s3_start_time_v.tz_localize(tz) if s3_start_time_v.tzinfo is None else s3_start_time_v.tz_convert(tz)

s3_start_time_va = pd.to_datetime(state03_start_time_va)
s3_start_time_va = s3_start_time_va.tz_localize(tz) if s3_start_time_va.tzinfo is None else s3_start_time_va.tz_convert(tz)

s3_start_time_vt = pd.to_datetime(state03_start_time_vt)
s3_start_time_vt = s3_start_time_vt.tz_localize(tz) if s3_start_time_vt.tzinfo is None else s3_start_time_vt.tz_convert(tz)

s3_start_time_vat = pd.to_datetime(state03_start_time_vat)
s3_start_time_vat = s3_start_time_vat.tz_localize(tz) if s3_start_time_vat.tzinfo is None else s3_start_time_vat.tz_convert(tz)

# Window
interval_before = pd.Timedelta(seconds=2)
interval_after  = pd.Timedelta(seconds=2)

def before_after_windows(df, s3_start):
    df_before = df[(df['Datetime_Local'] >= (s3_start - interval_before)) &
                   (df['Datetime_Local'] <  s3_start)]
    df_after  = df[(df['Datetime_Local'] >=  s3_start) &
                   (df['Datetime_Local'] <  (s3_start + interval_after))]

    mean_before = df_before[['Delta','Theta','Alpha','Beta','Gamma']].mean().to_frame().T
    mean_before['Window'] = 'Before'
    mean_after  = df_after[['Delta','Theta','Alpha','Beta','Gamma']].mean().to_frame().T
    mean_after['Window']  = 'After'

    out = pd.concat([mean_before, mean_after], ignore_index=True)
    return out[['Window','Delta','Theta','Alpha','Beta','Gamma']]

# --- V ---
df_v_window = before_after_windows(df_visual_a_eeg, s3_start_time_v)
print("=== V Condition (Mean band power 2s before and after S3_S) ===")
display(df_v_window)

# --- VA ---
df_va_window = before_after_windows(df_va_a_eeg, s3_start_time_va)
print("=== VA Condition (Mean band power 2s before and after S3_S) ===")
display(df_va_window)

# --- VT ---
df_vt_window = before_after_windows(df_vt_a_eeg, s3_start_time_vt)
print("=== VT Condition (Mean band power 2s before and after S3_S) ===")
display(df_vt_window)

# --- VAT ---
df_vat_window = before_after_windows(df_vat_a_eeg, s3_start_time_vat)
print("=== VAT Condition (Mean band power 2s before and after S3_S) ===")
display(df_vat_window)


=== V Condition (Mean band power 2s before and after S3_S) ===


Unnamed: 0,Window,Delta,Theta,Alpha,Beta,Gamma
0,Before,0.05,0.27,0.28,0.3,0.09
1,After,0.13,0.43,0.29,0.14,0.02


=== VA Condition (Mean band power 2s before and after S3_S) ===


Unnamed: 0,Window,Delta,Theta,Alpha,Beta,Gamma
0,Before,0.06,0.5,0.28,0.11,0.04
1,After,0.06,0.5,0.26,0.13,0.05


=== VT Condition (Mean band power 2s before and after S3_S) ===


Unnamed: 0,Window,Delta,Theta,Alpha,Beta,Gamma
0,Before,0.05,0.35,0.22,0.22,0.16
1,After,0.06,0.4,0.22,0.22,0.11


=== VAT Condition (Mean band power 2s before and after S3_S) ===


Unnamed: 0,Window,Delta,Theta,Alpha,Beta,Gamma
0,Before,0.04,0.31,0.27,0.23,0.14
1,After,0.05,0.35,0.26,0.24,0.11


## Export clean average S3 2s Before and After Data to CSV files

In [37]:
import os
import pandas as pd

# ========== Define Participant ID and Gender ==========
participant_id = "P07"   # Change for each participant
gender = "F"             

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\EEG"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_EEG_S3.csv"
filepath = os.path.join(output_dir, filename)

# ========== Build Output Row Function ==========
def build_output_row_eeg_S3(
    participant_id, gender, condition,
    theta_pre, alpha_pre, beta_pre,
    theta_00, alpha_00, beta_00,
    theta_before, alpha_before, beta_before,
    theta_after, alpha_after, beta_after,
    skip_pre=False
):
    if skip_pre:
        row = [
            participant_id, gender, condition,
            "", "", "",       
            theta_00, alpha_00, beta_00,
            theta_before, alpha_before, beta_before,
            theta_after, alpha_after, beta_after
        ]
    else:
        row = [
            participant_id, gender, condition,
            theta_pre, alpha_pre, beta_pre,
            theta_00, alpha_00, beta_00,
            theta_before, alpha_before, beta_before,
            theta_after, alpha_after, beta_after
        ]
    return row

def pick_band(df_window, window_label, band):
    return float(df_window.loc[df_window['Window'] == window_label, band].values[0])

# ========== Fill in from calculations ==========
row_v = build_output_row_eeg_S3(
    participant_id, gender, "V",
    theta_pre_v, alpha_pre_v, beta_pre_v,
    theta_00_v, alpha_00_v, beta_00_v,
    pick_band(df_v_window, 'Before', 'Theta'),
    pick_band(df_v_window, 'Before', 'Alpha'),
    pick_band(df_v_window, 'Before', 'Beta'),
    pick_band(df_v_window, 'After', 'Theta'),
    pick_band(df_v_window, 'After', 'Alpha'),
    pick_band(df_v_window, 'After', 'Beta'),
    skip_pre=False
)

# VA
row_va = build_output_row_eeg_S3(
    participant_id, gender, "VA",
    None, None, None,
    theta_00_va,  alpha_00_va,  beta_00_va,
    pick_band(df_va_window, 'Before', 'Theta'),
    pick_band(df_va_window, 'Before', 'Alpha'),
    pick_band(df_va_window, 'Before', 'Beta'),
    pick_band(df_va_window, 'After', 'Theta'),
    pick_band(df_va_window, 'After', 'Alpha'),
    pick_band(df_va_window, 'After', 'Beta'),
    skip_pre=False
)

# VT
row_vt = build_output_row_eeg_S3(
    participant_id, gender, "VT",
    None, None, None,    theta_00_vt,  alpha_00_vt,  beta_00_vt,
    pick_band(df_vt_window, 'Before', 'Theta'),
    pick_band(df_vt_window, 'Before', 'Alpha'),
    pick_band(df_vt_window, 'Before', 'Beta'),
    pick_band(df_vt_window, 'After', 'Theta'),
    pick_band(df_vt_window, 'After', 'Alpha'),
    pick_band(df_vt_window, 'After', 'Beta'),
    skip_pre=False
)

# VAT（Pre 留空）
row_vat = build_output_row_eeg_S3(
    participant_id, gender, "VAT",
    None, None, None,
    theta_00_vat, alpha_00_vat, beta_00_vat,
    pick_band(df_vat_window, 'Before', 'Theta'),
    pick_band(df_vat_window, 'Before', 'Alpha'),
    pick_band(df_vat_window, 'Before', 'Beta'),
    pick_band(df_vat_window, 'After', 'Theta'),
    pick_band(df_vat_window, 'After', 'Alpha'),
    pick_band(df_vat_window, 'After', 'Beta'),
    skip_pre=True
)

# ========== Columns ==========
columns = [
    "Participant ID", "Gender", "Condition",
    "Baseline_Pre_theta", "Baseline_Pre_alpha", "Baseline_Pre_beta",
    "Baseline_00_theta", "Baseline_00_alpha", "Baseline_00_beta",
    "Before_theta", "Before_alpha", "Before_beta",
    "After_theta",  "After_alpha",  "After_beta"
]

# ========== Save or Append ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, index=False)
else:
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")

File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\EEG\B_Participant_EEG_S3.csv


## Calculate mean values in the three time points of S3

In [38]:
import pandas as pd

# Helper: pick nearest value at ts (align tz to df)
def val_at_timestamp(df, ts, ts_col='Datetime_Local', val_col='Theta'):
    # ensure ts column is datetime
    if not pd.api.types.is_datetime64_any_dtype(df[ts_col]):
        df[ts_col] = pd.to_datetime(df[ts_col], errors='coerce')

    ts = pd.to_datetime(ts)

    # align timezone
    tz = df[ts_col].dt.tz
    if tz is not None:
        if ts.tzinfo is None:
            ts = ts.tz_localize(tz)
        else:
            ts = ts.tz_convert(tz)

    # guard empty / all-NaT
    if df[ts_col].isna().all():
        return None
    idx = (df[ts_col] - ts).abs().idxmin()
    val = df.loc[idx, val_col] if val_col in df.columns else None
    return None if pd.isna(val) else round(float(val), 2)

# List candidate bands; we will only use those that actually exist in each DF
candidate_bands = ['Delta', 'Theta', 'Alpha', 'Beta', 'Gamma']

# Build a row for one condition
def build_band_row(label, df, t_s, t_m, t_e):
    bands_present = [b for b in candidate_bands if b in df.columns]
    row = {'Condition': label}
    # S3_S / S3_M / S3_E for each band
    for band in bands_present:
        row[f"S3_S_{band}"] = val_at_timestamp(df, t_s, val_col=band)
        row[f"S3_M_{band}"] = val_at_timestamp(df, t_m, val_col=band)
        row[f"S3_E_{band}"] = val_at_timestamp(df, t_e, val_col=band)
    return row, bands_present

# Build rows using your EXISTING time variables & DFs:
rows = []
all_bands = set()

row_v, bands_v = build_band_row("V",
    df_visual_a_eeg, state03_start_time_v,  state03_m_time_v,  end_time_v)
rows.append(row_v); all_bands.update(bands_v)

row_va, bands_va = build_band_row("VA",
    df_va_a_eeg,     state03_start_time_va, state03_m_time_va, end_time_va)
rows.append(row_va); all_bands.update(bands_va)

row_vt, bands_vt = build_band_row("VT",
    df_vt_a_eeg,     state03_start_time_vt, state03_m_time_vt, end_time_vt)
rows.append(row_vt); all_bands.update(bands_vt)

row_vat, bands_vat = build_band_row("VAT",
    df_vat_a_eeg,    state03_start_time_vat, state03_m_time_vat, end_time_vat)
rows.append(row_vat); all_bands.update(bands_vat)

# Create a tidy column order: Condition, then per-band S3_S/M/E
ordered_cols = ['Condition']
for band in [b for b in candidate_bands if b in all_bands]:
    ordered_cols += [f"S3_S_{band}", f"S3_M_{band}", f"S3_E_{band}"]

df_eeg_points = pd.DataFrame(rows)[ordered_cols]

display(df_eeg_points)


Unnamed: 0,Condition,S3_S_Delta,S3_M_Delta,S3_E_Delta,S3_S_Theta,S3_M_Theta,S3_E_Theta,S3_S_Alpha,S3_M_Alpha,S3_E_Alpha,S3_S_Beta,S3_M_Beta,S3_E_Beta,S3_S_Gamma,S3_M_Gamma,S3_E_Gamma
0,V,0.07,0.12,0.15,0.39,0.49,0.63,0.27,0.27,0.18,0.22,0.1,0.03,0.04,0.03,0.01
1,VA,0.07,0.09,0.06,0.52,0.58,0.52,0.28,0.23,0.23,0.09,0.08,0.14,0.03,0.02,0.05
2,VT,0.06,0.03,0.08,0.4,0.45,0.44,0.19,0.17,0.19,0.23,0.25,0.2,0.12,0.1,0.08
3,VAT,0.05,0.23,0.09,0.38,0.49,0.53,0.27,0.17,0.13,0.2,0.08,0.14,0.1,0.02,0.1


In [None]:
import os
import pandas as pd

# ========== Define Participant ID and Gender ==========
participant_id = "P07"
gender = "F"

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\EEG"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_EEG_S3_Task.csv"
filepath = os.path.join(output_dir, filename)

# ========== Helper: nearest-sample value at a timestamp ==========
def val_at_timestamp(df, ts, ts_col='Datetime_Local', val_col='Theta'):
    """
    Return the value (rounded to 2 decimals) at the row whose timestamp is nearest to `ts`.
    Converts df[ts_col] to datetime if needed, and aligns `ts` timezone to df's tz if present.
    """
    if not pd.api.types.is_datetime64_any_dtype(df[ts_col]):
        df[ts_col] = pd.to_datetime(df[ts_col], errors='coerce')

    ts = pd.to_datetime(ts)

    tz = df[ts_col].dt.tz
    if tz is not None:
        if ts.tzinfo is None:
            ts = ts.tz_localize(tz)
        else:
            ts = ts.tz_convert(tz)

    if df[ts_col].isna().all():
        return None
    idx = (df[ts_col] - ts).abs().idxmin()
    val = df.loc[idx, val_col] if val_col in df.columns else None
    return None if pd.isna(val) else round(float(val), 2)

def fmt2(x):
    try:
        if x is None or (isinstance(x, float) and pd.isna(x)):
            return ""
        return f"{float(x):.2f}"
    except Exception:
        return ""

# ========== Build one row for a condition (Theta/Alpha/Beta) ==========
def build_row_eeg_points(condition_label, df,
                         pre_theta, pre_alpha, pre_beta,
                         zero_theta, zero_alpha, zero_beta,
                         t_s3s, t_s3m, t_s3e,
                         skip_pre=False):
    s3s_theta = val_at_timestamp(df, t_s3s, val_col='Theta')
    s3s_alpha = val_at_timestamp(df, t_s3s, val_col='Alpha')
    s3s_beta  = val_at_timestamp(df, t_s3s, val_col='Beta')

    s3m_theta = val_at_timestamp(df, t_s3m, val_col='Theta')
    s3m_alpha = val_at_timestamp(df, t_s3m, val_col='Alpha')
    s3m_beta  = val_at_timestamp(df, t_s3m, val_col='Beta')

    s3e_theta = val_at_timestamp(df, t_s3e, val_col='Theta')
    s3e_alpha = val_at_timestamp(df, t_s3e, val_col='Alpha')
    s3e_beta  = val_at_timestamp(df, t_s3e, val_col='Beta')

    if skip_pre:
        return [
            participant_id, gender, condition_label,
            "", "", "",
            fmt2(zero_theta), fmt2(zero_alpha), fmt2(zero_beta),
            fmt2(s3s_theta), fmt2(s3s_alpha), fmt2(s3s_beta),
            fmt2(s3m_theta), fmt2(s3m_alpha), fmt2(s3m_beta),
            fmt2(s3e_theta), fmt2(s3e_alpha), fmt2(s3e_beta),
        ]
    else:
        return [
            participant_id, gender, condition_label,
            fmt2(pre_theta), fmt2(pre_alpha), fmt2(pre_beta),
            fmt2(zero_theta), fmt2(zero_alpha), fmt2(zero_beta),
            fmt2(s3s_theta), fmt2(s3s_alpha), fmt2(s3s_beta),
            fmt2(s3m_theta), fmt2(s3m_alpha), fmt2(s3m_beta),
            fmt2(s3e_theta), fmt2(s3e_alpha), fmt2(s3e_beta),
        ]

# ========== Build rows using EXISTING variables & DFs ==========
# V:
row_v = build_row_eeg_points(
    "V",  df_visual_a_eeg,
    theta_pre_v, alpha_pre_v, beta_pre_v,
    theta_00_v,  alpha_00_v,  beta_00_v,
    state03_start_time_v, state03_m_time_v, end_time_v,
    skip_pre=False
)

# VA:
row_va = build_row_eeg_points(
    "VA", df_va_a_eeg,
    None, None, None,                         # ignored when skip_pre=True
    theta_00_va, alpha_00_va, beta_00_va,
    state03_start_time_va, state03_m_time_va, end_time_va,
    skip_pre=True
)

# VT：
row_vt = build_row_eeg_points(
    "VT", df_vt_a_eeg,
    None, None, None,
    theta_00_vt, alpha_00_vt, beta_00_vt,
    state03_start_time_vt, state03_m_time_vt, end_time_vt,
    skip_pre=True
)

# VAT
row_vat = build_row_eeg_points(
    "VAT", df_vat_a_eeg,
    None, None, None,
    theta_00_vat, alpha_00_vat, beta_00_vat,
    state03_start_time_vat, state03_m_time_vat, end_time_vat,
    skip_pre=True
)

# ========== Columns ==========
columns = [
    "Participant ID", "Gender", "Condition",
    "Baseline_Pre_theta", "Baseline_Pre_alpha", "Baseline_Pre_beta",
    "Baseline_00_theta", "Baseline_00_alpha", "Baseline_00_beta",
    "S3_S_Theta", "S3_S_Alpha", "S3_S_Beta",
    "S3_M_Theta", "S3_M_Alpha", "S3_M_Beta",
    "S3_E_Theta", "S3_E_Alpha", "S3_E_Beta",
]

# ========== Save or Append ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    pd.DataFrame(rows, columns=columns).to_csv(filepath, index=False)
else:
    pd.DataFrame(rows, columns=columns).to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")


File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\EEG\B_Participant_EEG_S3_Task.csv
