In [4]:
import pyxdf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, timezone
from scipy import stats
import os
from IPython.display import display
import matplotlib.pyplot as plt

In [5]:
from pathlib import Path
import re

BASE_DIR = Path(r".")       
SUBJECT  = "sub-P014"      
# ============================

CONDITIONS = ["v", "va", "vt", "vat"]

run_re = re.compile(r"_run-(\d+)_eeg\.xdf$", re.IGNORECASE)

def pick_latest_run(paths):
    """Pick the highest run-N file; fall back to newest modified time if no run number."""
    if not paths:
        return None
    # Try by run number
    with_run = []
    for p in paths:
        m = run_re.search(p.name)
        if m:
            with_run.append((int(m.group(1)), p))
    if with_run:
        with_run.sort(key=lambda x: x[0], reverse=True)
        return with_run[0][1]
    # Fallback: latest modified
    return sorted(paths, key=lambda p: p.stat().st_mtime, reverse=True)[0]

def find_condition_paths(base_dir: Path, subject: str):
    """Return dict condition->Path by scanning sub-<ID>/ses-<cond>/eeg/*.xdf."""
    result = {}
    subj_dir = base_dir / subject
    for cond in CONDITIONS:
        eeg_dir = subj_dir / f"ses-{cond}" / "eeg"
        candidates = list(eeg_dir.glob(f"{subject}_ses-{cond}_task-Default_run-*_eeg.xdf"))
        # If strict pattern fails, accept any *.xdf under eeg
        if not candidates:
            candidates = list(eeg_dir.glob("*.xdf"))
        chosen = pick_latest_run(candidates)
        if chosen:
            result[cond] = chosen
    return result

# -------- header helpers (reuse your logic) --------
def get_datetime_from_header(header):
    if 'info' in header and 'datetime' in header['info']:
        dt_list = header['info']['datetime']
        if isinstance(dt_list, list) and len(dt_list) > 0:
            return dt_list[0]
    return "Datetime not found"

def print_stream_info(streams, condition_name, datetime_str):
    print(f"\n--- Stream Information ({condition_name}) ---")
    print(f"Datetime from header: {datetime_str}")
    for i, stream in enumerate(streams):
        stream_name = stream['info']['name'][0]
        channel_count = stream['info']['channel_count'][0]
        print(f"Stream {i+1} Name: {stream_name}, Channel Count: {channel_count}, Datetime: {datetime_str}")

paths = find_condition_paths(BASE_DIR, SUBJECT)

In [6]:
# ---- Map discovered paths back to your original variable names ----
def require_found(paths_dict, cond_key, var_name):
    """Ensure a condition path exists; raise a clear error if missing."""
    p = paths_dict.get(cond_key)
    if p is None:
        raise FileNotFoundError(
            f"Missing file for condition '{cond_key}'. "
            f"Please check folder structure: sub-<ID>/ses-{cond_key}/eeg/*.xdf "
            f"and ensure it exists so we can assign `{var_name}`."
        )
    return str(p)  # keep as Windows-style path string

# Assign variables exactly as before
visual_file_path = require_found(paths, "v",   "visual_file_path")
va_file_path     = require_found(paths, "va",  "va_file_path")
vt_file_path     = require_found(paths, "vt",  "vt_file_path")
vat_file_path    = require_found(paths, "vat", "vat_file_path")

# (Optional) print to verify
print("\n== Assigned variables ==")
print("visual_file_path:", visual_file_path)
print("va_file_path    :", va_file_path)
print("vt_file_path    :", vt_file_path)
print("vat_file_path   :", vat_file_path)



== Assigned variables ==
visual_file_path: sub-P014\ses-v\eeg\sub-P014_ses-v_task-Default_run-001_eeg.xdf
va_file_path    : sub-P014\ses-va\eeg\sub-P014_ses-va_task-Default_run-001_eeg.xdf
vt_file_path    : sub-P014\ses-vt\eeg\sub-P014_ses-vt_task-Default_run-001_eeg.xdf
vat_file_path   : sub-P014\ses-vat\eeg\sub-P014_ses-vat_task-Default_run-001_eeg.xdf


## Load visual only data

In [7]:
# Load XDF file for Visual condition
visual_streams, visual_header = pyxdf.load_xdf(visual_file_path)
visual_datetime = get_datetime_from_header(visual_header)
print("Visual condition datetime:", visual_datetime)
    
# Print stream info for Visual condition with datetime
print_stream_info(visual_streams, "Visual", visual_datetime)

Stream 2: Calculated effective sampling rate 40.3620 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 40.3945 Hz is different from specified rate 250.0000 Hz.
Stream 3: Calculated effective sampling rate 40.8761 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 379.9766 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 25.9551 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 443.8161 Hz is different from specified rate 250.0000 Hz.
Stream 10: Calculated effective sampling rate 38.2778 Hz is different from specified rate 250.0000 Hz.


Visual condition datetime: 2025-10-10T13:59:34+1100

--- Stream Information (Visual) ---
Datetime from header: 2025-10-10T13:59:34+1100
Stream 1 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-10T13:59:34+1100
Stream 2 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-10T13:59:34+1100
Stream 3 Name: obci_stream_6, Channel Count: 1, Datetime: 2025-10-10T13:59:34+1100
Stream 4 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-10T13:59:34+1100
Stream 5 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-10T13:59:34+1100
Stream 6 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-10T13:59:34+1100
Stream 7 Name: obci_stream_7, Channel Count: 1, Datetime: 2025-10-10T13:59:34+1100
Stream 8 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-10T13:59:34+1100
Stream 9 Name: obci_stream_3, Channel Count: 125, Datetime: 2025-10-10T13:59:34+1100
Stream 10 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-10T13:59:34+1100
Stream 11 Name: obci_stream_

## Load VA data

In [8]:
# Load XDF file for Visual + Auditory condition (VA)
va_streams, va_header = pyxdf.load_xdf(va_file_path)
va_datetime = get_datetime_from_header(va_header)
print("Visual + Auditory condition datetime:", va_datetime)

# Print stream info for Visual + Auditory condition with datetime
print_stream_info(va_streams, "Visual + Auditory", va_datetime)

Stream 10: Calculated effective sampling rate 32.5189 Hz is different from specified rate 250.0000 Hz.
Stream 2: Calculated effective sampling rate 38.3058 Hz is different from specified rate 250.0000 Hz.
Stream 4: Calculated effective sampling rate 38.1692 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 307.7353 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 22.6840 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 326.0299 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 192.5431 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 38.8304 Hz is different from specified rate 250.0000 Hz.


Visual + Auditory condition datetime: 2025-10-10T14:46:52+1100

--- Stream Information (Visual + Auditory) ---
Datetime from header: 2025-10-10T14:46:52+1100
Stream 1 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-10T14:46:52+1100
Stream 2 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-10T14:46:52+1100
Stream 3 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-10T14:46:52+1100
Stream 4 Name: obci_stream_7, Channel Count: 1, Datetime: 2025-10-10T14:46:52+1100
Stream 5 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-10T14:46:52+1100
Stream 6 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-10T14:46:52+1100
Stream 7 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-10T14:46:52+1100
Stream 8 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-10T14:46:52+1100
Stream 9 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-10T14:46:52+1100
Stream 10 Name: obci_stream_3, Channel Count: 125, Datetime: 2025-10-10T14:46:52+1100
Stream

## Load VT data

In [9]:
# Load XDF file for Visual + Thermal condition (VT)
vt_streams, vt_header = pyxdf.load_xdf(vt_file_path)
vt_datetime = get_datetime_from_header(vt_header)
print("Visual + Thermal condition datetime:", vt_datetime)

# Print stream info for Visual + Thermal condition with datetime
print_stream_info(vt_streams, "Visual + Thermal", vt_datetime)

Stream 2: Calculated effective sampling rate 39.0305 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 192.2784 Hz is different from specified rate 250.0000 Hz.
Stream 6: Calculated effective sampling rate 39.6434 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 35.2238 Hz is different from specified rate 250.0000 Hz.
Stream 12: Calculated effective sampling rate 21.7044 Hz is different from specified rate 250.0000 Hz.
Stream 3: Calculated effective sampling rate 38.8067 Hz is different from specified rate 250.0000 Hz.
Stream 13: Calculated effective sampling rate 343.4047 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 309.4436 Hz is different from specified rate 250.0000 Hz.


Visual + Thermal condition datetime: 2025-10-10T14:33:35+1100

--- Stream Information (Visual + Thermal) ---
Datetime from header: 2025-10-10T14:33:35+1100
Stream 1 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-10T14:33:35+1100
Stream 2 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-10T14:33:35+1100
Stream 3 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-10T14:33:35+1100
Stream 4 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-10T14:33:35+1100
Stream 5 Name: obci_stream_10, Channel Count: 2, Datetime: 2025-10-10T14:33:35+1100
Stream 6 Name: obci_stream_6, Channel Count: 1, Datetime: 2025-10-10T14:33:35+1100
Stream 7 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-10T14:33:35+1100
Stream 8 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-10T14:33:35+1100
Stream 9 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-10T14:33:35+1100
Stream 10 Name: obci_stream_3, Channel Count: 125, Datetime: 2025-10-10T14:33:35+1100
Stream 

## Load VAT data

In [10]:
# Load XDF file for Visual + Auditory condition
vat_streams, vat_header = pyxdf.load_xdf(vat_file_path)
vat_datetime = get_datetime_from_header(vat_header)
print("VAT condition datetime:", vat_datetime)

# Print stream info for Visual + Auditory condition with datetime
print_stream_info(vat_streams, "VAT", vat_datetime)

Stream 3: Calculated effective sampling rate 41.0675 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 338.9286 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 23.1637 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 373.5739 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 202.7190 Hz is different from specified rate 250.0000 Hz.
Stream 10: Calculated effective sampling rate 35.2274 Hz is different from specified rate 250.0000 Hz.
Stream 6: Calculated effective sampling rate 41.6620 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 39.9000 Hz is different from specified rate 250.0000 Hz.


VAT condition datetime: 2025-10-10T14:18:03+1100

--- Stream Information (VAT) ---
Datetime from header: 2025-10-10T14:18:03+1100
Stream 1 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-10T14:18:03+1100
Stream 2 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-10T14:18:03+1100
Stream 3 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-10T14:18:03+1100
Stream 4 Name: GSR_PPG, Channel Count: 10, Datetime: 2025-10-10T14:18:03+1100
Stream 5 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-10T14:18:03+1100
Stream 6 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-10T14:18:03+1100
Stream 7 Name: obci_stream_3, Channel Count: 125, Datetime: 2025-10-10T14:18:03+1100
Stream 8 Name: obci_stream_10, Channel Count: 2, Datetime: 2025-10-10T14:18:03+1100
Stream 9 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-10T14:18:03+1100
Stream 10 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-10T14:18:03+1100
Stream 11 Name: obci_stream_6, Channel

## Extract data streams for SCL

In [11]:
# Target stream name for Shimmer GSR_PPG
target_stream_name = "GSR_PPG"

def extract_shimmer_data_from_xdf(file_path):
    # Load the XDF file
    streams, header = pyxdf.load_xdf(file_path)

    # Get start time
    datetime_list = header['info'].get('datetime', [])
    if isinstance(datetime_list, list) and len(datetime_list) > 0:
        datetime_str = datetime_list[0]
    else:
        raise ValueError("Start time not found in header.")
    start_time = datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%S%z")

    # Find target stream
    selected_stream = None
    for stream in streams:
        if stream['info']['name'][0].lower() == target_stream_name.lower():
            selected_stream = stream
            break
    if not selected_stream:
        raise ValueError(f"Stream '{target_stream_name}' not found in file.")

    # Custom shimmer channel names
    custom_channel_names = [
        "Timestamp (RAW)", "Timestamp (CAL)", "System Timestamp (LSL)", 
        "Internal ADC A13 (RAW)", "Internal ADC A13 (CAL)", 
        "GSR (RAW)", "GSR (CAL)", "GSR Conductance (CAL)","Heart Rate PPG", "IBI PPG"
    ]

    # Load raw data and validate shape
    raw_data = np.array(selected_stream['time_series'])
    if raw_data.shape[1] == len(custom_channel_names):
        pass
    elif raw_data.shape[0] == len(custom_channel_names):
        raw_data = raw_data.T
    else:
        raise ValueError(f"Unexpected shape: {raw_data.shape}. Cannot match {len(custom_channel_names)} channels.")

    # Create DataFrame
    df = pd.DataFrame(raw_data, columns=custom_channel_names)
    df["Timestamp"] = np.array(selected_stream['time_stamps'])

    # Sampling rate
    sampling_rate = float(selected_stream['info']['nominal_srate'][0])
    
    # Relative time in milliseconds
    df["Relative Timestamp (ms)"] = df["Timestamp (CAL)"] - df["Timestamp (CAL)"].iloc[0]

    # Convert to datetime with milliseconds
    df["Timestamp (UTC+10)"] = df["Relative Timestamp (ms)"].apply(
        lambda x: start_time + timedelta(milliseconds=x)
    )

    # Remove timezone info for consistency in later processing
    df["Timestamp (UTC+10)"] = df["Timestamp (UTC+10)"].dt.tz_localize(None)

    # Formatted time for plotting/debug
    df["Time (HH:MM:SS)"] = df["Timestamp (UTC+10)"].apply(
        lambda x: x.strftime('%H:%M:%S.%f')[:-3]
    )

    # Print metadata
    print("\n✅ Extracted Data from Selected Stream:")
    print("Stream Name:", selected_stream['info']['name'][0])
    print("Stream Type:", selected_stream['info']['type'][0])
    print("Number of Channels:", selected_stream['info']['channel_count'][0])
    print("Sampling Rate:", sampling_rate)
    print("Number of Samples:", len(df))
    print("Channel Names:", custom_channel_names)

    return df


## Experiment time and State

In [12]:
import pandas as pd

# Read the log files (updated file names)
log_v   = pd.read_csv('ButtonToggleLog_v.csv')
log_va  = pd.read_csv('ButtonToggleLog_va.csv')
log_vt  = pd.read_csv('ButtonToggleLog_vt.csv')
log_vat = pd.read_csv('ButtonToggleLog_vat.csv')

# Utility to fetch time for a given Group and DataFrame
def get_time(df, element_name, col='UTC+10_Time', add_hours=1, as_str=True):
    row = df.loc[df['Group'] == element_name, col]
    if row.empty:
        return None
    ts = pd.to_datetime(row.iloc[0], errors='coerce')
    if pd.isna(ts):
        return None

    ts = (ts + pd.Timedelta(hours=add_hours)).round('10ms')

    if not as_str:
        return ts

    centisec = ts.microsecond // 10000  # 0..99
    return f"{ts:%Y-%m-%d %H:%M:%S}.{centisec:02d}"

# ---- Visual ----
start_time_v   = get_time(log_v, 'S01_S')
state02_start_time_v = get_time(log_v, 'S02_S')
state03_start_time_v = get_time(log_v, 'S03_S')
state03_m_time_v     = get_time(log_v, 'S03_M')
end_time_v     = get_time(log_v, 'S03_E')

# ---- VA ----
start_time_va   = get_time(log_va, 'S01_S')
state02_start_time_va = get_time(log_va, 'S02_S')
state03_start_time_va = get_time(log_va, 'S03_S')
state03_m_time_va     = get_time(log_va, 'S03_M')
end_time_va     = get_time(log_va, 'S03_E')

# ---- VT ----
start_time_vt   = get_time(log_vt, 'S01_S')
state02_start_time_vt = get_time(log_vt, 'S02_S')
state03_start_time_vt = get_time(log_vt, 'S03_S')
state03_m_time_vt     = get_time(log_vt, 'S03_M')
end_time_vt     = get_time(log_vt, 'S03_E')

# ---- VAT ----
start_time_vat   = get_time(log_vat, 'S01_S')
state02_start_time_vat = get_time(log_vat, 'S02_S')
state03_start_time_vat = get_time(log_vat, 'S03_S')
state03_m_time_vat     = get_time(log_vat, 'S03_M')
end_time_vat     = get_time(log_vat, 'S03_E')

# ---- Print results ----
print("Visual condition start:", start_time_v)
print("Visual State 02 start:", state02_start_time_v)
print("Visual State 03 start:", state03_start_time_v)
print("Visual State 03 m_time:", state03_m_time_v)
print("Visual end:", end_time_v)
print("---")
print("VA condition start:", start_time_va)
print("VA State 02 start:", state02_start_time_va)
print("VA State 03 start:", state03_start_time_va)
print("VA State 03 m_time:", state03_m_time_va)
print("VA end:", end_time_va)
print("---")
print("VT condition start:", start_time_vt)
print("VT State 02 start:", state02_start_time_vt)
print("VT State 03 start:", state03_start_time_vt)
print("VT State 03 m_time:", state03_m_time_vt)
print("VT end:", end_time_vt)
print("---")
print("VAT condition start:", start_time_vat)
print("VAT State 02 start:", state02_start_time_vat)
print("VAT State 03 start:", state03_start_time_vat)
print("VAT State 03 m_time:", state03_m_time_vat)
print("VAT end:", end_time_vat)


Visual condition start: 2025-10-10 14:06:41.28
Visual State 02 start: 2025-10-10 14:09:46.52
Visual State 03 start: 2025-10-10 14:10:51.83
Visual State 03 m_time: 2025-10-10 14:11:05.96
Visual end: 2025-10-10 14:11:24.30
---
VA condition start: 2025-10-10 14:49:28.21
VA State 02 start: 2025-10-10 14:52:32.16
VA State 03 start: 2025-10-10 14:53:33.81
VA State 03 m_time: 2025-10-10 14:53:44.99
VA end: 2025-10-10 14:53:55.33
---
VT condition start: 2025-10-10 14:37:01.43
VT State 02 start: 2025-10-10 14:40:02.04
VT State 03 start: 2025-10-10 14:41:04.22
VT State 03 m_time: 2025-10-10 14:41:19.62
VT end: 2025-10-10 14:41:32.79
---
VAT condition start: 2025-10-10 14:22:41.10
VAT State 02 start: 2025-10-10 14:25:43.50
VAT State 03 start: 2025-10-10 14:26:48.91
VAT State 03 m_time: 2025-10-10 14:27:04.78
VAT end: 2025-10-10 14:27:19.17


## Load Shimmer Data

In [13]:
# Load GSR data for all four conditions
df_visual_gsr = extract_shimmer_data_from_xdf(visual_file_path)
df_va_gsr     = extract_shimmer_data_from_xdf(va_file_path)
df_vt_gsr     = extract_shimmer_data_from_xdf(vt_file_path)
df_vat_gsr    = extract_shimmer_data_from_xdf(vat_file_path)

Stream 2: Calculated effective sampling rate 40.3620 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 40.3945 Hz is different from specified rate 250.0000 Hz.
Stream 3: Calculated effective sampling rate 40.8761 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 379.9766 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 25.9551 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 443.8161 Hz is different from specified rate 250.0000 Hz.
Stream 10: Calculated effective sampling rate 38.2778 Hz is different from specified rate 250.0000 Hz.



✅ Extracted Data from Selected Stream:
Stream Name: GSR_PPG
Stream Type: GSR
Number of Channels: 10
Sampling Rate: 10.2
Number of Samples: 6335
Channel Names: ['Timestamp (RAW)', 'Timestamp (CAL)', 'System Timestamp (LSL)', 'Internal ADC A13 (RAW)', 'Internal ADC A13 (CAL)', 'GSR (RAW)', 'GSR (CAL)', 'GSR Conductance (CAL)', 'Heart Rate PPG', 'IBI PPG']


Stream 10: Calculated effective sampling rate 32.5189 Hz is different from specified rate 250.0000 Hz.
Stream 2: Calculated effective sampling rate 38.3058 Hz is different from specified rate 250.0000 Hz.
Stream 4: Calculated effective sampling rate 38.1692 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 307.7353 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 22.6840 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 326.0299 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 192.5431 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 38.8304 Hz is different from specified rate 250.0000 Hz.



✅ Extracted Data from Selected Stream:
Stream Name: GSR_PPG
Stream Type: GSR
Number of Channels: 10
Sampling Rate: 10.2
Number of Samples: 4891
Channel Names: ['Timestamp (RAW)', 'Timestamp (CAL)', 'System Timestamp (LSL)', 'Internal ADC A13 (RAW)', 'Internal ADC A13 (CAL)', 'GSR (RAW)', 'GSR (CAL)', 'GSR Conductance (CAL)', 'Heart Rate PPG', 'IBI PPG']


Stream 2: Calculated effective sampling rate 39.0305 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 192.2784 Hz is different from specified rate 250.0000 Hz.
Stream 6: Calculated effective sampling rate 39.6434 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 35.2238 Hz is different from specified rate 250.0000 Hz.
Stream 12: Calculated effective sampling rate 21.7044 Hz is different from specified rate 250.0000 Hz.
Stream 3: Calculated effective sampling rate 38.8067 Hz is different from specified rate 250.0000 Hz.
Stream 13: Calculated effective sampling rate 343.4047 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 309.4436 Hz is different from specified rate 250.0000 Hz.



✅ Extracted Data from Selected Stream:
Stream Name: GSR_PPG
Stream Type: GSR
Number of Channels: 10
Sampling Rate: 10.2
Number of Samples: 5222
Channel Names: ['Timestamp (RAW)', 'Timestamp (CAL)', 'System Timestamp (LSL)', 'Internal ADC A13 (RAW)', 'Internal ADC A13 (CAL)', 'GSR (RAW)', 'GSR (CAL)', 'GSR Conductance (CAL)', 'Heart Rate PPG', 'IBI PPG']


Stream 3: Calculated effective sampling rate 41.0675 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 338.9286 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 23.1637 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 373.5739 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 202.7190 Hz is different from specified rate 250.0000 Hz.
Stream 10: Calculated effective sampling rate 35.2274 Hz is different from specified rate 250.0000 Hz.
Stream 6: Calculated effective sampling rate 41.6620 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 39.9000 Hz is different from specified rate 250.0000 Hz.



✅ Extracted Data from Selected Stream:
Stream Name: GSR_PPG
Stream Type: GSR
Number of Channels: 10
Sampling Rate: 10.2
Number of Samples: 6110
Channel Names: ['Timestamp (RAW)', 'Timestamp (CAL)', 'System Timestamp (LSL)', 'Internal ADC A13 (RAW)', 'Internal ADC A13 (CAL)', 'GSR (RAW)', 'GSR (CAL)', 'GSR Conductance (CAL)', 'Heart Rate PPG', 'IBI PPG']


In [14]:
# Visual
display(pd.concat([df_visual_gsr.head(2), df_visual_gsr.tail(2)]))

# VA
display(pd.concat([df_va_gsr.head(2), df_va_gsr.tail(2)]))

# VT
display(pd.concat([df_vt_gsr.head(2), df_vt_gsr.tail(2)]))

# VAT
display(pd.concat([df_vat_gsr.head(2), df_vat_gsr.tail(2)]))


Unnamed: 0,Timestamp (RAW),Timestamp (CAL),System Timestamp (LSL),Internal ADC A13 (RAW),Internal ADC A13 (CAL),GSR (RAW),GSR (CAL),GSR Conductance (CAL),Heart Rate PPG,IBI PPG,Timestamp,Relative Timestamp (ms),Timestamp (UTC+10),Time (HH:MM:SS)
0,8548045.0,0.0,1760065000000.0,1523.0,1115.750977,35033.0,431.279633,2.318681,-1.0,-1.0,188116.749243,0.0,2025-10-10 13:59:34.000000,13:59:34.000
1,8551258.0,98.052979,1760065000000.0,3599.0,2636.630127,35032.0,431.552338,2.317216,-1.0,-1.0,188116.847294,98.052979,2025-10-10 13:59:34.098053,13:59:34.098
6333,12118758.0,620969.5,1760066000000.0,1861.0,1363.369995,18363.0,151.081757,6.618933,87.0,-1.0,188737.706956,620969.5,2025-10-10 14:09:54.969500,14:09:54.969
6334,12121971.0,621067.5625,1760066000000.0,2184.0,1600.0,18361.0,151.315186,6.608722,87.0,-1.0,188737.805007,621067.5625,2025-10-10 14:09:55.067562,14:09:55.067


Unnamed: 0,Timestamp (RAW),Timestamp (CAL),System Timestamp (LSL),Internal ADC A13 (RAW),Internal ADC A13 (CAL),GSR (RAW),GSR (CAL),GSR Conductance (CAL),Heart Rate PPG,IBI PPG,Timestamp,Relative Timestamp (ms),Timestamp (UTC+10),Time (HH:MM:SS)
0,13337552.0,2706164.25,1760068000000.0,2038.0,1493.040283,18709.0,119.255707,8.385343,67.0,-1.0,190822.842373,0.0,2025-10-10 14:46:52.000,14:46:52.000
1,13340765.0,2706262.25,1760068000000.0,1879.0,1376.556763,18709.0,119.255707,8.385343,67.0,-1.0,190822.940424,98.0,2025-10-10 14:46:52.098,14:46:52.098
4889,12268693.0,3185545.25,1760069000000.0,1901.0,1392.67395,49864.0,76347.460938,0.013098,-1.0,-1.0,191302.211333,479381.0,2025-10-10 14:54:51.381,14:54:51.381
4890,12271906.0,3185643.25,1760069000000.0,1899.0,1391.20874,49864.0,76347.460938,0.013098,-1.0,-1.0,191302.309383,479479.0,2025-10-10 14:54:51.479,14:54:51.479


Unnamed: 0,Timestamp (RAW),Timestamp (CAL),System Timestamp (LSL),Internal ADC A13 (RAW),Internal ADC A13 (CAL),GSR (RAW),GSR (CAL),GSR Conductance (CAL),Heart Rate PPG,IBI PPG,Timestamp,Relative Timestamp (ms),Timestamp (UTC+10),Time (HH:MM:SS)
0,4002717.0,1909287.625,1760067000000.0,1862.0,1364.102539,18731.0,117.679482,8.497658,101.0,-1.0,190025.98588,0.0,2025-10-10 14:33:35.000000,14:33:35.000
1,4005930.0,1909385.625,1760067000000.0,1819.0,1332.600708,18733.0,117.538254,8.507869,101.0,-1.0,190026.08393,98.0,2025-10-10 14:33:35.098000,14:33:35.098
5220,3997361.0,2421124.25,1760068000000.0,1767.0,1294.505493,18770.0,114.985321,8.696762,101.0,-1.0,190537.809108,511836.625,2025-10-10 14:42:06.836625,14:42:06.836
5221,4000574.0,2421222.25,1760068000000.0,1746.0,1279.12085,18766.0,115.255959,8.676341,101.0,-1.0,190537.907159,511934.625,2025-10-10 14:42:06.934625,14:42:06.934


Unnamed: 0,Timestamp (RAW),Timestamp (CAL),System Timestamp (LSL),Internal ADC A13 (RAW),Internal ADC A13 (CAL),GSR (RAW),GSR (CAL),GSR Conductance (CAL),Heart Rate PPG,IBI PPG,Timestamp,Relative Timestamp (ms),Timestamp (UTC+10),Time (HH:MM:SS)
0,7001519.0,976803.8,1760066000000.0,1865.0,1366.300415,18717.0,118.677673,8.426185,76.0,-1.0,189093.525993,0.0,2025-10-10 14:18:03.000000,14:18:03.000
1,7004732.0,976901.8,1760066000000.0,1822.0,1334.798584,18714.0,118.893776,8.410869,76.0,-1.0,189093.624044,98.0625,2025-10-10 14:18:03.098062,14:18:03.098
6108,9849307.0,1575711.0,1760067000000.0,2113.0,1547.985352,18646.0,124.012344,8.063713,76.0,-1.0,189692.418238,598907.625,2025-10-10 14:28:01.907625,14:28:01.907
6109,9852520.0,1575809.0,1760067000000.0,1884.0,1380.219727,18639.0,124.564384,8.027977,87.0,686.37085,189692.516289,599005.625,2025-10-10 14:28:02.005625,14:28:02.005


## Calculate Baseline SCL

In [15]:
import pandas as pd
from pandas import Timedelta

# Make copies of the original DataFrames to avoid modifying the source data
df_v   = df_visual_gsr.copy()
df_va  = df_va_gsr.copy()
df_vt  = df_vt_gsr.copy()
df_vat = df_vat_gsr.copy()

def get_interval_scl(df, interval_start, interval_end):
    df['Timestamp (UTC+10)'] = pd.to_datetime(df['Timestamp (UTC+10)'])
    tz = df['Timestamp (UTC+10)'].dt.tz
    interval_start = pd.to_datetime(interval_start).tz_localize(tz) if pd.to_datetime(interval_start).tzinfo is None else pd.to_datetime(interval_start)
    interval_end   = pd.to_datetime(interval_end).tz_localize(tz) if pd.to_datetime(interval_end).tzinfo is None else pd.to_datetime(interval_end)
    df_interval = df[(df['Timestamp (UTC+10)'] >= interval_start) & (df['Timestamp (UTC+10)'] < interval_end)]
    mean_scl = df_interval['GSR Conductance (CAL)'].mean()
    mean_scl = round(mean_scl, 2) if pd.notnull(mean_scl) else np.nan
    return mean_scl

# ----- Visual -----
baseline_00_start_v = pd.to_datetime(start_time_v) - Timedelta(minutes=1)
baseline_00_end_v   = pd.to_datetime(start_time_v)
baseline_pre_start_v = pd.to_datetime(start_time_v) - Timedelta(minutes=2)
baseline_pre_end_v   = pd.to_datetime(start_time_v) - Timedelta(minutes=1)

# ----- VA -----
baseline_00_start_va = pd.to_datetime(start_time_va) - Timedelta(minutes=1)
baseline_00_end_va   = pd.to_datetime(start_time_va)
baseline_pre_start_va = pd.to_datetime(start_time_va) - Timedelta(minutes=2)
baseline_pre_end_va   = pd.to_datetime(start_time_va) - Timedelta(minutes=1)

# ----- VT -----
baseline_00_start_vt = pd.to_datetime(start_time_vt) - Timedelta(minutes=1)
baseline_00_end_vt   = pd.to_datetime(start_time_vt)
baseline_pre_start_vt = pd.to_datetime(start_time_vt) - Timedelta(minutes=2)
baseline_pre_end_vt   = pd.to_datetime(start_time_vt) - Timedelta(minutes=1)

# ----- VAT -----
baseline_00_start_vat = pd.to_datetime(start_time_vat) - Timedelta(minutes=1)
baseline_00_end_vat   = pd.to_datetime(start_time_vat)
baseline_pre_start_vat = pd.to_datetime(start_time_vat) - Timedelta(minutes=2)
baseline_pre_end_vat   = pd.to_datetime(start_time_vat) - Timedelta(minutes=1)

# Compute means
scl_00_v   = get_interval_scl(df_v, baseline_00_start_v, baseline_00_end_v)
scl_pre_v  = get_interval_scl(df_v, baseline_pre_start_v, baseline_pre_end_v)

scl_00_va  = get_interval_scl(df_va, baseline_00_start_va, baseline_00_end_va)
scl_pre_va = get_interval_scl(df_va, baseline_pre_start_va, baseline_pre_end_va)

scl_00_vt  = get_interval_scl(df_vt, baseline_00_start_vt, baseline_00_end_vt)
scl_pre_vt = get_interval_scl(df_vt, baseline_pre_start_vt, baseline_pre_end_vt)

scl_00_vat  = get_interval_scl(df_vat, baseline_00_start_vat, baseline_00_end_vat)
scl_pre_vat = get_interval_scl(df_vat, baseline_pre_start_vat, baseline_pre_end_vat)

# Print results
print(f"Visual group Baseline_00 (-1min~0min) scl: {scl_00_v:.2f}")
print(f"Visual group Baseline_pre (-2min~-1min) scl: {scl_pre_v:.2f}")
print("---")
print(f"VA group Baseline_00 (-1min~0min) scl: {scl_00_va:.2f}")
print(f"VA group Baseline_pre (-2min~-1min) scl: {scl_pre_va:.2f}")
print("---")
print(f"VT group Baseline_00 (-1min~0min) scl: {scl_00_vt:.2f}")
print(f"VT group Baseline_pre (-2min~-1min) scl: {scl_pre_vt:.2f}")
print("---")
print(f"VAT group Baseline_00 (-1min~0min) scl: {scl_00_vat:.2f}")
print(f"VAT group Baseline_pre (-2min~-1min) scl: {scl_pre_vat:.2f}")

Visual group Baseline_00 (-1min~0min) scl: 5.14
Visual group Baseline_pre (-2min~-1min) scl: 4.96
---
VA group Baseline_00 (-1min~0min) scl: 9.03
VA group Baseline_pre (-2min~-1min) scl: 8.85
---
VT group Baseline_00 (-1min~0min) scl: 7.97
VT group Baseline_pre (-2min~-1min) scl: 8.57
---
VAT group Baseline_00 (-1min~0min) scl: 7.94
VAT group Baseline_pre (-2min~-1min) scl: 7.80


## Calculate mean values in 30-second windows for SCL

In [16]:
import pandas as pd
pd.set_option('display.float_format', lambda x: f"{x:.2f}")

def calc_rolling_means(
    df,
    start_time,
    end_time,
    interval_s=30,
    min_total_s=270,
    max_total_s=270
):
    d = df.copy()

    # Ensure timezone-aware datetimes aligned to the data column
    d['Timestamp (UTC+10)'] = pd.to_datetime(d['Timestamp (UTC+10)'])
    tz = d['Timestamp (UTC+10)'].dt.tz

    start_time = pd.to_datetime(start_time)
    end_time   = pd.to_datetime(end_time)
    start_time = start_time.tz_localize(tz) if start_time.tzinfo is None else start_time.tz_convert(tz)
    end_time   = end_time.tz_localize(tz)   if end_time.tzinfo   is None else end_time.tz_convert(tz)

    # Hard cap: do not compute beyond start_time + 270s (or max_total_s)
    cap_end_time = min(end_time, start_time + pd.Timedelta(seconds=max_total_s))

    interval = pd.Timedelta(seconds=interval_s)
    total_duration = cap_end_time - start_time  # duration after capping

    results = []
    idx = 1

    # Case A: whole interval shorter than one window -> single mean over the whole (capped) interval
    if total_duration < interval:
        seg_full = d[(d['Timestamp (UTC+10)'] >= start_time) & (d['Timestamp (UTC+10)'] < cap_end_time)]
        mSCL = seg_full['GSR Conductance (CAL)'].mean()
        seconds_marker = total_duration.total_seconds()  # < interval_s
        return pd.DataFrame([[round(seconds_marker, 2),
                              round(mSCL, 2) if pd.notnull(mSCL) else None]],
                            columns=['Seconds', 'scl'])

    # Case B: full 30s windows (or interval_s windows) up to the capped end
    current_start = start_time
    while current_start + interval <= cap_end_time:
        current_end = current_start + interval
        seg = d[(d['Timestamp (UTC+10)'] >= current_start) & (d['Timestamp (UTC+10)'] < current_end)]
        mSCL = seg['GSR Conductance (CAL)'].mean()
        results.append([idx * interval_s,
                        round(mSCL, 2) if pd.notnull(mSCL) else None])
        current_start = current_end
        idx += 1

    # Case C: trailing segment < interval_s (will not run if cap_end_time hits exact multiple)
    if current_start < cap_end_time:
        # If capped total duration < min_total_s (e.g., <270s), use whole-interval mean; else use tail-only mean
        if total_duration.total_seconds() < min_total_s:
            seg_last = d[(d['Timestamp (UTC+10)'] >= start_time) & (d['Timestamp (UTC+10)'] < cap_end_time)]
        else:
            seg_last = d[(d['Timestamp (UTC+10)'] >= current_start) & (d['Timestamp (UTC+10)'] < cap_end_time)]

        mSCL = seg_last['GSR Conductance (CAL)'].mean()
        seconds_marker = (idx - 1) * interval_s + (cap_end_time - current_start).total_seconds()  # cumulative seconds
        results.append([round(seconds_marker, 2),
                        round(mSCL, 2) if pd.notnull(mSCL) else None])

    return pd.DataFrame(results, columns=['Seconds', 'scl'])


In [17]:
df_v   = df_visual_gsr.copy()
df_va  = df_va_gsr.copy()
df_vt  = df_vt_gsr.copy()
df_vat = df_vat_gsr.copy()

# --- compute 30s rolling means for each condition ---
df_v_means   = calc_rolling_means(df_v,   start_time_v,   end_time_v).round(2)
df_va_means  = calc_rolling_means(df_va,  start_time_va,  end_time_va).round(2)
df_vt_means  = calc_rolling_means(df_vt,  start_time_vt,  end_time_vt).round(2)
df_vat_means = calc_rolling_means(df_vat, start_time_vat, end_time_vat).round(2)

# --- align to common minimum length so rows correspond across conditions ---
min_len = min(len(df_v_means), len(df_va_means), len(df_vt_means), len(df_vat_means))

df_v_means_aligned   = df_v_means.iloc[:min_len].reset_index(drop=True)
df_va_means_aligned  = df_va_means.iloc[:min_len].reset_index(drop=True)
df_vt_means_aligned  = df_vt_means.iloc[:min_len].reset_index(drop=True)
df_vat_means_aligned = df_vat_means.iloc[:min_len].reset_index(drop=True)

# --- display ---
display('Visual 30s Interval Means', df_v_means_aligned)
display('VA 30s Interval Means', df_va_means_aligned)
display('VT 30s Interval Means', df_vt_means_aligned)
display('VAT 30s Interval Means', df_vat_means_aligned)

'Visual 30s Interval Means'

Unnamed: 0,Seconds,scl
0,30,5.24
1,60,5.41
2,90,5.54
3,120,5.55
4,150,5.32
5,180,5.86
6,210,6.68
7,240,
8,270,


'VA 30s Interval Means'

Unnamed: 0,Seconds,scl
0,30.0,8.68
1,60.0,8.3
2,90.0,7.88
3,120.0,8.19
4,150.0,8.25
5,180.0,8.25
6,210.0,7.92
7,240.0,7.51
8,267.12,8.08


'VT 30s Interval Means'

Unnamed: 0,Seconds,scl
0,30,7.75
1,60,7.39
2,90,7.08
3,120,7.3
4,150,7.48
5,180,7.32
6,210,7.14
7,240,6.9
8,270,6.55


'VAT 30s Interval Means'

Unnamed: 0,Seconds,scl
0,30,7.72
1,60,7.42
2,90,7.18
3,120,6.89
4,150,6.55
5,180,6.69
6,210,7.15
7,240,7.3
8,270,7.29


## Export clean scl data to CSV files

In [20]:
import os

# ========== Define Participant ID and Gender ==========
participant_id = "P14"  # Change for each participant
gender = "M"            # Change for each participant

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\SCL"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_SCL.csv"
filepath = os.path.join(output_dir, filename)

# ========== Function to Build Output Row ==========
def build_output_row(participant_id, gender, condition, scl_pre, scl_00, means_df):
    scl_list = means_df['scl'].tolist()
    row = [participant_id, gender, condition, scl_pre, scl_00]
    row.extend(scl_list)
    return row

# ========== Use aligned DataFrames ==========
# Visual: with pre values
row_v   = build_output_row(participant_id, gender, "V", scl_pre_v, scl_00_v,   df_v_means_aligned)
# VA: with pre values
row_va  = build_output_row(participant_id, gender, "VA",     "", scl_00_va, df_va_means_aligned)
# VT: with pre values
row_vt  = build_output_row(participant_id, gender, "VT",     "", scl_00_vt, df_vt_means_aligned)
# VAT: pre column left empty
row_vat = build_output_row(participant_id, gender, "VAT",    "",          scl_00_vat, df_vat_means_aligned)

# ========== Build Columns Based on Number of Windows ==========
n_windows = len(df_v_means_aligned)
columns = ["Participant ID", "Gender", "Condition", "Baseline_Pre_scl", "Baseline_00_scl"]
columns += [f"scl_{(i+1)*30}s" for i in range(n_windows)]

# ========== Save or Append ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    # Write with header
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, index=False)
else:
    # Append without header
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")


File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\SCL\B_Participant_SCL.csv


## Calculate mean values in 2s pre and post the S3_S

In [21]:
# Confirm the timezone from Visual DF
tz = df_visual_gsr['Timestamp (UTC+10)'].dt.tz

# --- timezone-normalized S3_S for V / VA / VT / VAT ---
def norm_to_tz(t, tz):
    t = pd.to_datetime(t)
    return t.tz_localize(tz) if t.tzinfo is None else t.tz_convert(tz)

s3_start_time_v   = norm_to_tz(state03_start_time_v, tz)
s3_start_time_va  = norm_to_tz(state03_start_time_va, tz)
s3_start_time_vt  = norm_to_tz(state03_start_time_vt, tz)
s3_start_time_vat = norm_to_tz(state03_start_time_vat, tz)

# Ensure datetime dtype
for _df in (df_visual_gsr, df_va_gsr, df_vt_gsr, df_vat_gsr):
    _df['Timestamp (UTC+10)'] = pd.to_datetime(_df['Timestamp (UTC+10)'])

# Window settings
interval_before = pd.Timedelta(seconds=2)
interval_after  = pd.Timedelta(seconds=2)

def before_after_window(df, s3_start):
    df_before = df[(df['Timestamp (UTC+10)'] >= (s3_start - interval_before)) &
                   (df['Timestamp (UTC+10)'] <  s3_start)]
    df_after  = df[(df['Timestamp (UTC+10)'] >=  s3_start) &
                   (df['Timestamp (UTC+10)'] <  (s3_start + interval_after))]
    mean_before = df_before[['GSR Conductance (CAL)']].mean().to_frame().T
    mean_after  = df_after[['GSR Conductance (CAL)']].mean().to_frame().T
    mean_before['Window'] = 'Before'
    mean_after['Window']  = 'After'
    out = pd.concat([mean_before, mean_after], ignore_index=True)
    return out[['Window', 'GSR Conductance (CAL)']]

# --- Visual ---
df_v_window = before_after_window(df_visual_gsr, s3_start_time_v)
print("=== Visual Condition (Mean SCL 2s before/after S3_S) ===")
display(df_v_window)

# --- VA ---
df_va_window = before_after_window(df_va_gsr, s3_start_time_va)
print("=== VA Condition (Mean SCL 2s before/after S3_S) ===")
display(df_va_window)

# --- VT ---
df_vt_window = before_after_window(df_vt_gsr, s3_start_time_vt)
print("=== VT Condition (Mean SCL 2s before/after S3_S) ===")
display(df_vt_window)

# --- VAT ---
df_vat_window = before_after_window(df_vat_gsr, s3_start_time_vat)
print("=== VAT Condition (Mean SCL 2s before/after S3_S) ===")
display(df_vat_window)


=== Visual Condition (Mean SCL 2s before/after S3_S) ===


Unnamed: 0,Window,GSR Conductance (CAL)
0,Before,
1,After,


=== VA Condition (Mean SCL 2s before/after S3_S) ===


Unnamed: 0,Window,GSR Conductance (CAL)
0,Before,7.38
1,After,7.37


=== VT Condition (Mean SCL 2s before/after S3_S) ===


Unnamed: 0,Window,GSR Conductance (CAL)
0,Before,6.71
1,After,6.69


=== VAT Condition (Mean SCL 2s before/after S3_S) ===


Unnamed: 0,Window,GSR Conductance (CAL)
0,Before,7.23
1,After,7.25


## Export clean average S3 2s Before and After Data to CSV files

In [22]:
import os
import pandas as pd

# ========== Define Participant ID and Gender ==========
participant_id = "P14"
gender = "M"

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\SCL"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_SCL_S3.csv"
filepath = os.path.join(output_dir, filename)

# ========== Function to Build Output Row ==========
def build_output_row_gsr_S3(
    participant_id, gender, condition,
    scl_pre, scl_00,
    scl_before, scl_after,
    skip_pre=False
):
    """
    Build a row for the output CSV file with all numeric values rounded to 2 decimals.
    """
    def fmt(x):
        # Round to 2 decimals if numeric, otherwise keep as-is
        try:
            return f"{float(x):.2f}"
        except (ValueError, TypeError):
            return x

    if skip_pre:
        row = [
            participant_id, gender, condition,
            "",                   # leave Baseline_Pre_SCL empty if skip_pre
            fmt(scl_00),
            fmt(scl_before),
            fmt(scl_after)
        ]
    else:
        row = [
            participant_id, gender, condition,
            fmt(scl_pre),
            fmt(scl_00),
            fmt(scl_before),
            fmt(scl_after)
        ]
    return row

def pick_val(df_window, label):
    """Helper to pick 'Before' or 'After' S3_S mean SCL value from a window DataFrame."""
    return df_window.loc[df_window['Window'] == label, 'GSR Conductance (CAL)'].values[0]

# ========== Rows for each condition ==========
row_v = build_output_row_gsr_S3(
    participant_id, gender, "V",
    scl_pre_v, scl_00_v,
    pick_val(df_v_window, 'Before'),
    pick_val(df_v_window, 'After'),
    skip_pre=False
)

row_va = build_output_row_gsr_S3(
    participant_id, gender, "VA",
    "", scl_00_va,
    pick_val(df_va_window, 'Before'),
    pick_val(df_va_window, 'After'),
    skip_pre=False
)

row_vt = build_output_row_gsr_S3(
    participant_id, gender, "VT",
    "", scl_00_vt,
    pick_val(df_vt_window, 'Before'),
    pick_val(df_vt_window, 'After'),
    skip_pre=False
)

row_vat = build_output_row_gsr_S3(
    participant_id, gender, "VAT",
    "", scl_00_vat,
    pick_val(df_vat_window, 'Before'),
    pick_val(df_vat_window, 'After'),
    skip_pre=True
)

# ========== Column names ==========
columns = [
    "Participant ID", "Gender", "Condition",
    "Baseline_Pre_SCL", "Baseline_00_SCL",
    "Before_S3_S_SCL", "After_S3_S_SCL"
]

# ========== Save or Append ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    pd.DataFrame(rows, columns=columns).to_csv(filepath, index=False)
else:
    pd.DataFrame(rows, columns=columns).to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")


File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\SCL\B_Participant_SCL_S3.csv


## Calculate mean values in the three time points of S3

In [23]:
import pandas as pd

# Safer helper: convert `ts` to pandas Timestamp, align tz to df column, then pick nearest sample
def scl_at_timestamp(df, ts, ts_col='Timestamp (UTC+10)', val_col='GSR Conductance (CAL)'):
    # ensure df timestamp column is datetime
    if not pd.api.types.is_datetime64_any_dtype(df[ts_col]):
        df[ts_col] = pd.to_datetime(df[ts_col])

    # convert ts to pandas Timestamp
    ts = pd.to_datetime(ts)

    # align timezone of ts to df's tz (if df has tz)
    tz = df[ts_col].dt.tz
    if tz is not None:
        if ts.tzinfo is None:
            ts = ts.tz_localize(tz)
        else:
            ts = ts.tz_convert(tz)

    # find nearest index
    idx = (df[ts_col] - ts).abs().idxmin()
    val = df.loc[idx, val_col]

    return None if pd.isna(val) else round(float(val), 2)

# Build a wide table using your EXISTING time variables (no re-definitions)
rows = [
    ["Visual",
     scl_at_timestamp(df_visual_gsr, state03_start_time_v),
     scl_at_timestamp(df_visual_gsr, state03_m_time_v),
     scl_at_timestamp(df_visual_gsr, end_time_v)],

    ["VA",
     scl_at_timestamp(df_va_gsr, state03_start_time_va),
     scl_at_timestamp(df_va_gsr, state03_m_time_va),
     scl_at_timestamp(df_va_gsr, end_time_va)],

    ["VT",
     scl_at_timestamp(df_vt_gsr, state03_start_time_vt),
     scl_at_timestamp(df_vt_gsr, state03_m_time_vt),
     scl_at_timestamp(df_vt_gsr, end_time_vt)],

    ["VAT",
     scl_at_timestamp(df_vat_gsr, state03_start_time_vat),
     scl_at_timestamp(df_vat_gsr, state03_m_time_vat),
     scl_at_timestamp(df_vat_gsr, end_time_vat)],
]

df_scl_points = pd.DataFrame(rows, columns=["Condition", "S3_S_SCL", "S3_M_SCL", "S3_E_SCL"])
display(df_scl_points)


Unnamed: 0,Condition,S3_S_SCL,S3_M_SCL,S3_E_SCL
0,Visual,6.61,6.61,6.61
1,VA,7.38,7.8,7.51
2,VT,6.71,6.49,6.33
3,VAT,7.2,7.46,7.34


## Export clean average Task Data to CSV files

In [24]:
import os
import pandas as pd

# ========== Define Participant ID and Gender ==========
participant_id = "P14"
gender = "M"

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\SCL"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_SCL_S3_Task.csv"  # same file name as before; change if you want a separate file
filepath = os.path.join(output_dir, filename)

# ========== Helper: nearest-sample SCL at a given timestamp ==========
def scl_at_timestamp(df, ts, ts_col='Timestamp (UTC+10)', val_col='GSR Conductance (CAL)'):
    """
    Return the SCL value (rounded to 2 decimals) at the row whose timestamp is nearest to `ts`.
    This function converts df[ts_col] to datetime if needed and aligns `ts` timezone to df's tz.
    """
    # Ensure datetime for the timestamp column
    if not pd.api.types.is_datetime64_any_dtype(df[ts_col]):
        df[ts_col] = pd.to_datetime(df[ts_col])

    # Convert ts to pandas Timestamp
    ts = pd.to_datetime(ts)

    # Align timezone if df has tz info
    tz = df[ts_col].dt.tz
    if tz is not None:
        if ts.tzinfo is None:
            ts = ts.tz_localize(tz)
        else:
            ts = ts.tz_convert(tz)

    idx = (df[ts_col] - ts).abs().idxmin()
    val = df.loc[idx, val_col]
    return None if pd.isna(val) else round(float(val), 2)

# ========== Build one row for a condition ==========
def build_row_points(condition_label, df, s_pre, s_00, t_s3s, t_s3m, t_s3e):
    """
    Build a row [participant, gender, condition, baseline_pre, baseline_00, S3_S, S3_M, S3_E]
    with all numeric values formatted to 2 decimals as strings.
    """
    def fmt(x):
        try:
            return f"{float(x):.2f}"
        except (TypeError, ValueError):
            return x

    s3s = scl_at_timestamp(df, t_s3s)
    s3m = scl_at_timestamp(df, t_s3m)
    s3e = scl_at_timestamp(df, t_s3e)

    return [
        participant_id, gender, condition_label,
        fmt(s_pre), fmt(s_00),
        fmt(s3s), fmt(s3m), fmt(s3e)
    ]

# ========== Build rows using EXISTING variables ==========
row_v = build_row_points(
    "V",  df_visual_gsr,
    scl_pre_v,  scl_00_v,
    state03_start_time_v, state03_m_time_v, end_time_v
)

row_va = build_row_points(
    "VA", df_va_gsr,
    "", scl_00_va,
    state03_start_time_va, state03_m_time_va, end_time_va
)

row_vt = build_row_points(
    "VT", df_vt_gsr,
    "", scl_00_vt,
    state03_start_time_vt, state03_m_time_vt, end_time_vt
)

# Note: you previously left VAT pre empty; you asked now to include baseline pre & 00 for each group,
# so we use scl_pre_vat here.
row_vat = build_row_points(
    "VAT", df_vat_gsr,
    "", scl_00_vat,
    state03_start_time_vat, state03_m_time_vat, end_time_vat
)

# ========== Column names ==========
columns = [
    "Participant ID", "Gender", "Condition",
    "Baseline_Pre_SCL", "Baseline_00_SCL",
    "S3_S_SCL", "S3_M_SCL", "S3_E_SCL"
]

# ========== Save or Append ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    pd.DataFrame(rows, columns=columns).to_csv(filepath, index=False)
else:
    pd.DataFrame(rows, columns=columns).to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")


File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\SCL\B_Participant_SCL_S3_Task.csv
