In [5]:
import pyxdf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, timezone
from scipy import stats
import os
from IPython.display import display
import matplotlib.pyplot as plt

## Load the XDF file

In [6]:
from pathlib import Path
import re

BASE_DIR = Path(r".")       
SUBJECT  = "sub-P014"      
# ============================

CONDITIONS = ["v", "va", "vt", "vat"]

run_re = re.compile(r"_run-(\d+)_eeg\.xdf$", re.IGNORECASE)

def pick_latest_run(paths):
    """Pick the highest run-N file; fall back to newest modified time if no run number."""
    if not paths:
        return None
    # Try by run number
    with_run = []
    for p in paths:
        m = run_re.search(p.name)
        if m:
            with_run.append((int(m.group(1)), p))
    if with_run:
        with_run.sort(key=lambda x: x[0], reverse=True)
        return with_run[0][1]
    # Fallback: latest modified
    return sorted(paths, key=lambda p: p.stat().st_mtime, reverse=True)[0]

def find_condition_paths(base_dir: Path, subject: str):
    """Return dict condition->Path by scanning sub-<ID>/ses-<cond>/eeg/*.xdf."""
    result = {}
    subj_dir = base_dir / subject
    for cond in CONDITIONS:
        eeg_dir = subj_dir / f"ses-{cond}" / "eeg"
        candidates = list(eeg_dir.glob(f"{subject}_ses-{cond}_task-Default_run-*_eeg.xdf"))
        # If strict pattern fails, accept any *.xdf under eeg
        if not candidates:
            candidates = list(eeg_dir.glob("*.xdf"))
        chosen = pick_latest_run(candidates)
        if chosen:
            result[cond] = chosen
    return result

# -------- header helpers (reuse your logic) --------
def get_datetime_from_header(header):
    if 'info' in header and 'datetime' in header['info']:
        dt_list = header['info']['datetime']
        if isinstance(dt_list, list) and len(dt_list) > 0:
            return dt_list[0]
    return "Datetime not found"

def print_stream_info(streams, condition_name, datetime_str):
    print(f"\n--- Stream Information ({condition_name}) ---")
    print(f"Datetime from header: {datetime_str}")
    for i, stream in enumerate(streams):
        stream_name = stream['info']['name'][0]
        channel_count = stream['info']['channel_count'][0]
        print(f"Stream {i+1} Name: {stream_name}, Channel Count: {channel_count}, Datetime: {datetime_str}")

paths = find_condition_paths(BASE_DIR, SUBJECT)

In [7]:
# ---- Map discovered paths back to your original variable names ----
def require_found(paths_dict, cond_key, var_name):
    """Ensure a condition path exists; raise a clear error if missing."""
    p = paths_dict.get(cond_key)
    if p is None:
        raise FileNotFoundError(
            f"Missing file for condition '{cond_key}'. "
            f"Please check folder structure: sub-<ID>/ses-{cond_key}/eeg/*.xdf "
            f"and ensure it exists so we can assign `{var_name}`."
        )
    return str(p)  # keep as Windows-style path string

# Assign variables exactly as before
visual_file_path = require_found(paths, "v",   "visual_file_path")
va_file_path     = require_found(paths, "va",  "va_file_path")
vt_file_path     = require_found(paths, "vt",  "vt_file_path")
vat_file_path    = require_found(paths, "vat", "vat_file_path")

# (Optional) print to verify
print("\n== Assigned variables ==")
print("visual_file_path:", visual_file_path)
print("va_file_path    :", va_file_path)
print("vt_file_path    :", vt_file_path)
print("vat_file_path   :", vat_file_path)



== Assigned variables ==
visual_file_path: sub-P014\ses-v\eeg\sub-P014_ses-v_task-Default_run-001_eeg.xdf
va_file_path    : sub-P014\ses-va\eeg\sub-P014_ses-va_task-Default_run-001_eeg.xdf
vt_file_path    : sub-P014\ses-vt\eeg\sub-P014_ses-vt_task-Default_run-001_eeg.xdf
vat_file_path   : sub-P014\ses-vat\eeg\sub-P014_ses-vat_task-Default_run-001_eeg.xdf


## Load visual only data

In [8]:
# Load XDF file for Visual condition
visual_streams, visual_header = pyxdf.load_xdf(visual_file_path)
visual_datetime = get_datetime_from_header(visual_header)
print("Visual condition datetime:", visual_datetime)

# Print stream info for Visual condition with datetime
print_stream_info(visual_streams, "Visual", visual_datetime)

Stream 2: Calculated effective sampling rate 40.3620 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 40.3945 Hz is different from specified rate 250.0000 Hz.
Stream 3: Calculated effective sampling rate 40.8761 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 379.9766 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 25.9551 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 443.8161 Hz is different from specified rate 250.0000 Hz.
Stream 10: Calculated effective sampling rate 38.2778 Hz is different from specified rate 250.0000 Hz.


Visual condition datetime: 2025-10-10T13:59:34+1100

--- Stream Information (Visual) ---
Datetime from header: 2025-10-10T13:59:34+1100
Stream 1 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-10T13:59:34+1100
Stream 2 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-10T13:59:34+1100
Stream 3 Name: obci_stream_6, Channel Count: 1, Datetime: 2025-10-10T13:59:34+1100
Stream 4 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-10T13:59:34+1100
Stream 5 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-10T13:59:34+1100
Stream 6 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-10T13:59:34+1100
Stream 7 Name: obci_stream_7, Channel Count: 1, Datetime: 2025-10-10T13:59:34+1100
Stream 8 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-10T13:59:34+1100
Stream 9 Name: obci_stream_3, Channel Count: 125, Datetime: 2025-10-10T13:59:34+1100
Stream 10 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-10T13:59:34+1100
Stream 11 Name: obci_stream_

## Load VA data

In [9]:
# Load XDF file for Visual + Auditory condition (VA)
va_streams, va_header = pyxdf.load_xdf(va_file_path)
va_datetime = get_datetime_from_header(va_header)
print("Visual + Auditory condition datetime:", va_datetime)

# Print stream info for Visual + Auditory condition with datetime
print_stream_info(va_streams, "Visual + Auditory", va_datetime)

Stream 10: Calculated effective sampling rate 32.5189 Hz is different from specified rate 250.0000 Hz.
Stream 2: Calculated effective sampling rate 38.3058 Hz is different from specified rate 250.0000 Hz.
Stream 4: Calculated effective sampling rate 38.1692 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 307.7353 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 22.6840 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 326.0299 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 192.5431 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 38.8304 Hz is different from specified rate 250.0000 Hz.


Visual + Auditory condition datetime: 2025-10-10T14:46:52+1100

--- Stream Information (Visual + Auditory) ---
Datetime from header: 2025-10-10T14:46:52+1100
Stream 1 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-10T14:46:52+1100
Stream 2 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-10T14:46:52+1100
Stream 3 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-10T14:46:52+1100
Stream 4 Name: obci_stream_7, Channel Count: 1, Datetime: 2025-10-10T14:46:52+1100
Stream 5 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-10T14:46:52+1100
Stream 6 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-10T14:46:52+1100
Stream 7 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-10T14:46:52+1100
Stream 8 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-10T14:46:52+1100
Stream 9 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-10T14:46:52+1100
Stream 10 Name: obci_stream_3, Channel Count: 125, Datetime: 2025-10-10T14:46:52+1100
Stream

## Load VT data

In [10]:
# Load XDF file for Visual + Thermal condition (VT)
vt_streams, vt_header = pyxdf.load_xdf(vt_file_path)
vt_datetime = get_datetime_from_header(vt_header)
print("Visual + Thermal condition datetime:", vt_datetime)

# Print stream info for Visual + Thermal condition with datetime
print_stream_info(vt_streams, "Visual + Thermal", vt_datetime)

Stream 2: Calculated effective sampling rate 39.0305 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 192.2784 Hz is different from specified rate 250.0000 Hz.
Stream 6: Calculated effective sampling rate 39.6434 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 35.2238 Hz is different from specified rate 250.0000 Hz.
Stream 12: Calculated effective sampling rate 21.7044 Hz is different from specified rate 250.0000 Hz.
Stream 3: Calculated effective sampling rate 38.8067 Hz is different from specified rate 250.0000 Hz.
Stream 13: Calculated effective sampling rate 343.4047 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 309.4436 Hz is different from specified rate 250.0000 Hz.


Visual + Thermal condition datetime: 2025-10-10T14:33:35+1100

--- Stream Information (Visual + Thermal) ---
Datetime from header: 2025-10-10T14:33:35+1100
Stream 1 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-10T14:33:35+1100
Stream 2 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-10T14:33:35+1100
Stream 3 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-10T14:33:35+1100
Stream 4 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-10T14:33:35+1100
Stream 5 Name: obci_stream_10, Channel Count: 2, Datetime: 2025-10-10T14:33:35+1100
Stream 6 Name: obci_stream_6, Channel Count: 1, Datetime: 2025-10-10T14:33:35+1100
Stream 7 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-10T14:33:35+1100
Stream 8 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-10T14:33:35+1100
Stream 9 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-10T14:33:35+1100
Stream 10 Name: obci_stream_3, Channel Count: 125, Datetime: 2025-10-10T14:33:35+1100
Stream 

## Load VAT data

In [11]:
# Load XDF file for Visual + Auditory condition
vat_streams, vat_header = pyxdf.load_xdf(vat_file_path)
vat_datetime = get_datetime_from_header(vat_header)
print("VAT condition datetime:", vat_datetime)

# Print stream info for Visual + Auditory condition with datetime
print_stream_info(vat_streams, "VAT", vat_datetime)

Stream 3: Calculated effective sampling rate 41.0675 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 338.9286 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 23.1637 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 373.5739 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 202.7190 Hz is different from specified rate 250.0000 Hz.
Stream 10: Calculated effective sampling rate 35.2274 Hz is different from specified rate 250.0000 Hz.
Stream 6: Calculated effective sampling rate 41.6620 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 39.9000 Hz is different from specified rate 250.0000 Hz.


VAT condition datetime: 2025-10-10T14:18:03+1100

--- Stream Information (VAT) ---
Datetime from header: 2025-10-10T14:18:03+1100
Stream 1 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-10T14:18:03+1100
Stream 2 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-10T14:18:03+1100
Stream 3 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-10T14:18:03+1100
Stream 4 Name: GSR_PPG, Channel Count: 10, Datetime: 2025-10-10T14:18:03+1100
Stream 5 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-10T14:18:03+1100
Stream 6 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-10T14:18:03+1100
Stream 7 Name: obci_stream_3, Channel Count: 125, Datetime: 2025-10-10T14:18:03+1100
Stream 8 Name: obci_stream_10, Channel Count: 2, Datetime: 2025-10-10T14:18:03+1100
Stream 9 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-10T14:18:03+1100
Stream 10 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-10T14:18:03+1100
Stream 11 Name: obci_stream_6, Channel

## Extract data streams for heart rates

In [12]:
# Function to parse datetime string from header
def parse_header_datetime(dt_str):
    if '+' in dt_str:
        base, zone = dt_str.split('+')
        if '.' in base:
            dt_fmt = "%Y-%m-%dT%H:%M:%S.%f"
        else:
            dt_fmt = "%Y-%m-%dT%H:%M:%S"
        dt_obj = datetime.strptime(base, dt_fmt)
        tz = timezone(timedelta(hours=int(zone[:2]), minutes=int(zone[2:])))
        dt_obj = dt_obj.replace(tzinfo=tz)
    else:
        dt_obj = datetime.fromisoformat(dt_str)
    return dt_obj

# Function to extract a specific stream as a DataFrame
def extract_stream_df(streams, start_time_str, stream_name_to_extract):
    start_dt = parse_header_datetime(start_time_str)
    # Find the target stream by name
    target_stream = None
    for stream in streams:
        if stream['info']['name'][0] == stream_name_to_extract:
            target_stream = stream
            break
    if target_stream is None:
        print(f"Stream '{stream_name_to_extract}' not found.")
        return None
    n_channels = int(target_stream['info']['channel_count'][0])
    df_stream = pd.DataFrame(
        target_stream['time_series'],
        columns=[f"Channel_{i+1}" for i in range(n_channels)]
    )
    df_stream['Timestamp'] = target_stream['time_stamps']
    df_stream['Relative_Seconds'] = df_stream['Timestamp'] - df_stream['Timestamp'].iloc[0]
    df_stream['Datetime_Local'] = [start_dt + timedelta(seconds=s) for s in df_stream['Relative_Seconds']]
    df_stream['Datetime_Local'] = df_stream['Datetime_Local'].map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S.%f %z'))
    return df_stream

# Set the stream name to extract
stream_name_to_extract = "obci_stream_11"  # streams for [bpm, signal, ibi]


## Extract heart rates for Visual condition

In [13]:
# visual only
df_visual = extract_stream_df(visual_streams, visual_datetime, stream_name_to_extract)

if df_visual is not None:
    print("==== Visual Only ====")
    df_visual = df_visual.rename(columns={
        'Channel_1': 'heart-rate',
        'Channel_2': 'hrv',
        'Channel_3': 'spo2'
    })

    display(pd.concat([df_visual.head(4), df_visual.tail(4)]))


==== Visual Only ====


Unnamed: 0,heart-rate,hrv,spo2,Timestamp,Relative_Seconds,Datetime_Local
0,0.0,0.0,0.0,187984.552794,0.0,2025-10-10 13:59:34.000000 +1100
1,0.0,0.0,0.0,187984.579708,0.026914,2025-10-10 13:59:34.026914 +1100
2,0.0,0.0,0.0,187984.606622,0.053828,2025-10-10 13:59:34.053828 +1100
3,0.0,0.0,0.0,187984.633536,0.080742,2025-10-10 13:59:34.080742 +1100
29058,94.0,378.100006,81.0,188737.491115,752.938321,2025-10-10 14:12:06.938321 +1100
29059,94.0,378.100006,81.0,188737.515862,752.963068,2025-10-10 14:12:06.963068 +1100
29060,119.0,422.399994,84.0,188737.540608,752.987814,2025-10-10 14:12:06.987814 +1100
29061,119.0,422.399994,84.0,188737.565355,753.012561,2025-10-10 14:12:07.012561 +1100


## Extract heart rates for VA condition

In [14]:
# va only
df_va = extract_stream_df(va_streams, va_datetime, stream_name_to_extract)

if df_va is not None:
    print("==== va Only ====")
    df_va = df_va.rename(columns={
        'Channel_1': 'heart-rate',
        'Channel_2': 'hrv',
        'Channel_3': 'spo2'
    })

    display(pd.concat([df_va.head(4), df_va.tail(4)]))


==== va Only ====


Unnamed: 0,heart-rate,hrv,spo2,Timestamp,Relative_Seconds,Datetime_Local
0,0.0,0.0,77.900002,190818.362744,0.0,2025-10-10 14:46:52.000000 +1100
1,0.0,0.0,77.300003,190818.393497,0.030753,2025-10-10 14:46:52.030753 +1100
2,0.0,0.0,77.300003,190818.42425,0.061506,2025-10-10 14:46:52.061506 +1100
3,0.0,0.0,77.099998,190818.455003,0.092259,2025-10-10 14:46:52.092259 +1100
15924,108.0,633.899963,90.400002,191308.073973,489.711229,2025-10-10 14:55:01.711229 +1100
15925,113.0,465.700012,79.800003,191308.104726,489.741982,2025-10-10 14:55:01.741982 +1100
15926,113.0,545.099976,80.699997,191308.135479,489.772735,2025-10-10 14:55:01.772735 +1100
15927,113.0,545.099976,80.699997,191308.166232,489.803488,2025-10-10 14:55:01.803488 +1100


## Extract heart rates for VT condition

In [15]:
# vt only
df_vt = extract_stream_df(vt_streams, vt_datetime, stream_name_to_extract)

if df_vt is not None:
    print("==== vt Only ====")
    df_vt = df_vt.rename(columns={
        'Channel_1': 'heart-rate',
        'Channel_2': 'hrv',
        'Channel_3': 'spo2'
    })

    display(pd.concat([df_vt.head(4), df_vt.tail(4)]))


==== vt Only ====


Unnamed: 0,heart-rate,hrv,spo2,Timestamp,Relative_Seconds,Datetime_Local
0,0.0,0.0,86.599998,190029.697115,0.0,2025-10-10 14:33:35.000000 +1100
1,0.0,0.0,86.900002,190029.725506,0.028391,2025-10-10 14:33:35.028391 +1100
2,0.0,0.0,86.900002,190029.753897,0.056782,2025-10-10 14:33:35.056782 +1100
3,0.0,0.0,86.400002,190029.782289,0.085174,2025-10-10 14:33:35.085174 +1100
18044,78.0,459.0,77.099998,190541.988638,512.291524,2025-10-10 14:42:07.291524 +1100
18045,78.0,459.0,77.099998,190542.01703,512.319915,2025-10-10 14:42:07.319915 +1100
18046,108.0,505.399994,82.300003,190542.045421,512.348306,2025-10-10 14:42:07.348306 +1100
18047,110.0,477.100006,83.199997,190542.073812,512.376697,2025-10-10 14:42:07.376697 +1100


## Extract heart rates for VAT condition

In [16]:
# visual + auditory
df_vat = extract_stream_df(vat_streams, vat_datetime, stream_name_to_extract)
if df_vat is not None:
    print("==== VAT ====")
    df_vat = df_vat.rename(columns={
    'Channel_1': 'heart-rate',
    'Channel_2': 'hrv',
    'Channel_3': 'spo2'
    })
    
    display(pd.concat([df_vat.head(4), df_vat.tail(4)]))

==== VAT ====


Unnamed: 0,heart-rate,hrv,spo2,Timestamp,Relative_Seconds,Datetime_Local
0,0.0,0.0,83.699997,189098.475943,0.0,2025-10-10 14:18:03.000000 +1100
1,0.0,0.0,83.5,189098.504331,0.028388,2025-10-10 14:18:03.028388 +1100
2,0.0,0.0,83.5,189098.532719,0.056776,2025-10-10 14:18:03.056776 +1100
3,0.0,0.0,83.900002,189098.561107,0.085165,2025-10-10 14:18:03.085165 +1100
21010,0.0,0.0,70.0,189694.911392,596.435449,2025-10-10 14:27:59.435449 +1100
21011,0.0,0.0,70.0,189694.93978,596.463837,2025-10-10 14:27:59.463837 +1100
21012,0.0,0.0,70.0,189694.968168,596.492225,2025-10-10 14:27:59.492225 +1100
21013,0.0,0.0,70.0,189694.996556,596.520613,2025-10-10 14:27:59.520613 +1100


## Experiment Time

In [17]:
import pandas as pd

# Read the log files (updated file names)
log_v   = pd.read_csv('ButtonToggleLog_v.csv')
log_va  = pd.read_csv('ButtonToggleLog_va.csv')
log_vt  = pd.read_csv('ButtonToggleLog_vt.csv')
log_vat = pd.read_csv('ButtonToggleLog_vat.csv')

# Utility to fetch time for a given Group and DataFrame
def get_time(df, element_name, col='UTC+10_Time', add_hours=1, as_str=True):
    row = df.loc[df['Group'] == element_name, col]
    if row.empty:
        return None
    ts = pd.to_datetime(row.iloc[0], errors='coerce')
    if pd.isna(ts):
        return None

    ts = (ts + pd.Timedelta(hours=add_hours)).round('10ms')

    if not as_str:
        return ts

    centisec = ts.microsecond // 10000  # 0..99
    return f"{ts:%Y-%m-%d %H:%M:%S}.{centisec:02d}"

# ---- Visual ----
start_time_v   = get_time(log_v, 'S01_S')
state02_start_time_v = get_time(log_v, 'S02_S')
state03_start_time_v = get_time(log_v, 'S03_S')
state03_m_time_v     = get_time(log_v, 'S03_M')
end_time_v     = get_time(log_v, 'S03_E')

# ---- VA ----
start_time_va   = get_time(log_va, 'S01_S')
state02_start_time_va = get_time(log_va, 'S02_S')
state03_start_time_va = get_time(log_va, 'S03_S')
state03_m_time_va     = get_time(log_va, 'S03_M')
end_time_va     = get_time(log_va, 'S03_E')

# ---- VT ----
start_time_vt   = get_time(log_vt, 'S01_S')
state02_start_time_vt = get_time(log_vt, 'S02_S')
state03_start_time_vt = get_time(log_vt, 'S03_S')
state03_m_time_vt     = get_time(log_vt, 'S03_M')
end_time_vt     = get_time(log_vt, 'S03_E')

# ---- VAT ----
start_time_vat   = get_time(log_vat, 'S01_S')
state02_start_time_vat = get_time(log_vat, 'S02_S')
state03_start_time_vat = get_time(log_vat, 'S03_S')
state03_m_time_vat     = get_time(log_vat, 'S03_M')
end_time_vat     = get_time(log_vat, 'S03_E')

# ---- Print results ----
print("Visual condition start:", start_time_v)
print("Visual State 02 start:", state02_start_time_v)
print("Visual State 03 start:", state03_start_time_v)
print("Visual State 03 m_time:", state03_m_time_v)
print("Visual end:", end_time_v)
print("---")
print("VA condition start:", start_time_va)
print("VA State 02 start:", state02_start_time_va)
print("VA State 03 start:", state03_start_time_va)
print("VA State 03 m_time:", state03_m_time_va)
print("VA end:", end_time_va)
print("---")
print("VT condition start:", start_time_vt)
print("VT State 02 start:", state02_start_time_vt)
print("VT State 03 start:", state03_start_time_vt)
print("VT State 03 m_time:", state03_m_time_vt)
print("VT end:", end_time_vt)
print("---")
print("VAT condition start:", start_time_vat)
print("VAT State 02 start:", state02_start_time_vat)
print("VAT State 03 start:", state03_start_time_vat)
print("VAT State 03 m_time:", state03_m_time_vat)
print("VAT end:", end_time_vat)


Visual condition start: 2025-10-10 14:06:41.28
Visual State 02 start: 2025-10-10 14:09:46.52
Visual State 03 start: 2025-10-10 14:10:51.83
Visual State 03 m_time: 2025-10-10 14:11:05.96
Visual end: 2025-10-10 14:11:24.30
---
VA condition start: 2025-10-10 14:49:28.21
VA State 02 start: 2025-10-10 14:52:32.16
VA State 03 start: 2025-10-10 14:53:33.81
VA State 03 m_time: 2025-10-10 14:53:44.99
VA end: 2025-10-10 14:53:55.33
---
VT condition start: 2025-10-10 14:37:01.43
VT State 02 start: 2025-10-10 14:40:02.04
VT State 03 start: 2025-10-10 14:41:04.22
VT State 03 m_time: 2025-10-10 14:41:19.62
VT end: 2025-10-10 14:41:32.79
---
VAT condition start: 2025-10-10 14:22:41.10
VAT State 02 start: 2025-10-10 14:25:43.50
VAT State 03 start: 2025-10-10 14:26:48.91
VAT State 03 m_time: 2025-10-10 14:27:04.78
VAT end: 2025-10-10 14:27:19.17


## Basline calculation

In [18]:
import pandas as pd
from pandas import Timedelta

# Make copies of the original DataFrames to avoid modifying the source data
df_v   = df_visual.copy()
df_va  = df_va.copy()
df_vt  = df_vt.copy()
df_vat = df_vat.copy()

def get_interval_bpm_hrv(df, interval_start, interval_end):
    df['Datetime_Local'] = pd.to_datetime(df['Datetime_Local'])
    tz = df['Datetime_Local'].dt.tz
    interval_start = pd.to_datetime(interval_start).tz_localize(tz) if pd.to_datetime(interval_start).tzinfo is None else pd.to_datetime(interval_start)
    interval_end   = pd.to_datetime(interval_end).tz_localize(tz) if pd.to_datetime(interval_end).tzinfo is None else pd.to_datetime(interval_end)
    df_interval = df[(df['Datetime_Local'] >= interval_start) & (df['Datetime_Local'] < interval_end)]
    mean_bpm = df_interval['heart-rate'].mean()
    mean_hrv = df_interval['hrv'].mean()
    mean_bpm = round(mean_bpm, 2) if pd.notnull(mean_bpm) else np.nan
    mean_hrv = round(mean_hrv, 2) if pd.notnull(mean_hrv) else np.nan
    return mean_bpm, mean_hrv

# ----- Visual -----
baseline_00_start_v  = pd.to_datetime(start_time_v)  - Timedelta(minutes=1)
baseline_00_end_v    = pd.to_datetime(start_time_v)
baseline_pre_start_v = pd.to_datetime(start_time_v)  - Timedelta(minutes=2)
baseline_pre_end_v   = pd.to_datetime(start_time_v)  - Timedelta(minutes=1)

# ----- VA -----
baseline_00_start_va  = pd.to_datetime(start_time_va)  - Timedelta(minutes=1)
baseline_00_end_va    = pd.to_datetime(start_time_va)
baseline_pre_start_va = pd.to_datetime(start_time_va)  - Timedelta(minutes=2)
baseline_pre_end_va   = pd.to_datetime(start_time_va)  - Timedelta(minutes=1)

# ----- VT -----
baseline_00_start_vt  = pd.to_datetime(start_time_vt)  - Timedelta(minutes=1)
baseline_00_end_vt    = pd.to_datetime(start_time_vt)
baseline_pre_start_vt = pd.to_datetime(start_time_vt)  - Timedelta(minutes=2)
baseline_pre_end_vt   = pd.to_datetime(start_time_vt)  - Timedelta(minutes=1)

# ----- VAT -----
baseline_00_start_vat  = pd.to_datetime(start_time_vat) - Timedelta(minutes=1)
baseline_00_end_vat    = pd.to_datetime(start_time_vat)
baseline_pre_start_vat = pd.to_datetime(start_time_vat) - Timedelta(minutes=2)
baseline_pre_end_vat   = pd.to_datetime(start_time_vat) - Timedelta(minutes=1)

# Compute means for each group and interval
bpm_00_v,  hrv_00_v  = get_interval_bpm_hrv(df_v,   baseline_00_start_v,  baseline_00_end_v)
bpm_pre_v, hrv_pre_v = get_interval_bpm_hrv(df_v,   baseline_pre_start_v, baseline_pre_end_v)

bpm_00_va,  hrv_00_va  = get_interval_bpm_hrv(df_va,  baseline_00_start_va,  baseline_00_end_va)
bpm_pre_va, hrv_pre_va = get_interval_bpm_hrv(df_va,  baseline_pre_start_va, baseline_pre_end_va)

bpm_00_vt,  hrv_00_vt  = get_interval_bpm_hrv(df_vt,  baseline_00_start_vt,  baseline_00_end_vt)
bpm_pre_vt, hrv_pre_vt = get_interval_bpm_hrv(df_vt,  baseline_pre_start_vt, baseline_pre_end_vt)

bpm_00_vat,  hrv_00_vat  = get_interval_bpm_hrv(df_vat, baseline_00_start_vat,  baseline_00_end_vat)
bpm_pre_vat, hrv_pre_vat = get_interval_bpm_hrv(df_vat, baseline_pre_start_vat, baseline_pre_end_vat)

# Print results in a readable format
print(f"Visual group Baseline_00 (-1min~0min) bpm: {bpm_00_v:.2f}, HRV(RMSSD): {hrv_00_v:.2f} ms")
print(f"Visual group Baseline_pre (-2min~-1min) bpm: {bpm_pre_v:.2f}, HRV(RMSSD): {hrv_pre_v:.2f} ms")
print('---')
print(f"VA group Baseline_00 (-1min~0min) bpm: {bpm_00_va:.2f}, HRV(RMSSD): {hrv_00_va:.2f} ms")
# print(f"VA group Baseline_pre (-2min~-1min) bpm: {bpm_pre_va:.2f}, HRV(RMSSD): {hrv_pre_va:.2f} ms")
print('---')
print(f"VT group Baseline_00 (-1min~0min) bpm: {bpm_00_vt:.2f}, HRV(RMSSD): {hrv_00_vt:.2f} ms")
# print(f"VT group Baseline_pre (-2min~-1min) bpm: {bpm_pre_vt:.2f}, HRV(RMSSD): {hrv_pre_vt:.2f} ms")
print('---')
print(f"VAT group Baseline_00 (-1min~0min) bpm: {bpm_00_vat:.2f}, HRV(RMSSD): {hrv_00_vat:.2f} ms")
# print(f"VAT group Baseline_pre (-2min~-1min) bpm: {bpm_pre_vat:.2f}, HRV(RMSSD): {hrv_pre_vat:.2f} ms")


Visual group Baseline_00 (-1min~0min) bpm: 63.81, HRV(RMSSD): 25.12 ms
Visual group Baseline_pre (-2min~-1min) bpm: 82.02, HRV(RMSSD): 38.66 ms
---
VA group Baseline_00 (-1min~0min) bpm: 80.30, HRV(RMSSD): 44.59 ms
---
VT group Baseline_00 (-1min~0min) bpm: 77.17, HRV(RMSSD): 49.76 ms
---
VAT group Baseline_00 (-1min~0min) bpm: 78.06, HRV(RMSSD): 50.92 ms


## Calculate mean values in 30-second windows for HR

In [19]:
pd.set_option('display.float_format', lambda x: f"{x:.2f}")

def calc_rolling_means(df, start_time, end_time, interval_s=30, min_total_s=270, max_total_s=270):
    df = df.copy()
    # Ensure datetime with tz alignment
    df['Datetime_Local'] = pd.to_datetime(df['Datetime_Local'])
    tz = df['Datetime_Local'].dt.tz
    start_time = pd.to_datetime(start_time)
    end_time   = pd.to_datetime(end_time)
    start_time = start_time.tz_localize(tz) if start_time.tzinfo is None else start_time.tz_convert(tz)
    end_time   = end_time.tz_localize(tz)   if end_time.tzinfo   is None else end_time.tz_convert(tz)

    cap_end_time = min(end_time, start_time + pd.Timedelta(seconds=max_total_s))

    interval = pd.Timedelta(seconds=interval_s)
    total_duration = end_time - start_time
    results = []
    idx = 1

    # A) <30s 
    if total_duration < interval:
        seg_full = df[(df['Datetime_Local'] >= start_time) & (df['Datetime_Local'] < end_time)]
        mean_bpm = seg_full['heart-rate'].mean()
        mean_hrv = seg_full['hrv'].mean()
        seconds_marker = total_duration.total_seconds()
        return pd.DataFrame([[round(seconds_marker,2),
                              round(mean_bpm,2) if pd.notnull(mean_bpm) else None,
                              round(mean_hrv,2) if pd.notnull(mean_hrv) else None]],
                            columns=['Seconds','bpm','hrv'])

    # B) > 30s 
    current_start = start_time
    while current_start + interval <= end_time:
        current_end = current_start + interval
        seg = df[(df['Datetime_Local'] >= current_start) & (df['Datetime_Local'] < current_end)]
        mean_bpm = seg['heart-rate'].mean()
        mean_hrv = seg['hrv'].mean()
        results.append([idx * interval_s,
                        round(mean_bpm, 2) if pd.notnull(mean_bpm) else None,
                        round(mean_hrv, 2) if pd.notnull(mean_hrv) else None])
        current_start = current_end
        idx += 1

    # C) end < 30s
    if current_start < cap_end_time:
        tail_seconds = (cap_end_time - current_start).total_seconds()

        if total_duration.total_seconds() < min_total_s:
            # < 270s：end as “whole”
            seg_full = df[(df['Datetime_Local'] >= start_time) & (df['Datetime_Local'] < cap_end_time)]
            mean_bpm = seg_full['heart-rate'].mean()
            mean_hrv = seg_full['hrv'].mean()
        else:
            # ≥270s 
            seg_tail = df[(df['Datetime_Local'] >= current_start) & (df['Datetime_Local'] < cap_end_time)]
            mean_bpm = seg_tail['heart-rate'].mean()
            mean_hrv = seg_tail['hrv'].mean()

        seconds_marker = (idx - 1) * interval_s + tail_seconds
        results.append([round(seconds_marker, 2),
                        round(mean_bpm, 2) if pd.notnull(mean_bpm) else None,
                        round(mean_hrv, 2) if pd.notnull(mean_hrv) else None])

    return pd.DataFrame(results, columns=['Seconds', 'bpm', 'hrv'])

# --- copies ---
df_v   = df_visual.copy()
df_va  = df_va.copy()
df_vt  = df_vt.copy()
df_vat = df_vat.copy()

# --- compute rolling means ---
df_v_means   = calc_rolling_means(df_v,   start_time_v,   end_time_v).round(2)
df_va_means  = calc_rolling_means(df_va,  start_time_va,  end_time_va).round(2)
df_vt_means  = calc_rolling_means(df_vt,  start_time_vt,  end_time_vt).round(2)
df_vat_means = calc_rolling_means(df_vat, start_time_vat, end_time_vat).round(2)

# --- align to common minimum length ---
min_len = min(len(df_v_means), len(df_va_means), len(df_vt_means), len(df_vat_means))

df_v_means_aligned   = df_v_means.iloc[:min_len].reset_index(drop=True)
df_va_means_aligned  = df_va_means.iloc[:min_len].reset_index(drop=True)
df_vt_means_aligned  = df_vt_means.iloc[:min_len].reset_index(drop=True)
df_vat_means_aligned = df_vat_means.iloc[:min_len].reset_index(drop=True)

# --- display ---
display('Visual 30s Interval Means', df_v_means_aligned)
display('VA 30s Interval Means', df_va_means_aligned)
display('VT 30s Interval Means', df_vt_means_aligned)
display('VAT 30s Interval Means', df_vat_means_aligned)


'Visual 30s Interval Means'

Unnamed: 0,Seconds,bpm,hrv
0,30,79.61,37.94
1,60,76.35,38.66
2,90,79.59,31.54
3,120,83.21,30.29
4,150,80.51,29.03
5,180,82.9,27.92
6,210,79.52,33.92
7,240,77.38,38.34
8,270,79.42,40.67


'VA 30s Interval Means'

Unnamed: 0,Seconds,bpm,hrv
0,30.0,63.46,44.15
1,60.0,64.6,38.7
2,90.0,73.12,39.54
3,120.0,50.33,33.11
4,150.0,79.58,51.93
5,180.0,80.73,45.82
6,210.0,76.85,49.29
7,240.0,78.91,57.28
8,267.12,72.01,45.37


'VT 30s Interval Means'

Unnamed: 0,Seconds,bpm,hrv
0,30,74.51,51.87
1,60,49.74,29.66
2,90,78.75,40.5
3,120,80.1,42.57
4,150,76.87,49.43
5,180,77.25,48.87
6,210,78.06,42.06
7,240,76.66,40.12
8,270,48.09,20.49


'VAT 30s Interval Means'

Unnamed: 0,Seconds,bpm,hrv
0,30,77.95,42.86
1,60,76.14,52.95
2,90,75.92,44.78
3,120,76.01,46.95
4,150,79.45,44.99
5,180,81.58,42.33
6,210,81.06,45.09
7,240,76.93,42.99
8,270,50.39,19.92


## Export clean bpm and hrv data to CSV files

In [20]:
import os
import pandas as pd

# ========== Define Participant ID and Gender ==========
participant_id = "P14"  # Change for each participant
gender = "M"            # Change for each participant

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\HR"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_HR.csv"
filepath = os.path.join(output_dir, filename)

# ========== Function to Build Output Row ==========
def build_output_row(participant_id, gender, condition, bpm_pre, hrv_pre, bpm_00, hrv_00, means_df):
    """
    Output: [ID, gender, condition, bpm_pre, hrv_pre, bpm_00, hrv_00, bpm_30s..., bpm_Ns, hrv_30s..., hrv_Ns]
    All bpm values first, then all hrv values.
    """
    bpm_list = means_df['bpm'].tolist()
    hrv_list = means_df['hrv'].tolist()
    row = [participant_id, gender, condition, bpm_pre, hrv_pre, bpm_00, hrv_00]
    row.extend(bpm_list)
    row.extend(hrv_list)
    return row

# ========== Use aligned DataFrames ==========
# V: with pre values (label changed from "Visual" -> "V")
row_v  = build_output_row(participant_id, gender, "V",
                          bpm_pre_v, hrv_pre_v, bpm_00_v, hrv_00_v, df_v_means_aligned)

# VA: with pre values
row_va = build_output_row(participant_id, gender, "VA",
                          "", "", bpm_00_va, hrv_00_va, df_va_means_aligned)

# VT: with pre values
row_vt = build_output_row(participant_id, gender, "VT",
                          "", "", bpm_00_vt, hrv_00_vt, df_vt_means_aligned)

# VAT: pre columns left empty (keep your original behavior)
row_vat = build_output_row(participant_id, gender, "VAT",
                           "", "", bpm_00_vat, hrv_00_vat, df_vat_means_aligned)

# ========== Build Columns Based on Number of Windows ==========
n_windows = len(df_v_means_aligned)
columns = ["Participant ID", "Gender", "Condition",
           "Baseline_Pre_bpm", "Baseline_Pre_hrv", "Baseline_00_bpm", "Baseline_00_hrv"]
# Add bpm columns first
columns += [f"bpm_{(i+1)*30}s" for i in range(n_windows)]
# Then add hrv columns
columns += [f"hrv_{(i+1)*30}s" for i in range(n_windows)]

# ========== Save or Append ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    # If file does not exist, write with header
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, index=False)
else:
    # If file exists, append rows without header
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")


File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\HR\B_Participant_HR.csv


## Calculate mean values in 2s pre and post the S3_S

In [21]:
import pandas as pd

# Ensure datetime
df_visual['Datetime_Local'] = pd.to_datetime(df_visual['Datetime_Local'], errors='coerce')
df_va['Datetime_Local']     = pd.to_datetime(df_va['Datetime_Local'], errors='coerce')
df_vt['Datetime_Local']     = pd.to_datetime(df_vt['Datetime_Local'], errors='coerce')
df_vat['Datetime_Local']    = pd.to_datetime(df_vat['Datetime_Local'], errors='coerce')

# Reference timezone from Visual DF
tz = df_visual['Datetime_Local'].dt.tz

# Align S3_S timestamps to the same timezone
def to_tz(ts, tz):
    ts = pd.to_datetime(ts)
    return ts.tz_localize(tz) if ts.tzinfo is None else ts.tz_convert(tz)

s3_start_time_v   = to_tz(state03_start_time_v, tz)
s3_start_time_va  = to_tz(state03_start_time_va, tz)
s3_start_time_vt  = to_tz(state03_start_time_vt, tz)
s3_start_time_vat = to_tz(state03_start_time_vat, tz)

# Window settings
interval_before = pd.Timedelta(seconds=2)
interval_after  = pd.Timedelta(seconds=2)

def before_after(df, s3_start):
    df_before = df[(df['Datetime_Local'] >= (s3_start - interval_before)) &
                   (df['Datetime_Local'] <  s3_start)]
    df_after  = df[(df['Datetime_Local'] >=  s3_start) &
                   (df['Datetime_Local'] <  (s3_start + interval_after))]
    mean_before = df_before[['heart-rate','hrv']].mean().to_frame().T
    mean_before['Window'] = 'Before'
    mean_after  = df_after[['heart-rate','hrv']].mean().to_frame().T
    mean_after['Window']  = 'After'
    out = pd.concat([mean_before, mean_after], ignore_index=True)
    return out[['Window','heart-rate','hrv']]

# --- V ---
df_v_window = before_after(df_visual, s3_start_time_v)
print("=== V Condition (Mean 2s before/after S3_S) ===")
display(df_v_window)

# --- VA ---
df_va_window = before_after(df_va, s3_start_time_va)
print("=== VA Condition (Mean 2s before/after S3_S) ===")
display(df_va_window)

# --- VT ---
df_vt_window = before_after(df_vt, s3_start_time_vt)
print("=== VT Condition (Mean 2s before/after S3_S) ===")
display(df_vt_window)

# --- VAT ---
df_vat_window = before_after(df_vat, s3_start_time_vat)
print("=== VAT Condition (Mean 2s before/after S3_S) ===")
display(df_vat_window)


=== V Condition (Mean 2s before/after S3_S) ===


Unnamed: 0,Window,heart-rate,hrv
0,Before,82.11,55.3
1,After,82.05,56.64


=== VA Condition (Mean 2s before/after S3_S) ===


Unnamed: 0,Window,heart-rate,hrv
0,Before,79.63,63.27
1,After,79.34,51.04


=== VT Condition (Mean 2s before/after S3_S) ===


Unnamed: 0,Window,heart-rate,hrv
0,Before,75.5,32.07
1,After,75.77,30.61


=== VAT Condition (Mean 2s before/after S3_S) ===


Unnamed: 0,Window,heart-rate,hrv
0,Before,70.23,25.16
1,After,0.0,0.0


## Export clean average S3 2s Before and After Data to CSV files

In [22]:
import os
import pandas as pd

# ========== Define Participant ID and Gender ==========
participant_id = "P14"   # Change for each participant
gender = "M"             # Change for each participant

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\HR"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_HR_S3.csv"
filepath = os.path.join(output_dir, filename)

# ========== Build Output Row Function ==========
def build_output_row_hr_S3(
    participant_id, gender, condition,
    hr_pre, hrv_pre, hr_00, hrv_00,
    hr_before, hrv_before, hr_after, hrv_after,
    skip_pre=False
):
    if skip_pre:
        row = [
            participant_id, gender, condition,
            "", "",           # No pre values (e.g., VAT)
            hr_00, hrv_00,
            hr_before, hrv_before,
            hr_after, hrv_after
        ]
    else:
        row = [
            participant_id, gender, condition,
            hr_pre, hrv_pre,
            hr_00, hrv_00,
            hr_before, hrv_before,
            hr_after, hrv_after
        ]
    return row

# ========== Fill in from calculations ==========
# Visual
row_v = build_output_row_hr_S3(
    participant_id, gender, "Visual",
    bpm_pre_v if 'bpm_pre_v' in locals() else "",
    hrv_pre_v if 'hrv_pre_v' in locals() else "",
    bpm_00_v, hrv_00_v,
    df_v_window.loc[df_v_window['Window'] == 'Before', 'heart-rate'].values[0],
    df_v_window.loc[df_v_window['Window'] == 'Before', 'hrv'].values[0],
    df_v_window.loc[df_v_window['Window'] == 'After',  'heart-rate'].values[0],
    df_v_window.loc[df_v_window['Window'] == 'After',  'hrv'].values[0],
    skip_pre=False
)

# VA
row_va = build_output_row_hr_S3(
    participant_id, gender, "VA",
    "", "",
    bpm_00_va, hrv_00_va,
    df_va_window.loc[df_va_window['Window'] == 'Before', 'heart-rate'].values[0],
    df_va_window.loc[df_va_window['Window'] == 'Before', 'hrv'].values[0],
    df_va_window.loc[df_va_window['Window'] == 'After',  'heart-rate'].values[0],
    df_va_window.loc[df_va_window['Window'] == 'After',  'hrv'].values[0],
    skip_pre=False
)

# VT
row_vt = build_output_row_hr_S3(
    participant_id, gender, "VT",
    "", "",
    bpm_00_vt, hrv_00_vt,
    df_vt_window.loc[df_vt_window['Window'] == 'Before', 'heart-rate'].values[0],
    df_vt_window.loc[df_vt_window['Window'] == 'Before', 'hrv'].values[0],
    df_vt_window.loc[df_vt_window['Window'] == 'After',  'heart-rate'].values[0],
    df_vt_window.loc[df_vt_window['Window'] == 'After',  'hrv'].values[0],
    skip_pre=False
)

# VAT (pre left empty)
row_vat = build_output_row_hr_S3(
    participant_id, gender, "VAT",
    "", "",
    bpm_00_vat, hrv_00_vat,
    df_vat_window.loc[df_vat_window['Window'] == 'Before', 'heart-rate'].values[0],
    df_vat_window.loc[df_vat_window['Window'] == 'Before', 'hrv'].values[0],
    df_vat_window.loc[df_vat_window['Window'] == 'After',  'heart-rate'].values[0],
    df_vat_window.loc[df_vat_window['Window'] == 'After',  'hrv'].values[0],
    skip_pre=True
)

# ========== Columns ==========
columns = [
    "Participant ID", "Gender", "Condition",
    "Baseline_Pre_HR", "Baseline_Pre_HRV",
    "Baseline_00_HR", "Baseline_00_HRV",
    "Before_S3_S_HR", "Before_S3_S_HRV",
    "After_S3_S_HR",  "After_S3_S_HRV"
]

# ========== Write CSV ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, index=False)
else:
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")


File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\HR\B_Participant_HR_S3.csv


## Calculate mean values in the three time points of S3

In [23]:
import pandas as pd

# Helper: convert ts to pandas Timestamp, align tz to df's ts_col, then pick nearest sample
def val_at_timestamp(df, ts, ts_col='Datetime_Local', val_col='heart-rate'):
    # ensure ts column is datetime
    if not pd.api.types.is_datetime64_any_dtype(df[ts_col]):
        df[ts_col] = pd.to_datetime(df[ts_col])

    ts = pd.to_datetime(ts)

    # align timezone
    tz = df[ts_col].dt.tz
    if tz is not None:
        if ts.tzinfo is None:
            ts = ts.tz_localize(tz)
        else:
            ts = ts.tz_convert(tz)

    idx = (df[ts_col] - ts).abs().idxmin()
    val = df.loc[idx, val_col]
    return None if pd.isna(val) else round(float(val), 2)

# Build a wide table for HR/HRV at S3_S, S3_M, S3_E
rows = [
    ["V",
     val_at_timestamp(df_visual, state03_start_time_v, val_col='heart-rate'),
     val_at_timestamp(df_visual, state03_start_time_v, val_col='hrv'),
     val_at_timestamp(df_visual, state03_m_time_v,   val_col='heart-rate'),
     val_at_timestamp(df_visual, state03_m_time_v,   val_col='hrv'),
     val_at_timestamp(df_visual, end_time_v,         val_col='heart-rate'),
     val_at_timestamp(df_visual, end_time_v,         val_col='hrv')],
    
    ["VA",
     val_at_timestamp(df_va, state03_start_time_va, val_col='heart-rate'),
     val_at_timestamp(df_va, state03_start_time_va, val_col='hrv'),
     val_at_timestamp(df_va, state03_m_time_va,     val_col='heart-rate'),
     val_at_timestamp(df_va, state03_m_time_va,     val_col='hrv'),
     val_at_timestamp(df_va, end_time_va,           val_col='heart-rate'),
     val_at_timestamp(df_va, end_time_va,           val_col='hrv')],

    ["VT",
     val_at_timestamp(df_vt, state03_start_time_vt, val_col='heart-rate'),
     val_at_timestamp(df_vt, state03_start_time_vt, val_col='hrv'),
     val_at_timestamp(df_vt, state03_m_time_vt,     val_col='heart-rate'),
     val_at_timestamp(df_vt, state03_m_time_vt,     val_col='hrv'),
     val_at_timestamp(df_vt, end_time_vt,           val_col='heart-rate'),
     val_at_timestamp(df_vt, end_time_vt,           val_col='hrv')],

    ["VAT",
     val_at_timestamp(df_vat, state03_start_time_vat, val_col='heart-rate'),
     val_at_timestamp(df_vat, state03_start_time_vat, val_col='hrv'),
     val_at_timestamp(df_vat, state03_m_time_vat,     val_col='heart-rate'),
     val_at_timestamp(df_vat, state03_m_time_vat,     val_col='hrv'),
     val_at_timestamp(df_vat, end_time_vat,           val_col='heart-rate'),
     val_at_timestamp(df_vat, end_time_vat,           val_col='hrv')],
]

df_hr_hrv_points = pd.DataFrame(
    rows,
    columns=[
        "Condition",
        "S3_S_HR", "S3_S_HRV",
        "S3_M_HR", "S3_M_HRV",
        "S3_E_HR", "S3_E_HRV"
    ]
)

display(df_hr_hrv_points)


Unnamed: 0,Condition,S3_S_HR,S3_S_HRV,S3_M_HR,S3_M_HRV,S3_E_HR,S3_E_HRV
0,V,82.0,55.8,78.0,25.1,77.0,25.1
1,VA,79.0,56.5,83.0,49.2,83.0,35.9
2,VT,76.0,33.3,0.0,0.0,77.0,29.2
3,VAT,0.0,0.0,76.0,28.7,75.0,39.8


In [24]:
import os
import pandas as pd

# ========== Define Participant ID and Gender ==========
participant_id = "P14"
gender = "M"

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\HR"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_HR_S3_Task.csv"
filepath = os.path.join(output_dir, filename)

# ========== Helper: nearest-sample value at a timestamp ==========
def val_at_timestamp(df, ts, ts_col='Datetime_Local', val_col='heart-rate'):
    """
    Return the value (rounded to 2 decimals) at the row whose timestamp is nearest to `ts`.
    Converts df[ts_col] to datetime if needed, and aligns `ts` timezone to df's tz if present.
    """
    if not pd.api.types.is_datetime64_any_dtype(df[ts_col]):
        df[ts_col] = pd.to_datetime(df[ts_col])

    ts = pd.to_datetime(ts)

    tz = df[ts_col].dt.tz
    if tz is not None:
        if ts.tzinfo is None:
            ts = ts.tz_localize(tz)
        else:
            ts = ts.tz_convert(tz)

    idx = (df[ts_col] - ts).abs().idxmin()
    val = df.loc[idx, val_col]
    return None if pd.isna(val) else round(float(val), 2)

# ========== Build one row for a condition ==========
def build_row_hr_points(condition_label, df, bpm_pre, hrv_pre, bpm_00, hrv_00, t_s3s, t_s3m, t_s3e, skip_pre=False):
    def fmt(x):
        try:
            return f"{float(x):.2f}"
        except (TypeError, ValueError):
            return x

    s3s_hr  = val_at_timestamp(df, t_s3s, val_col='heart-rate')
    s3s_hrv = val_at_timestamp(df, t_s3s, val_col='hrv')
    s3m_hr  = val_at_timestamp(df, t_s3m, val_col='heart-rate')
    s3m_hrv = val_at_timestamp(df, t_s3m, val_col='hrv')
    s3e_hr  = val_at_timestamp(df, t_s3e, val_col='heart-rate')
    s3e_hrv = val_at_timestamp(df, t_s3e, val_col='hrv')

    if skip_pre:
        return [
            participant_id, gender, condition_label,
            "", "",                     # Baseline_Pre (bpm/hrv) left empty
            fmt(bpm_00), fmt(hrv_00),   # Baseline_00
            fmt(s3s_hr), fmt(s3s_hrv),
            fmt(s3m_hr), fmt(s3m_hrv),
            fmt(s3e_hr), fmt(s3e_hrv),
        ]
    else:
        return [
            participant_id, gender, condition_label,
            fmt(bpm_pre), fmt(hrv_pre), # Baseline_Pre
            fmt(bpm_00), fmt(hrv_00),   # Baseline_00
            fmt(s3s_hr), fmt(s3s_hrv),
            fmt(s3m_hr), fmt(s3m_hrv),
            fmt(s3e_hr), fmt(s3e_hrv),
        ]

# ========== Build rows using EXISTING variables ==========
row_v = build_row_hr_points(
    "V",  df_visual,
    bpm_pre_v, hrv_pre_v, bpm_00_v, hrv_00_v,
    state03_start_time_v, state03_m_time_v, end_time_v,
    skip_pre=False
)

row_va = build_row_hr_points(
    "VA", df_va,
    "", "", bpm_00_va, hrv_00_va,
    state03_start_time_va, state03_m_time_va, end_time_va,
    skip_pre=False
)

row_vt = build_row_hr_points(
    "VT", df_vt,
    "", "", bpm_00_vt, hrv_00_vt,
    state03_start_time_vt, state03_m_time_vt, end_time_vt,
    skip_pre=False
)

# VAT: keep Pre empty per your prior convention
row_vat = build_row_hr_points(
    "VAT", df_vat,
    "", "", bpm_00_vat, hrv_00_vat,
    state03_start_time_vat, state03_m_time_vat, end_time_vat,
    skip_pre=True
)

# ========== Columns ==========
columns = [
    "Participant ID", "Gender", "Condition",
    "Baseline_Pre_bpm", "Baseline_Pre_hrv",
    "Baseline_00_bpm", "Baseline_00_hrv",
    "S3_S_HR", "S3_S_HRV",
    "S3_M_HR", "S3_M_HRV",
    "S3_E_HR", "S3_E_HRV",
]

# ========== Save or Append ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    pd.DataFrame(rows, columns=columns).to_csv(filepath, index=False)
else:
    pd.DataFrame(rows, columns=columns).to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")


File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\HR\B_Participant_HR_S3_Task.csv
