In [3]:
import pyxdf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, timezone
from scipy import stats
import os
from IPython.display import display
import matplotlib.pyplot as plt

## Load the XDF file

In [6]:
from pathlib import Path
import re

BASE_DIR = Path(r".")       
SUBJECT  = "sub-P007"      
# ============================

CONDITIONS = ["v", "va", "vt", "vat"]

run_re = re.compile(r"_run-(\d+)_eeg\.xdf$", re.IGNORECASE)

def pick_latest_run(paths):
    """Pick the highest run-N file; fall back to newest modified time if no run number."""
    if not paths:
        return None
    # Try by run number
    with_run = []
    for p in paths:
        m = run_re.search(p.name)
        if m:
            with_run.append((int(m.group(1)), p))
    if with_run:
        with_run.sort(key=lambda x: x[0], reverse=True)
        return with_run[0][1]
    # Fallback: latest modified
    return sorted(paths, key=lambda p: p.stat().st_mtime, reverse=True)[0]

def find_condition_paths(base_dir: Path, subject: str):
    """Return dict condition->Path by scanning sub-<ID>/ses-<cond>/eeg/*.xdf."""
    result = {}
    subj_dir = base_dir / subject
    for cond in CONDITIONS:
        eeg_dir = subj_dir / f"ses-{cond}" / "eeg"
        candidates = list(eeg_dir.glob(f"{subject}_ses-{cond}_task-Default_run-*_eeg.xdf"))
        # If strict pattern fails, accept any *.xdf under eeg
        if not candidates:
            candidates = list(eeg_dir.glob("*.xdf"))
        chosen = pick_latest_run(candidates)
        if chosen:
            result[cond] = chosen
    return result

# -------- header helpers (reuse your logic) --------
def get_datetime_from_header(header):
    if 'info' in header and 'datetime' in header['info']:
        dt_list = header['info']['datetime']
        if isinstance(dt_list, list) and len(dt_list) > 0:
            return dt_list[0]
    return "Datetime not found"

def print_stream_info(streams, condition_name, datetime_str):
    print(f"\n--- Stream Information ({condition_name}) ---")
    print(f"Datetime from header: {datetime_str}")
    for i, stream in enumerate(streams):
        stream_name = stream['info']['name'][0]
        channel_count = stream['info']['channel_count'][0]
        print(f"Stream {i+1} Name: {stream_name}, Channel Count: {channel_count}, Datetime: {datetime_str}")

paths = find_condition_paths(BASE_DIR, SUBJECT)

In [7]:
# ---- Map discovered paths back to your original variable names ----
def require_found(paths_dict, cond_key, var_name):
    """Ensure a condition path exists; raise a clear error if missing."""
    p = paths_dict.get(cond_key)
    if p is None:
        raise FileNotFoundError(
            f"Missing file for condition '{cond_key}'. "
            f"Please check folder structure: sub-<ID>/ses-{cond_key}/eeg/*.xdf "
            f"and ensure it exists so we can assign `{var_name}`."
        )
    return str(p)  # keep as Windows-style path string

# Assign variables exactly as before
visual_file_path = require_found(paths, "v",   "visual_file_path")
va_file_path     = require_found(paths, "va",  "va_file_path")
vt_file_path     = require_found(paths, "vt",  "vt_file_path")
vat_file_path    = require_found(paths, "vat", "vat_file_path")

# (Optional) print to verify
print("\n== Assigned variables ==")
print("visual_file_path:", visual_file_path)
print("va_file_path    :", va_file_path)
print("vt_file_path    :", vt_file_path)
print("vat_file_path   :", vat_file_path)



== Assigned variables ==
visual_file_path: sub-P007\ses-v\eeg\sub-P007_ses-v_task-Default_run-001_eeg.xdf
va_file_path    : sub-P007\ses-va\eeg\sub-P007_ses-va_task-Default_run-001_eeg.xdf
vt_file_path    : sub-P007\ses-vt\eeg\sub-P007_ses-vt_task-Default_run-001_eeg.xdf
vat_file_path   : sub-P007\ses-vat\eeg\sub-P007_ses-vat_task-Default_run-001_eeg.xdf


## Load visual only data

In [8]:
# Load XDF file for Visual condition
visual_streams, visual_header = pyxdf.load_xdf(visual_file_path)
visual_datetime = get_datetime_from_header(visual_header)
print("Visual condition datetime:", visual_datetime)

# Print stream info for Visual condition with datetime
print_stream_info(visual_streams, "Visual", visual_datetime)

Stream 3: Calculated effective sampling rate 10.2037 Hz is different from specified rate 100.0000 Hz.
Stream 5: Calculated effective sampling rate 607.4623 Hz is different from specified rate 250.0000 Hz.
Stream 12: Calculated effective sampling rate 34.8861 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 29.4068 Hz is different from specified rate 250.0000 Hz.
Stream 6: Calculated effective sampling rate 35.2033 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 33.3579 Hz is different from specified rate 250.0000 Hz.
Stream 4: Calculated effective sampling rate 311.9492 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 648.6850 Hz is different from specified rate 250.0000 Hz.
Stream 10: Calculated effective sampling rate 29.2983 Hz is different from specified rate 250.0000 Hz.


Visual condition datetime: 2025-10-02T10:29:44+1000

--- Stream Information (Visual) ---
Datetime from header: 2025-10-02T10:29:44+1000
Stream 1 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-02T10:29:44+1000
Stream 2 Name: GSR_PPG, Channel Count: 10, Datetime: 2025-10-02T10:29:44+1000
Stream 3 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-02T10:29:44+1000
Stream 4 Name: obci_stream_7, Channel Count: 1, Datetime: 2025-10-02T10:29:44+1000
Stream 5 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-02T10:29:44+1000
Stream 6 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-02T10:29:44+1000
Stream 7 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-02T10:29:44+1000
Stream 8 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-02T10:29:44+1000
Stream 9 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-02T10:29:44+1000
Stream 10 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-02T10:29:44+1000
Stream 11 Name: obci_stream_10, Cha

## Load VA data

In [9]:
# Load XDF file for Visual + Auditory condition (VA)
va_streams, va_header = pyxdf.load_xdf(va_file_path)
va_datetime = get_datetime_from_header(va_header)
print("Visual + Auditory condition datetime:", va_datetime)

# Print stream info for Visual + Auditory condition with datetime
print_stream_info(va_streams, "Visual + Auditory", va_datetime)

Stream 5: Calculated effective sampling rate 20.1419 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 47.7352 Hz is different from specified rate 250.0000 Hz.
Stream 1: Calculated effective sampling rate 22.8322 Hz is different from specified rate 250.0000 Hz.
Stream 4: Calculated effective sampling rate 192.1785 Hz is different from specified rate 250.0000 Hz.
Stream 10: Calculated effective sampling rate 29.3699 Hz is different from specified rate 250.0000 Hz.
Stream 13: Calculated effective sampling rate 306.7551 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 10.2044 Hz is different from specified rate 100.0000 Hz.
Stream 12: Calculated effective sampling rate 314.7820 Hz is different from specified rate 250.0000 Hz.
Stream 6: Calculated effective sampling rate 23.1527 Hz is different from specified rate 250.0000 Hz.


Visual + Auditory condition datetime: 2025-10-02T11:00:05+1000

--- Stream Information (Visual + Auditory) ---
Datetime from header: 2025-10-02T11:00:05+1000
Stream 1 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-02T11:00:05+1000
Stream 2 Name: obci_stream_6, Channel Count: 1, Datetime: 2025-10-02T11:00:05+1000
Stream 3 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-02T11:00:05+1000
Stream 4 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-02T11:00:05+1000
Stream 5 Name: obci_stream_10, Channel Count: 2, Datetime: 2025-10-02T11:00:05+1000
Stream 6 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-02T11:00:05+1000
Stream 7 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-02T11:00:05+1000
Stream 8 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-02T11:00:05+1000
Stream 9 Name: obci_stream_7, Channel Count: 1, Datetime: 2025-10-02T11:00:05+1000
Stream 10 Name: GSR_PPG, Channel Count: 10, Datetime: 2025-10-02T11:00:05+1000
Stream 11 Nam

## Load VT data

In [10]:
# Load XDF file for Visual + Thermal condition (VT)
vt_streams, vt_header = pyxdf.load_xdf(vt_file_path)
vt_datetime = get_datetime_from_header(vt_header)
print("Visual + Thermal condition datetime:", vt_datetime)

# Print stream info for Visual + Thermal condition with datetime
print_stream_info(vt_streams, "Visual + Thermal", vt_datetime)

Stream 1: Calculated effective sampling rate 23.5504 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 45.0638 Hz is different from specified rate 250.0000 Hz.
Stream 9: Calculated effective sampling rate 335.7725 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 10.2043 Hz is different from specified rate 100.0000 Hz.
Stream 11: Calculated effective sampling rate 207.5193 Hz is different from specified rate 250.0000 Hz.
Stream 12: Calculated effective sampling rate 23.5342 Hz is different from specified rate 250.0000 Hz.
Stream 4: Calculated effective sampling rate 20.0107 Hz is different from specified rate 250.0000 Hz.
Stream 13: Calculated effective sampling rate 323.8675 Hz is different from specified rate 250.0000 Hz.
Stream 10: Calculated effective sampling rate 30.0159 Hz is different from specified rate 250.0000 Hz.


Visual + Thermal condition datetime: 2025-10-02T11:12:18+1000

--- Stream Information (Visual + Thermal) ---
Datetime from header: 2025-10-02T11:12:18+1000
Stream 1 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-02T11:12:18+1000
Stream 2 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-02T11:12:18+1000
Stream 3 Name: obci_stream_4, Channel Count: 18, Datetime: 2025-10-02T11:12:18+1000
Stream 4 Name: obci_stream_6, Channel Count: 1, Datetime: 2025-10-02T11:12:18+1000
Stream 5 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-02T11:12:18+1000
Stream 6 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-02T11:12:18+1000
Stream 7 Name: GSR_PPG, Channel Count: 10, Datetime: 2025-10-02T11:12:18+1000
Stream 8 Name: obci_stream_10, Channel Count: 2, Datetime: 2025-10-02T11:12:18+1000
Stream 9 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-02T11:12:18+1000
Stream 10 Name: obci_stream_11, Channel Count: 3, Datetime: 2025-10-02T11:12:18+1000
Stream 11 Name

## Load VAT data

In [11]:
# Load XDF file for Visual + Auditory condition
vat_streams, vat_header = pyxdf.load_xdf(vat_file_path)
vat_datetime = get_datetime_from_header(vat_header)
print("VAT condition datetime:", vat_datetime)

# Print stream info for Visual + Auditory condition with datetime
print_stream_info(vat_streams, "VAT", vat_datetime)

Stream 4: Calculated effective sampling rate 41.3696 Hz is different from specified rate 250.0000 Hz.
Stream 3: Calculated effective sampling rate 628.2684 Hz is different from specified rate 250.0000 Hz.
Stream 7: Calculated effective sampling rate 24.4776 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 183.2153 Hz is different from specified rate 250.0000 Hz.
Stream 8: Calculated effective sampling rate 41.0945 Hz is different from specified rate 250.0000 Hz.
Stream 2: Calculated effective sampling rate 321.4891 Hz is different from specified rate 250.0000 Hz.
Stream 11: Calculated effective sampling rate 24.0566 Hz is different from specified rate 250.0000 Hz.
Stream 6: Calculated effective sampling rate 40.9352 Hz is different from specified rate 250.0000 Hz.
Stream 12: Calculated effective sampling rate 10.2044 Hz is different from specified rate 100.0000 Hz.


VAT condition datetime: 2025-10-02T11:44:49+1000

--- Stream Information (VAT) ---
Datetime from header: 2025-10-02T11:44:49+1000
Stream 1 Name: obci_stream_6, Channel Count: 1, Datetime: 2025-10-02T11:44:49+1000
Stream 2 Name: obci_stream_3, Channel Count: 125, Datetime: 2025-10-02T11:44:49+1000
Stream 3 Name: obci_stream_1, Channel Count: 5, Datetime: 2025-10-02T11:44:49+1000
Stream 4 Name: obci_stream_10, Channel Count: 2, Datetime: 2025-10-02T11:44:49+1000
Stream 5 Name: obci_stream_8, Channel Count: 4, Datetime: 2025-10-02T11:44:49+1000
Stream 6 Name: obci_stream_5, Channel Count: 2, Datetime: 2025-10-02T11:44:49+1000
Stream 7 Name: obci_stream_7, Channel Count: 1, Datetime: 2025-10-02T11:44:49+1000
Stream 8 Name: obci_stream_0, Channel Count: 18, Datetime: 2025-10-02T11:44:49+1000
Stream 9 Name: obci_stream_2, Channel Count: 6, Datetime: 2025-10-02T11:44:49+1000
Stream 10 Name: obci_stream_9, Channel Count: 9, Datetime: 2025-10-02T11:44:49+1000
Stream 11 Name: obci_stream_11, Cha

## Extract data streams for heart rates

In [12]:
# Function to parse datetime string from header
def parse_header_datetime(dt_str):
    if '+' in dt_str:
        base, zone = dt_str.split('+')
        if '.' in base:
            dt_fmt = "%Y-%m-%dT%H:%M:%S.%f"
        else:
            dt_fmt = "%Y-%m-%dT%H:%M:%S"
        dt_obj = datetime.strptime(base, dt_fmt)
        tz = timezone(timedelta(hours=int(zone[:2]), minutes=int(zone[2:])))
        dt_obj = dt_obj.replace(tzinfo=tz)
    else:
        dt_obj = datetime.fromisoformat(dt_str)
    return dt_obj

# Function to extract a specific stream as a DataFrame
def extract_stream_df(streams, start_time_str, stream_name_to_extract):
    start_dt = parse_header_datetime(start_time_str)
    # Find the target stream by name
    target_stream = None
    for stream in streams:
        if stream['info']['name'][0] == stream_name_to_extract:
            target_stream = stream
            break
    if target_stream is None:
        print(f"Stream '{stream_name_to_extract}' not found.")
        return None
    n_channels = int(target_stream['info']['channel_count'][0])
    df_stream = pd.DataFrame(
        target_stream['time_series'],
        columns=[f"Channel_{i+1}" for i in range(n_channels)]
    )
    df_stream['Timestamp'] = target_stream['time_stamps']
    df_stream['Relative_Seconds'] = df_stream['Timestamp'] - df_stream['Timestamp'].iloc[0]
    df_stream['Datetime_Local'] = [start_dt + timedelta(seconds=s) for s in df_stream['Relative_Seconds']]
    df_stream['Datetime_Local'] = df_stream['Datetime_Local'].map(lambda x: x.strftime('%Y-%m-%d %H:%M:%S.%f %z'))
    return df_stream

# Set the stream name to extract
stream_name_to_extract = "obci_stream_11"  # streams for [bpm, signal, ibi]


## Extract heart rates for Visual condition

In [13]:
# visual only
df_visual = extract_stream_df(visual_streams, visual_datetime, stream_name_to_extract)

if df_visual is not None:
    print("==== Visual Only ====")
    df_visual = df_visual.rename(columns={
        'Channel_1': 'heart-rate',
        'Channel_2': 'hrv',
        'Channel_3': 'spo2'
    })

    display(pd.concat([df_visual.head(4), df_visual.tail(4)]))


==== Visual Only ====


Unnamed: 0,heart-rate,hrv,spo2,Timestamp,Relative_Seconds,Datetime_Local
0,84.0,37.700001,99.599998,432.972057,0.0,2025-10-02 10:29:44.000000 +1000
1,84.0,37.700001,99.900002,433.000723,0.028666,2025-10-02 10:29:44.028666 +1000
2,84.0,37.700001,99.900002,433.029389,0.057332,2025-10-02 10:29:44.057332 +1000
3,84.0,37.700001,99.900002,433.058055,0.085998,2025-10-02 10:29:44.085998 +1000
18579,152.0,342.899994,79.099998,965.557726,532.585669,2025-10-02 10:38:36.585669 +1000
18580,152.0,342.899994,79.099998,965.586392,532.614335,2025-10-02 10:38:36.614335 +1000
18581,155.0,284.200012,88.599998,965.615058,532.643001,2025-10-02 10:38:36.643001 +1000
18582,155.0,284.200012,88.599998,965.643724,532.671667,2025-10-02 10:38:36.671667 +1000


## Extract heart rates for VA condition

In [14]:
# va only
df_va = extract_stream_df(va_streams, va_datetime, stream_name_to_extract)

if df_va is not None:
    print("==== va Only ====")
    df_va = df_va.rename(columns={
        'Channel_1': 'heart-rate',
        'Channel_2': 'hrv',
        'Channel_3': 'spo2'
    })

    display(pd.concat([df_va.head(4), df_va.tail(4)]))


==== va Only ====


Unnamed: 0,heart-rate,hrv,spo2,Timestamp,Relative_Seconds,Datetime_Local
0,82.0,46.5,98.300003,826.58222,0.0,2025-10-02 11:00:05.000000 +1000
1,82.0,46.5,97.800003,826.631873,0.049654,2025-10-02 11:00:05.049654 +1000
2,82.0,46.5,97.800003,826.681527,0.099308,2025-10-02 11:00:05.099308 +1000
3,82.0,46.5,98.400002,826.731181,0.148962,2025-10-02 11:00:05.148962 +1000
7571,114.0,277.200012,70.0,1202.511499,375.92928,2025-10-02 11:06:20.929280 +1000
7572,113.0,335.599976,70.0,1202.561153,375.978934,2025-10-02 11:06:20.978934 +1000
7573,127.0,336.200012,70.0,1202.610807,376.028587,2025-10-02 11:06:21.028587 +1000
7574,117.0,321.5,70.0,1202.660461,376.078241,2025-10-02 11:06:21.078241 +1000


## Extract heart rates for VT condition

In [15]:
# vt only
df_vt = extract_stream_df(vt_streams, vt_datetime, stream_name_to_extract)

if df_vt is not None:
    print("==== vt Only ====")
    df_vt = df_vt.rename(columns={
        'Channel_1': 'heart-rate',
        'Channel_2': 'hrv',
        'Channel_3': 'spo2'
    })

    display(pd.concat([df_vt.head(4), df_vt.tail(4)]))


==== vt Only ====


Unnamed: 0,heart-rate,hrv,spo2,Timestamp,Relative_Seconds,Datetime_Local
0,90.0,39.400002,96.199997,1552.615594,0.0,2025-10-02 11:12:18.000000 +1000
1,90.0,39.400002,97.199997,1552.665573,0.049979,2025-10-02 11:12:18.049979 +1000
2,90.0,39.400002,97.199997,1552.715552,0.099958,2025-10-02 11:12:18.099958 +1000
3,90.0,39.400002,97.199997,1552.765531,0.149937,2025-10-02 11:12:18.149937 +1000
8006,115.0,433.5,88.400002,1952.747918,400.132325,2025-10-02 11:18:58.132325 +1000
8007,149.0,362.5,86.5,1952.797897,400.182304,2025-10-02 11:18:58.182304 +1000
8008,136.0,306.5,99.900002,1952.847877,400.232283,2025-10-02 11:18:58.232283 +1000
8009,133.0,392.800018,77.099998,1952.897856,400.282262,2025-10-02 11:18:58.282262 +1000


## Extract heart rates for VAT condition

In [16]:
# visual + auditory
df_vat = extract_stream_df(vat_streams, vat_datetime, stream_name_to_extract)
if df_vat is not None:
    print("==== VAT ====")
    df_vat = df_vat.rename(columns={
    'Channel_1': 'heart-rate',
    'Channel_2': 'hrv',
    'Channel_3': 'spo2'
    })
    
    display(pd.concat([df_vat.head(4), df_vat.tail(4)]))

==== VAT ====


Unnamed: 0,heart-rate,hrv,spo2,Timestamp,Relative_Seconds,Datetime_Local
0,77.0,37.400002,95.5,634.278953,0.0,2025-10-02 11:44:49.000000 +1000
1,77.0,32.699997,95.400002,634.320526,0.041573,2025-10-02 11:44:49.041573 +1000
2,77.0,32.699997,96.199997,634.362099,0.083146,2025-10-02 11:44:49.083146 +1000
3,77.0,32.699997,96.199997,634.403672,0.124718,2025-10-02 11:44:49.124718 +1000
9205,0.0,0.0,83.599998,1016.956474,382.677521,2025-10-02 11:51:11.677521 +1000
9206,0.0,0.0,83.599998,1016.998047,382.719094,2025-10-02 11:51:11.719094 +1000
9207,0.0,0.0,83.800003,1017.03962,382.760666,2025-10-02 11:51:11.760666 +1000
9208,0.0,0.0,83.900002,1017.081192,382.802239,2025-10-02 11:51:11.802239 +1000


## Experiment Time

In [17]:
import pandas as pd

# Read the log files (updated file names)
log_v   = pd.read_csv('ButtonToggleLog_v.csv')
log_va  = pd.read_csv('ButtonToggleLog_va.csv')
log_vt  = pd.read_csv('ButtonToggleLog_vt.csv')
log_vat = pd.read_csv('ButtonToggleLog_vat.csv')

# Utility to fetch time for a given Group and DataFrame
def get_time(df, element_name):
    row = df[df['Group'] == element_name]
    return row['UTC+10_Time'].iloc[0] if not row.empty else None

# ---- Visual ----
start_time_v   = get_time(log_v, 'S01_S')
state02_start_time_v = get_time(log_v, 'S02_S')
state03_start_time_v = get_time(log_v, 'S03_S')
state03_m_time_v     = get_time(log_v, 'S03_M')
end_time_v     = get_time(log_v, 'S03_E')

# ---- VA ----
start_time_va   = get_time(log_va, 'S01_S')
state02_start_time_va = get_time(log_va, 'S02_S')
state03_start_time_va = get_time(log_va, 'S03_S')
state03_m_time_va     = get_time(log_va, 'S03_M')
end_time_va     = get_time(log_va, 'S03_E')

# ---- VT ----
start_time_vt   = get_time(log_vt, 'S01_S')
state02_start_time_vt = get_time(log_vt, 'S02_S')
state03_start_time_vt = get_time(log_vt, 'S03_S')
state03_m_time_vt     = get_time(log_vt, 'S03_M')
end_time_vt     = get_time(log_vt, 'S03_E')

# ---- VAT ----
start_time_vat   = get_time(log_vat, 'S01_S')
state02_start_time_vat = get_time(log_vat, 'S02_S')
state03_start_time_vat = get_time(log_vat, 'S03_S')
state03_m_time_vat     = get_time(log_vat, 'S03_M')
end_time_vat     = get_time(log_vat, 'S03_E')

# ---- Print results ----
print("Visual condition start:", start_time_v)
print("Visual State 02 start:", state02_start_time_v)
print("Visual State 03 start:", state03_start_time_v)
print("Visual State 03 m_time:", state03_m_time_v)
print("Visual end:", end_time_v)
print("---")
print("VA condition start:", start_time_va)
print("VA State 02 start:", state02_start_time_va)
print("VA State 03 start:", state03_start_time_va)
print("VA State 03 m_time:", state03_m_time_va)
print("VA end:", end_time_va)
print("---")
print("VT condition start:", start_time_vt)
print("VT State 02 start:", state02_start_time_vt)
print("VT State 03 start:", state03_start_time_vt)
print("VT State 03 m_time:", state03_m_time_vt)
print("VT end:", end_time_vt)
print("---")
print("VAT condition start:", start_time_vat)
print("VAT State 02 start:", state02_start_time_vat)
print("VAT State 03 start:", state03_start_time_vat)
print("VAT State 03 m_time:", state03_m_time_vat)
print("VAT end:", end_time_vat)

Visual condition start: 2025/10/2 10:32:51.99
Visual State 02 start: 2025/10/2 10:36:00.48
Visual State 03 start: 2025/10/2 10:37:06.26
Visual State 03 m_time: 2025/10/2 10:37:19.60
Visual end: 2025/10/2 10:37:29.48
---
VA condition start: 2025/10/2 11:01:02.71
VA State 02 start: 2025/10/2 11:04:07.00
VA State 03 start: 2025/10/2 11:05:08.63
VA State 03 m_time: 2025/10/2 11:05:19.17
VA end: 2025/10/2 11:05:30.54
---
VT condition start: 2025-10-02 11:13:30.165
VT State 02 start: 2025-10-02 11:16:30.414
VT State 03 start: 2025-10-02 11:17:35.226
VT State 03 m_time: 2025-10-02 11:17:46.209
VT end: 2025-10-02 11:17:54.285
---
VAT condition start: 2025-10-02 11:46:20.637
VAT State 02 start: 2025-10-02 11:49:26.071
VAT State 03 start: 2025-10-02 11:50:26.516
VAT State 03 m_time: 2025-10-02 11:50:37.765
VAT end: 2025-10-02 11:50:46.133


## Basline calculation

In [18]:
import pandas as pd
from pandas import Timedelta

# Make copies of the original DataFrames to avoid modifying the source data
df_v   = df_visual.copy()
df_va  = df_va.copy()
df_vt  = df_vt.copy()
df_vat = df_vat.copy()

def get_interval_bpm_hrv(df, interval_start, interval_end):
    df['Datetime_Local'] = pd.to_datetime(df['Datetime_Local'])
    tz = df['Datetime_Local'].dt.tz
    interval_start = pd.to_datetime(interval_start).tz_localize(tz) if pd.to_datetime(interval_start).tzinfo is None else pd.to_datetime(interval_start)
    interval_end   = pd.to_datetime(interval_end).tz_localize(tz) if pd.to_datetime(interval_end).tzinfo is None else pd.to_datetime(interval_end)
    df_interval = df[(df['Datetime_Local'] >= interval_start) & (df['Datetime_Local'] < interval_end)]
    mean_bpm = df_interval['heart-rate'].mean()
    mean_hrv = df_interval['hrv'].mean()
    mean_bpm = round(mean_bpm, 2) if pd.notnull(mean_bpm) else None
    mean_hrv = round(mean_hrv, 2) if pd.notnull(mean_hrv) else None
    return mean_bpm, mean_hrv

# ----- Visual -----
baseline_00_start_v  = pd.to_datetime(start_time_v)  - Timedelta(minutes=1)
baseline_00_end_v    = pd.to_datetime(start_time_v)
baseline_pre_start_v = pd.to_datetime(start_time_v)  - Timedelta(minutes=2)
baseline_pre_end_v   = pd.to_datetime(start_time_v)  - Timedelta(minutes=1)

# ----- VA -----
baseline_00_start_va  = pd.to_datetime(start_time_va)  - Timedelta(minutes=1)
baseline_00_end_va    = pd.to_datetime(start_time_va)
baseline_pre_start_va = pd.to_datetime(start_time_va)  - Timedelta(minutes=2)
baseline_pre_end_va   = pd.to_datetime(start_time_va)  - Timedelta(minutes=1)

# ----- VT -----
baseline_00_start_vt  = pd.to_datetime(start_time_vt)  - Timedelta(minutes=1)
baseline_00_end_vt    = pd.to_datetime(start_time_vt)
baseline_pre_start_vt = pd.to_datetime(start_time_vt)  - Timedelta(minutes=2)
baseline_pre_end_vt   = pd.to_datetime(start_time_vt)  - Timedelta(minutes=1)

# ----- VAT -----
baseline_00_start_vat  = pd.to_datetime(start_time_vat) - Timedelta(minutes=1)
baseline_00_end_vat    = pd.to_datetime(start_time_vat)
baseline_pre_start_vat = pd.to_datetime(start_time_vat) - Timedelta(minutes=2)
baseline_pre_end_vat   = pd.to_datetime(start_time_vat) - Timedelta(minutes=1)

# Compute means for each group and interval
bpm_00_v,  hrv_00_v  = get_interval_bpm_hrv(df_v,   baseline_00_start_v,  baseline_00_end_v)
bpm_pre_v, hrv_pre_v = get_interval_bpm_hrv(df_v,   baseline_pre_start_v, baseline_pre_end_v)

bpm_00_va,  hrv_00_va  = get_interval_bpm_hrv(df_va,  baseline_00_start_va,  baseline_00_end_va)
bpm_pre_va, hrv_pre_va = get_interval_bpm_hrv(df_va,  baseline_pre_start_va, baseline_pre_end_va)

bpm_00_vt,  hrv_00_vt  = get_interval_bpm_hrv(df_vt,  baseline_00_start_vt,  baseline_00_end_vt)
bpm_pre_vt, hrv_pre_vt = get_interval_bpm_hrv(df_vt,  baseline_pre_start_vt, baseline_pre_end_vt)

bpm_00_vat,  hrv_00_vat  = get_interval_bpm_hrv(df_vat, baseline_00_start_vat,  baseline_00_end_vat)
bpm_pre_vat, hrv_pre_vat = get_interval_bpm_hrv(df_vat, baseline_pre_start_vat, baseline_pre_end_vat)

# Print results in a readable format
print(f"Visual group Baseline_00 (-1min~0min) bpm: {bpm_00_v:.2f}, HRV(RMSSD): {hrv_00_v:.2f} ms")
print(f"Visual group Baseline_pre (-2min~-1min) bpm: {bpm_pre_v:.2f}, HRV(RMSSD): {hrv_pre_v:.2f} ms")
print('---')
print(f"VA group Baseline_00 (-1min~0min) bpm: {bpm_00_va:.2f}, HRV(RMSSD): {hrv_00_va:.2f} ms")
# print(f"VA group Baseline_pre (-2min~-1min) bpm: {bpm_pre_va:.2f}, HRV(RMSSD): {hrv_pre_va:.2f} ms")
print('---')
print(f"VT group Baseline_00 (-1min~0min) bpm: {bpm_00_vt:.2f}, HRV(RMSSD): {hrv_00_vt:.2f} ms")
# print(f"VT group Baseline_pre (-2min~-1min) bpm: {bpm_pre_vt:.2f}, HRV(RMSSD): {hrv_pre_vt:.2f} ms")
print('---')
print(f"VAT group Baseline_00 (-1min~0min) bpm: {bpm_00_vat:.2f}, HRV(RMSSD): {hrv_00_vat:.2f} ms")
# print(f"VAT group Baseline_pre (-2min~-1min) bpm: {bpm_pre_vat:.2f}, HRV(RMSSD): {hrv_pre_vat:.2f} ms")


Visual group Baseline_00 (-1min~0min) bpm: 86.20, HRV(RMSSD): 36.11 ms
Visual group Baseline_pre (-2min~-1min) bpm: 66.03, HRV(RMSSD): 49.83 ms
---
VA group Baseline_00 (-1min~0min) bpm: 68.07, HRV(RMSSD): 29.22 ms
---
VT group Baseline_00 (-1min~0min) bpm: 80.54, HRV(RMSSD): 65.72 ms
---
VAT group Baseline_00 (-1min~0min) bpm: 78.15, HRV(RMSSD): 47.57 ms


## Calculate mean values in 30-second windows for HR

In [26]:
pd.set_option('display.float_format', lambda x: f"{x:.2f}")

def calc_rolling_means(df, start_time, end_time, interval_s=30, min_total_s=270):
    df = df.copy()
    # Ensure datetime with tz alignment
    df['Datetime_Local'] = pd.to_datetime(df['Datetime_Local'])
    tz = df['Datetime_Local'].dt.tz
    start_time = pd.to_datetime(start_time)
    end_time   = pd.to_datetime(end_time)
    start_time = start_time.tz_localize(tz) if start_time.tzinfo is None else start_time.tz_convert(tz)
    end_time   = end_time.tz_localize(tz)   if end_time.tzinfo   is None else end_time.tz_convert(tz)

    interval = pd.Timedelta(seconds=interval_s)
    total_duration = end_time - start_time
    results = []
    idx = 1

    # A) <30s —— 直接整段均值
    if total_duration < interval:
        seg_full = df[(df['Datetime_Local'] >= start_time) & (df['Datetime_Local'] < end_time)]
        mean_bpm = seg_full['heart-rate'].mean()
        mean_hrv = seg_full['hrv'].mean()
        seconds_marker = total_duration.total_seconds()
        return pd.DataFrame([[round(seconds_marker,2),
                              round(mean_bpm,2) if pd.notnull(mean_bpm) else None,
                              round(mean_hrv,2) if pd.notnull(mean_hrv) else None]],
                            columns=['Seconds','bpm','hrv'])

    # B) > 30s 
    current_start = start_time
    while current_start + interval <= end_time:
        current_end = current_start + interval
        seg = df[(df['Datetime_Local'] >= current_start) & (df['Datetime_Local'] < current_end)]
        mean_bpm = seg['heart-rate'].mean()
        mean_hrv = seg['hrv'].mean()
        results.append([idx * interval_s,
                        round(mean_bpm, 2) if pd.notnull(mean_bpm) else None,
                        round(mean_hrv, 2) if pd.notnull(mean_hrv) else None])
        current_start = current_end
        idx += 1

    # C) end < 30s
    if current_start < end_time:
        tail_seconds = (end_time - current_start).total_seconds()

        if total_duration.total_seconds() < min_total_s:
            seg_full = df[(df['Datetime_Local'] >= start_time) & (df['Datetime_Local'] < end_time)]
            mean_bpm = seg_full['heart-rate'].mean()
            mean_hrv = seg_full['hrv'].mean()
        else:
            seg_tail = df[(df['Datetime_Local'] >= current_start) & (df['Datetime_Local'] < end_time)]
            mean_bpm = seg_tail['heart-rate'].mean()
            mean_hrv = seg_tail['hrv'].mean()

        seconds_marker = (idx - 1) * interval_s + tail_seconds  # 真实累计秒数
        results.append([round(seconds_marker, 2),
                        round(mean_bpm, 2) if pd.notnull(mean_bpm) else None,
                        round(mean_hrv, 2) if pd.notnull(mean_hrv) else None])

    return pd.DataFrame(results, columns=['Seconds', 'bpm', 'hrv'])
# --- copies ---
df_v   = df_visual.copy()
df_va  = df_va.copy()
df_vt  = df_vt.copy()
df_vat = df_vat.copy()

# --- compute rolling means ---
df_v_means   = calc_rolling_means(df_v,   start_time_v,   end_time_v).round(2)
df_va_means  = calc_rolling_means(df_va,  start_time_va,  end_time_va).round(2)
df_vt_means  = calc_rolling_means(df_vt,  start_time_vt,  end_time_vt).round(2)
df_vat_means = calc_rolling_means(df_vat, start_time_vat, end_time_vat).round(2)

# --- align to common minimum length ---
min_len = min(len(df_v_means), len(df_va_means), len(df_vt_means), len(df_vat_means))

df_v_means_aligned   = df_v_means.iloc[:min_len].reset_index(drop=True)
df_va_means_aligned  = df_va_means.iloc[:min_len].reset_index(drop=True)
df_vt_means_aligned  = df_vt_means.iloc[:min_len].reset_index(drop=True)
df_vat_means_aligned = df_vat_means.iloc[:min_len].reset_index(drop=True)

# --- display ---
display('Visual 30s Interval Means', df_v_means_aligned)
display('VA 30s Interval Means', df_va_means_aligned)
display('VT 30s Interval Means', df_vt_means_aligned)
display('VAT 30s Interval Means', df_vat_means_aligned)


'Visual 30s Interval Means'

Unnamed: 0,Seconds,bpm,hrv
0,30.0,81.47,30.77
1,60.0,78.6,28.66
2,90.0,83.15,31.48
3,120.0,82.84,25.97
4,150.0,82.59,28.17
5,180.0,83.83,38.33
6,210.0,81.46,40.56
7,240.0,54.65,22.55
8,270.0,89.36,33.19


'VA 30s Interval Means'

Unnamed: 0,Seconds,bpm,hrv
0,30.0,84.72,35.34
1,60.0,82.23,31.19
2,90.0,83.15,42.9
3,120.0,50.18,18.52
4,150.0,73.67,25.6
5,180.0,85.88,46.07
6,210.0,82.61,36.87
7,240.0,78.34,113.23
8,267.83,78.79,41.7


'VT 30s Interval Means'

Unnamed: 0,Seconds,bpm,hrv
0,30.0,83.35,35.34
1,60.0,83.31,33.1
2,90.0,83.61,32.34
3,120.0,80.12,39.07
4,150.0,84.0,39.77
5,180.0,76.6,129.08
6,210.0,81.7,52.08
7,240.0,63.24,72.11
8,264.12,78.11,51.99


'VAT 30s Interval Means'

Unnamed: 0,Seconds,bpm,hrv
0,30.0,76.14,38.34
1,60.0,72.95,147.09
2,90.0,80.56,94.57
3,120.0,69.52,64.92
4,150.0,60.43,30.74
5,180.0,75.7,36.46
6,210.0,79.55,32.83
7,240.0,77.93,39.45
8,265.5,74.74,58.9


## Export clean bpm and hrv data to CSV files

In [27]:
import os
import pandas as pd

# ========== Define Participant ID and Gender ==========
participant_id = "P07"  # Change for each participant
gender = "F"            # Change for each participant

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\HR"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_HR.csv"
filepath = os.path.join(output_dir, filename)

# ========== Function to Build Output Row ==========
def build_output_row(participant_id, gender, condition, bpm_pre, hrv_pre, bpm_00, hrv_00, means_df):
    """
    Output: [ID, gender, condition, bpm_pre, hrv_pre, bpm_00, hrv_00, bpm_30s..., bpm_Ns, hrv_30s..., hrv_Ns]
    All bpm values first, then all hrv values.
    """
    bpm_list = means_df['bpm'].tolist()
    hrv_list = means_df['hrv'].tolist()
    row = [participant_id, gender, condition, bpm_pre, hrv_pre, bpm_00, hrv_00]
    row.extend(bpm_list)
    row.extend(hrv_list)
    return row

# ========== Use aligned DataFrames ==========
# V: with pre values (label changed from "Visual" -> "V")
row_v  = build_output_row(participant_id, gender, "V",
                          bpm_pre_v, hrv_pre_v, bpm_00_v, hrv_00_v, df_v_means_aligned)

# VA: with pre values
row_va = build_output_row(participant_id, gender, "VA",
                          "", "", bpm_00_va, hrv_00_va, df_va_means_aligned)

# VT: with pre values
row_vt = build_output_row(participant_id, gender, "VT",
                          "", "", bpm_00_vt, hrv_00_vt, df_vt_means_aligned)

# VAT: pre columns left empty (keep your original behavior)
row_vat = build_output_row(participant_id, gender, "VAT",
                           "", "", bpm_00_vat, hrv_00_vat, df_vat_means_aligned)

# ========== Build Columns Based on Number of Windows ==========
n_windows = len(df_v_means_aligned)
columns = ["Participant ID", "Gender", "Condition",
           "Baseline_Pre_bpm", "Baseline_Pre_hrv", "Baseline_00_bpm", "Baseline_00_hrv"]
# Add bpm columns first
columns += [f"bpm_{(i+1)*30}s" for i in range(n_windows)]
# Then add hrv columns
columns += [f"hrv_{(i+1)*30}s" for i in range(n_windows)]

# ========== Save or Append ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    # If file does not exist, write with header
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, index=False)
else:
    # If file exists, append rows without header
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")


File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\HR\B_Participant_HR.csv


## Calculate mean values in 2s pre and post the S3_S

In [21]:
import pandas as pd

# Ensure datetime
df_visual['Datetime_Local'] = pd.to_datetime(df_visual['Datetime_Local'], errors='coerce')
df_va['Datetime_Local']     = pd.to_datetime(df_va['Datetime_Local'], errors='coerce')
df_vt['Datetime_Local']     = pd.to_datetime(df_vt['Datetime_Local'], errors='coerce')
df_vat['Datetime_Local']    = pd.to_datetime(df_vat['Datetime_Local'], errors='coerce')

# Reference timezone from Visual DF
tz = df_visual['Datetime_Local'].dt.tz

# Align S3_S timestamps to the same timezone
def to_tz(ts, tz):
    ts = pd.to_datetime(ts)
    return ts.tz_localize(tz) if ts.tzinfo is None else ts.tz_convert(tz)

s3_start_time_v   = to_tz(state03_start_time_v, tz)
s3_start_time_va  = to_tz(state03_start_time_va, tz)
s3_start_time_vt  = to_tz(state03_start_time_vt, tz)
s3_start_time_vat = to_tz(state03_start_time_vat, tz)

# Window settings
interval_before = pd.Timedelta(seconds=2)
interval_after  = pd.Timedelta(seconds=2)

def before_after(df, s3_start):
    df_before = df[(df['Datetime_Local'] >= (s3_start - interval_before)) &
                   (df['Datetime_Local'] <  s3_start)]
    df_after  = df[(df['Datetime_Local'] >=  s3_start) &
                   (df['Datetime_Local'] <  (s3_start + interval_after))]
    mean_before = df_before[['heart-rate','hrv']].mean().to_frame().T
    mean_before['Window'] = 'Before'
    mean_after  = df_after[['heart-rate','hrv']].mean().to_frame().T
    mean_after['Window']  = 'After'
    out = pd.concat([mean_before, mean_after], ignore_index=True)
    return out[['Window','heart-rate','hrv']]

# --- V ---
df_v_window = before_after(df_visual, s3_start_time_v)
print("=== V Condition (Mean 2s before/after S3_S) ===")
display(df_v_window)

# --- VA ---
df_va_window = before_after(df_va, s3_start_time_va)
print("=== VA Condition (Mean 2s before/after S3_S) ===")
display(df_va_window)

# --- VT ---
df_vt_window = before_after(df_vt, s3_start_time_vt)
print("=== VT Condition (Mean 2s before/after S3_S) ===")
display(df_vt_window)

# --- VAT ---
df_vat_window = before_after(df_vat, s3_start_time_vat)
print("=== VAT Condition (Mean 2s before/after S3_S) ===")
display(df_vat_window)


=== V Condition (Mean 2s before/after S3_S) ===


Unnamed: 0,Window,heart-rate,hrv
0,Before,91.53,32.95
1,After,89.9,34.7


=== VA Condition (Mean 2s before/after S3_S) ===


Unnamed: 0,Window,heart-rate,hrv
0,Before,86.07,23.64
1,After,86.12,23.83


=== VT Condition (Mean 2s before/after S3_S) ===


Unnamed: 0,Window,heart-rate,hrv
0,Before,0.0,0.0
1,After,62.33,30.6


=== VAT Condition (Mean 2s before/after S3_S) ===


Unnamed: 0,Window,heart-rate,hrv
0,Before,79.1,29.61
1,After,80.9,35.94


## Export clean average S3 2s Before and After Data to CSV files

In [22]:
import os
import pandas as pd

# ========== Define Participant ID and Gender ==========
participant_id = "P07"   # Change for each participant
gender = "F"             # Change for each participant

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\HR"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_HR_S3.csv"
filepath = os.path.join(output_dir, filename)

# ========== Build Output Row Function ==========
def build_output_row_hr_S3(
    participant_id, gender, condition,
    hr_pre, hrv_pre, hr_00, hrv_00,
    hr_before, hrv_before, hr_after, hrv_after,
    skip_pre=False
):
    if skip_pre:
        row = [
            participant_id, gender, condition,
            "", "",           # No pre values (e.g., VAT)
            hr_00, hrv_00,
            hr_before, hrv_before,
            hr_after, hrv_after
        ]
    else:
        row = [
            participant_id, gender, condition,
            hr_pre, hrv_pre,
            hr_00, hrv_00,
            hr_before, hrv_before,
            hr_after, hrv_after
        ]
    return row

# ========== Fill in from calculations ==========
# Visual
row_v = build_output_row_hr_S3(
    participant_id, gender, "Visual",
    bpm_pre_v if 'bpm_pre_v' in locals() else "",
    hrv_pre_v if 'hrv_pre_v' in locals() else "",
    bpm_00_v, hrv_00_v,
    df_v_window.loc[df_v_window['Window'] == 'Before', 'heart-rate'].values[0],
    df_v_window.loc[df_v_window['Window'] == 'Before', 'hrv'].values[0],
    df_v_window.loc[df_v_window['Window'] == 'After',  'heart-rate'].values[0],
    df_v_window.loc[df_v_window['Window'] == 'After',  'hrv'].values[0],
    skip_pre=False
)

# VA
row_va = build_output_row_hr_S3(
    participant_id, gender, "VA",
    "", "",
    bpm_00_va, hrv_00_va,
    df_va_window.loc[df_va_window['Window'] == 'Before', 'heart-rate'].values[0],
    df_va_window.loc[df_va_window['Window'] == 'Before', 'hrv'].values[0],
    df_va_window.loc[df_va_window['Window'] == 'After',  'heart-rate'].values[0],
    df_va_window.loc[df_va_window['Window'] == 'After',  'hrv'].values[0],
    skip_pre=False
)

# VT
row_vt = build_output_row_hr_S3(
    participant_id, gender, "VT",
    "", "",
    bpm_00_vt, hrv_00_vt,
    df_vt_window.loc[df_vt_window['Window'] == 'Before', 'heart-rate'].values[0],
    df_vt_window.loc[df_vt_window['Window'] == 'Before', 'hrv'].values[0],
    df_vt_window.loc[df_vt_window['Window'] == 'After',  'heart-rate'].values[0],
    df_vt_window.loc[df_vt_window['Window'] == 'After',  'hrv'].values[0],
    skip_pre=False
)

# VAT (pre left empty)
row_vat = build_output_row_hr_S3(
    participant_id, gender, "VAT",
    "", "",
    bpm_00_vat, hrv_00_vat,
    df_vat_window.loc[df_vat_window['Window'] == 'Before', 'heart-rate'].values[0],
    df_vat_window.loc[df_vat_window['Window'] == 'Before', 'hrv'].values[0],
    df_vat_window.loc[df_vat_window['Window'] == 'After',  'heart-rate'].values[0],
    df_vat_window.loc[df_vat_window['Window'] == 'After',  'hrv'].values[0],
    skip_pre=True
)

# ========== Columns ==========
columns = [
    "Participant ID", "Gender", "Condition",
    "Baseline_Pre_HR", "Baseline_Pre_HRV",
    "Baseline_00_HR", "Baseline_00_HRV",
    "Before_S3_S_HR", "Before_S3_S_HRV",
    "After_S3_S_HR",  "After_S3_S_HRV"
]

# ========== Write CSV ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, index=False)
else:
    df_out = pd.DataFrame(rows, columns=columns)
    df_out.to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")


File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\HR\B_Participant_HR_S3.csv


## Calculate mean values in the three time points of S3

In [23]:
import pandas as pd

# Helper: convert ts to pandas Timestamp, align tz to df's ts_col, then pick nearest sample
def val_at_timestamp(df, ts, ts_col='Datetime_Local', val_col='heart-rate'):
    # ensure ts column is datetime
    if not pd.api.types.is_datetime64_any_dtype(df[ts_col]):
        df[ts_col] = pd.to_datetime(df[ts_col])

    ts = pd.to_datetime(ts)

    # align timezone
    tz = df[ts_col].dt.tz
    if tz is not None:
        if ts.tzinfo is None:
            ts = ts.tz_localize(tz)
        else:
            ts = ts.tz_convert(tz)

    idx = (df[ts_col] - ts).abs().idxmin()
    val = df.loc[idx, val_col]
    return None if pd.isna(val) else round(float(val), 2)

# Build a wide table for HR/HRV at S3_S, S3_M, S3_E
rows = [
    ["V",
     val_at_timestamp(df_visual, state03_start_time_v, val_col='heart-rate'),
     val_at_timestamp(df_visual, state03_start_time_v, val_col='hrv'),
     val_at_timestamp(df_visual, state03_m_time_v,   val_col='heart-rate'),
     val_at_timestamp(df_visual, state03_m_time_v,   val_col='hrv'),
     val_at_timestamp(df_visual, end_time_v,         val_col='heart-rate'),
     val_at_timestamp(df_visual, end_time_v,         val_col='hrv')],
    
    ["VA",
     val_at_timestamp(df_va, state03_start_time_va, val_col='heart-rate'),
     val_at_timestamp(df_va, state03_start_time_va, val_col='hrv'),
     val_at_timestamp(df_va, state03_m_time_va,     val_col='heart-rate'),
     val_at_timestamp(df_va, state03_m_time_va,     val_col='hrv'),
     val_at_timestamp(df_va, end_time_va,           val_col='heart-rate'),
     val_at_timestamp(df_va, end_time_va,           val_col='hrv')],

    ["VT",
     val_at_timestamp(df_vt, state03_start_time_vt, val_col='heart-rate'),
     val_at_timestamp(df_vt, state03_start_time_vt, val_col='hrv'),
     val_at_timestamp(df_vt, state03_m_time_vt,     val_col='heart-rate'),
     val_at_timestamp(df_vt, state03_m_time_vt,     val_col='hrv'),
     val_at_timestamp(df_vt, end_time_vt,           val_col='heart-rate'),
     val_at_timestamp(df_vt, end_time_vt,           val_col='hrv')],

    ["VAT",
     val_at_timestamp(df_vat, state03_start_time_vat, val_col='heart-rate'),
     val_at_timestamp(df_vat, state03_start_time_vat, val_col='hrv'),
     val_at_timestamp(df_vat, state03_m_time_vat,     val_col='heart-rate'),
     val_at_timestamp(df_vat, state03_m_time_vat,     val_col='hrv'),
     val_at_timestamp(df_vat, end_time_vat,           val_col='heart-rate'),
     val_at_timestamp(df_vat, end_time_vat,           val_col='hrv')],
]

df_hr_hrv_points = pd.DataFrame(
    rows,
    columns=[
        "Condition",
        "S3_S_HR", "S3_S_HRV",
        "S3_M_HR", "S3_M_HRV",
        "S3_E_HR", "S3_E_HRV"
    ]
)

display(df_hr_hrv_points)


Unnamed: 0,Condition,S3_S_HR,S3_S_HRV,S3_M_HR,S3_M_HRV,S3_E_HR,S3_E_HRV
0,V,90.0,33.3,94.0,14.8,95.0,15.4
1,VA,87.0,22.9,90.0,23.6,91.0,27.1
2,VT,0.0,0.0,83.0,40.6,85.0,32.7
3,VAT,80.0,31.9,81.0,69.3,78.0,47.2


In [24]:
import os
import pandas as pd

# ========== Define Participant ID and Gender ==========
participant_id = "P07"
gender = "F"

# ========== Define Output Directory and File ==========
output_dir = r"G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\HR"
os.makedirs(output_dir, exist_ok=True)
filename = "B_Participant_HR_S3_Task.csv"
filepath = os.path.join(output_dir, filename)

# ========== Helper: nearest-sample value at a timestamp ==========
def val_at_timestamp(df, ts, ts_col='Datetime_Local', val_col='heart-rate'):
    """
    Return the value (rounded to 2 decimals) at the row whose timestamp is nearest to `ts`.
    Converts df[ts_col] to datetime if needed, and aligns `ts` timezone to df's tz if present.
    """
    if not pd.api.types.is_datetime64_any_dtype(df[ts_col]):
        df[ts_col] = pd.to_datetime(df[ts_col])

    ts = pd.to_datetime(ts)

    tz = df[ts_col].dt.tz
    if tz is not None:
        if ts.tzinfo is None:
            ts = ts.tz_localize(tz)
        else:
            ts = ts.tz_convert(tz)

    idx = (df[ts_col] - ts).abs().idxmin()
    val = df.loc[idx, val_col]
    return None if pd.isna(val) else round(float(val), 2)

# ========== Build one row for a condition ==========
def build_row_hr_points(condition_label, df, bpm_pre, hrv_pre, bpm_00, hrv_00, t_s3s, t_s3m, t_s3e, skip_pre=False):
    def fmt(x):
        try:
            return f"{float(x):.2f}"
        except (TypeError, ValueError):
            return x

    s3s_hr  = val_at_timestamp(df, t_s3s, val_col='heart-rate')
    s3s_hrv = val_at_timestamp(df, t_s3s, val_col='hrv')
    s3m_hr  = val_at_timestamp(df, t_s3m, val_col='heart-rate')
    s3m_hrv = val_at_timestamp(df, t_s3m, val_col='hrv')
    s3e_hr  = val_at_timestamp(df, t_s3e, val_col='heart-rate')
    s3e_hrv = val_at_timestamp(df, t_s3e, val_col='hrv')

    if skip_pre:
        return [
            participant_id, gender, condition_label,
            "", "",                     # Baseline_Pre (bpm/hrv) left empty
            fmt(bpm_00), fmt(hrv_00),   # Baseline_00
            fmt(s3s_hr), fmt(s3s_hrv),
            fmt(s3m_hr), fmt(s3m_hrv),
            fmt(s3e_hr), fmt(s3e_hrv),
        ]
    else:
        return [
            participant_id, gender, condition_label,
            fmt(bpm_pre), fmt(hrv_pre), # Baseline_Pre
            fmt(bpm_00), fmt(hrv_00),   # Baseline_00
            fmt(s3s_hr), fmt(s3s_hrv),
            fmt(s3m_hr), fmt(s3m_hrv),
            fmt(s3e_hr), fmt(s3e_hrv),
        ]

# ========== Build rows using EXISTING variables ==========
row_v = build_row_hr_points(
    "V",  df_visual,
    bpm_pre_v, hrv_pre_v, bpm_00_v, hrv_00_v,
    state03_start_time_v, state03_m_time_v, end_time_v,
    skip_pre=False
)

row_va = build_row_hr_points(
    "VA", df_va,
    "", "", bpm_00_va, hrv_00_va,
    state03_start_time_va, state03_m_time_va, end_time_va,
    skip_pre=False
)

row_vt = build_row_hr_points(
    "VT", df_vt,
    "", "", bpm_00_vt, hrv_00_vt,
    state03_start_time_vt, state03_m_time_vt, end_time_vt,
    skip_pre=False
)

# VAT: keep Pre empty per your prior convention
row_vat = build_row_hr_points(
    "VAT", df_vat,
    "", "", bpm_00_vat, hrv_00_vat,
    state03_start_time_vat, state03_m_time_vat, end_time_vat,
    skip_pre=True
)

# ========== Columns ==========
columns = [
    "Participant ID", "Gender", "Condition",
    "Baseline_Pre_bpm", "Baseline_Pre_hrv",
    "Baseline_00_bpm", "Baseline_00_hrv",
    "S3_S_HR", "S3_S_HRV",
    "S3_M_HR", "S3_M_HRV",
    "S3_E_HR", "S3_E_HRV",
]

# ========== Save or Append ==========
rows = [row_v, row_va, row_vt, row_vat]

if not os.path.exists(filepath):
    pd.DataFrame(rows, columns=columns).to_csv(filepath, index=False)
else:
    pd.DataFrame(rows, columns=columns).to_csv(filepath, mode='a', index=False, header=False)

print(f"File saved at: {filepath}")


File saved at: G:\USYD_PhD_OneDrive\OneDrive - The University of Sydney (Students)\PhD_USYD\2025\Prototype B\HR\B_Participant_HR_S3_Task.csv
