In [1]:
import matplotlib.pyplot as plt
import numpy as np

import pyxdf

In [24]:
test_WPI_xdf_path = "data/Group 1/LabRecorder/HCILab1-Group1.xdf"
test_CSL_xdf_path = "data/Group 1/LabRecorder/CSL_01_10.xdf"

### View XDF Streams

In [None]:
def view_xdf_streams(xdf_path):
    data, header = pyxdf.load_xdf(xdf_path)
    print(header)
    
    for stream in data:
        stream_name = stream["info"]["name"][0]
        print(f'Stream {stream_name}')

In [None]:
view_xdf_streams(test_WPI_xdf_path)

Stream 6: Calculated effective sampling rate 14.5166 Hz is different from specified rate 30.0000 Hz.
Stream 3: Calculated effective sampling rate 215.7338 Hz is different from specified rate 250.0000 Hz.


{'info': defaultdict(<class 'list'>, {'version': ['1.0'], 'datetime': ['2024-10-01T10:11:12-0400']})}
Stream OBS_HCILab1
Stream Unicorn_HCILab1
Stream OBS_HCILab2
Stream Unicorn_HCILab2
Stream Unicorn_CSL_LabPC
Stream OBS_CSL_Laptop


In [None]:
view_xdf_streams(test_CSL_xdf_path)

Stream 5: Calculated effective sampling rate 14.5717 Hz is different from specified rate 30.0000 Hz.


{'info': defaultdict(<class 'list'>, {'version': ['1.0'], 'datetime': ['2024-10-01T16:10:18+0200']})}
Stream OBS_CSL_Laptop
Stream Unicorn_CSL_Laptop
Stream Unicorn_CSL_LabPC
Stream OBS_CSL_LabPC
Stream OBS_HCILab2
Stream Unicorn_HCILab1


In [40]:
def view_stream_keys(xdf_path):
    data, header = pyxdf.load_xdf(xdf_path)
    
    # Print all keys in the stream to inspect the structure
    print(f"Keys in a stream:")
    for key in data[0].keys():
        print(f"- {key}")

In [48]:
view_stream_keys(test_WPI_xdf_path)

Stream 6: Calculated effective sampling rate 14.5166 Hz is different from specified rate 30.0000 Hz.
Stream 3: Calculated effective sampling rate 215.7338 Hz is different from specified rate 250.0000 Hz.


Keys in a stream:
- info
- footer
- time_series
- time_stamps
- clock_times
- clock_values


From running the code above, we get that each stream contains:

- info
- footer
- time_series (contains UNIX time stampes for OBS streams)
- time_stamps (contain Lab Recorder time stamps)
- clock_times
- clock_values

Also, from examining the streams of both WPI HCI Lab and CSL xdf files, we get that:

The WPI HCI Lab Stream Names:

- Stream OBS_HCILab1
- Stream Unicorn_HCILab1 (shared)
- Stream OBS_HCILab2 (shared)
- Stream Unicorn_HCILab2
- Stream Unicorn_CSL_LabPC
- Stream OBS_CSL_Laptop

The CSL Lab Stream Names:

- Stream OBS_CSL_Laptop
- Stream Unicorn_CSL_Laptop
- Stream Unicorn_CSL_LabPC
- Stream OBS_CSL_LabPC
- Stream OBS_HCILab2 (shared)
- Stream Unicorn_HCILab1 (shared)

We see that both lab xdf files share OBS_HCILab2 and Unicorn_HCILab1! So, since the same OBS stream is in multiple files, below, I will find the index of the where the UNIX timestamp at the start of the later recording also occurs in the earlier ones. Once I get the LabRecorder timestamp at this index, I will cut all the EEG data with timestamps lower than this value to sync all streams.

In [63]:
def examine_timestamps_length(xdf_path):
    data, header = pyxdf.load_xdf(xdf_path)
    
    for stream in data:
        stream_name = stream["info"]["name"][0]
        
        # for key in stream["info"].keys():
        #     print(f"{key}: {stream['info'][key]}")
        
        time_stamps = stream["time_stamps"]
        
        # print(f'Stream {stream_name} has {len(time_stamps)} time stamps, starting with {time_stamps[0]} and ending with {time_stamps[-1]}')
        
        # if xdf_path == test_CSL_xdf_path:
        #     # CSL Lab is 6 hours ahead of WPI, so we need to adjust the time stamps
        #     # So, for each CSL time_stamp, we subtract 6 * 60 * 60 seconds
        #     time_stamps = [ts - 4801944.59891 for ts in time_stamps]
        
        print(f'Stream {stream_name} has {len(time_stamps)} time stamps, starting with {time_stamps[0]} and ending with {time_stamps[-1]}')

# Examine the length of all time stamps in each stream for both XDF files
print("WPI HCI Lab Streams:")
examine_timestamps_length(test_WPI_xdf_path)

print("\nCSL Lab Streams:")
examine_timestamps_length(test_CSL_xdf_path)

WPI HCI Lab Streams:


Stream 6: Calculated effective sampling rate 14.5166 Hz is different from specified rate 30.0000 Hz.
Stream 3: Calculated effective sampling rate 215.7338 Hz is different from specified rate 250.0000 Hz.


Stream OBS_HCILab1 has 120921 time stamps, starting with 56793.94837631806 and ending with 60826.57269000918
Stream Unicorn_HCILab1 has 1009144 time stamps, starting with 56794.38196600928 and ending with 60826.86284536544
Stream OBS_HCILab2 has 61069 time stamps, starting with 56683.57279925809 and ending with 60890.37793320787
Stream Unicorn_HCILab2 has 857742 time stamps, starting with 56823.3321161977 and ending with 60799.25638114239
Stream Unicorn_CSL_LabPC has 985886 time stamps, starting with 56793.539689118654 and ending with 60750.5087719986
Stream OBS_CSL_Laptop has 120966 time stamps, starting with 56794.65596391625 and ending with 60826.8442674134

CSL Lab Streams:


Stream 5: Calculated effective sampling rate 14.5717 Hz is different from specified rate 30.0000 Hz.


Stream OBS_CSL_Laptop has 119338 time stamps, starting with 4858783.043931104 and ending with 4862760.94388939
Stream Unicorn_CSL_Laptop has 995227 time stamps, starting with 4858782.925259427 and ending with 4862760.846540175
Stream Unicorn_CSL_LabPC has 992670 time stamps, starting with 4858781.894095449 and ending with 4862760.177315814
Stream OBS_CSL_LabPC has 7868 time stamps, starting with 4858783.037595432 and ending with 4858914.1542559145
Stream OBS_HCILab2 has 60141 time stamps, starting with 4858628.17170826 and ending with 4862755.37086901
Stream Unicorn_HCILab1 has 995460 time stamps, starting with 4858783.100935575 and ending with 4862760.864878031


### Examining OBS Data Structure

In [None]:
WPI_HCI_data, WPI_HCI_header = pyxdf.load_xdf(test_WPI_xdf_path)

OBS_HCILab2_steam_at_WPI = WPI_HCI_data[2]

for key in OBS_HCILab2_steam_at_WPI.keys():
    print(f"{key}: {OBS_HCILab2_steam_at_WPI[key]}")

Stream 6: Calculated effective sampling rate 14.5166 Hz is different from specified rate 30.0000 Hz.
Stream 3: Calculated effective sampling rate 215.7338 Hz is different from specified rate 250.0000 Hz.


info: defaultdict(<class 'list'>, {'name': ['OBS_HCILab2'], 'type': ['OBS frame numbers'], 'channel_count': ['3'], 'channel_format': ['double64'], 'source_id': ['ID2'], 'nominal_srate': ['30.00000000000000'], 'version': ['1.100000000000000'], 'created_at': ['55467.06533450000'], 'uid': ['99ea131f-4f5e-438c-bf56-7a4c96d85552'], 'session_id': ['default'], 'hostname': ['DESKTOP-1PS5GJD'], 'v4address': [None], 'v4data_port': ['16572'], 'v4service_port': ['16572'], 'v6address': [None], 'v6data_port': ['16572'], 'v6service_port': ['16572'], 'desc': [defaultdict(<class 'list'>, {'channels': [defaultdict(<class 'list'>, {'channel': [defaultdict(<class 'list'>, {'label': ['frame_time'], 'unit': ['seconds'], 'type': ['timestamp']}), defaultdict(<class 'list'>, {'label': ['frame_num'], 'unit': ['integer'], 'type': ['frameID']}), defaultdict(<class 'list'>, {'label': ['frame_dtime'], 'unit': ['seconds'], 'type': ['time_diff']})]})]})], 'stream_id': 6, 'effective_srate': np.float64(14.5166020694490

In [None]:
CSL_data, CSL_header = pyxdf.load_xdf(test_CSL_xdf_path)

OBS_HCILab2_steam_at_CSL = CSL_data[4]

for key in OBS_HCILab2_steam_at_CSL.keys():
    print(f"{key}: {OBS_HCILab2_steam_at_CSL[key]}")

info: defaultdict(<class 'list'>, {'name': ['OBS_HCILab2'], 'type': ['OBS frame numbers'], 'channel_count': ['3'], 'channel_format': ['double64'], 'source_id': ['ID2'], 'nominal_srate': ['30.00000000000000'], 'version': ['1.100000000000000'], 'created_at': ['55467.06533450000'], 'uid': ['99ea131f-4f5e-438c-bf56-7a4c96d85552'], 'session_id': ['default'], 'hostname': ['DESKTOP-1PS5GJD'], 'v4address': [None], 'v4data_port': ['16572'], 'v4service_port': ['16572'], 'v6address': [None], 'v6data_port': ['16572'], 'v6service_port': ['16572'], 'desc': [defaultdict(<class 'list'>, {'channels': [defaultdict(<class 'list'>, {'channel': [defaultdict(<class 'list'>, {'label': ['frame_time'], 'unit': ['seconds'], 'type': ['timestamp']}), defaultdict(<class 'list'>, {'label': ['frame_num'], 'unit': ['integer'], 'type': ['frameID']}), defaultdict(<class 'list'>, {'label': ['frame_dtime'], 'unit': ['seconds'], 'type': ['time_diff']})]})]})], 'stream_id': 5, 'effective_srate': np.float64(14.5717499768228

In [85]:
OOBS_CSL_Laptop_at_CSL = CSL_data[0]  # Stream OBS_CSL_Laptop

for key in OOBS_CSL_Laptop_at_CSL.keys():
    print(f"\nLength of {key}: {len(OOBS_CSL_Laptop_at_CSL[key])}")
    print(f"{key}: {OOBS_CSL_Laptop_at_CSL[key]}")


Length of info: 20
info: defaultdict(<class 'list'>, {'name': ['OBS_CSL_Laptop'], 'type': ['OBS frame numbers'], 'channel_count': ['3'], 'channel_format': ['double64'], 'source_id': ['17864872365732'], 'nominal_srate': ['30.00000000000000'], 'version': ['1.100000000000000'], 'created_at': ['4851613.236379700'], 'uid': ['0abd1bea-ca0d-4e4f-891f-632519d3e98f'], 'session_id': ['default'], 'hostname': ['eyetracker-laptop'], 'v4address': [None], 'v4data_port': ['16572'], 'v4service_port': ['16572'], 'v6address': [None], 'v6data_port': ['16572'], 'v6service_port': ['16572'], 'desc': [defaultdict(<class 'list'>, {'channels': [defaultdict(<class 'list'>, {'channel': [defaultdict(<class 'list'>, {'label': ['frame_time'], 'unit': ['seconds'], 'type': ['timestamp']}), defaultdict(<class 'list'>, {'label': ['frame_num'], 'unit': ['integer'], 'type': ['frameID']}), defaultdict(<class 'list'>, {'label': ['frame_dtime'], 'unit': ['seconds'], 'type': ['time_diff']})]})]})], 'stream_id': 1, 'effective

In [86]:
Unicorn_CSL_Laptop_at_CSL = CSL_data[1]  # Stream Unicorn_CSL_Laptop

for key in Unicorn_CSL_Laptop_at_CSL.keys():
    print(f"\nLength of {key}: {len(Unicorn_CSL_Laptop_at_CSL[key])}")
    print(f"{key}: {Unicorn_CSL_Laptop_at_CSL[key]}")


Length of info: 20
info: defaultdict(<class 'list'>, {'name': ['Unicorn_CSL_Laptop'], 'type': ['EEG'], 'channel_count': ['17'], 'channel_format': ['float32'], 'source_id': ['UN-2021.12.36'], 'nominal_srate': ['250.0000000000000'], 'version': ['1.100000000000000'], 'created_at': ['4857634.229646700'], 'uid': ['07b3ee41-2e4a-4c0b-a203-c0a604f3777f'], 'session_id': ['default'], 'hostname': ['eyetracker-laptop'], 'v4address': [None], 'v4data_port': ['16573'], 'v4service_port': ['16573'], 'v6address': [None], 'v6data_port': ['16573'], 'v6service_port': ['16573'], 'desc': [defaultdict(<class 'list'>, {'channels': [defaultdict(<class 'list'>, {'channel': [defaultdict(<class 'list'>, {'label': ['Fz'], 'unit': ['microvolts'], 'type': ['EEG']}), defaultdict(<class 'list'>, {'label': ['C3'], 'unit': ['microvolts'], 'type': ['EEG']}), defaultdict(<class 'list'>, {'label': ['Cz'], 'unit': ['microvolts'], 'type': ['EEG']}), defaultdict(<class 'list'>, {'label': ['C4'], 'unit': ['microvolts'], 'type

### Sync Streams!!

In [None]:
def sync_streams(data_streams_1, data_streams_2, shared_stream_name):
    
    """
    Sync the data by trimming streams so that they align based on the shared stream's timestamp.
    
    Parameters:
    - data_streams_1: Stream data from one file (i.e. WPI HCI Lab)
    - data_streams_2: Stream data from another file (i.e. CSL Lab)
    - shared_stream_name: The shared stream (i.e. OBS_HCILab2)
    
    Returns:
    - Synchronized stream data
    """
    
    # Extract the index of the shared stream in both files
    for i, stream in enumerate(data_streams_1):
        if shared_stream_name == stream["info"]["name"][0]:
            shared_stream_index_1 = i
            break
    
    for i, stream in enumerate(data_streams_2):
        if shared_stream_name == stream["info"]["name"][0]:
            shared_stream_index_2 = i
            break
    
    # Extract UNIX time stamps for the shared stream in both files
    unix_timstamps_1 = [unix_time for unix_time, _, _ in data_streams_1[shared_stream_index_1]['time_series']]
    unix_timstamps_2 = [unix_time for unix_time, _, _ in data_streams_2[shared_stream_index_2]['time_series']]
    
    # Find the index of the shared stream in the earlier stream
    if unix_timstamps_1[0] < unix_timstamps_2[0]:
        # The first stream is earlier
        earlier_data_streams = data_streams_1
        later_data_streams = data_streams_2
        
        # Find the index in the earlier stream where the later start timestamp occurs
        index_in_earlier = np.searchsorted(unix_timstamps_1, unix_timstamps_2[0])
    else:
        # The second stream is earlier
        earlier_data_streams = data_streams_2
        later_data_streams = data_streams_1
        
        # Find the index in the earlier stream where the later start timestamp occurs
        index_in_earlier = np.searchsorted(unix_timstamps_2, unix_timstamps_1[0])
    
    # Trim all the data in the earlier stream based on this index
    for stream in earlier_data_streams:
        stream["time_stamps"] = stream["time_stamps"][index_in_earlier:]
        stream["time_series"] = stream["time_series"][index_in_earlier:]
    
    return


In [89]:
# Load the XDF data for both labs
WPI_data_streams, WPI_header = pyxdf.load_xdf(test_WPI_xdf_path)
CSL_data_streams, CSL_header = pyxdf.load_xdf(test_CSL_xdf_path)

# Set the shared stream name
shared_stream_name = "OBS_HCILab2"

Stream 6: Calculated effective sampling rate 14.5166 Hz is different from specified rate 30.0000 Hz.
Stream 3: Calculated effective sampling rate 215.7338 Hz is different from specified rate 250.0000 Hz.
Stream 5: Calculated effective sampling rate 14.5717 Hz is different from specified rate 30.0000 Hz.


In [94]:
# Before syncing the streams, let's examine the time_stamps and time_series in both files
def print_time_stamps_and_series(data_streams):
    for stream in data_streams:
        stream_name = stream["info"]["name"][0]
        print(f'\nStream {stream_name} has {len(stream["time_stamps"])} time stamps and {len(stream["time_series"])} time series')
        print(f'Time stamps: {stream["time_stamps"][:3]} ...')
        
        if stream_name.startswith("OBS"):
            print(f'Time series: {stream["time_series"][:3]} ...')
        
print("WPI HCI Lab Streams before syncing:")
print_time_stamps_and_series(WPI_data_streams)

print("\nCSL Lab Streams before syncing:")
print_time_stamps_and_series(CSL_data_streams)

WPI HCI Lab Streams before syncing:

Stream OBS_HCILab1 has 120921 time stamps and 120921 time series
Time stamps: [56793.94837632 56793.98172584 56794.01507536] ...
Time series: [[1.72779187e+09 0.00000000e+00 3.33333313e-02]
 [1.72779187e+09 0.00000000e+00 3.33333313e-02]
 [1.72779187e+09 0.00000000e+00 3.33333313e-02]] ...

Stream Unicorn_HCILab1 has 1009144 time stamps and 1009144 time series
Time stamps: [56794.38196601 56794.38596196 56794.3899579 ] ...

Stream OBS_HCILab2 has 61069 time stamps and 61069 time series
Time stamps: [56683.57279926 56683.64168648 56683.71057371] ...
Time series: [[1.72779189e+09 0.00000000e+00 3.33333313e-02]
 [1.72779189e+09 0.00000000e+00 3.33333313e-02]
 [1.72779189e+09 0.00000000e+00 3.33333313e-02]] ...

Stream Unicorn_HCILab2 has 857742 time stamps and 857742 time series
Time stamps: [56823.3321162  56823.33675154 56823.34138688] ...

Stream Unicorn_CSL_LabPC has 985886 time stamps and 985886 time series
Time stamps: [56793.53968912 56793.54369

In [95]:
# Sync the streams based on the shared stream's timestamp
sync_streams(WPI_data_streams, CSL_data_streams, shared_stream_name)

In [96]:
print("WPI HCI Lab Streams AFTER syncing:")
print_time_stamps_and_series(WPI_data_streams)

print("\nCSL Lab Streams AFTER syncing:")
print_time_stamps_and_series(CSL_data_streams)

WPI HCI Lab Streams AFTER syncing:

Stream OBS_HCILab1 has 120921 time stamps and 120921 time series
Time stamps: [56793.94837632 56793.98172584 56794.01507536] ...
Time series: [[1.72779187e+09 0.00000000e+00 3.33333313e-02]
 [1.72779187e+09 0.00000000e+00 3.33333313e-02]
 [1.72779187e+09 0.00000000e+00 3.33333313e-02]] ...

Stream Unicorn_HCILab1 has 1009144 time stamps and 1009144 time series
Time stamps: [56794.38196601 56794.38596196 56794.3899579 ] ...

Stream OBS_HCILab2 has 61069 time stamps and 61069 time series
Time stamps: [56683.57279926 56683.64168648 56683.71057371] ...
Time series: [[1.72779189e+09 0.00000000e+00 3.33333313e-02]
 [1.72779189e+09 0.00000000e+00 3.33333313e-02]
 [1.72779189e+09 0.00000000e+00 3.33333313e-02]] ...

Stream Unicorn_HCILab2 has 857742 time stamps and 857742 time series
Time stamps: [56823.3321162  56823.33675154 56823.34138688] ...

Stream Unicorn_CSL_LabPC has 985886 time stamps and 985886 time series
Time stamps: [56793.53968912 56793.543696

### Below are test functions that might be helpful later

In [12]:
def visualize_eeg_data(xdf_path):
    data, header = pyxdf.load_xdf(xdf_path)
    
    for stream in data:
        print(f'Visualizing {stream["info"]["name"][0]}')
        y = stream["time_series"]
        plt.figure(figsize=(12, 6))
        plt.plot(stream["time_stamps"], y)
        plt.xlabel('Time (s)')
        plt.ylabel('EEG Signal')
        plt.title(f'EEG Data from {stream["info"]["name"][0]}')
        plt.show()

In [26]:
def print_starting_timestamps(xdf_path):
    # Load the XDF file
    data, header = pyxdf.load_xdf(xdf_path)
    print(header)
    
    for stream in data:
        # Get the name of the stream
        stream_name = stream["info"]["name"][0]
        print(f'Visualizing {stream_name}')
        
        # Get the time stamps for the stream
        time_stamps = stream["time_stamps"]
        
        if len(time_stamps) > 0:
            # Get the starting timestamp (first timestamp)
            starting_timestamp = time_stamps[0]
            # Get the ending timestamp (last timestamp)
            ending_timestamp = time_stamps[-1]
            # Calculate the duration of the stream in seconds
            stream_duration = ending_timestamp - starting_timestamp
            
            print(f"Starting timestamp for {stream_name}: {starting_timestamp}")
            print(f"Ending timestamp for {stream_name}: {ending_timestamp}")
            print(f"Duration of {stream_name}: {stream_duration} seconds")
        else:
            print(f"No time stamps available for {stream_name}")
