In [1]:
import matplotlib.pyplot as plt
import numpy as np

import pyxdf

In [24]:
test_WPI_xdf_path = "data/Group 1/LabRecorder/HCILab1-Group1.xdf"
test_CSL_xdf_path = "data/Group 1/LabRecorder/CSL_01_10.xdf"

### View XDF Streams

In [None]:
def view_xdf_streams(xdf_path):
    data, header = pyxdf.load_xdf(xdf_path)
    print(header)
    
    for stream in data:
        stream_name = stream["info"]["name"][0]
        print(f'Stream {stream_name}')

In [None]:
view_xdf_streams(test_WPI_xdf_path)

Stream 6: Calculated effective sampling rate 14.5166 Hz is different from specified rate 30.0000 Hz.
Stream 3: Calculated effective sampling rate 215.7338 Hz is different from specified rate 250.0000 Hz.


{'info': defaultdict(<class 'list'>, {'version': ['1.0'], 'datetime': ['2024-10-01T10:11:12-0400']})}
Stream OBS_HCILab1
Stream Unicorn_HCILab1
Stream OBS_HCILab2
Stream Unicorn_HCILab2
Stream Unicorn_CSL_LabPC
Stream OBS_CSL_Laptop


In [None]:
view_xdf_streams(test_CSL_xdf_path)

Stream 5: Calculated effective sampling rate 14.5717 Hz is different from specified rate 30.0000 Hz.


{'info': defaultdict(<class 'list'>, {'version': ['1.0'], 'datetime': ['2024-10-01T16:10:18+0200']})}
Stream OBS_CSL_Laptop
Stream Unicorn_CSL_Laptop
Stream Unicorn_CSL_LabPC
Stream OBS_CSL_LabPC
Stream OBS_HCILab2
Stream Unicorn_HCILab1


In [40]:
def view_stream_keys(xdf_path):
    data, header = pyxdf.load_xdf(xdf_path)
    
    # Print all keys in the stream to inspect the structure
    print(f"Keys in a stream:")
    for key in data[0].keys():
        print(f"- {key}")

In [48]:
view_stream_keys(test_WPI_xdf_path)

Stream 6: Calculated effective sampling rate 14.5166 Hz is different from specified rate 30.0000 Hz.
Stream 3: Calculated effective sampling rate 215.7338 Hz is different from specified rate 250.0000 Hz.


Keys in a stream:
- info
- footer
- time_series
- time_stamps
- clock_times
- clock_values


From running the code above, we get that each stream contains:

- info
- footer
- time_series
- time_stamps
- clock_times
- clock_values

Also, from examining the streams of both WPI HCI Lab and CSL xdf files, we get that:

The WPI HCI Lab Stream Names:

- Stream OBS_HCILab1
- Stream Unicorn_HCILab1 (shared)
- Stream OBS_HCILab2 (shared)
- Stream Unicorn_HCILab2
- Stream Unicorn_CSL_LabPC
- Stream OBS_CSL_Laptop

The CSL Lab Stream Names:

- Stream OBS_CSL_Laptop
- Stream Unicorn_CSL_Laptop
- Stream Unicorn_CSL_LabPC
- Stream OBS_CSL_LabPC
- Stream OBS_HCILab2 (shared)
- Stream Unicorn_HCILab1 (shared)

We see that both lab xdf files share OBS_HCILab2 and Unicorn_HCILab1! So, since the same OBS stream is in multiple files, below, I will find the index of the where the UNIX timestamp at the start of the later recording also occurs in the earlier ones. Once I get the LabRecorder timestamp at this index, I will cut all the EEG data with timestamps lower than this value to sync all streams.

In [61]:
def examine_timestamps_length(xdf_path):
    data, header = pyxdf.load_xdf(xdf_path)
    
    for stream in data:
        stream_name = stream["info"]["name"][0]
        
        # for key in stream["info"].keys():
        #     print(f"{key}: {stream['info'][key]}")
        
        time_stamps = stream["time_stamps"]
        
        # print(f'Stream {stream_name} has {len(time_stamps)} time stamps, starting with {time_stamps[0]} and ending with {time_stamps[-1]}')
        
        if xdf_path == test_CSL_xdf_path:
            # CSL Lab is 6 hours ahead of WPI, so we need to adjust the time stamps
            # So, for each CSL time_stamp, we subtract 6 * 60 * 60 seconds
            time_stamps = [ts - 4801944.59891 for ts in time_stamps]
        
        if stream_name == "OBS_HCILab2":
            print(f'Stream {stream_name} has {len(time_stamps)} time stamps, starting with {time_stamps[0]} and ending with {time_stamps[-1]}')

# Examine the length of all time stamps in each stream for both XDF files
print("WPI HCI Lab Streams:")
examine_timestamps_length(test_WPI_xdf_path)

print("\nCSL Lab Streams:")
examine_timestamps_length(test_CSL_xdf_path)

WPI HCI Lab Streams:


Stream 6: Calculated effective sampling rate 14.5166 Hz is different from specified rate 30.0000 Hz.
Stream 3: Calculated effective sampling rate 215.7338 Hz is different from specified rate 250.0000 Hz.


Stream OBS_HCILab2 has 61069 time stamps, starting with 56683.57279925809 and ending with 60890.37793320787

CSL Lab Streams:


Stream 5: Calculated effective sampling rate 14.5717 Hz is different from specified rate 30.0000 Hz.


Stream OBS_HCILab2 has 60141 time stamps, starting with 56683.572798259556 and ending with 60810.77195900958


4858628.17170826 - 56683.57279925809 = 4801944.59891

### Below are test functions that might be helpful later

In [None]:
def load_stream_data(xdf_path):
    data, header = pyxdf.load_xdf(xdf_path)
    
    # Create a dictionary to hold stream name as key and its time_stamps and time_series as values
    streams_data = {}
    
    for stream in data:
        stream_name = stream["info"]["name"][0]
        time_stamps = stream["time_stamps"]
        time_series = stream["time_series"]
        
        streams_data[stream_name] = {
            'time_stamps': time_stamps,
            'time_series': time_series
        }
    
    return streams_data

def sync_streams(data_streams_1, data_streams_2, shared_stream_name):
    """
    Sync the data by trimming streams so that they align based on the shared stream's timestamp.
    
    Parameters:
    - data_streams_1: Stream data from one file (i.e. WPI HCI Lab)
    - data_streams_2: Stream data from another file (i.e. CSL Lab)
    - shared_stream_name: The shared stream (i.e. OBS_HCILab2)
    
    Returns:
    - Synchronized stream data
    """
    # Extract time_stamps for the shared stream in both files
    time_stamps_1 = data_streams_1[shared_stream_name]['time_stamps']
    time_stamps_2 = data_streams_2[shared_stream_name]['time_stamps']
    
    # Find the index of the shared stream in the earlier stream
    if time_stamps_1[0] < time_stamps_2[0]:
        # The first stream is earlier
        earlier_stream_data = data_streams_1
        later_stream_data = data_streams_2
        
        # Find the index in the earlier stream where the later start timestamp occurs
        index_in_earlier = np.searchsorted(time_stamps_1, time_stamps_2[0])
    else:
        # The second stream is earlier
        earlier_stream_data = data_streams_2
        later_stream_data = data_streams_1
        
        # Find the index in the earlier stream where the later start timestamp occurs
        index_in_earlier = np.searchsorted(time_stamps_2, time_stamps_1[0])
    
    # Trim all the data in both streams based on this index
    synchronized_data = {}
    
    for stream_name in earlier_stream_data:
        # Trim earlier stream data from the found index
        synchronized_data[stream_name] = {
            'time_stamps': earlier_stream_data[stream_name]['time_stamps'][index_in_earlier:],
            'time_series': earlier_stream_data[stream_name]['time_series'][index_in_earlier:]
        }
    
    # Also trim the later stream data from its start point
    for stream_name in later_stream_data:
        synchronized_data[stream_name] = {
            'time_stamps': later_stream_data[stream_name]['time_stamps'],
            'time_series': later_stream_data[stream_name]['time_series']
        }
    
    return synchronized_data


In [None]:
# Load the XDF data for both labs
earlier_xdf_path = "WPI_HCI_Lab_Data.xdf"
later_xdf_path = "CSL_Lab_Data.xdf"

stream_data = load_stream_data(earlier_xdf_path)
later_stream_data = load_stream_data(later_xdf_path)

# Set the shared stream name (e.g., OBS_HCILab2)
shared_stream_name = "OBS_HCILab2"

# Sync the streams based on the shared stream's timestamp
synchronized_data = sync_streams(earlier_stream_data, later_stream_data, shared_stream_name)

# Print out the synchronized data for verification
for stream_name in synchronized_data:
    print(f"Stream: {stream_name}")
    print(f"  Time stamps: {synchronized_data[stream_name]['time_stamps'][:10]}...")  # First 10 timestamps for preview
    print(f"  Time series shape: {synchronized_data[stream_name]['time_series'].shape}")

In [12]:
def visualize_eeg_data(xdf_path):
    data, header = pyxdf.load_xdf(xdf_path)
    
    for stream in data:
        print(f'Visualizing {stream["info"]["name"][0]}')
        y = stream["time_series"]
        plt.figure(figsize=(12, 6))
        plt.plot(stream["time_stamps"], y)
        plt.xlabel('Time (s)')
        plt.ylabel('EEG Signal')
        plt.title(f'EEG Data from {stream["info"]["name"][0]}')
        plt.show()

In [26]:
def print_starting_timestamps(xdf_path):
    # Load the XDF file
    data, header = pyxdf.load_xdf(xdf_path)
    print(header)
    
    for stream in data:
        # Get the name of the stream
        stream_name = stream["info"]["name"][0]
        print(f'Visualizing {stream_name}')
        
        # Get the time stamps for the stream
        time_stamps = stream["time_stamps"]
        
        if len(time_stamps) > 0:
            # Get the starting timestamp (first timestamp)
            starting_timestamp = time_stamps[0]
            # Get the ending timestamp (last timestamp)
            ending_timestamp = time_stamps[-1]
            # Calculate the duration of the stream in seconds
            stream_duration = ending_timestamp - starting_timestamp
            
            print(f"Starting timestamp for {stream_name}: {starting_timestamp}")
            print(f"Ending timestamp for {stream_name}: {ending_timestamp}")
            print(f"Duration of {stream_name}: {stream_duration} seconds")
        else:
            print(f"No time stamps available for {stream_name}")
