In [None]:
import numpy as np
import pandas as pd

import wfdb
import chorus_converter.utils as wfdb_conv_funcs

In [None]:
# Globals: Things to parameterize:

# Column name used for timing.
wfdb_conv_funcs.TIMING_COL = 'time'

# Column name used for the converted timestamp index.
wfdb_conv_funcs.TIMING_INDEX_COL = 'Timestamp'

# The expected units of the timing column (string must be compatible with np.datetime64).
wfdb_conv_funcs.EXPECTED_TIMING_UNITS = 'ns'

# Set the maximum segment length, in seconds. (Here: 8hrs * 60mins/hr * 60seconds/min)
wfdb_conv_funcs.MAX_SEGMENT_LENGTH_SECONDS = 8*60*60

# Set the format for the WFDB files.
wfdb_conv_funcs.BINARY_FMT = ['16']

# Set the acceptable tolerance in the measured vs. expected sampling frequencies.
wfdb_conv_funcs.HZ_TOLERANCE = 5

In [None]:
# The directory where the test Sickbay waveforms live.
base_wf_dir = ''
sickbay_mapping_file = ''

In [None]:
# For CHoRUS would need to come up with a dictionary that maps each sites channels to some common names.
sickbay_fast_lookup_df = wfdb_conv_funcs.get_duke_mapping_df(base_wf_dir, sickbay_mapping_file)

In [None]:
# How would we get the start datetime from Sickbay? Lets assume a simply solution and have it temporarily saved in the filename.

# Example file naming:
# {SickbayID}_{YYYYMMDD}_{HHMMSS}.csv

input_filename = ''

In [None]:
# Read an example WF file, extracting some metadata and the signals. 
input_file_ID, input_date, input_time, signal_df = wfdb_conv_funcs.read_duke_wf(base_wf_dir, input_filename)

In [None]:
# Get a list of the channels present in the data.
channel_cols = wfdb_conv_funcs.get_channel_cols(signal_df)

In [None]:
# Use the mapping file to extract more metadata for the channels in this WF file.
sig_names_dict, samp_freqs_dict, units_dict = wfdb_conv_funcs.get_signal_meta_dicts(channel_cols, sickbay_fast_lookup_df)

In [None]:
# We want the same binarized format for all channels, expand the list to match the number of channels.
fmt = wfdb_conv_funcs.BINARY_FMT * len(channel_cols) # [wfdb.io.Record PARAM]

# Set the WFDB 'units' variable to our list of units.
units = list(units_dict.values()) # [wfdb.io.Record PARAM]

# Set the WFDB 'sig_name' variable to our list of signal names.
sig_names_list = list(sig_names_dict.values()) # [wfdb.io.Record PARAM]

In [None]:
# Check the expected sampling frequency, from the mapping file, for the available channels.
expected_fs = int(max(list(samp_freqs_dict.values()))) 

In [None]:
# Check the inter-sample timing for the data and make sure it's within bounds of the expected sampling frequency.
fs = wfdb_conv_funcs.check_timing_coherancy(signal_df, expected_fs, wfdb_conv_funcs.HZ_TOLERANCE)

In [None]:
# Add a DateTime index to the signal dataframe. 
base_date, base_time = wfdb_conv_funcs.add_timing_index(signal_df, input_date, input_time, fs)

In [None]:
# Set some of the rows to all NaNs for testing (note the use of loc vs. iloc here, hence the -1 on the indexing as loc is INCLUSIVE!):
signal_df.loc[500_000:600_000-1, channel_cols] = np.nan
signal_df.loc[10_000_000:11_000_000-1, channel_cols] = np.nan
signal_df.loc[15_000_000:16_000_000-1, channel_cols] = np.nan

In [None]:
# Check for chunks of missing data in the signal matrix and mark when the data starts and stops.
data_block_start_indices, NaN_block_start_indices = wfdb_conv_funcs.check_missing_data_blocks(signal_df, channel_cols)

In [None]:
# Make sure that all segments won't exceed the max file length we'd like to stick to.
segment_start_indices, segment_stop_indices = wfdb_conv_funcs.check_oversized_chunks(data_block_start_indices, NaN_block_start_indices, fs)

In [None]:
# Set the record name, here we use the Sickbay ID.
master_record_name = input_file_ID # [wfdb.io.Record PARAM]

In [None]:
# Loop over the segments and gather the segment-specific metadata.
(
segment_record_names_list, 
data_segment_start_times_list, 
data_segment_list, 
segment_channels_list
) = wfdb_conv_funcs.prepare_segment_data(
    signal_df=signal_df, 
    channel_cols=channel_cols, 
    master_record_name=master_record_name, 
    segment_start_indices=segment_start_indices, 
    segment_stop_indices=segment_stop_indices,
)

In [None]:
# Create the individual records for each segment.
segments_record_list = wfdb_conv_funcs.create_segment_records(
    segment_record_names_list=segment_record_names_list, 
    data_segment_start_times_list=data_segment_start_times_list, 
    data_segment_list=data_segment_list, 
    segment_channels_list=segment_channels_list, 
    units_dict=units_dict, 
    sig_names_dict=sig_names_dict, 
    fs=fs,
)

In [None]:
# Put the segment records/metadata together with the overall metadata for the multi-segment record.
sickbay_multirecord = wfdb_conv_funcs.create_multirecord(
    segments_record_list=segments_record_list, 
    master_record_name=master_record_name,
    segment_record_names_list=segment_record_names_list, 
    data_segment_start_times_list=data_segment_start_times_list, 
    channel_cols=channel_cols, 
    sig_names_list=sig_names_list, 
    units=units, 
    fs=fs,
)

In [None]:
# Need to add '~' for the empty segments in 'seg_name'?

In [None]:
sickbay_multirecord.__dict__

In [None]:
# Write the Record (the WFDB header file(s) and any associated dat files from this object) to disk.
sickbay_multirecord.wrsamp(write_dir="")