In [1]:
import os
import pickle
from pyologger.load_data.datareader import DataReader
from pyologger.load_data.metadata import Metadata
from pyologger.plot_data.plotter import *
from pyologger.calibrate_data.calibrate_acc_mag import *
#from pyologger.process_data.feature_generation_utils import get_heart_rate

# Change the current working directory to the root directory
# os.chdir("/Users/fbar/Documents/GitHub/pyologger")
os.chdir("/Users/jessiekb/Documents/GitHub/pyologger")

root_dir = os.getcwd()
data_dir = os.path.join(root_dir, "data")

# Verify the current working directory
print(f"Current working directory: {root_dir}")

Current working directory: /Users/jessiekb/Documents/GitHub/pyologger


In [2]:
# Initialize the info class
metadata = Metadata()
metadata.fetch_databases(verbose=False)
metadata.find_relations(verbose=False)

# Save databases
deployment_db = metadata.get_metadata("deployment_DB")
logger_db = metadata.get_metadata("logger_DB")
recording_db = metadata.get_metadata("recording_DB")
animal_db = metadata.get_metadata("animal_DB")
dataset_db = metadata.get_metadata("dataset_DB")

Loaded Notion secret token.
Loaded database ID for deployment_DB.
Loaded database ID for recording_DB.
Loaded database ID for logger_DB.
Loaded database ID for animal_DB.
Loaded database ID for dataset_DB.


In [3]:
# Define the path to your custom mapping file
channel_mapping_path = os.path.join(root_dir, 'channel_mapping.json')

datareader = DataReader(deployment_folder_path=data_dir)

deployment_folder = datareader.check_deployment_folder(deployment_db, data_dir)
edf_filename_template = os.path.join(datareader.files_info['deployment_folder_path'], 'outputs', 'edf_test_{sensor}.edf')

if deployment_folder:
    datareader.read_files(metadata, save_csv=False, save_parq=False, save_edf=False, 
                          custom_mapping_path=channel_mapping_path, save_netcdf=False,
                          edf_filename_template=edf_filename_template, edf_save_from='sensor_data')

Step 1: Displaying deployments to help you select one.


TypeError: 'NoneType' object is not subscriptable

In [4]:
deployment_folder = '/Users/jessiekb/Documents/GitHub/pyologger/data/2024-01-16_oror-002a'

In [5]:
# Load the data_reader object from the pickle file
pkl_path = os.path.join(deployment_folder, 'outputs', 'data.pkl')

with open(pkl_path, 'rb') as file:
    data_pkl = pickle.load(file)

for logger_id, info in data_pkl.logger_info.items():
    sampling_frequency = info.get('datetime_metadata', {}).get('fs', None)
    if sampling_frequency is not None:
        # Format the sampling frequency to 5 significant digits
        print(f"Sampling frequency for {logger_id}: {sampling_frequency} Hz")
    else:
        print(f"No sampling frequency available for {logger_id}")

Sampling frequency for CC-96: 400 Hz
Sampling frequency for UF-01: 100 Hz


In [6]:
# Load color mappings
color_mapping_path = os.path.join(root_dir, 'color_mappings.json')

# Streamlit sidebar for time range selection
imu_logger_to_use = 'CC-96'
ephys_logger_to_use = 'UF-01'

# Define the overlapping time range
imu_df = data_pkl.logger_data[imu_logger_to_use]
ephys_df = data_pkl.logger_data[ephys_logger_to_use]
OVERLAP_START_TIME = max(imu_df['datetime'].min(), ephys_df['datetime'].min()).to_pydatetime()
OVERLAP_END_TIME = min(imu_df['datetime'].max(), ephys_df['datetime'].max()).to_pydatetime()


In [7]:
import numpy as np
from scipy.signal import butter, filtfilt, bartlett
import wfdb

# Bandpass Filter Function
def bandpass_filter(signal, lowcut, highcut, fs, order=2):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, signal)

# Spike Removal Function
def remove_spikes(signal, threshold=400):
    median = np.median(signal)
    mad = np.median(np.abs(signal - median))
    return np.where(np.abs(signal - median) > threshold * mad, median, signal)

# Signal Smoothing Function
def smooth_signal(signal, smooth_sec, fs):
    window = int(smooth_sec * fs)
    return np.convolve(np.abs(signal), bartlett(window), mode='same')

# Sliding Window Normalization Function
def sliding_window_normalization(signal, window_size, noise=1e-10):
    half_window = window_size // 2
    normalized_signal = np.array([(signal[i] - np.mean(signal[max(0, i - half_window):min(len(signal), i + half_window)])) / 
                     (np.std(signal[max(0, i - half_window):min(len(signal), i + half_window)]) + noise) 
                     for i in range(len(signal))])
    return normalized_signal

# Peak Refinement with WFDB
def refine_peaks_with_wfdb(cleaned_signal, rpeaks, fs, search_radius=0.5, sample_rate=1000, peak_dir="compare"):
    return wfdb.processing.correct_peaks(cleaned_signal, rpeaks, smooth_window_size = int(0.5 * fs), search_radius=int(search_radius * sample_rate), peak_dir=peak_dir)

# Absolute Maxima Search Function
def absolute_maxima_search(refined_peaks, original_signal, QRS_width, sample_rate):
    QRS_samples = int(QRS_width * sample_rate)
    return [np.argmax(original_signal[max(0, peak - QRS_samples):peak + 1]) + max(0, peak - QRS_samples) for peak in refined_peaks]


ImportError: cannot import name 'bartlett' from 'scipy.signal' (/Users/jessiekb/opt/anaconda3/envs/finescale_env/lib/python3.12/site-packages/scipy/signal/__init__.py)

In [None]:
import numpy as np
import pandas as pd
from scipy.signal import butter, filtfilt, bartlett, find_peaks
import plotly.graph_objs as go

# Configuration Section
BROAD_LOW_CUTOFF = 1  # Hz for bandpass filter
BROAD_HIGH_CUTOFF = 35  # Hz for bandpass filter
NARROW_LOW_CUTOFF = 5  # Hz for bandpass filter
NARROW_HIGH_CUTOFF = 20  # Hz for bandpass filter
FILTER_ORDER = 2  # Order of the bandpass filter
SPIKE_THRESHOLD = 400  # Threshold for spike removal
SMOOTH_SEC_MULTIPLIER = 3  # Multiplier for smoothing window size
WINDOW_SIZE_MULTIPLIER = 5  # Multiplier for sliding window normalization
NORMALIZATION_NOISE = 1e-10  # Noise level for sliding window normalization
PEAK_HEIGHT = -0.4  # Minimum peak height for detection
PEAK_DISTANCE_SEC = 0.2  # Minimum distance between peaks in seconds
SEARCH_RADIUS_SEC = 0.5  # Search radius for peak refinement in seconds
MIN_PEAK_HEIGHT = 500  # Minimum acceptable peak height
MAX_PEAK_HEIGHT = 12000  # Maximum acceptable peak height
OVERLAP_START_TIME = '2024-01-16 09:30:00'  # Start time for plotting
OVERLAP_END_TIME = '2024-01-16 10:30:00'  # End time for plotting
ZOOM_START_TIME = '2024-01-16 10:00:00'  # Start time for zooming
ZOOM_END_TIME = '2024-01-16 10:02:30'  # End time for zooming
TARGET_SAMPLING_RATE = 25  # Target sampling rate for plotting

# Combined Peak Detection Function
def peak_detect(signal, sampling_rate, QRS_width=0.200, 
                broad_lowcut= BROAD_LOW_CUTOFF, broad_highcut = BROAD_HIGH_CUTOFF,
                narrow_lowcut=NARROW_LOW_CUTOFF, narrow_highcut=NARROW_HIGH_CUTOFF, 
                filter_order=FILTER_ORDER,
                spike_threshold=SPIKE_THRESHOLD, smooth_sec_multiplier=SMOOTH_SEC_MULTIPLIER,
                window_size_multiplier=WINDOW_SIZE_MULTIPLIER, normalization_noise=NORMALIZATION_NOISE,
                peak_height=PEAK_HEIGHT, peak_distance_sec=PEAK_DISTANCE_SEC, search_radius_sec=SEARCH_RADIUS_SEC,
                min_peak_height=MIN_PEAK_HEIGHT, max_peak_height=MAX_PEAK_HEIGHT,
                enable_bandpass=True, enable_spike_removal=True, enable_smoothing=True, 
                enable_normalization=True, enable_refinement=True):
    
    results = {}

    # Bandpass filter
    if enable_bandpass:
        broad_bandpassed_signal = bandpass_filter(signal, lowcut=broad_lowcut, highcut=broad_highcut, fs=sampling_rate, order=filter_order)
        results['broad_bandpassed_signal'] = broad_bandpassed_signal
        narrow_bandpassed_signal = bandpass_filter(signal, lowcut=narrow_lowcut, highcut=narrow_highcut, fs=sampling_rate, order=filter_order)
        results['narrow_bandpassed_signal'] = narrow_bandpassed_signal
    else:
        narrow_bandpassed_signal = signal

    # Spike removal
    if enable_spike_removal:
        spike_removed_signal = remove_spikes(narrow_bandpassed_signal, threshold=spike_threshold)
        results['spike_removed_signal'] = spike_removed_signal
    else:
        spike_removed_signal = narrow_bandpassed_signal

    # Smoothing
    if enable_smoothing:
        smoothed_signal = smooth_signal(spike_removed_signal, smooth_sec=QRS_width * smooth_sec_multiplier, fs=sampling_rate)
        results['smoothed_signal'] = smoothed_signal
    else:
        smoothed_signal = spike_removed_signal

    # Normalization
    if enable_normalization:
        normalized_signal = sliding_window_normalization(smoothed_signal, int(window_size_multiplier * sampling_rate), noise=normalization_noise)
        results['normalized_signal'] = normalized_signal
    else:
        normalized_signal = smoothed_signal

    # Peak Detection
    detected_peaks = find_peaks(normalized_signal, height=peak_height, distance=int(peak_distance_sec * sampling_rate))[0]
    results['detected_peaks'] = detected_peaks

    # Peak Refinement
    if enable_refinement:
        refined_peaks = refine_peaks_with_wfdb(spike_removed_signal, detected_peaks, fs = sampling_rate, search_radius=search_radius_sec, sample_rate=sampling_rate,
                                               peak_dir = "both")
        results['refined_peaks'] = refined_peaks
        refined_indices = refined_peaks
        #refined_indices = absolute_maxima_search(refined_peaks, spike_removed_signal, QRS_width * 2, sampling_rate)
    else:
        refined_indices = detected_peaks

    # Remove duplicate refined_indices while keeping corresponding heights aligned
    unique_refined_indices, index_positions = np.unique(refined_indices, return_index=True)
    results['unique_refined_indices'] = unique_refined_indices

    # Align heights with unique refined indices
    height_original = smoothed_signal[unique_refined_indices]
    height_normalized = normalized_signal[refined_peaks[index_positions]]
    results['height_original'] = height_original
    results['height_normalized'] = height_normalized

    # Filter out peaks that are too close to each other
    min_distance_samples = int(peak_distance_sec * sampling_rate)
    filtered_indices = [unique_refined_indices[0]] if len(unique_refined_indices) > 0 else []  # Always include the first peak if available
    for i in range(1, len(unique_refined_indices)):
        if unique_refined_indices[i] - filtered_indices[-1] >= min_distance_samples:
            filtered_indices.append(unique_refined_indices[i])
    
    # Update the DataFrame with the filtered indices
    peak_df = pd.DataFrame({
        'refined_index': filtered_indices,
        'height_original': smoothed_signal[filtered_indices],
        'height_normalized': normalized_signal[filtered_indices]
    })

    # Filter out peaks based on both minimum and maximum peak height
    filtered_peak_df = peak_df[(peak_df['height_original'] >= min_peak_height) & 
                               (peak_df['height_original'] <= max_peak_height)].reset_index(drop=True)
    results['filtered_peak_df'] = filtered_peak_df

    # Label peaks as accepted or rejected
    peak_df['key'] = np.where(peak_df['refined_index'].isin(filtered_peak_df['refined_index']), 
                              'heartbeat_auto_detect_accepted', 
                              'heartbeat_auto_detect_rejected')
    # Ensure no NaN values in 'key'
    peak_df['key'].fillna('heartbeat_auto_detect_unknown', inplace=True)
    results['peak_df'] = peak_df

    return results

# Example usage of peak_detect function
results = peak_detect(
    signal=data_pkl.sensor_data['ecg']['ecg'], 
    sampling_rate=data_pkl.sensor_info['ecg']['sampling_frequency']
)

peak_df = results['peak_df']

# Add intermediate results to the sensor data for visualization
data_pkl.sensor_data['ecg']['broad_bandpassed_signal'] = results.get('broad_bandpassed_signal', None)-2000
data_pkl.sensor_data['ecg']['narrow_bandpassed_signal'] = results.get('narrow_bandpassed_signal', None)
data_pkl.sensor_data['ecg']['spike_removed_signal'] = results.get('spike_removed_signal', None)
data_pkl.sensor_data['ecg']['smoothed_signal'] = results.get('smoothed_signal', None) / 10
data_pkl.sensor_data['ecg']['normalized_signal'] = results.get('normalized_signal', None)*250 + 4000

# Calculate RR intervals (in seconds) between successive peaks
if len(peak_df) > 1:
    rr_intervals = np.diff(peak_df['refined_index']) / data_pkl.sensor_info['ecg']['sampling_frequency']
    # Calculate heart rate (in bpm) from RR intervals
    heart_rate = 60 / rr_intervals
else:
    rr_intervals = np.array([])
    heart_rate = np.array([])

# Create an array for heart rate data, initialized with NaNs
hr_data = np.full(len(data_pkl.sensor_data['ecg']['ecg']), np.nan)

# Fill in the heart rate data between detected peaks
for i in range(len(heart_rate)):
    start_idx = peak_df['refined_index'].iloc[i]
    end_idx = peak_df['refined_index'].iloc[i + 1]
    hr_data[start_idx:end_idx] = heart_rate[i]

# Optionally, fill in the remaining segment with the last heart rate value
if len(peak_df) > 0 and len(heart_rate) > 0:
    hr_data[peak_df['refined_index'].iloc[-1]:] = heart_rate[-1]

# Assign hr_data to the sensor data
data_pkl.sensor_data['ecg']['hr_data'] = hr_data

# Convert refined indices to datetime for event annotations
matching_datetimes = data_pkl.sensor_data['ecg'].loc[peak_df['refined_index'], 'datetime'].values
utc_datetimes = pd.to_datetime(matching_datetimes).tz_localize('UTC')
local_timezone = data_pkl.sensor_info['ecg']['sensor_start_datetime'].tz
peak_df['datetime'] = utc_datetimes.tz_convert(local_timezone)

# Append detected peaks to the event data with heart rate values at each peak
hr_values = hr_data[peak_df['refined_index']]

hr_events = pd.DataFrame({
    'datetime': peak_df['datetime'],
    'key': peak_df['key'],
    'short_description': 'calculated heart rate from detected peaks',
    'type': 'point',
    'value': hr_values  # Use the interpolated heart rate values here
})

# Clear any existing events with keys that start with 'heartbeat_auto_detect'
data_pkl.event_data['key'] = data_pkl.event_data['key'].astype(str)  # Ensure 'key' is string type
data_pkl.event_data = data_pkl.event_data[~data_pkl.event_data['key'].str.startswith('heartbeat_auto_detect', na=False)]

# Concatenate with hr_events
data_pkl.event_data = pd.concat([data_pkl.event_data, hr_events], ignore_index=True)

# Visualization: Define Notes and Plot the Results
notes_to_plot = {
    'heartbeat_manual_ok': {'sensor': 'ecg', 'symbol': 'triangle-down', 'color': 'blue'},
    'heartbeat_auto_detect_accepted': {'sensor': 'ecg', 'symbol': 'triangle-up', 'color': 'green'},
    'heartbeat_auto_detect_rejected': {'sensor': 'ecg', 'symbol': 'triangle-up', 'color': 'red'}
}
events_to_plot = ['heartbeat_manual_ok', 'heartbeat_auto_detect_accepted', 'heartbeat_auto_detect_rejected']

fig = plot_tag_data_interactive4(
    data_pkl=data_pkl,
    sensors=['ecg'],
    channels={'ecg': ['broad_bandpassed_signal','narrow_bandpassed_signal', 'spike_removed_signal', 'smoothed_signal', 'normalized_signal']},
    time_range=(OVERLAP_START_TIME, OVERLAP_END_TIME),
    note_annotations=notes_to_plot,
    color_mapping_path=color_mapping_path,
    target_sampling_rate=TARGET_SAMPLING_RATE,
    zoom_start_time=ZOOM_START_TIME,
    zoom_end_time=ZOOM_END_TIME,
    zoom_range_selector_channel='ecg',
    plot_event_values=[],
)

# Add horizontal lines for min and max peak height
min_peak_height_line = MIN_PEAK_HEIGHT / 10
max_peak_height_line = MAX_PEAK_HEIGHT / 10
min_peak_height_line2 = 4000 + (250 * PEAK_HEIGHT)

fig.add_trace(go.Scatter(
    x=[OVERLAP_START_TIME, OVERLAP_END_TIME],
    y=[min_peak_height_line, min_peak_height_line],
    mode="lines",
    line=dict(color="gray", dash="dot"),
    name="Min Peak Height"
))

fig.add_trace(go.Scatter(
    x=[OVERLAP_START_TIME, OVERLAP_END_TIME],
    y=[min_peak_height_line2, min_peak_height_line2],
    mode="lines",
    line=dict(color="gray", dash="dot"),
    name="Min Peak Height Peak detect"
))

fig.add_trace(go.Scatter(
    x=[OVERLAP_START_TIME, OVERLAP_END_TIME],
    y=[max_peak_height_line, max_peak_height_line],
    mode="lines",
    line=dict(color="gray", dash="dot"),
    name="Max Peak Height"
))

# Display the figure
fig.show()


In [None]:
import plotly.offline as pyo

# Specify the file path to save the HTML file
html_file_path = os.path.join(deployment_folder,'hr_data.html')

# Save the fig plot as an HTML file
pyo.plot(fig, filename=html_file_path, auto_open=False)