In [6]:
!pip install -q pyhrv

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m142.2/142.2 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m230.8/230.8 kB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.7/55.7 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for spectrum (setup.py) ... [?25l[?25hdone
  Building wheel for easydev (setup.py) ... [?25l[?25hdone


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
folder_input_path = '/content/drive/My Drive/Colab Notebooks/Ch4_HR_HRV_Generation/data-hrv-swell-kw/'
output_output_path = '/content/drive/My Drive/Colab Notebooks/Ch4_HR_HRV_Generation/data-hrv-swell-kw/'
name = 'p1'

In [18]:
import pandas as pd

# Attempting to correctly parse the RR intervals data from p1.txt by adjusting the read_csv parameters
p1_data = pd.read_csv(folder_input_path + name + '.txt', delim_whitespace=True, header=None, names=['Timestamp', 'RR Interval'])

# Display the first few rows to ensure correct loading
p1_data.head()


Unnamed: 0,Timestamp,RR Interval
0,1.265625,870.11719
1,1.515625,885.36996
2,1.765625,890.18974
3,2.015625,886.73851
4,2.265625,877.1782


In [5]:
import numpy as np

# Function to segment data into one-minute intervals
def segment_data(df, segment_length=60):
    # Assuming Timestamp is in seconds, we convert it to minutes for easier segmentation
    df['Minute'] = (df['Timestamp'] / segment_length).astype(int)
    segments = df.groupby('Minute')
    return segments

# Function to calculate HR and RMSSD for each segment
def calculate_hr_rmssd(segments):
    results = []
    for minute, segment in segments:
        rr_intervals = segment['RR Interval'].values
        # Calculate HR: 60,000 ms in a minute divided by the average RR interval in ms
        hr = 60000 / np.mean(rr_intervals)
        # Calculate RMSSD
        rr_diff = np.diff(rr_intervals)
        rmssd = np.sqrt(np.mean(rr_diff ** 2))
        results.append((minute, hr, rmssd))
    return results

# Segment the data
segments = segment_data(p1_data)

# Calculate HR and RMSSD for each one-minute segment
hr_rmssd_results = calculate_hr_rmssd(segments)

# Display the first few results
hr_rmssd_results[:5]


[(0, 81.4128668793742, 9.368697454179117),
 (1, 62.442459119441395, 19.46203888487065),
 (2, 60.33541272472929, 11.019985617219046),
 (3, 61.86862417582038, 11.941023105256441),
 (4, 60.33409476550284, 13.909957566500099)]

In [None]:
segments = segment_data(p1_data)
for minute, segment in segments:
  print(minute, segment)

In [None]:
# Correct the RMSSD calculation according to the provided formula

# Function to calculate RMSSD correctly
def calculate_corrected_rmssd(rr_intervals):
    if len(rr_intervals) > 1:
        # Calculate differences between adjacent RR intervals
        rr_diff = np.diff(rr_intervals)
        # Calculate the sum of squares of the differences
        sum_of_squares = np.sum(rr_diff ** 2)
        # Divide by the number of differences (N-1) and take the square root
        rmssd = np.sqrt(sum_of_squares / (len(rr_intervals) - 1))
    else:
        rmssd = np.nan  # Handle case with insufficient data for RMSSD calculation
    return rmssd

# Re-calculate HR and RMSSD for each one-minute segment using the corrected RMSSD calculation
corrected_hr_rmssd_results = []

for minute, segment in segments:
    rr_intervals = segment['RR Interval'].values
    hr = 60000 / np.mean(rr_intervals) if np.mean(rr_intervals) > 0 else np.nan
    rmssd = calculate_corrected_rmssd(rr_intervals)
    corrected_hr_rmssd_results.append((minute, hr, rmssd))

# Display the first few corrected results
corrected_hr_rmssd_results[:5]


In [32]:
def exclude_abnormal_rr_intervals(data, lower_threshold=500, upper_threshold=1200):
    """
    Excludes 1-minute intervals that contain RR intervals below the lower threshold
    or above the upper threshold.

    :param data: DataFrame with 'Timestamp' and 'RR Interval' columns
    :param lower_threshold: Lower threshold for RR intervals (in milliseconds)
    :param upper_threshold: Upper threshold for RR intervals (in milliseconds)
    :return: DataFrame with excluded intervals
    """
    # Create a copy of the data to avoid SettingWithCopyWarning
    data_copy = data.copy()

    # Convert timestamps to minutes
    data_copy['Minute'] = (data_copy['Timestamp'] // 60).astype(int)

    # Find intervals with abnormal RR intervals
    abnormal_intervals = data_copy[(data_copy['RR Interval'] < lower_threshold) |
                                   (data_copy['RR Interval'] > upper_threshold)]['Minute'].unique()

    print(abnormal_intervals)

    # Exclude intervals with abnormal RR intervals
    data_excluded = data_copy.loc[~data_copy['Minute'].isin(abnormal_intervals)]

    return data_excluded

In [None]:
def exclude_abnormal_intervals(data):
    # Convert timestamps to minutes
    data['Minute'] = (data['Timestamp'] // 60).astype(int)

    # Find intervals with abnormal RR intervals
    minutes_to_exclude = data[
        (data['RR Interval'] < 500) |
        (data['RR Interval'] > 1200)
    ]['Minute'].unique()

    # Exclude intervals with abnormal RR intervals
    data_excluded = data[~data['Minute'].isin(minutes_to_exclude)]
    return data_excluded

In [46]:
from pyhrv import time_domain as td

def calculate_hrv_agg(segments):
    results = []
    for minute, segment in segments:
        rr_intervals = segment['RR Interval'].values
        hr = td.hr_parameters(nni=rr_intervals)
        rmssd = td.rmssd(nni=rr_intervals)

        result_dict = {
            'Minute': minute,
            'HR Mean': hr['hr_mean'],
            'HR Min': hr['hr_min'],
            'HR Max': hr['hr_max'],
            'HR Std': hr['hr_std'],
            'RMSSD': rmssd['rmssd']
        }
        results.append(result_dict)
    return results

df = exclude_abnormal_rr_intervals(p1_data)
# print(df)

# Segment the data
segments = segment_data(df)

# Calculate HR and RMSSD for each one-minute segment
hr_rmssd_results = calculate_hrv_agg(segments)
hr_rmssd_df = pd.DataFrame(hr_rmssd_results)
hr_rmssd_df

[ 59  60  61  62  63  65  93  94  96  97  98  99 100 108 113 120]


Unnamed: 0,Minute,HR Mean,HR Min,HR Max,HR Std,RMSSD
0,0,83.576291,63.658755,113.284780,13.838706,9.368697
1,1,63.526361,50.534576,82.710555,8.700709,19.462039
2,2,60.507775,52.969735,66.480229,3.231229,11.019986
3,3,62.222693,55.807573,77.705483,4.882485,11.941023
4,4,60.533492,53.454740,72.514359,3.535848,13.909958
...,...,...,...,...,...,...
129,145,59.140316,50.676510,68.930145,3.847367,11.741175
130,146,63.278928,55.966208,78.192564,5.297531,11.836447
131,147,63.782677,55.045165,73.963887,4.734211,9.753958
132,148,71.569342,62.287994,81.617559,3.716097,8.212164


In [47]:
hrv_csv_path = output_output_path + name + '_HRV2.csv'
# rr_csv_path = output_output_path + name + '_RR2.csv'
hr_rmssd_df.to_csv(hrv_csv_path, index=False)
# rr_df.to_csv(rr_csv_path, index=False)