# Process RR_peaks to metrics

**The following notebook** 
* Reads the RR peaks file
* Derives metrics from 5 min intervals
* Joins clinical indicator file with metrics on patient id 
* Saves the file

In [None]:
import pandas as pd
import h5py
import helper

read_path = "our_processing/Nabian_2018_derrived_data.h5"
pickle_path = 'our_processing/Nabian_2025_10_11.pkl'

Read the RR_Peaks file ["our_processing\ML_Data.h5"]("our_processing\ML_Data.h5")

In [2]:


def read_data(file_path, group_name, signal_name= "r_intervals"):
    with h5py.File(file_path, "r") as hdf:
        group = hdf[group_name]
        data = group[signal_name][()]
    return data

def get_page_names(file_path):
    with h5py.File(file_path, "r") as hdf:
        page_names = list(hdf.keys())
    return page_names

page_names = get_page_names(read_path)

patient_arrays = {group_name: read_data(read_path, group_name) for group_name in page_names}

patient_arrays

{'19070921': array([764., 772., 780., ..., 708., 696., 704.]),
 '19072205': array([824., 772., 776., ..., 820., 800., 804.]),
 '19072214': array([1268.,  632.,  644., ..., 1332., 1576., 1812.]),
 '19072938': array([684., 700., 724., ..., 688., 676., 448.]),
 '19072939': array([732., 720., 668., ..., 656., 664., 556.]),
 '19072940': array([ 668.,  656.,  656., ..., 1016.,  472.,  912.]),
 '19080106': array([892., 888., 856., ..., 816., 816., 560.]),
 '19080715': array([1100.,  948.,  964., ..., 1228., 1044., 1440.]),
 '19081506': array([ 884.,  832.,  824., ..., 1016.,  520., 1608.]),
 '19082406': array([ 692.,  680.,  668., ..., 1780.,  792., 1276.]),
 '19090308': array([624., 624., 620., ..., 764., 768., 748.]),
 '19090320': array([708., 712., 684., ..., 636., 644., 620.]),
 '19101607': array([1508.,  956.,  972., ...,  604.,  848., 1388.]),
 '19101619': array([1356.,  952.,  944., ...,  740.,  752.,  748.]),
 '19102102': array([784., 804., 784., ..., 648., 648., 644.]),
 '19102103': 

Calculate the metrics for each patient.

In [3]:
metrics_df = pd.DataFrame()

for id, signal in patient_arrays.items():
    individual_metrics = helper.patients_metrics(signal)
    individual_metrics['id'] = id
    print(id, "processed")
    metrics_df = pd.concat([metrics_df, individual_metrics], ignore_index=True)

19070921 processed
19072205 processed
19072214 processed


  return self.LF_power() / self.HF_power()


19072938 processed
19072939 processed
19072940 processed
19080106 processed
19080715 processed
19081506 processed
19082406 processed
19090308 processed
19090320 processed
19101607 processed
19101619 processed
19102102 processed
19102103 processed
19102524 processed
19102622 processed
19112609 processed
19120302 processed
19120323 processed
19120704 processed
19120723 processed
19121303 processed
19121735 processed
20010826 processed
20010827 processed
20011712 processed
20050628 processed
20052606 processed
20061729 processed
20092226 processed
20092535 processed
20101424 processed
20101822 processed
20102029 processed
20120116 processed
20120922 processed
20121033 processed
20121716 processed
20121718 processed
20122932 processed
20123017 processed


Read the clinical indicators

In [4]:
clin_indic = helper.df_from_excel('actionable_data/Clinical indicators.xlsx')
clin_indic.rename(columns={'number': 'id'}, inplace=True)
clin_indic['id'] = clin_indic['id'].astype(str)
display(clin_indic.columns)

Index(['id', 'gender', 'age', 'height', 'weight', 'admission FBG (mmol/L)',
       'Discharge FBG (mmol/L)', 'HbA1c (%)', 'SBP (mmHg)', 'DBP (mmHg)',
       'WBC (×109/L)', 'N% (%)', 'Hb (g/L)', 'PLT (×109/L)', 'CRP (mg/L)',
       'ALT (U/L)', ' AST (U/L)', 'AST/ALT', 'GGT (U/L)', 'BUN (mmol/L)',
       ' UA (mmol/L)', 'TG (mmol/L)', 'HDL-C (mmol/L)', 'LDL-C (mmol/L)',
       'UMA (mg)', 'UCr (g)', 'UACR (mg/g)', 'Diabetic Complications',
       'Diabetic nephropathy', 'Diabetic retinopathy and cataract',
       'Diabetic peripheral neuropathy',
       'Coronary artery disease and cardiac insufficiency',
       'Lower extremity atherosclerosis or stenosis', 'Carotid plaque'],
      dtype='object')

Inner merge the clinical indicators and processing results

In [5]:
metrics_df["id"].astype(str)
merged_data = metrics_df.merge(clin_indic, on='id', how='inner')

merged_data.head()

Unnamed: 0,t_start,t_end,SDRR,RMSSD,pNN50 (%),Mean HR (bpm),ULF Power,ULF Peak Frequency,VLF Power,VLF Peak Frequency,...,UMA (mg),UCr (g),UACR (mg/g),Diabetic Complications,Diabetic nephropathy,Diabetic retinopathy and cataract,Diabetic peripheral neuropathy,Coronary artery disease and cardiac insufficiency,Lower extremity atherosclerosis or stenosis,Carotid plaque
0,0,300,60.931394,61.35441,4.422604,81.424124,0.0,0.0,3.708648,0.026677,...,411.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,300,600,58.906488,36.231774,3.902439,82.086174,0.0,0.0,1.837286,0.03004,...,411.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,600,900,91.847623,71.252995,12.096774,74.509595,0.0,0.0,6.999347,0.023358,...,411.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,900,1200,93.225784,90.241933,13.387978,73.345236,0.0,0.0,2.367134,0.020035,...,411.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1200,1500,51.212332,33.27988,10.951009,69.574953,0.0,0.0,4.534024,0.036754,...,411.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
merged_data.to_pickle(pickle_path)
merged_data.to_csv(pickle_path.replace(".pkl", '.csv'), index=False)