In [2]:
pip install wfdb

Note: you may need to restart the kernel to use updated packages.


In [2]:
import wfdb
import pywt
import seaborn
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from wfdb import processing
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
record_ecg = wfdb.rdrecord('data/infant9_ecg')
record_resp = wfdb.rdrecord('data/infant9_resp')

r_peaks = wfdb.io.rdann('data/infant9_ecg', 'qrsc')
resp_peaks = wfdb.io.rdann('data/infant9_resp', 'resp')

In [3]:
def compute_hr(sig_len, qrs_inds, fs):
   
    heart_rate = np.full(sig_len, np.nan, dtype="float32")

    if len(qrs_inds) < 2:
        return heart_rate

    for i in range(0, len(qrs_inds) - 2):
        a = qrs_inds[i]
        b = qrs_inds[i + 1]
        c = qrs_inds[i + 2]
        rr = (b - a) * (1.0 / fs) * 1000
        hr = 60000.0 / rr
        heart_rate[b + 1 : c + 1] = hr

    heart_rate[qrs_inds[-1] :] = heart_rate[qrs_inds[-1]]

    return heart_rate

In [5]:
heart_rate = processing.compute_hr(len(record_ecg.p_signal),r_peaks.sample,record_ecg.fs)
breathing_rate = processing.compute_hr(len(record_resp.p_signal),resp_peaks.sample,record_resp.fs)

In [7]:
# get the sampling interval 

ecg_samp_interval = 1/record_ecg.fs
resp_samp_interval = 1/record_resp.fs
print("ECG sampling frequency: ", record_ecg.fs, " Hz")
print("ECG sampling interval every ", ecg_samp_interval, " sec.")
print("RESP sampling frequency: ", record_resp.fs, " Hz")
print("RESP sampling interval every ", resp_samp_interval, " sec.")

ECG sampling frequency:  500  Hz
ECG sampling interval every  0.002  sec.
RESP sampling frequency:  50  Hz
RESP sampling interval every  0.02  sec.


In [8]:
time_ecg = np.arange(record_ecg.p_signal.shape[0])*ecg_samp_interval # ECG time axis
time_resp = np.arange(record_resp.p_signal.shape[0])*resp_samp_interval # RESP time axis

In [9]:
heart_data = {
    'Infant':'9',
    'Heart Rate': heart_rate,
    'Time (sec)': time_ecg,
        }
resp_data  = {
     'Infant':'9',
    'Respiratory Rate':breathing_rate,
       'Time (sec)':time_resp,}

heart_df = pd.DataFrame.from_dict(heart_data) 
resp_df = pd.DataFrame.from_dict(resp_data) 

In [10]:
print("Heart DF")
heart_df

Heart DF


Unnamed: 0,Infant,Heart Rate,Time (sec)
0,9,,0.000
1,9,,0.002
2,9,,0.004
3,9,,0.006
4,9,,0.008
...,...,...,...
126569083,9,150.0,253138.166
126569084,9,150.0,253138.168
126569085,9,150.0,253138.170
126569086,9,150.0,253138.172


In [11]:
print ("Resp DF")
resp_df

Resp DF


Unnamed: 0,Infant,Respiratory Rate,Time (sec)
0,9,,0.00
1,9,,0.02
2,9,,0.04
3,9,,0.06
4,9,,0.08
...,...,...,...
12656779,9,75.0,253135.58
12656780,9,75.0,253135.60
12656781,9,75.0,253135.62
12656782,9,75.0,253135.64


In [12]:
heart_df = heart_df.dropna()
heart_df = heart_df.dropna().reset_index(drop = True)

resp_df = resp_df.dropna()
resp_df = resp_df.dropna().reset_index(drop = True)

In [13]:
print("Heart DF")
heart_df

Heart DF


Unnamed: 0,Infant,Heart Rate,Time (sec)
0,9,142.857147,0.820
1,9,142.857147,0.822
2,9,142.857147,0.824
3,9,142.857147,0.826
4,9,142.857147,0.828
...,...,...,...
126568673,9,150.000000,253138.166
126568674,9,150.000000,253138.168
126568675,9,150.000000,253138.170
126568676,9,150.000000,253138.172


In [14]:
print ("Resp DF")
resp_df

Resp DF


Unnamed: 0,Infant,Respiratory Rate,Time (sec)
0,9,60.0,592.66
1,9,60.0,592.68
2,9,60.0,592.70
3,9,60.0,592.72
4,9,60.0,592.74
...,...,...,...
12627146,9,75.0,253135.58
12627147,9,75.0,253135.60
12627148,9,75.0,253135.62
12627149,9,75.0,253135.64


In [26]:
heart_df_subset = heart_df.head(1000).sort_values(by='Time (sec)')
resp_df_subset = resp_df.head(1000).sort_values(by='Time (sec)')
combined_data = pd.merge(heart_df_subset, resp_df_subset, how = 'inner', on = ['Time (sec)', 'Infant'])

In [27]:
combined_data.head()

Unnamed: 0,Infant,Heart Rate,Time (sec),Respiratory Rate


### The independent variable 'Heart Rate' is stored in the 'X' variable and the dependent variable 'Respiratory Rate' is stored in the 'y' variable. The LinearRegression() model is then created, fitted to the data, and the coefficients are printed. The model's predictions for the 'Heart Rate' values are stored in the 'y_pred' variable and plotted against the actual 'Respiratory Rate' values. And finally, it plots a scatter plot of the data, with the line of best fit of the linear model, which shows the relationship between the two variables.

In [3]:
for i in tqdm(range(1, 11), bar_format='{n_fmt}/{total_fmt} [{elapsed}<{remaining}]'):
   
    record_ecg= wfdb.rdrecord('data/infant{}_ecg'.format(i))
    record_resp=wfdb.rdrecord('data/infant{}_resp'.format(i))
    ecg_signal = record_ecg.p_signal
    
    r_peaks= wfdb.rdann('data/infant{}_ecg'.format(i),'qrsc')
    resp_peaks=wfdb.rdann('data/infant{}_resp'.format(i),'resp')

    heart_rate = processing.compute_hr(len(record_ecg.p_signal),r_peaks.sample,record_ecg.fs)
    breathing_rate = processing.compute_hr(len(record_resp.p_signal),resp_peaks.sample,record_resp.fs)
    
    # Print out sampling interval
    ecg_samp_interval = 1/record_ecg.fs
    resp_samp_interval = 1/record_resp.fs
    print(i)
    print("ECG sampling frequency: ", record_ecg.fs, " Hz")
    print("ECG sampling interval every ", ecg_samp_interval, " sec.")
    print("RESP sampling frequency: ", record_resp.fs, " Hz")
    print("RESP sampling interval every = ", resp_samp_interval, " sec.")
    print("\n")
    
    # Sorting the time axis  
    time_ecg = np.arange(record_ecg.p_signal.shape[0])*ecg_samp_interval 
    time_resp = np.arange(record_resp.p_signal.shape[0])*resp_samp_interval 
    

    ecg_data = {
    'Infant':i,
    'Heart Rate': heart_rate,
    'Time (sec)': time_ecg,
        }
    
    resp_data  = {
     'Infant':i,
    'Respiratory Rate':breathing_rate,
       'Time (sec)':time_resp,}
 
    ecg_df = pd.DataFrame.from_dict(ecg_data) 
    resp_df = pd.DataFrame.from_dict(resp_data) 
    
    # Remove unwanted rows 
    ecg_df = ecg_df.dropna().reset_index(drop=True)
    resp_df=resp_df.dropna().reset_index(drop=True)
    
    # Merge both data on Time (sec) & Infant column
    combined_data = pd.merge(ecg_df, resp_df, how='inner', on = ['Time (sec)','Infant'])
    
    # Save the data to respective csv files
    combined_data.to_csv('csv_data/infant_'+str(i)+'.csv', index=False)

0/10 [00:00<?]

1
ECG sampling frequency:  250  Hz
ECG sampling interval every  0.004  sec.
RESP sampling frequency:  500  Hz
RESP sampling interval every =  0.002  sec.




1/10 [07:13<1:05:01]

2
ECG sampling frequency:  500  Hz
ECG sampling interval every  0.002  sec.
RESP sampling frequency:  50  Hz
RESP sampling interval every =  0.02  sec.




2/10 [09:06<32:38]  

3
ECG sampling frequency:  500  Hz
ECG sampling interval every  0.002  sec.
RESP sampling frequency:  50  Hz
RESP sampling interval every =  0.02  sec.




3/10 [10:57<21:26]

4
ECG sampling frequency:  500  Hz
ECG sampling interval every  0.002  sec.
RESP sampling frequency:  50  Hz
RESP sampling interval every =  0.02  sec.




4/10 [12:59<15:56]

5
ECG sampling frequency:  250  Hz
ECG sampling interval every  0.004  sec.
RESP sampling frequency:  50  Hz
RESP sampling interval every =  0.02  sec.




5/10 [14:07<10:32]

6
ECG sampling frequency:  500  Hz
ECG sampling interval every  0.002  sec.
RESP sampling frequency:  50  Hz
RESP sampling interval every =  0.02  sec.




6/10 [16:16<08:28]

7
ECG sampling frequency:  500  Hz
ECG sampling interval every  0.002  sec.
RESP sampling frequency:  50  Hz
RESP sampling interval every =  0.02  sec.




7/10 [17:01<05:01]

8
ECG sampling frequency:  500  Hz
ECG sampling interval every  0.002  sec.
RESP sampling frequency:  50  Hz
RESP sampling interval every =  0.02  sec.




8/10 [17:55<02:50]

9
ECG sampling frequency:  500  Hz
ECG sampling interval every  0.002  sec.
RESP sampling frequency:  50  Hz
RESP sampling interval every =  0.02  sec.




9/10 [51:43<11:32]

10
ECG sampling frequency:  500  Hz
ECG sampling interval every  0.002  sec.
RESP sampling frequency:  50  Hz
RESP sampling interval every =  0.02  sec.




10/10 [53:35<00:00]


In [4]:
import random

#randomise data 

test_set = random.sample(range(1, 11), 3)
train_set = [i+1 for i in range(10) if i+1 not in test_set]

print("The train data will be on Infant "+str(train_set))
print("The test data will be on Infant "+str(test_set))

The train data will be on Infant [1, 2, 3, 5, 6, 9, 10]
The test data will be on Infant [8, 4, 7]


In [5]:
df = pd.read_csv(r"csv_data/infant_"+str(i)+".csv")
df[0:26]

Unnamed: 0,Infant,Heart Rate,Time (sec),Respiratory Rate
0,10,159.57446,57.56,157.89473
1,10,159.57446,57.58,157.89473
2,10,159.57446,57.6,157.89473
3,10,159.57446,57.62,157.89473
4,10,159.57446,57.64,157.89473
5,10,159.57446,57.66,157.89473
6,10,159.57446,57.68,157.89473
7,10,159.57446,57.7,157.89473
8,10,159.57446,57.72,157.89473
9,10,159.57446,57.74,157.89473


In [6]:
min_heart_rate = 20
max_heart_rate = 250
min_resp_rate = 20
max_resp_rate = 200

In [7]:
train_df = pd.DataFrame()
correlation_score = []


for i in tqdm(train_set):
    df = pd.read_csv(r"csv_data/infant_"+str(i)+".csv")
    df = df.drop_duplicates(subset=['Heart Rate', 'Respiratory Rate'], keep='first')
    
    # Remove outliers from the df 
    df = df[df["Heart Rate"] >min_heart_rate]
    df = df[df["Heart Rate"] <max_heart_rate]
    df = df[df["Respiratory Rate"] >min_resp_rate]
    df = df[df["Respiratory Rate"] <max_resp_rate]

    train_df = train_df.append(df,ignore_index=True)

  train_df = train_df.append(df,ignore_index=True)
  train_df = train_df.append(df,ignore_index=True)
  train_df = train_df.append(df,ignore_index=True)
  train_df = train_df.append(df,ignore_index=True)
  train_df = train_df.append(df,ignore_index=True)
  train_df = train_df.append(df,ignore_index=True)
  train_df = train_df.append(df,ignore_index=True)
100%|█████████████████████████████████████████████| 7/7 [00:23<00:00,  3.29s/it]


In [8]:
train_df.to_csv('csv_data/train_data.csv', index=False)

In [10]:
test_df = pd.DataFrame()

for j in tqdm(test_set):
    df = pd.read_csv(r"csv_data/infant_"+str(j)+".csv")
    df = df.drop_duplicates(subset=['Heart Rate', 'Respiratory Rate'], keep='first')

    # remove outliers
    df = df[df["Heart Rate"] >min_heart_rate]
    df = df[df["Heart Rate"] <max_heart_rate]
    df = df[df["Respiratory Rate"] >min_resp_rate]
    df = df[df["Respiratory Rate"] <max_resp_rate]
    
    test_df = test_df.append(df,ignore_index=True)

  test_df = test_df.append(df,ignore_index=True)
  test_df = test_df.append(df,ignore_index=True)
  test_df = test_df.append(df,ignore_index=True)
100%|█████████████████████████████████████████████| 3/3 [00:03<00:00,  1.19s/it]


In [11]:
test_df.to_csv('csv_data/test_data.csv', index=False)