In [1]:
import pickle
import pandas as pd
from sklearn.metrics import mean_squared_error,mean_absolute_error
from sklearn import preprocessing
import warnings
from tqdm import tqdm
import os
import wfdb
warnings.filterwarnings("ignore")

## Load Test Data

In [2]:
test_data = pd.read_csv(r"dataset/test_data.csv")
test_data = test_data.round(2)
test_data

Unnamed: 0,Infant,Heart Rate,Time (sec),Respiratory Rate
0,7,184.05,30.56,93.75
1,7,182.93,30.88,93.75
2,7,182.93,32.56,50.85
3,7,184.05,32.84,50.85
4,7,184.05,33.18,96.77
...,...,...,...,...
48741,4,145.63,167495.94,23.81
48742,4,135.14,167498.74,42.25
48743,4,138.89,167499.18,45.45
48744,4,140.85,167555.70,44.12


In [3]:
X = test_data[['Time (sec)','Respiratory Rate']] # respiratory rate data
y = test_data['Heart Rate']  # heart rate labels

In [4]:
sc = preprocessing.StandardScaler()
X = sc.fit_transform(X)
y =  sc.fit_transform(y.values.reshape(-1, 1))

## Load Models

In [5]:
file_rf = "models/random_forest.sav"
file_sv = "models/support_vector.sav"

In [6]:
# load the model from disk
rf_model = pickle.load(open(file_rf, 'rb'))
sv_model = pickle.load(open(file_sv, 'rb'))

## Evaluate on entire test dataset

In [7]:
rf_pred = rf_model.predict(X)
sv_pred = sv_model.predict(X)

In [8]:
print("Random Forest Results")
print("Heart rate MSE:", mean_squared_error(y, rf_pred))
print("Heart rate RMSE:", mean_squared_error(y, rf_pred,squared=False))
print("Heart rate MAE:", mean_absolute_error(y, rf_pred))


Random Forest Results
Heart rate MSE: 1.3327835126045517
Heart rate RMSE: 1.1544624344709324
Heart rate MAE: 0.8550942274203684


In [9]:
print("Support Vector Regression Results")
print("Heart rate MSE:", mean_squared_error(y, sv_pred))
print("Heart rate RMSE:", mean_squared_error(y, sv_pred,squared=False))
print("Heart rate MAE:", mean_absolute_error(y, sv_pred))

Support Vector Regression Results
Heart rate MSE: 0.9800645130512564
Heart rate RMSE: 0.9899820771363774
Heart rate MAE: 0.7410193059121665


## Split by Time

To compare the performance of our model with baseline, we will compute the error rate for different time windows. The error rate for 30 seconds, 1 minute and 3 minutes will be calculated.

In [13]:
infants = [7,1,4]
# split the test data by Infant
grouped = test_data.groupby(['Infant'])
df1 = grouped.get_group(infants[0])
df2 = grouped.get_group(infants[1])
df3 = grouped.get_group(infants[2])

## 30 sec 

The first 30 seconds of data from each infant is obtained to be used for testing.

In [16]:
time_window = 30 

In [17]:
# 30s of data from 1st test infant 
test_total= 0
test_samples= []
for i in range(len(df1)):
    if test_total >= time_window:
        break
    test_total+=df1.iloc[1]['Time (sec)'] - df1.iloc[0]['Time (sec)']
    test_samples.append(i)

df1_30 = df1[:test_samples[-1]]

In [18]:
# 30s of data from 2nd test infant

test_total= 0
test_samples = []
for i in range(len(df2)):
    if test_total >= time_window:
        break
    test_total+=df2.iloc[1]['Time (sec)'] - df2.iloc[0]['Time (sec)']
    test_samples.append(i)

df2_30 = df2[:test_samples[-1]]

In [19]:
# 30s of data from 3rd test infant

test_total= 0
test_samples = []
for i in range(len(df3)):
    if test_total >= time_window:
        break
    test_total+=df3.iloc[1]['Time (sec)'] - df3.iloc[0]['Time (sec)']
    test_samples.append(i)

df3_30 = df3[:test_samples[-1]]

In [20]:
# merge all the 30s data together
df_final_30 = pd.concat([df1_30, df2_30,df3_30])

X = df_final_30[['Time (sec)','Respiratory Rate']] # respiratory rate data
y = df_final_30['Heart Rate']  # heart rate labels

X = sc.fit_transform(X)
y =  sc.fit_transform(y.values.reshape(-1, 1))

rf_pred = rf_model.predict(X)
sv_pred = sv_model.predict(X)

In [21]:
print("Random Forest Results for 30 sec of data")
print("Heart rate MSE:", mean_squared_error(y, rf_pred))
print("Heart rate RMSE:", mean_squared_error(y, rf_pred,squared=False))
print("Heart rate MAE:", mean_absolute_error(y, rf_pred))

Random Forest Results for 30 sec of data
Heart rate MSE: 1.4221711072385552
Heart rate RMSE: 1.1925481571989265
Heart rate MAE: 1.007906921173458


In [22]:
print("Support Vector Regression Results for 30 sec of data")
print("Heart rate MSE:", mean_squared_error(y, sv_pred))
print("Heart rate RMSE:", mean_squared_error(y, sv_pred,squared=False))
print("Heart rate MAE:", mean_absolute_error(y, sv_pred))

Support Vector Regression Results for 30 sec of data
Heart rate MSE: 1.009569649831331
Heart rate RMSE: 1.0047734320887127
Heart rate MAE: 0.9243302461398442


## 1 min window

In [23]:
# 1 min = 60 seconds
time_window = 60

In [29]:
# 60s of data from 1st test infant 
test_total= 0
test_samples= []
for i in range(len(df1)):
    if test_total >= time_window:
        break
    test_total+=df1.iloc[1]['Time (sec)'] - df1.iloc[0]['Time (sec)']
    test_samples.append(i)

df1_60 = df1[:test_samples[-1]]

In [30]:
# 60s of data from 3rd test infant

test_total= 0
test_samples = []
for i in range(len(df2)):
    if test_total >= time_window:
        break
    test_total+=df2.iloc[1]['Time (sec)'] - df2.iloc[0]['Time (sec)']
    test_samples.append(i)

df2_60 = df2[:test_samples[-1]]

In [31]:
# 60s of data from 3rd test infant

test_total= 0
test_samples = []
for i in range(len(df3)):
    if test_total >= time_window:
        break
    test_total+=df3.iloc[1]['Time (sec)'] - df3.iloc[0]['Time (sec)']
    test_samples.append(i)

df3_60 = df3[:test_samples[-1]]

In [32]:
# merge all the 60s data together
df_final_60 = pd.concat([df1_60, df2_60,df3_60])

X = df_final_60[['Time (sec)','Respiratory Rate']] # respiratory rate data
y = df_final_60['Heart Rate']  # heart rate labels

X = sc.fit_transform(X)
y =  sc.fit_transform(y.values.reshape(-1, 1))

rf_pred = rf_model.predict(X)
sv_pred = sv_model.predict(X)

In [33]:
print("Random Forest Results for 1 min of data")
print("Heart rate MSE:", mean_squared_error(y, rf_pred))
print("Heart rate RMSE:", mean_squared_error(y, rf_pred,squared=False))
print("Heart rate MAE:", mean_absolute_error(y, rf_pred))

Random Forest Results for 1 min of data
Heart rate MSE: 1.3418216616264742
Heart rate RMSE: 1.1583702610247184
Heart rate MAE: 0.9951254417380324


In [34]:
print("Support Vector Regression Results for 1 min of data")
print("Heart rate MSE:", mean_squared_error(y, sv_pred))
print("Heart rate RMSE:", mean_squared_error(y, sv_pred,squared=False))
print("Heart rate MAE:", mean_absolute_error(y, sv_pred))

Support Vector Regression Results for 1 min of data
Heart rate MSE: 0.9895522236592936
Heart rate RMSE: 0.9947623955796145
Heart rate MAE: 0.8680876889930136


## 3 Minutes

In [35]:
# 3 minutes = 180 secs
time_window = 180 

In [39]:
# 60s of data from 1st test infant 
test_total= 0
test_samples= []
for i in range(len(df1)):
    if test_total >= time_window:
        break
    test_total+=df1.iloc[1]['Time (sec)'] - df1.iloc[0]['Time (sec)']
    test_samples.append(i)

df1_180 = df1[:test_samples[-1]]

In [40]:
# 60s of data from 3rd test infant

test_total= 0
test_samples = []
for i in range(len(df2)):
    if test_total >= time_window:
        break
    test_total+=df2.iloc[1]['Time (sec)'] - df2.iloc[0]['Time (sec)']
    test_samples.append(i)

df2_180 = df2[:test_samples[-1]]

In [41]:
# 60s of data from 3rd test infant

test_total= 0
test_samples = []
for i in range(len(df3)):
    if test_total >= time_window:
        break
    test_total+=df3.iloc[1]['Time (sec)'] - df3.iloc[0]['Time (sec)']
    test_samples.append(i)

df3_180 = df3[:test_samples[-1]]

In [42]:
# merge all the 60s data together
df_final_180 = pd.concat([df1_180, df2_180,df3_180])

X = df_final_180[['Time (sec)','Respiratory Rate']] # respiratory rate data
y = df_final_180['Heart Rate']  # heart rate labels

X = sc.fit_transform(X)
y =  sc.fit_transform(y.values.reshape(-1, 1))

rf_pred = rf_model.predict(X)
sv_pred = sv_model.predict(X)

In [43]:
print("Random Forest Results for 3 min of data")
print("Heart rate MSE:", mean_squared_error(y, rf_pred))
print("Heart rate RMSE:", mean_squared_error(y, rf_pred,squared=False))
print("Heart rate MAE:", mean_absolute_error(y, rf_pred))

Random Forest Results for 3 min of data
Heart rate MSE: 1.3592250796820944
Heart rate RMSE: 1.1658580872825364
Heart rate MAE: 0.9186499128408957


In [44]:
print("Support Vector Regression Results for 3min of data")
print("Heart rate MSE:", mean_squared_error(y, sv_pred))
print("Heart rate RMSE:", mean_squared_error(y, sv_pred,squared=False))
print("Heart rate MAE:", mean_absolute_error(y, sv_pred))

Support Vector Regression Results for 3min of data
Heart rate MSE: 0.9643099533756974
Heart rate RMSE: 0.9819928479249211
Heart rate MAE: 0.7920130793516673
