In [505]:
import numpy as np
import scipy.signal as signal
import matplotlib.pyplot as plt
from datetime import datetime
import pandas as pd
from scipy.signal import find_peaks
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import AdaBoostRegressor, RandomForestRegressor
from sklearn.metrics import accuracy_score

In [506]:
def normalize(data):
  nor_data = (data - np.min(data)) / (np.max(data) - np.min(data))
  return nor_data

In [507]:
def find10(ecg_p, ppg_peaks):
  ecg_head = []
  ecg_tail = []
  ppg_head = []
  ppg_tail = []
  i = 1
  j = 1
  while len(ecg_head) < 10:
    curr_pat = ppg_peaks[i] - ecg_p[j]
    if ppg_peaks[i] > ecg_p[j]  > ppg_peaks[i - 1]:
      if 65 < curr_pat < 125:
        ecg_head.append(ecg_p[j])
        ppg_head.append(ppg_peaks[i])
      i = i + 1
      j = j + 1
    elif ppg_peaks[i] == ecg_p[j] or ppg_peaks[i - 1] == ecg_p[j]:
      i = i + 1
      j = j + 1
    elif ecg_p[j] < ppg_peaks[i - 1]:
      j = j + 1
    else:
      i = i + 1


  i = -2
  j = -2

  while len(ecg_tail) < 10:
    curr_pat = ppg_peaks[i] - ecg_p[j]
    if ppg_peaks[i] > ecg_p[j] > ppg_peaks[i - 1]:
      if 65 < curr_pat < 125:
        ecg_tail.append(ecg_p[j])
        ppg_tail.append(ppg_peaks[i])
      i = i - 1
      j = j - 1
    elif ppg_peaks[i] == ecg_p[j] or ppg_peaks[i - 1] == ecg_p[j]:
      i = i - 1
      j = j - 1
    elif ppg_peaks[i] < ecg_p[j]:
      j = j - 1
    else:
      i = i - 1
  
  return ecg_head, ecg_tail, ppg_head, ppg_tail

In [508]:
def find10min(ecg_p, ppg_min):
  ecg_head = []
  ecg_tail = []
  ppg_head = []
  ppg_tail = []
  i = 1
  j = 1
  while len(ecg_head) < 10:
    curr_pat = ppg_min[i] - ecg_p[j]
    if ppg_min[i] > ecg_p[j]  > ppg_min[i - 1]:
      if 125 < curr_pat < 200:
        ecg_head.append(ecg_p[j])
        ppg_head.append(ppg_min[i])
      i = i + 1
      j = j + 1
    elif ppg_min[i] == ecg_p[j] or ppg_min[i - 1] == ecg_p[j]:
      i = i + 1
      j = j + 1
    elif ecg_p[j] < ppg_min[i - 1]:
      j = j + 1
    else:
      i = i + 1


  i = -2
  j = -2

  while len(ecg_tail) < 10:
    curr_pat = ppg_min[i] - ecg_p[j]
    if ppg_min[i] > ecg_p[j] > ppg_min[i - 1]:
      if 125 < curr_pat < 200:
        ecg_tail.append(ecg_p[j])
        ppg_tail.append(ppg_min[i])
      i = i - 1
      j = j - 1
    elif ppg_min[i] == ecg_p[j] or ppg_min[i - 1] == ecg_p[j]:
      i = i - 1
      j = j - 1
    elif ppg_min[i] < ecg_p[j]:
      j = j - 1
    else:
      i = i - 1
  
  return ecg_head, ecg_tail, ppg_head, ppg_tail

In [509]:
pat_record = []
pat_start = []
pat_end = []

filename = "/"
data = pd.read_csv(filename).values
record = data[:, 0]
bp_sys_start = data[:, 6]
bp_sys_end = data[:, 7]
bp_dia_start = data[:, 8]
bp_dia_end = data[:, 9]
hr1_start = data[:, 10]
hr1_end = data[:, 11]
hr2_start = data[:, 12]
hr2_end = data[:, 13]
spo2_start = data[:, 14]
spo2_end = data[:, 15]

In [510]:
folder_path = "/"
csv_files = os.listdir(folder_path)
csv_files.sort()

In [511]:
for filename in csv_files:
    if filename.endswith('.csv'):
        
        file_path = os.path.join(folder_path, filename)
        data = pd.read_csv(file_path).values
        time = data[:, 0]
        ecg = data[:, 1]
        ecg_peaks = data[:,2]
        ppg = data[:, 3]

        filename = filename [:-4]
        pat_record.append(filename)

        nor_ecg = normalize(ecg)
        nor_ppg = normalize(ppg)
        ecg_p = []
        ppg_peaks, _ = find_peaks(nor_ppg, distance = 150)

        for i in range(len(time)):
          if ecg_peaks[i] == 1:
            ecg_p.append(i)
        
        ecg_head, ecg_tail, ppg_head, ppg_tail = find10(ecg_p, ppg_peaks)

        pat_head = []
        pat_tail = []
        for i in range(len(ecg_head)):
          pat_head.append((ppg_head[i] - ecg_head[i]) * 2)
          pat_tail.append((ppg_tail[i] - ecg_tail[i]) * 2) 
        pat_head_avg = np.average(pat_head)
        pat_tail_avg = np.average(pat_tail)
        
        pat_start.append(pat_head_avg)
        pat_end.append(pat_tail_avg)

In [None]:
pat = []
hr1 = []
hr2 = []
spo2 = []
bp_sys = []
bp_dia = []
record_track = []
for i in range(len(pat_record)):
  for j in range(len(record)):
    if pat_record[i] == record[j] and record[j] not in record_track:
      record_track.append(record[j])
      
      pat.append(pat_start[i])
      bp_sys.append(bp_sys_start[j])
      bp_dia.append(bp_dia_start[j])
      hr1.append(hr1_start[j])
      hr2.append(hr2_start[j])
      spo2.append(spo2_start[j])

      pat.append(pat_end[i])
      bp_sys.append(bp_sys_end[j])
      bp_dia.append(bp_dia_end[j])
      hr1.append(hr1_end[j])
      hr2.append(hr2_end[j])
      spo2.append(spo2_end[j])


In [515]:
df_pat = np.array(pat)
sbp = np.array(bp_sys)
dbp = np.array(bp_dia)
heart_rate1 = np.array(hr1)
heart_rate2 = np.array(hr2)
df_spo2 = np.array(spo2)

df = pd.DataFrame({
    'HR1': heart_rate1,
    'HR2': heart_rate2,
    'SPO2': df_spo2,
    'PAT': pat,
    'SBP': sbp,
    'DBP': dbp
})

# Features and targets
X = df[['HR1', 'HR2', 'SPO2', 'PAT']]

y_sbp = df['SBP']
y_dbp = df['DBP']




X_train, X_test, y_train_sbp, y_test_sbp = train_test_split(X, y_sbp, test_size=0.2, random_state=42)
X_train, X_test, y_train_dbp, y_test_dbp = train_test_split(X, y_dbp, test_size=0.2, random_state=42)

# if X_train.ndim == 1:
#   X_train = X_train.reshape(-1, 1)

# if X_test.ndim == 1:
#   X_test = X_test.reshape(-1, 1)

# Standardizing the features (important for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Function to evaluate and print results for a target
def evaluate_model(model, X_train, y_train, X_test, y_test, target_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    res = y_test - y_pred
    std = np.std(res)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"{model.__class__.__name__} ({target_name}) - MAE: {mae}, STD: {std}, R^2: {r2}")

# Linear Regression
lin_reg = LinearRegression()
evaluate_model(lin_reg, X_train, y_train_sbp, X_test, y_test_sbp, "SBP")
evaluate_model(lin_reg, X_train, y_train_dbp, X_test, y_test_dbp, "DBP")

# Decision Tree
dec_tree = DecisionTreeRegressor()
evaluate_model(dec_tree, X_train, y_train_sbp, X_test, y_test_sbp, "SBP")
evaluate_model(dec_tree, X_train, y_train_dbp, X_test, y_test_dbp, "DBP")

# Support Vector Machine
svm = SVR()
evaluate_model(svm, X_train_scaled, y_train_sbp, X_test_scaled, y_test_sbp, "SBP")
evaluate_model(svm, X_train_scaled, y_train_dbp, X_test_scaled, y_test_dbp, "DBP")

# AdaBoost
ada_boost = AdaBoostRegressor()
evaluate_model(ada_boost, X_train, y_train_sbp, X_test, y_test_sbp, "SBP")
evaluate_model(ada_boost, X_train, y_train_dbp, X_test, y_test_dbp, "DBP")

# Random Forest
rand_forest = RandomForestRegressor()
evaluate_model(rand_forest, X_train, y_train_sbp, X_test, y_test_sbp, "SBP")
evaluate_model(rand_forest, X_train, y_train_dbp, X_test, y_test_dbp, "DBP")


LinearRegression (SBP) - MAE: 12.194958367983222, STD: 15.362465165513994, R^2: -0.07629090784649994
LinearRegression (DBP) - MAE: 7.723470649973286, STD: 9.284661899394683, R^2: 0.09838866932509993
DecisionTreeRegressor (SBP) - MAE: 9.814814814814815, STD: 13.549178828016856, R^2: 0.15269653720168463
DecisionTreeRegressor (DBP) - MAE: 5.555555555555555, STD: 7.412775832428641, R^2: 0.41857836108966573
SVR (SBP) - MAE: 11.27363598082059, STD: 15.164075821995146, R^2: -0.01792014518225149
SVR (DBP) - MAE: 7.647848491491745, STD: 8.901675966992904, R^2: 0.15907426148356618
AdaBoostRegressor (SBP) - MAE: 8.839045213539205, STD: 10.91772125871851, R^2: 0.41857699119327363
AdaBoostRegressor (DBP) - MAE: 5.475052375243784, STD: 6.886578882809079, R^2: 0.4581234885790396
RandomForestRegressor (SBP) - MAE: 8.358518518518519, STD: 11.284484048768629, R^2: 0.40302949461862414
RandomForestRegressor (DBP) - MAE: 5.316666666666667, STD: 6.586162448836889, R^2: 0.5186412857064078
