# Infant 3 processing
1. Read the data
1. Perform interquartile range smoothing for outlier
1. Normalize the data to (-1, 1)
1. Calculate the Heart rate (BPM) and respiration rate (BPM)
    - save the data into .npy files
1. Resample the data to same cardinality
1. Correlation test
1. Linear Regression
    - export model
1. Polynomial Regression
    - export model
1. Support Vector Regression (SVR)
    - export model

In [2]:
# Import of libraries/modules
### -------------------------------------------
import wfdb
from wfdb import processing
### ------------------------------------------- 
import scipy
from scipy.signal import butter, lfilter, filtfilt
### -------------------------------------------
import matplotlib.pyplot as plt
### -------------------------------------------
import numpy as np
### -------------------------------------------
import pandas as pd
### -------------------------------------------
import sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing, svm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error
### -------------------------------------------
import functools
### -------------------------------------------
import seaborn as sns
### -------------------------------------------
import joblib


In [3]:
# Some Custom Function
def time2Num(time, fs):
    num = time * fs
    return int(num)


def num2Time(num, fs):
    time = num / fs
    return time


def secs2minutes(time):
    return time / 60


def minutes2secs(time):
    return time * 60


def secs2hours(time):
    return time / 3600


def hours2secs(time):
    return time * 3600


def iqr_remove_outlier(x, lower, upper):
    if (x < lower):
        return lower
    elif (x > upper): 
        return upper
    else:
        return x


def correlationTest(signal_1, signal_2, plot=True):
    # Inspect by scatter plot
    if plot: 
        plt.scatter(signal_1, signal_2)
    # Covariance
    covariance = np.cov(signal_1, signal_2)
    print(covariance)
    # calculate Pearson's correlation - 0 is no correlation -1 or 1 is highly correlated
    corr, _ = scipy.stats.pearsonr(signal_1, signal_2)
    print('Pearsons correlation: %.3f' % corr)
    # calculate spearman's correlation - 0 is no correlation -1 or 1 is highly correlated
    corr, _ = scipy.stats.spearmanr(signal_1, signal_2)
    print('Spearmans correlation: %.3f' % corr)


def peaks_hr(sig, peak_inds, fs, title, figsize=(20, 10), saveto=None):
    "Plot a signal with its peaks and heart rate"
    # Calculate heart rate
    hrs = processing.hr.compute_hr(sig_len=sig.shape[0], qrs_inds=peak_inds, fs=fs)
    
    N = sig.shape[0]
    
    fig, ax_left = plt.subplots(figsize=figsize)
    ax_right = ax_left.twinx()
    
    ax_left.plot(sig, color='#3979f0', label='Signal')
    ax_left.plot(peak_inds, sig[peak_inds], 'rx', marker='x', 
                 color='#8b0000', label='Peak', markersize=12)
    ax_right.plot(np.arange(N), hrs, label='Heart rate', color='m', linewidth=2)

    ax_left.set_title(title)

    ax_left.set_xlabel('Time (ms)')
    ax_left.set_ylabel('ECG (mV)', color='#3979f0')
    ax_right.set_ylabel('Heart rate (bpm)', color='m')
    # Make the y-axis label, ticks and tick labels match the line color.
    ax_left.tick_params('y', colors='#3979f0')
    ax_right.tick_params('y', colors='m')
    if saveto is not None:
        plt.savefig(saveto, dpi=600)
    plt.show()


def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band', analog=False)
    return b, a


def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y


def butter_lowpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    return b, a


def butter_lowpass_filter(data, cutoff, fs, order=5):
    b, a = butter_lowpass(cutoff, fs, order=order)
    y = lfilter(b, a, data)
    return y


def butter_highpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = butter(order, normal_cutoff, btype='high', analog=False)
    return b, a


def butter_highpass_filter(data, cutoff, fs, order=5):
    b, a = butter_highpass(cutoff, fs, order=order)
    y = filtfilt(b, a, data)
    return y


def peaks_rr(sig, peak_inds, fs, title, figsize=(20, 10), saveto=None):
    "Plot a signal with its peaks and heart rate"
    # Calculate heart rate
    hrs = processing.hr.compute_hr(sig_len=sig.shape[0], qrs_inds=peak_inds, fs=fs)
    
    N = sig.shape[0]
    
    fig, ax_left = plt.subplots(figsize=figsize)
    ax_right = ax_left.twinx()
    
    ax_left.plot(sig, color='#3979f0', label='Signal')
    ax_left.plot(peak_inds, sig[peak_inds], 'rx', marker='x', 
                 color='#8b0000', label='Peak', markersize=12)
    ax_right.plot(np.arange(N), hrs, label='Repiration rate', color='m', linewidth=2)

    ax_left.set_title(title)

    ax_left.set_xlabel('Time (ms)')
    ax_left.set_ylabel('RESP (NU)', color='#3979f0')
    ax_right.set_ylabel('Repiration rate (bpm)', color='m')
    # Make the y-axis label, ticks and tick labels match the line color.
    ax_left.tick_params('y', colors='#3979f0')
    ax_right.tick_params('y', colors='m')
    if saveto is not None:
        plt.savefig(saveto, dpi=600)
    plt.show()

### Read the data

In [4]:
data_dir = "../data"
infantNum = 3
ECG_dataset = f"{data_dir}/infant{infantNum}_ecg"
RESP_dataset = f"{data_dir}/infant{infantNum}_resp"

In [5]:
def read_Data(filename, startNum, endNum):
    signal = wfdb.rdsamp(filename, sampfrom=startNum, sampto=endNum)
    startTime_seconds = startNum/signal[1]['fs']
    endTime_seconds = endNum/signal[1]['fs']
    return signal, startTime_seconds, endTime_seconds

### Interquartile (IQR) Smoothing 

In [6]:
def cal_iqr(signal, hiPerc, loPerc):
    hiPerc_val, loPerc_val = np.percentile(signal, [hiPerc, loPerc])
    iqr = hiPerc_val - loPerc_val
    print(f"{hiPerc}th percentile: {hiPerc_val}, {loPerc}th percentile: {loPerc_val}, IQR: {iqr}")
    return iqr, hiPerc_val, loPerc_val

In [7]:
def iqr_smooth(signal, hiPerc, loPerc, cutoff_factor=0.5):
    # calculate the outlier cutoff
    iqr, hiPerc_val, loPerc_val = cal_iqr(signal, hiPerc=hiPerc, loPerc=loPerc)
    cutoff = iqr * cutoff_factor
    lower, upper = loPerc_val - cutoff, hiPerc_val + cutoff
    # identify outliers
    outliers = [x for x in signal if x < lower or x > upper]
    print('Identified outliers: %d' % len(outliers))
    output = map(functools.partial(iqr_remove_outlier, lower=lower, upper=upper), signal)
    output = np.fromiter(output, dtype=np.float64)
    print(f"Data Shape: {output.shape}")
    return output

### Data Normalization (-1,1)

In [8]:
def norm_signal(signal, max=1, min=-1):
    scaler = MinMaxScaler(feature_range=(min,max))
    output = scaler.fit_transform(signal.reshape((-1,1)))
    return output

### Heart Rate and Respiration Rate

In [9]:
def cal_heartrate(signal, fs):
    qrs_inds = processing.qrs.gqrs_detect(sig=signal.reshape(signal.shape[0]), fs=fs)
    hrs = processing.hr.compute_hr(sig_len=signal.shape[0], qrs_inds=qrs_inds, fs=fs)
    return hrs


def cal_resprate(signal, fs):
    peaks_inds = processing.peaks.find_local_peaks(sig=signal.reshape(signal.shape[0]), radius=fs)
    rrs = processing.hr.compute_hr(sig_len=signal.shape[0], qrs_inds=peaks_inds, fs=fs)
    return rrs

### Resample Data & Fix NaN

In [10]:
def data_fixNan(signal):
    output = pd.DataFrame(signal).fillna(0).to_numpy().reshape(signal.shape[0])
    return output


def data_resample(signal, sig_len):
    output = scipy.signal.resample(signal, sig_len)
    return output

### Linear Regression

In [11]:
def train_lr_model(X, y, test_size = 0.25, **kwargs):
    
    X = X.reshape((-1,1))
    y = y.reshape((-1,1))

    if 'seed' in kwargs:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state=kwargs['seed'])
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size)
    
    lr_model = LinearRegression().fit(X_train, y_train)
    r_sq = lr_model.score(X_train, y_train)
    print(f"Coefficient of determination: {r_sq}")
    print(f"Intercept: {lr_model.intercept_}")
    print(f"Coefficients: {lr_model.coef_}")
    y_test_predict = lr_model.predict(X_test)
    mae = mean_absolute_error(y_test, y_test_predict)
    mse = mean_squared_error(y_test, y_test_predict)
    rmse = np.sqrt(mse)
    print(f'Mean absolute error: {mae:.2f}')
    print(f'Mean squared error: {mse:.2f}')
    print(f'Root mean squared error: {rmse:.2f}')
    return lr_model

In [12]:
def cascade_train_lr_model(model, X, y, test_size = 0.25, **kwargs):
    X = X.reshape((-1,1))
    y = y.reshape((-1,1))
    
    if 'seed' in kwargs:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state=kwargs['seed'])
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size)
    
    model.fit(X_train, y_train)
    r_sq = model.score(X_train, y_train)
    print(f"Coefficient of determination: {r_sq}")
    print(f"Intercept: {model.intercept_}")
    print(f"Coefficients: {model.coef_}")
    y_test_predict = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_test_predict)
    mse = mean_squared_error(y_test, y_test_predict)
    rmse = np.sqrt(mse)
    print(f'Mean absolute error: {mae:.2f}')
    print(f'Mean squared error: {mse:.2f}')
    print(f'Root mean squared error: {rmse:.2f}')
    return model

### Polynomial Regression

In [13]:
def train_pr_model(X, y, degree=2, test_size = 0.25, **kwargs):
    X = X.reshape((-1,1))
    y = y.reshape((-1,1))
    
    if 'seed' in kwargs:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state=kwargs['seed'])
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size)
    
    transformer = PolynomialFeatures(degree=degree, include_bias=False)
    X_train_poly = transformer.fit_transform(X_train)
    X_test_poly = transformer.fit_transform(X_test)
    pr_model = LinearRegression().fit(X_train_poly, y_train)
    r_sq = pr_model.score(X_train_poly, y_train)
    print(f"Coefficient of determination: {r_sq}")
    print(f"Intercept: {pr_model.intercept_}")
    print(f"Coefficients: {pr_model.coef_}")
    y_test_predict = pr_model.predict(X_test_poly)
    mae = mean_absolute_error(y_test, y_test_predict)
    mse = mean_squared_error(y_test, y_test_predict)
    rmse = np.sqrt(mse)
    print(f'Mean absolute error: {mae:.2f}')
    print(f'Mean squared error: {mse:.2f}')
    print(f'Root mean squared error: {rmse:.2f}')
    return pr_model


def cascade_train_pr_model(model, X, y, degree=2, test_size = 0.25, **kwargs):
    X = X.reshape((-1,1))
    y = y.reshape((-1,1))
    
    if 'seed' in kwargs:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state=kwargs['seed'])
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size)
    
    transformer = PolynomialFeatures(degree=degree, include_bias=False)
    X_train_poly = transformer.fit_transform(X_train)
    X_test_poly = transformer.fit_transform(X_test)
    model.fit(X_train_poly, y_train)
    r_sq = model.score(X_train_poly, y_train)
    print(f"Coefficient of determination: {r_sq}")
    print(f"Intercept: {model.intercept_}")
    print(f"Coefficients: {model.coef_}")
    y_test_predict = model.predict(X_test_poly)
    mae = mean_absolute_error(y_test, y_test_predict)
    mse = mean_squared_error(y_test, y_test_predict)
    rmse = np.sqrt(mse)
    print(f'Mean absolute error: {mae:.2f}')
    print(f'Mean squared error: {mse:.2f}')
    print(f'Root mean squared error: {rmse:.2f}')
    return model

### Support Vector Regression (SVR)

In [14]:
def train_svr_model(X, y, test_size = 0.25, scaler='MinMax', param_C=1, param_gamma=0.1, param_degree=2, param_epsilon=0.1, **kwargs):
    X = X.reshape((-1,1))
    y = y.reshape((-1,1))
    
    if 'seed' in kwargs:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state=kwargs['seed'])
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size)
    
    # feature scaling
    if scaler == 'MinMax':
        MinMax_X = MinMaxScaler()
        MinMax_y = MinMaxScaler()
        X_train_l = MinMax_X.fit_transform(X_train)
        y_train_p = MinMax_y.fit_transform(y_train)
        X_test_l = MinMax_X.fit_transform(X_test)
        y_test_p = MinMax_y.fit_transform(y_test)
    else: 
        StdS_X = StandardScaler()
        StdS_y = StandardScaler()
        X_train_l = StdS_X.fit_transform(X_train)
        y_train_p = StdS_y.fit_transform(y_train)
        X_test_l = StdS_X.fit_transform(X_test)
        y_test_p = StdS_y.fit_transform(y_test)
    
    # Create models 
    svr_rbf = SVR(kernel='rbf', C=param_C, gamma=param_gamma, epsilon=param_epsilon)
    svr_lin = SVR(kernel='linear', C=param_C)
    svr_poly = SVR(kernel='poly', C=param_C, degree=param_degree, epsilon=param_epsilon)

    # Train models
    svr_rbf.fit(X_train_l, y_train_p.ravel())
    svr_lin.fit(X_train_l, y_train_p.ravel())
    svr_poly.fit(X_train_l, y_train_p.ravel())

    r_sq_rbf = svr_rbf.score(X_train_l, y_train_p)
    r_sq_lin = svr_lin.score(X_train_l, y_train_p)
    r_sq_poly = svr_poly.score(X_train_l, y_train_p)
    print("SVR Radial Basis Function (RBF)")
    print(f"Coefficient of determination: {r_sq_rbf}")
    print(f"Intercept: {svr_rbf.intercept_}")
    print("SVR Linear")
    print(f"Coefficient of determination: {r_sq_lin}")
    print(f"Intercept: {svr_lin.intercept_}")
    print("SVR Polynomial")
    print(f"Coefficient of determination: {r_sq_poly}")
    print(f"Intercept: {svr_poly.intercept_}")

    y_test_p_predict_rbf = svr_rbf.predict(X_test_l)
    y_test_p_predict_lin = svr_lin.predict(X_test_l)
    y_test_p_predict_poly = svr_poly.predict(X_test_l)

    if scaler == 'MinMax':
        y_test_predict_rbf = MinMax_y.inverse_transform(y_test_p_predict_rbf.reshape(-1,1))
        y_test_predict_lin = MinMax_y.inverse_transform(y_test_p_predict_lin.reshape(-1,1))
        y_test_predict_poly = MinMax_y.inverse_transform(y_test_p_predict_poly.reshape(-1,1))
    else:
        y_test_predict_rbf = StdS_y.inverse_transform(y_test_p_predict_rbf.reshape(-1,1))
        y_test_predict_lin = StdS_y.inverse_transform(y_test_p_predict_lin.reshape(-1,1))
        y_test_predict_poly = StdS_y.inverse_transform(y_test_p_predict_poly.reshape(-1,1))
    
    print("SVR Radial Basis Function (RBF)")
    mae = mean_absolute_error(y_test, y_test_predict_rbf)
    mse = mean_squared_error(y_test, y_test_predict_rbf)
    rmse = np.sqrt(mse)
    print(f'Mean absolute error: {mae:.2f}')
    print(f'Mean squared error: {mse:.2f}')
    print(f'Root mean squared error: {rmse:.2f}')
    print("SVR Linear")
    mae = mean_absolute_error(y_test, y_test_predict_lin)
    mse = mean_squared_error(y_test, y_test_predict_lin)
    rmse = np.sqrt(mse)
    print(f'Mean absolute error: {mae:.2f}')
    print(f'Mean squared error: {mse:.2f}')
    print(f'Root mean squared error: {rmse:.2f}')
    print("SVR Polynomial")
    mae = mean_absolute_error(y_test, y_test_predict_poly)
    mse = mean_squared_error(y_test, y_test_predict_poly)
    rmse = np.sqrt(mse)
    print(f'Mean absolute error: {mae:.2f}')
    print(f'Mean squared error: {mse:.2f}')
    print(f'Root mean squared error: {rmse:.2f}')

    return svr_rbf, svr_lin, svr_poly


def train_svr_rbf_model(X, y, test_size = 0.25, scaler='MinMax', param_C=1, param_gamma=0.1, param_epsilon=0.1, **kwargs):
    X = X.reshape((-1,1))
    y = y.reshape((-1,1))
    
    if 'seed' in kwargs:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state=kwargs['seed'])
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size)
    
    # feature scaling
    if scaler == 'MinMax':
        MinMax_X = MinMaxScaler()
        MinMax_y = MinMaxScaler()
        X_train_l = MinMax_X.fit_transform(X_train)
        y_train_p = MinMax_y.fit_transform(y_train)
        X_test_l = MinMax_X.fit_transform(X_test)
        y_test_p = MinMax_y.fit_transform(y_test)
    else: 
        StdS_X = StandardScaler()
        StdS_y = StandardScaler()
        X_train_l = StdS_X.fit_transform(X_train)
        y_train_p = StdS_y.fit_transform(y_train)
        X_test_l = StdS_X.fit_transform(X_test)
        y_test_p = StdS_y.fit_transform(y_test)
    
    # Create models 
    svr_rbf = SVR(kernel='rbf', C=param_C, gamma=param_gamma, epsilon=param_epsilon)

    # Train models
    svr_rbf.fit(X_train_l, y_train_p.ravel())

    r_sq_rbf = svr_rbf.score(X_train_l, y_train_p)
    print(f"Coefficient of determination: {r_sq_rbf}")
    print(f"Intercept: {svr_rbf.intercept_}")

    y_test_p_predict_rbf = svr_rbf.predict(X_test_l)

    if scaler == 'MinMax':
        y_test_predict_rbf = MinMax_y.inverse_transform(y_test_p_predict_rbf.reshape(-1,1))
    else:
        y_test_predict_rbf = StdS_y.inverse_transform(y_test_p_predict_rbf.reshape(-1,1))

    mae = mean_absolute_error(y_test, y_test_predict_rbf)
    mse = mean_squared_error(y_test, y_test_predict_rbf)
    rmse = np.sqrt(mse)
    print(f'Mean absolute error: {mae:.2f}')
    print(f'Mean squared error: {mse:.2f}')
    print(f'Root mean squared error: {rmse:.2f}')

    return svr_rbf


def cascade_train_svr_rbf_model(model, X, y, test_size = 0.25, scaler='MinMax', **kwargs):
    X = X.reshape((-1,1))
    y = y.reshape((-1,1))
    
    if 'seed' in kwargs:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state=kwargs['seed'])
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size)
    
    # feature scaling
    if scaler == 'MinMax':
        MinMax_X = MinMaxScaler()
        MinMax_y = MinMaxScaler()
        X_train_l = MinMax_X.fit_transform(X_train)
        y_train_p = MinMax_y.fit_transform(y_train)
        X_test_l = MinMax_X.fit_transform(X_test)
        y_test_p = MinMax_y.fit_transform(y_test)
    else: 
        StdS_X = StandardScaler()
        StdS_y = StandardScaler()
        X_train_l = StdS_X.fit_transform(X_train)
        y_train_p = StdS_y.fit_transform(y_train)
        X_test_l = StdS_X.fit_transform(X_test)
        y_test_p = StdS_y.fit_transform(y_test)
    
    # Create models 
    # svr_rbf = SVR(kernel='rbf', C=param_C, gamma=param_gamma, epsilon=param_epsilon)

    # Train models
    model.fit(X_train_l, y_train_p.ravel())

    r_sq_rbf = model.score(X_train_l, y_train_p)
    print(f"Coefficient of determination: {r_sq_rbf}")
    print(f"Intercept: {model.intercept_}")

    y_test_p_predict_rbf = model.predict(X_test_l)

    if scaler == 'MinMax':
        y_test_predict_rbf = MinMax_y.inverse_transform(y_test_p_predict_rbf.reshape(-1,1))
    else:
        y_test_predict_rbf = StdS_y.inverse_transform(y_test_p_predict_rbf.reshape(-1,1))

    mae = mean_absolute_error(y_test, y_test_predict_rbf)
    mse = mean_squared_error(y_test, y_test_predict_rbf)
    rmse = np.sqrt(mse)
    print(f'Mean absolute error: {mae:.2f}')
    print(f'Mean squared error: {mse:.2f}')
    print(f'Root mean squared error: {rmse:.2f}')

    return model


def train_svr_lin_model(X, y, test_size = 0.25, scaler='MinMax', param_C=1, param_gamma='auto', **kwargs):
    X = X.reshape((-1,1))
    y = y.reshape((-1,1))
    
    if 'seed' in kwargs:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state=kwargs['seed'])
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size)
    
    # feature scaling
    if scaler == 'MinMax':
        MinMax_X = MinMaxScaler()
        MinMax_y = MinMaxScaler()
        X_train_l = MinMax_X.fit_transform(X_train)
        y_train_p = MinMax_y.fit_transform(y_train)
        X_test_l = MinMax_X.fit_transform(X_test)
        y_test_p = MinMax_y.fit_transform(y_test)
    else: 
        StdS_X = StandardScaler()
        StdS_y = StandardScaler()
        X_train_l = StdS_X.fit_transform(X_train)
        y_train_p = StdS_y.fit_transform(y_train)
        X_test_l = StdS_X.fit_transform(X_test)
        y_test_p = StdS_y.fit_transform(y_test)
    
    # Create models 
    svr_lin = SVR(kernel='linear', C=param_C, gamma=param_gamma)

    # Train models
    svr_lin.fit(X_train_l, y_train_p.ravel())

    r_sq_lin = svr_lin.score(X_train_l, y_train_p)
    print(f"Coefficient of determination: {r_sq_lin}")
    print(f"Intercept: {svr_lin.intercept_}")

    y_test_p_predict_lin = svr_lin.predict(X_test_l)

    if scaler == 'MinMax':
        y_test_predict_lin = MinMax_y.inverse_transform(y_test_p_predict_lin.reshape(-1,1))
    else:
        y_test_predict_lin = StdS_y.inverse_transform(y_test_p_predict_lin.reshape(-1,1))
    mae = mean_absolute_error(y_test, y_test_predict_lin)
    mse = mean_squared_error(y_test, y_test_predict_lin)
    rmse = np.sqrt(mse)
    print(f'Mean absolute error: {mae:.2f}')
    print(f'Mean squared error: {mse:.2f}')
    print(f'Root mean squared error: {rmse:.2f}')

    return svr_lin


def cascade_train_svr_lin_model(model, X, y, test_size = 0.25, scaler='MinMax', **kwargs):
    X = X.reshape((-1,1))
    y = y.reshape((-1,1))
    
    if 'seed' in kwargs:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state=kwargs['seed'])
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size)
    
    # feature scaling
    if scaler == 'MinMax':
        MinMax_X = MinMaxScaler()
        MinMax_y = MinMaxScaler()
        X_train_l = MinMax_X.fit_transform(X_train)
        y_train_p = MinMax_y.fit_transform(y_train)
        X_test_l = MinMax_X.fit_transform(X_test)
        y_test_p = MinMax_y.fit_transform(y_test)
    else: 
        StdS_X = StandardScaler()
        StdS_y = StandardScaler()
        X_train_l = StdS_X.fit_transform(X_train)
        y_train_p = StdS_y.fit_transform(y_train)
        X_test_l = StdS_X.fit_transform(X_test)
        y_test_p = StdS_y.fit_transform(y_test)
    
    # Create models 
    # svr_lin = SVR(kernel='linear', C=param_C, gamma=param_gamma)

    # Train models
    model.fit(X_train_l, y_train_p.ravel())

    r_sq_lin = model.score(X_train_l, y_train_p)
    print(f"Coefficient of determination: {r_sq_lin}")
    print(f"Intercept: {model.intercept_}")

    y_test_p_predict_lin = model.predict(X_test_l)

    if scaler == 'MinMax':
        y_test_predict_lin = MinMax_y.inverse_transform(y_test_p_predict_lin.reshape(-1,1))
    else:
        y_test_predict_lin = StdS_y.inverse_transform(y_test_p_predict_lin.reshape(-1,1))
    mae = mean_absolute_error(y_test, y_test_predict_lin)
    mse = mean_squared_error(y_test, y_test_predict_lin)
    rmse = np.sqrt(mse)
    print(f'Mean absolute error: {mae:.2f}')
    print(f'Mean squared error: {mse:.2f}')
    print(f'Root mean squared error: {rmse:.2f}')

    return model


def train_svr_poly_model(X, y, test_size = 0.25, scaler='MinMax', param_C=1, param_gamma='auto', param_degree=2, param_epsilon=0.1, **kwargs):
    X = X.reshape((-1,1))
    y = y.reshape((-1,1))
    
    if 'seed' in kwargs:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state=kwargs['seed'])
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size)
    
    # feature scaling
    if scaler == 'MinMax':
        MinMax_X = MinMaxScaler()
        MinMax_y = MinMaxScaler()
        X_train_l = MinMax_X.fit_transform(X_train)
        y_train_p = MinMax_y.fit_transform(y_train)
        X_test_l = MinMax_X.fit_transform(X_test)
        y_test_p = MinMax_y.fit_transform(y_test)
    else: 
        StdS_X = StandardScaler()
        StdS_y = StandardScaler()
        X_train_l = StdS_X.fit_transform(X_train)
        y_train_p = StdS_y.fit_transform(y_train)
        X_test_l = StdS_X.fit_transform(X_test)
        y_test_p = StdS_y.fit_transform(y_test)
    
    # Create models 
    svr_poly = SVR(kernel='poly', C=param_C, gamma=param_gamma, epsilon=param_epsilon, degree=param_degree)

    # Train models
    svr_poly.fit(X_train_l, y_train_p.ravel())

    r_sq_poly = svr_poly.score(X_train_l, y_train_p)
    print(f"Coefficient of determination: {r_sq_poly}")
    print(f"Intercept: {svr_poly.intercept_}")

    y_test_p_predict_poly = svr_poly.predict(X_test_l)

    if scaler == 'MinMax':
        y_test_predict_poly = MinMax_y.inverse_transform(y_test_p_predict_poly.reshape(-1,1))
    else:
        y_test_predict_y_test_predict_polylin = StdS_y.inverse_transform(y_test_p_predict_poly.reshape(-1,1))
    mae = mean_absolute_error(y_test, y_test_predict_poly)
    mse = mean_squared_error(y_test, y_test_predict_poly)
    rmse = np.sqrt(mse)
    print(f'Mean absolute error: {mae:.2f}')
    print(f'Mean squared error: {mse:.2f}')
    print(f'Root mean squared error: {rmse:.2f}')

    return svr_poly


def cascade_train_svr_poly_model(model, X, y, test_size = 0.25, scaler='MinMax', **kwargs):
    X = X.reshape((-1,1))
    y = y.reshape((-1,1))
    
    if 'seed' in kwargs:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state=kwargs['seed'])
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size)
    
    # feature scaling
    if scaler == 'MinMax':
        MinMax_X = MinMaxScaler()
        MinMax_y = MinMaxScaler()
        X_train_l = MinMax_X.fit_transform(X_train)
        y_train_p = MinMax_y.fit_transform(y_train)
        X_test_l = MinMax_X.fit_transform(X_test)
        y_test_p = MinMax_y.fit_transform(y_test)
    else: 
        StdS_X = StandardScaler()
        StdS_y = StandardScaler()
        X_train_l = StdS_X.fit_transform(X_train)
        y_train_p = StdS_y.fit_transform(y_train)
        X_test_l = StdS_X.fit_transform(X_test)
        y_test_p = StdS_y.fit_transform(y_test)
    
    # Create models 
    # svr_poly = SVR(kernel='poly', C=param_C, gamma=param_gamma, epsilon=param_epsilon, degree=param_degree)

    # Train models
    model.fit(X_train_l, y_train_p.ravel())

    r_sq_poly = model.score(X_train_l, y_train_p)
    print(f"Coefficient of determination: {r_sq_poly}")
    print(f"Intercept: {model.intercept_}")

    y_test_p_predict_poly = model.predict(X_test_l)

    if scaler == 'MinMax':
        y_test_predict_poly = MinMax_y.inverse_transform(y_test_p_predict_poly.reshape(-1,1))
    else:
        y_test_predict_y_test_predict_polylin = StdS_y.inverse_transform(y_test_p_predict_poly.reshape(-1,1))
    mae = mean_absolute_error(y_test, y_test_predict_poly)
    mse = mean_squared_error(y_test, y_test_predict_poly)
    rmse = np.sqrt(mse)
    print(f'Mean absolute error: {mae:.2f}')
    print(f'Mean squared error: {mse:.2f}')
    print(f'Root mean squared error: {rmse:.2f}')

    return model

### Full Implementation

In [15]:
# Read all the data
signal_ecg_0 = wfdb.rdsamp(f"{data_dir}/infant{infantNum}_ecg")
signal_resp_0 = wfdb.rdsamp(f"{data_dir}/infant{infantNum}_resp")
print(f'ECG DATA: {signal_ecg_0[1]}')
print(F'RESP DATA: {signal_resp_0[1]}')
# signal_ECG = read_Data(ECG_dataset, startNum=0, endNum=)

ECG DATA: {'fs': 500, 'sig_len': 78684614, 'n_sig': 1, 'base_date': None, 'base_time': None, 'units': ['mV'], 'sig_name': ['II'], 'comments': []}
RESP DATA: {'fs': 50, 'sig_len': 7868296, 'n_sig': 1, 'base_date': None, 'base_time': None, 'units': ['NU'], 'sig_name': ['RESP'], 'comments': []}


In [16]:
totalDuration_ECG = num2Time(num=signal_ecg_0[1]['sig_len'], fs=signal_ecg_0[1]['fs'])
print(f'{totalDuration_ECG} sec, {secs2minutes(totalDuration_ECG)} minutes, {secs2hours(totalDuration_ECG)} hours')

157369.228 sec, 2622.8204666666666 minutes, 43.71367444444444 hours


In [17]:
# Start and end time defined
start_time_secs = hours2secs(1)
end_time_secs = hours2secs(2)

ECG_startNum = time2Num(start_time_secs, signal_ecg_0[1]['fs'])
ECG_endNum = time2Num(end_time_secs, signal_ecg_0[1]['fs'])
print(f'start: {ECG_startNum}, end: {ECG_endNum}')
RESP_startNum = time2Num(start_time_secs, signal_resp_0[1]['fs'])
RESP_endNum = time2Num(end_time_secs, signal_resp_0[1]['fs'])
print(f'start: {RESP_startNum}, end: {RESP_endNum}')

start: 1800000, end: 3600000
start: 180000, end: 360000


In [18]:
signal_ECG, signal_ECG_startTime_secs, signal_ECG_endTime_secs = read_Data(ECG_dataset, startNum=ECG_startNum, endNum=ECG_endNum)
signal_RESP, signal_RESP_startTime_secs, signal_RESP_endTime_secs = read_Data(RESP_dataset, startNum=RESP_startNum, endNum=RESP_endNum)

In [19]:
print(signal_ECG[0].shape)
signal_ECG_1 = iqr_smooth(signal=signal_ECG[0],hiPerc=90,loPerc=10, cutoff_factor=1.5)
print(signal_RESP[0].shape)
signal_RESP_1 = iqr_smooth(signal=signal_RESP[0],hiPerc=90,loPerc=10, cutoff_factor=1.5)

(1800000, 1)
90th percentile: 0.12798088070831984, 10th percentile: -0.17005678669461677, IQR: 0.2980376674029366
Identified outliers: 79092
Data Shape: (1800000,)
(180000, 1)
90th percentile: 23.419280576061812, 10th percentile: 21.09773410503232, IQR: 2.3215464710294924
Identified outliers: 934
Data Shape: (180000,)


In [20]:
signal_ECG_2 = norm_signal(signal=signal_ECG_1)
signal_RESP_2 = norm_signal(signal=signal_RESP_1)

In [21]:
hrs_0 = cal_heartrate(signal=signal_ECG_2, fs=signal_ECG[1]['fs'])
rrs_0 = cal_resprate(signal=signal_RESP_2, fs=signal_RESP[1]['fs'])

In [22]:
if (rrs_0.shape[0] < hrs_0.shape[0]):
    hrs_1 = data_resample(data_fixNan(hrs_0), rrs_0.shape[0])
    rrs_1 = data_fixNan(rrs_0)
else:
    rrs_1 = data_resample(data_fixNan(rrs_0), hrs_0.shape[0])
    hrs_1 = data_fixNan(hrs_0)

In [23]:
print(hrs_1)
print(rrs_1)

[ 52.309414  -10.458236    5.7722535 ... 119.45426   110.43015
 125.91644  ]
[ 0.        0.        0.       ... 30.927835 30.927835 30.927835]


In [24]:
correlationTest(rrs_1[:],hrs_1[:], plot=False)

[[160.48146965 -21.74480548]
 [-21.74480548 160.40714127]]
Pearsons correlation: -0.136
Spearmans correlation: -0.206


In [26]:
print("---------------------------------------------")
print("--- Linear Regression ---")
lr_model_1 = train_lr_model(X=rrs_1, y=hrs_1)
print("---------------------------------------------")
print("--- Polynomial Regression ---")
pr_model_1 = train_pr_model(X=rrs_1, y=hrs_1, degree=6)
print("---------------------------------------------")
print("--- SVR ---")
svr_rbf_1, svr_lin_1, svr_poly_1 = train_svr_model(X=rrs_1, y=hrs_1)
# svr_rbf_1 = train_svr_rbf_model(X=rrs_1, y=hrs_1)

---------------------------------------------
--- Linear Regression ---
Coefficient of determination: 0.018250025191659858
Intercept: [130.04575]
Coefficients: [[-0.13510673]]
Mean absolute error: 8.56
Mean squared error: 157.23
Root mean squared error: 12.54
---------------------------------------------
--- Polynomial Regression ---
Coefficient of determination: 0.026462602305085015
Intercept: [127.47017]
Coefficients: [[ 4.0392306e-10  9.4348724e-09  3.7569964e-07  5.9631006e-06
  -3.3849508e-07  4.0051282e-09]]
Mean absolute error: 8.56
Mean squared error: 158.76
Root mean squared error: 12.60
---------------------------------------------
--- SVR ---
SVR Radial Basis Function (RBF)
Coefficient of determination: -0.1043429559831861
Intercept: [0.1822884]
SVR Linear
Coefficient of determination: -0.17089279679719538
Intercept: [0.55173055]
SVR Polynomial
Coefficient of determination: -0.1643583433300433
Intercept: [0.55534475]
SVR Radial Basis Function (RBF)
Mean absolute error: 8.70
