In [2]:
#from kats.models.prophet import ProphetModel, ProphetParams
#from kats.consts import TimeSeriesData
#from kats.models.var import VARModel, VARParams
import pandas as pd

from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import math
import numpy as np
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
import os
import glob
from arch import arch_model
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.holtwinters import ExponentialSmoothing
import warnings

## Read in Data

Change this `PATH` string to your local repository with all the data:

In [3]:
path = r'/Users/edawg/Desktop/Duke/Time Series/Heart-Rate-Variability-Repo/01_data/1st topic' # use your path

In [9]:
def read_data(path):
    # read all files from a directory
    rri_files = glob.glob(os.path.join(path, "*_RRI.txt"))
    all_patients_df = pd.DataFrame()

    for i in range(len(rri_files)):
        patient = rri_files[i].split("/")[-1][0:-8]
        bis_path = path + "/" + patient + "_BIS.txt"
        patient_num = i + 1

        # read in the data
        rri = pd.read_table(rri_files[i], sep=",",names=["time","signal"])
        bis = pd.read_table(bis_path, sep=",",names=["time","signal"])

        # rename signal column
        rri.rename(columns={"signal": "rri_" + str(patient_num)}, inplace=True)
        bis.rename(columns={"signal": "bis_" + str(patient_num)}, inplace=True)

        # merge the data
        patient_df = pd.concat([rri['rri_' + str(patient_num)], bis['bis_' + str(patient_num)]], axis=1)
        all_patients_df = pd.concat([all_patients_df, patient_df], axis=1)

    all_patients_df['time'] = all_patients_df.index
    all_patients_df['pre_post'] = np.where(all_patients_df['time']<all_patients_df['time'][1440],0,1)

    rris = all_patients_df[[col for col in all_patients_df if col.startswith('rri')]].copy()
    biss = all_patients_df[[col for col in all_patients_df if col.startswith('bis')]].copy()

    return rris, biss

In [38]:
def preprocess(patient, differencing = True, periods = 1, smoothing = True, smoothing_level = 0.1):
    if differencing:
        patient = patient.diff(periods = periods).dropna()
    if smoothing:
        exp_model = ExponentialSmoothing(patient, trend="add", seasonal=None).fit(smoothing_level=smoothing_level)
        patient = exp_model.fittedvalues
    return patient

In [40]:
def create_datasets(rris, biss, differencing = True, periods = 1, smoothing = True, smoothing_level = 0.1):
    preprocessed_rris = {}
    preprocessed_biss = {}
    for patient in rris.columns:
        preprocessed_rris[patient] = preprocess(rris[patient], differencing, periods, smoothing, smoothing_level)
    for patient in biss.columns:
        preprocessed_biss[patient] = preprocess(biss[patient], differencing, periods, smoothing, smoothing_level)
    return pd.DataFrame(preprocessed_rris), pd.DataFrame(preprocessed_biss)

### Create datasets

In [41]:
warnings.filterwarnings("ignore")

rris_raw, biss_raw = read_data(path)
rris, biss = create_datasets(rris_raw, biss_raw)

### Import datasets

In [44]:
rris.to_csv("rris_preprocessed.csv")
biss.to_csv("biss_preprocessed.csv")