In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import wfdb
from wfdb import processing

from gc import collect as collect_garbage
from psutil import virtual_memory
from os import scandir

In [4]:
%matplotlib widget

In [6]:
#folder = "../../Deidentified-Raw-Waveforms/"
folder = "C:/Users/aidan/Box/Deidentified-Raw-Waveforms/"
coldict = {
    "raw_waves_data_1a.csv": ["time", "257"], "raw_waves_data_1b.csv": ["time", "257", "258"], "raw_waves_data_1c.csv": ["time", "257", "258"], "raw_waves_data_1d.csv": ["time", "257", "258", "317"], 
    "raw_waves_data_1e.csv": ["time", "258"],

    "raw_waves_data_2a.csv": ["time", "257", "258"], "raw_waves_data_2b.csv": ["time", "258"], "raw_waves_data_2c.csv": ["time", "257"], "raw_waves_data_2d.csv": ["time", "257", "258"], 
    "raw_waves_data_2e.csv": ["time", "257", "258"],

    "raw_waves_data_3a.csv": ["time", "258"], "raw_waves_data_3b.csv": ["time", "258"], "raw_waves_data_3c.csv": ["time", "258"], "raw_waves_data_3d.csv": ["time", "258"], 
    "raw_waves_data_3e.csv": ["time", "257", "258", "317"],

    "raw_waves_data_4a.csv": ["time", "257", "258"], "raw_waves_data_4b.csv": ["time", "257", "258"], "raw_waves_data_4c.csv": ["time", "257"], "raw_waves_data_4d.csv": ["time", "257", "258"], 
    "raw_waves_data_4e.csv": ["time", "257", "258"],

    "raw_waves_data_5a.csv": ["time", "258"], "raw_waves_data_5b.csv": ["time", "258"], "raw_waves_data_5c.csv": ["time", "258"], "raw_waves_data_5d.csv": ["time", "258", "317"],
    "raw_waves_data_5e.csv": ["time", "258"],

    "raw_waves_data_6a.csv": ["time", "257", "258"], "raw_waves_data_6b.csv": ["time", "258"], "raw_waves_data_6c.csv": ["time", "258"], "raw_waves_data_6d.csv": ["time", "258"], "raw_waves_data_6e.csv": ["time", "258"],
    
    "raw_waves_data_7a.csv": ["time", "257", "258"], "raw_waves_data_7b.csv": ["time", "258"], "raw_waves_data_7c.csv": ["time", "258"], "raw_waves_data_7d.csv": ["time", "257", "258", "317"], 
    "raw_waves_data_7e.csv": ["time", "258"]
}

namedict = {
    "raw_waves_data_1a.csv": "1a", "raw_waves_data_1b.csv": "1b", "raw_waves_data_1c.csv": "1c", "raw_waves_data_1d.csv": "1d", "raw_waves_data_1e.csv": "1e",
    "raw_waves_data_2a.csv": "2a", "raw_waves_data_2b.csv": "2b", "raw_waves_data_2c.csv": "2c", "raw_waves_data_2d.csv": "2d", "raw_waves_data_2e.csv": "2e",
    "raw_waves_data_3a.csv": "3a", "raw_waves_data_3b.csv": "3b", "raw_waves_data_3c.csv": "3c", "raw_waves_data_3d.csv": "3d", "raw_waves_data_3e.csv": "3e",
    "raw_waves_data_4a.csv": "4a", "raw_waves_data_4b.csv": "4b", "raw_waves_data_4c.csv": "4c", "raw_waves_data_4d.csv": "4d", "raw_waves_data_4e.csv": "4e",
    "raw_waves_data_5a.csv": "5a", "raw_waves_data_5b.csv": "5b", "raw_waves_data_5c.csv": "5c", "raw_waves_data_5d.csv": "5d", "raw_waves_data_5e.csv": "5e",
    "raw_waves_data_6a.csv": "6a", "raw_waves_data_6b.csv": "6b", "raw_waves_data_6c.csv": "6c", "raw_waves_data_6d.csv": "6d", "raw_waves_data_6e.csv": "6e",
    "raw_waves_data_7a.csv": "7a", "raw_waves_data_7b.csv": "7b", "raw_waves_data_7c.csv": "7c", "raw_waves_data_7d.csv": "7d", "raw_waves_data_7e.csv": "7e"    
}

In [7]:
collect_garbage()
virtual_memory()

svmem(total=12655771648, available=5653020672, percent=55.3, used=7002750976, free=5653020672)

In [10]:
all_files = [folder + file.name for file in scandir(folder) if ".csv" in file.name]

# We need to replace some of the files with the shifted files produced in notebook 06
all_files[5] =  folder+'06-shifted-2-and-3/raw_waves_data_2a.csv'
all_files[6] =  folder+'06-shifted-2-and-3/raw_waves_data_2b.csv'
all_files[10] =  folder+'06-shifted-2-and-3/raw_waves_data_3a.csv'
all_files[11] =  folder+'06-shifted-2-and-3/raw_waves_data_3b.csv'

In [None]:
# This is the code that detects QRS complexes in the ECG
for i in range(1,8):
    files = [file for file in all_files if "_"+str(i) in file]

    for file in files:
        # Preliminaries
        key = file.split("/")[-1]
        cols = coldict[key]
        freq = 250
        print("Starting now with " + key)

        # Read in the data
        df = pd.read_csv(file, usecols=cols)
        print("Data loaded in")

        # Complete the signal in the order of 257 (ECG1), then  258 (ECG2), and 
        # then 317 (EG3), then ffill for remaining missing values
        signal = pd.Series(df[cols[1]])
        i=2
        while True:
            try:
                signal = signal.combine_first(df[cols[i]])
                i+=1
            except IndexError:
                break
        signal = signal.fillna(method="ffill")
        signal = pd.to_numeric(signal)
        print("Signals combined and filled in")

        # Remove spikes and troughs by pinpointing values out of bounds and then 
        # erasing left and right of those pinpoints by delta indices
        delta = 125
        filt = (signal <= -10) | (signal >= 10)
        filt.loc[~filt] = np.nan
        filt.fillna(method="ffill", limit=delta, inplace=True)
        filt.fillna(method="bfill", limit=delta, inplace=True)
        filt.fillna(value=False, inplace=True)

        signal.loc[filt] = np.nan
        signal.fillna(method="ffill", inplace=True)
        print("Troughs and spikes removed")

        # Initialize the rpeak list
        rpeaks = []

        # Create a counter for breaking the signal into chunks
        i=0
        N = len(signal)
        chunk = 10000
        num_chunks = N//chunk + 1
        print("Signal broken into " + str(num_chunks) + " chunks")

        # Find R peaks in all but the last chunk (that just tends to cause a problem)
        while True:
            try:
                if i%1000 == 0:
                    # I've found this choice of progress marker works for this chunk
                    # size and signal length. If those values change, then this 
                    # condition will need to be modified too
                    print( str(round(i/num_chunks,4)*100) + "% percent done" )

                lo = i*chunk
                hi = min( (i+1)*chunk, N)
                xqrs = processing.XQRS(sig=signal[lo:hi], fs=freq)
                xqrs.detect(verbose=False)

                # xqrs recognized the chunk as starting from 0, so we have to shift 
                # the R peaks according to the left endpoint of the chunk
                rpeaks += list( lo + xqrs.qrs_inds )

                i+=1
            except IndexError:
                # This is the main way in which we'd expect to break this loop
                break
            except ValueError:
                # More often than not, we get this case because the last chunk isn't 
                # long enough, hence the next block
                break
        print("R peaks outside of the last chunk located")

        # Delineate an ending chunk of like 20000 indices that gets the end of the
        # signal, find R peaks
        hi = len(signal)
        lo = hi - 20000
        xqrs = processing.XQRS(sig=signal[lo:hi], fs=freq)
        xqrs.detect(verbose=False)

        rpeaks += [ peak for peak in xqrs.qrs_inds if peak > max(rpeaks)]
        print("Peaks in final chunk located")

        # Grab the time stamps, write them to a file
        df.loc[rpeaks, "time"].to_csv("01-outputs/00-rpeaks/00-rpeaks/rpeaks_" + namedict[key] + ".csv", )
        print("Output file written")

        # Delete all of the variables to save space
        del df
        del xqrs
        del signal
        del rpeaks
        collect_garbage()

        print(str(virtual_memory()[2]) + " memory usage")


