#  Heart rhythm classification from raw ECG signals

## Purpose
This file cleans the raw data and extracts several features manually and saves them in a new csv file.
Train and test data are treated separately.

## Notes
Interesting insights from Nature paper published on 21.9.2021
- https://www.nature.com/articles/s41598-021-97118-5?proof=t%3B#Tab6
- https://www.youtube.com/watch?v=3tfin4sSBFQ
- Focus on only two features PR and RT.
- MLP and SVM
#### Done
- Verified heartbeat feature extraction transformer by comparing mean beat of sample 0 (np.array_equal and plot) -> looks OK
    - extracted features hb_feat = extractor.fit_transform(...) return shape (num_samples, num_features)
    - mean = hb_feat[0][:180] if no downsampling gives mean beat
- Rpeaks in hb extractor and delineation extractor for sample 0, 1, 2, 3 are the same -> OK

https://ecgwaves.com/topic/ecg-normal-p-wave-qrs-complex-st-segment-t-wave-j-point/

In [1]:
import pandas as pd
import numpy as np
import neurokit2 as nk
import biosppy.signals.ecg as ecg
from sklearn.base import BaseEstimator, TransformerMixin
from imblearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.decomposition import KernelPCA
from sklearn.model_selection import train_test_split
from sklearn.pipeline import FeatureUnion
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
import seaborn as sns
import matplotlib.pyplot as plt
sns.set('talk')
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
%matplotlib inline

## Params

In [2]:
sr = 300 #sampling rate
downsampling = False
trim_beginning = False

# sampling rate needs adjustment if downsampling is applied
sampling_divisor = 2 # default: reduce sampling frequency by factor 2

## Read data

In [3]:
path = '/home/rapwag01/eth/aml/task2/'

In [4]:
%%time
df_train = pd.read_csv(path+'X_train.csv')

CPU times: user 1min 34s, sys: 2.32 s, total: 1min 36s
Wall time: 1min 37s


In [5]:
%%time
df_test = pd.read_csv(path+'X_test.csv')

CPU times: user 42.1 s, sys: 402 ms, total: 42.5 s
Wall time: 42.9 s


In [6]:
# don't need targets for preprocessing
#df_target = pd.read_csv(path+'y_train.csv')

In [29]:
df_train.shape

NameError: name 'df_train' is not defined

In [28]:
%%time
# make it arrays as custom transformers only accept np arrays
X_train = df_train.drop('id', axis=1).values
#y_train = df_target.drop('id', axis=1).values.ravel()
X_test = df_test.drop('id', axis=1).values
# don't need to split data for initial preprocessing (no data leakage as operating on independent rows only)
# X_train, X_valid, y_train, y_valid = train_test_split(X, y, random_state=0, train_size=train_size)

NameError: name 'df_train' is not defined

In [27]:
X_train.shape

NameError: name 'X_train' is not defined

---

## Customised Transformers for Preprocessing Pipeline

In [4]:
class MyCleaning(BaseEstimator, TransformerMixin):
    
    def __init__(self, sampling_rate, detrend_method='locreg', filter_method='neurokit', \
                 trim_beginning=True, downsampling=True, skip_num_samples=540, sampling_divisor=2):
        
        self.sampling_rate = sampling_rate
        self.detrend_method = detrend_method
        self.filter_method = filter_method
        self.order = -1
        self.trim_beginning = trim_beginning
        self.downsampling = downsampling
        self.sampling_divisor = sampling_divisor
        self.skip_num_samples = skip_num_samples
        
        if self.detrend_method == 'constant':
            self.order = 0
        elif self.detrend_method == 'linear':
            self.order = 1
        elif self.detrend_method == 'quadratic':
            self.order = 2
        elif self.detrend_method == 'cubic':
            self.order = 3
        elif self.detrend_method == 'poly10':
            self.order = 10
        elif self.detrend_method == 'trav':
            self.detrend_method='tarvainen2002'      
        elif self.detrend_method == 'loess':
            self.detrend_method = 'loess'


    def fit(self, X, y = None):
        return self

    def transform(self, X, y = None):
        """np.apply_along_axis is slower than for loop.
        Keeping for loop.
        Input X should be numpy array, not pd series. Trying to be consistent with sklearn.
        """
        print('Running cleaning...')
        if self.trim_beginning:
            X = self._trim_beginning(X)
            
        if self.downsampling:
            print(f'You are downsampling data by a factor {self.sampling_divisor}. Be aware to adjust sampling frequency for subsequent transformers!')
            X = self._downsampling(X)

        clean_signals = []
        
        # TODO fix shape if one row only!
        if len(X.shape) < 2:
            raise ValueError('Make sure to reshape array to (1, -1) if you feed in one sample only.')

        for sample in np.arange(X.shape[0]):

            if sample % 500 == 0:
                print(f'cleaning sample {sample}')
            
            # drop nans
            ecg_sample = X[sample]
            ecg_nonans = ecg_sample[~np.isnan(ecg_sample)]
            clean = self._cleaning(ecg_nonans)
            
            # pad array with nans to match previous dimensions
            pad_width = X[sample].shape[0]-clean.shape[0]           
            cleaned_padded = np.pad(clean, pad_width=(0, pad_width), mode='constant', constant_values=np.nan)           
            clean_signals.append(cleaned_padded)
            
        return np.stack(clean_signals)
    
    def _trim_beginning(self, X, skip_num_samples=540):
        """Trim signal at begining and skip n first samples given by skip_num_samples.
        Applied before downsampling if downsampling=True.
        By default skips ~3 heartbeats, i.e. 3x180 samples, where 180 corresponds to the heartbeat extraction
        sample size."""
        X_trimmed = X[:,skip_num_samples:]
        assert X_trimmed.shape[0] == X.shape[0]
        assert X_trimmed.shape[1] == X.shape[1]-skip_num_samples
        
        return X_trimmed
    
    def _downsampling(self, X, sampling_divisor=2):
        """Selects every n-th (sampling_divisor) timestep.
        By default cuts sampling rate into two."""
        X_downsampled = X[:,::sampling_divisor]
        assert X_downsampled.shape[0] == X.shape[0]
        assert X_downsampled.shape[1] == X.shape[1]/sampling_divisor
        return X_downsampled
    
    def _cleaning(self, raw_ecg):
        """
        For detrending: https://neurokit2.readthedocs.io/en/latest/functions.html#neurokit2.signal.signal_detrend
        For filtering: https://neurokit2.readthedocs.io/en/latest/functions.html#neurokit2.ecg.ecg_clean
        """

        detrended = nk.signal_detrend(raw_ecg, order=self.order, method=self.detrend_method, \
                                      window=1.5*100, stepsize=0.02*100)        

        if self.filter_method == 'custom_butterworth':
            cleaned = nk.signal_filter(detrended, sampling_rate=self.sampling_rate, lowcut=2, highcut=9, method='butterworth')
        else:
             # ecg_clean only applies filtering, no detrending
            cleaned = nk.ecg_clean(detrended, sampling_rate=self.sampling_rate, method=self.filter_method)

        return cleaned

In [5]:
class MyHeartBeatExtractor(BaseEstimator, TransformerMixin):
    """Extracts heartbeats for each cleaned sample separately (num beats x length template).
    Each feature is an aggregation over different heartbeats extracted and has dim of the standard length of one beat (tempalte).
    Template length is the same for all samples but number of beats extracted changes.
    We need to average over number of heartbeats extracted.
    Extracted features correspond to aggregated heartbeat templates for each sample, e.g. if template length is 180, 
    mean beat is an 180-step averaged timeseries, max beat is the maximum amplitude at 180 different timesteps, etc.
    
    """
    def __init__(self, sampling_rate):
        self.sampling_rate = sampling_rate
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        print('Running heartbeat feature extraction...')
        features = []

        for id_clean in np.arange(X.shape[0]):
            if id_clean % 500 == 0:
                print(f'extracting features from sample {id_clean}')
            sample_features = self._get_features_from_sample(X[id_clean])
            features.append(sample_features)
            print('shape sample features:', sample_features.shape)
        
        # should return X_new (num_samples, num_features)
        X_new = np.vstack(features)
        print('final shape of new heartbeat template features:', X_new.shape)
        return X_new

    def _get_rpeaks(self, ecg_cleaned):
        instant_peaks, rpeaks = nk.ecg_peaks(ecg_cleaned, sampling_rate=self.sampling_rate)
        return instant_peaks, rpeaks['ECG_R_Peaks']
    
    def _get_features_from_sample(self, ecg_cleaned):
        """Returns a list of aggregated heartbeat features for each sample"""

        ecg_nonans = ecg_cleaned[~np.isnan(ecg_cleaned)]
        _, rpeaks = self._get_rpeaks(ecg_nonans)
        # print('rpeaks', rpeaks)
        beats = ecg.extract_heartbeats(ecg_nonans, rpeaks, self.sampling_rate)['templates']

        if len(beats) >= 2:
            # aggregate over heartbeats
            mean_beat = np.nanmean(beats, axis=0) # average over beats with shape (num beats x standard length per beat) to get mean with dim (length per beat)
            median_beat = np.nanmedian(beats, axis=0) # shape (standard heartbeat length,)
            std_beat = np.nanstd(beats, axis=0)
            max_beat = np.nanmax(beats, axis=0)
            min_beat = np.nanmin(beats, axis=0)
        else:
            print('HB extractor, length of beat < 2, filling with 180 NaNs. Needs adjustment if sampling rate not 300.')
            # TODO resolve hacky workaround!
            mean_beat = np.repeat(np.nan, 180)
            median_beat = np.repeat(np.nan, 180)
            std_beat = np.repeat(np.nan, 180)
            max_beat = np.repeat(np.nan, 180)
            min_beat = np.repeat(np.nan, 180)

        sample_features = [mean_beat, median_beat, std_beat, max_beat, min_beat]
        sample_features = np.hstack(sample_features)

        return sample_features

In [6]:
class MyDelineationExtractor(BaseEstimator, TransformerMixin):
    """Use dwt, cwt method yields lots of NaN values for first sample. Lenght mismatch.
    Method peak not helpful as we need onset points"""
    def __init__(self, sampling_rate, delineation_method='dwt'):
        self.sampling_rate = sampling_rate
        self.delineation_method = delineation_method
           
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        print('Running delineation feature extraction...')
        features = []
        
        for id_clean in np.arange(X.shape[0]):
            if id_clean % 500 == 0:
                print(f'\nextracting features from sample {id_clean}')
            
            wave_peaks, rpeaks = self._get_wavepeaks(X[id_clean])

            # get amplitude and timing features and accumulate (mean, median, std, max, min)
            amp_and_timing_features = self._get_sample_features(X[id_clean], wave_peaks, rpeaks)
 
            features.append(amp_and_timing_features)
            
        # should return X_new (num_samples, num_features)
        X_new = np.vstack(features)
        print('final shape of new delineation features:', X_new.shape)

        return X_new
    
    def _get_rpeaks(self, ecg_cleaned):
        instant_peaks, rpeaks = nk.ecg_peaks(ecg_cleaned, sampling_rate=self.sampling_rate)
        return instant_peaks, rpeaks['ECG_R_Peaks'] 

    def _get_heartrate(self, rpeaks, ecg_cleaned):
        rate = nk.ecg_rate(rpeaks, sampling_rate=self.sampling_rate, desired_length=len(ecg_cleaned))
        return rate
    
    def _get_wavepeaks(self, ecg_cleaned):
        ecg_nonans = ecg_cleaned[~np.isnan(ecg_cleaned)]
        _, rpeaks = self._get_rpeaks(ecg_nonans)
       
        # DEBUGGING
        # cannot extract heartbeats when len(rpeaks)=2
        #print('rpeaks length', len(rpeaks))
        
        if len(rpeaks) >= 10:

            _, wave_peaks = nk.ecg_delineate(ecg_nonans, rpeaks, sampling_rate=self.sampling_rate, method=self.delineation_method)
        else:
            print('rpeaks length < 10, cannot apply delineation. wave_peaks is set to empty dict')
            wave_peaks = {}

        return wave_peaks, rpeaks
    
    def _get_sample_features(self, ecg_cleaned, wave_peaks, rpeaks):
        """Get a total of 5x12 features for 1)mean, 2)median, 3)std, 4)max, 5)min."""

        if wave_peaks:
            wave_peaks_nonan = {k:[elem for elem in v if elem is not np.nan] for k,v in wave_peaks.items()}

            ## amplitude features we want to accumulate over
            # don't want all peaks (no amplitude stats for on-and offsets, just want the PQRST peaks)
            ppeaks = wave_peaks_nonan['ECG_P_Peaks']
            qpeaks = wave_peaks_nonan['ECG_Q_Peaks']
            speaks = wave_peaks_nonan['ECG_S_Peaks']
            tpeaks = wave_peaks_nonan['ECG_T_Peaks']

            ## timing features we want to accumulate over
            # rr-interval
            rr_interval = np.diff(rpeaks)/sr*1000 # rr interval in ms

            # pp-interval
            pp_interval = np.diff(ppeaks)/sr*1000 # pp interval in ms

            # qrs duration
            #print('R-Offset:', len(wave_peaks['ECG_R_Offsets']))
            #print('R-Onset:', len(wave_peaks['ECG_R_Onsets']))
            qrs_duration = (np.array(wave_peaks['ECG_R_Offsets'])-np.array(wave_peaks['ECG_R_Onsets']))/sr*1000 # in ms
            #qrs_duration = qrs_duration[~np.isnan(qrs_duration)]
            #assert (qrs_duration>0).all()

            # p-wave duration, normal p wave duration 0.12-0.22s
            #print('ECG_P_Offsets:', len(wave_peaks['ECG_P_Offsets']))
            #print('ECG_P_Onsets:', len(wave_peaks['ECG_P_Onsets']))
            p_duration = (np.array(wave_peaks['ECG_P_Offsets'])-np.array(wave_peaks['ECG_P_Onsets']))/sr*1000 # in ms
            #p_duration = p_duration[~np.isnan(p_duration)]
            #assert (p_duration>0).all()

            # pr segment (P onset up to R onset)
            #print('ECG_R_Onsets:', len(wave_peaks['ECG_R_Onsets']))
            #print('ECG_P_Onsets:', len(wave_peaks['ECG_P_Onsets']))            
            pr_duration = (np.array(wave_peaks['ECG_R_Onsets'])-np.array(wave_peaks['ECG_P_Onsets']))/sr*1000 # in ms
            #pr_duration = pr_duration[~np.isnan(pr_duration)]
            #assert (pr_duration>0).all()

            # r-time, time from R onset to R peak, R_onset seems to be the QRS onset: https://neurokit2.readthedocs.io/en/latest/functions.html#neurokit2.ecg_delineate%3E
            rwave_peaktime = rpeaks-wave_peaks['ECG_R_Onsets']
            #rwave_peaktime = rwave_peaktime[~np.isnan(rwave_peaktime)]
            #assert (rwave_peaktime>0).all()

            # heartrate = inverse of rr-interval
            heartrate = self._get_heartrate(rpeaks, ecg_cleaned)

            mean_feat = []
            median_feat = []
            std_feat = []
            max_feat = []
            min_feat = []

            # calculate stats for all features
            for idx, feat in enumerate([ecg_cleaned[ppeaks], ecg_cleaned[qpeaks], ecg_cleaned[speaks], ecg_cleaned[tpeaks],\
                         ecg_cleaned[rpeaks], rr_interval, pp_interval, qrs_duration, p_duration, \
                         pr_duration, rwave_peaktime, heartrate]):

                #print(f'idx feature {idx}, feature shape: {feat.shape}')
                #if np.isnan(feat).sum()>0:
                #    print(f'Nans present in feature number {idx}')

                if len(feat) >= 2:
                    mean_feat.append(np.nanmean(feat))
                    median_feat.append(np.nanmedian(feat))
                    std_feat.append(np.nanstd(feat))
                    max_feat.append(np.nanmax(feat))
                    min_feat.append(np.nanmin(feat))
                else:
                    print('Delineation extraction, len(feat) < 2, filling NaNs.')
                    mean_feat.append(np.nan) # mean of rr_interval, mean of pp_interval, ... -> need to hstack
                    median_feat.append(np.nan)
                    std_feat.append(np.nan)
                    max_feat.append(np.nan)
                    min_feat.append(np.nan)               
            #print('mean_feat', mean_feat)   
            all_sample_features = np.hstack([[mean_feat], [median_feat], [std_feat], [max_feat], [min_feat]])
            #print('all_sample_features', all_sample_features, all_sample_features.shape, type(all_sample_features))
        else:
            print('Could not extract wavelet, filling feature vector with Nans.')
            all_sample_features = np.repeat(np.nan, 5*12).reshape(1,-1)
        # column order is all feature means, then medians, stds, max, min
        return all_sample_features


In [7]:
class MyHRVExtractor(BaseEstimator, TransformerMixin):
    def __init__(self, sampling_rate):
        self.sampling_rate = sampling_rate
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        print('Running HRV feature extraction...')
        hrvs = []
        for id_clean in np.arange(X.shape[0]):
            if id_clean % 500 == 0:
                print(f'extracting features from sample {id_clean}')
            
            sample_clean = X[id_clean]
            hrv = self._get_hrv(sample_clean)
            hrvs.append(hrv)
        
        #hrv_features = np.vstack(hrvs)
        # remove columns where at least one NaN present
        #X_new = hrv_features[:,~np.isnan(hrv_features).any(axis=0)]
        
        # keep Nans for now for consistent dimensions
        X_new = np.vstack(hrvs)
        
        # should return array of shape (num_samples, num_hrv_features)
        print('final shape of new HRV features:', X_new.shape)
        
        return X_new
    
    def _get_hrv(self, ecg_cleaned):
        
        ecg_nonans = ecg_cleaned[~np.isnan(ecg_cleaned)]
        _, rpeaks = self._get_rpeaks(ecg_nonans)
        
        # TODO add non-linear hrv, not done yet as window size needs adjustment
        if len(rpeaks) >= 10:
            hrv_time = nk.hrv_time(rpeaks, sampling_rate=self.sampling_rate)
            hrv_freq = nk.hrv_frequency(rpeaks, sampling_rate=self.sampling_rate, normalize=True)
            hrv_concat = pd.concat([hrv_time, hrv_freq], axis=1) # add features along axis 1 (horizontically)
            hrv = np.hstack([hrv_time, hrv_freq]) # add features along axis 1 (horizontically)
        else:
            # Hacky workaround, add 29 NaNs
            hrv = np.repeat(np.nan, 29).reshape(1,-1)
        return hrv

    def _get_rpeaks(self, ecg_cleaned):
        instant_peaks, rpeaks = nk.ecg_peaks(ecg_cleaned, sampling_rate=self.sampling_rate)
        return instant_peaks, rpeaks['ECG_R_Peaks']    

In [71]:
%%time
cleaner = MyCleaning(sampling_rate=sr, downsampling=downsampling, trim_beginning=trim_beginning)
X_train_cleaned = cleaner.fit_transform(X_train)

Running cleaning...
cleaning sample 0
cleaning sample 500
cleaning sample 1000
cleaning sample 1500
cleaning sample 2000
cleaning sample 2500
cleaning sample 3000
cleaning sample 3500
cleaning sample 4000
cleaning sample 4500
cleaning sample 5000
CPU times: user 18min 54s, sys: 890 ms, total: 18min 55s
Wall time: 18min 54s


In [72]:
%%time
X_test_cleaned = cleaner.transform(X_test)

Running cleaning...
cleaning sample 0
cleaning sample 500
cleaning sample 1000
cleaning sample 1500
cleaning sample 2000
cleaning sample 2500
cleaning sample 3000
CPU times: user 12min 25s, sys: 455 ms, total: 12min 26s
Wall time: 12min 25s


In [73]:
pd.DataFrame(X_train_cleaned).to_csv('../../aml-project/task2/data/cleaned_train.csv')
pd.DataFrame(X_test_cleaned).to_csv('../../aml-project/task2/data/cleaned_test.csv')

## Read cleaned data and extract features

In [8]:
X_train_cleaned = pd.read_csv('../../aml-project/task2/data/cleaned_train.csv', index_col=False).values
X_test_cleaned = pd.read_csv('../../aml-project/task2/data/cleaned_test.csv', index_col=False).values

In [9]:
X_train_cleaned.shape

(5117, 17843)

In [10]:
X_test_cleaned.shape

(3411, 17843)

In [11]:
%%time
heartbeat_extractor = MyHeartBeatExtractor(sampling_rate=sr)
X_train_hb = heartbeat_extractor.fit_transform(X_train_cleaned)

Running heartbeat feature extraction...
extracting features from sample 0
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (90

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
extracting

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
extracting features from sample 4500
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
sha

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

In [12]:
%%time
X_test_hb = heartbeat_extractor.transform(X_test_cleaned)

Running heartbeat feature extraction...
extracting features from sample 0
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (90

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
extracting features from sample 2500
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
sha

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape sample features: (900,)
shape samp

In [13]:
pd.DataFrame(X_train_hb).to_csv('../../aml-project/task2/data/features_heartbeat_train.csv', index=False)
pd.DataFrame(X_test_hb).to_csv('../../aml-project/task2/data/features_heartbeat_test.csv', index=False)

In [14]:
%%time
delineation_extractor = MyDelineationExtractor(sampling_rate=sr, delineation_method='dwt')
X_train_delin = delineation_extractor.fit_transform(X_train_cleaned)
# error at extracting features from sample 388 => 4388 -> fixed
# error at sample 3826
# should take 5x7min=35min

Running delineation feature extraction...

extracting features from sample 0


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Delineation extraction, len(feat) < 2, filling NaNs.


  mean_feat.append(np.nanmean(feat))
  r, k = function_base._ureduce(a, func=_nanmedian, axis=axis, out=out,
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  max_feat.append(np.nanmax(feat))
  min_feat.append(np.nanmin(feat))


rpeaks length < 10, cannot apply delineation. wave_peaks is set to empty dict
Could not extract wavelet, filling feature vector with Nans.
rpeaks length < 10, cannot apply delineation. wave_peaks is set to empty dict
Could not extract wavelet, filling feature vector with Nans.
rpeaks length < 10, cannot apply delineation. wave_peaks is set to empty dict
Could not extract wavelet, filling feature vector with Nans.

extracting features from sample 500
Delineation extraction, len(feat) < 2, filling NaNs.
rpeaks length < 10, cannot apply delineation. wave_peaks is set to empty dict
Could not extract wavelet, filling feature vector with Nans.

extracting features from sample 1000
Delineation extraction, len(feat) < 2, filling NaNs.
rpeaks length < 10, cannot apply delineation. wave_peaks is set to empty dict
Could not extract wavelet, filling feature vector with Nans.
rpeaks length < 10, cannot apply delineation. wave_peaks is set to empty dict
Could not extract wavelet, filling feature vec

In [15]:
X_train_delin.shape

(5117, 60)

In [16]:
pd.DataFrame(X_train_delin).to_csv('../../aml-project/task2/data/features_delineation_train.csv', index=False)

In [17]:
X_test_cleaned.shape

(3411, 17843)

In [18]:
%%time
X_test_delin = delineation_extractor.transform(X_test_cleaned)

Running delineation feature extraction...

extracting features from sample 0
rpeaks length < 10, cannot apply delineation. wave_peaks is set to empty dict
Could not extract wavelet, filling feature vector with Nans.
Delineation extraction, len(feat) < 2, filling NaNs.
Delineation extraction, len(feat) < 2, filling NaNs.

extracting features from sample 500
Delineation extraction, len(feat) < 2, filling NaNs.


  mean_feat.append(np.nanmean(feat))
  max_feat.append(np.nanmax(feat))
  min_feat.append(np.nanmin(feat))


Delineation extraction, len(feat) < 2, filling NaNs.
Delineation extraction, len(feat) < 2, filling NaNs.
Delineation extraction, len(feat) < 2, filling NaNs.
rpeaks length < 10, cannot apply delineation. wave_peaks is set to empty dict
Could not extract wavelet, filling feature vector with Nans.
rpeaks length < 10, cannot apply delineation. wave_peaks is set to empty dict
Could not extract wavelet, filling feature vector with Nans.

extracting features from sample 1000
rpeaks length < 10, cannot apply delineation. wave_peaks is set to empty dict
Could not extract wavelet, filling feature vector with Nans.
Delineation extraction, len(feat) < 2, filling NaNs.
Delineation extraction, len(feat) < 2, filling NaNs.

extracting features from sample 1500
Delineation extraction, len(feat) < 2, filling NaNs.
rpeaks length < 10, cannot apply delineation. wave_peaks is set to empty dict
Could not extract wavelet, filling feature vector with Nans.
Delineation extraction, len(feat) < 2, filling NaN

In [19]:
X_test_delin.shape

(3411, 60)

In [20]:
pd.DataFrame(X_test_delin).to_csv('../../aml-project/task2/data/features_delineation_test.csv', index=False)

In [21]:
%%time
hrv_extractor = MyHRVExtractor(sampling_rate=sr)
X_train_hrv = hrv_extractor.fit_transform(X_train_cleaned)

Running HRV feature extraction...
extracting features from sample 0
extracting features from sample 500
extracting features from sample 1000
extracting features from sample 1500
extracting features from sample 2000
extracting features from sample 2500
extracting features from sample 3000
extracting features from sample 3500
extracting features from sample 4000
extracting features from sample 4500
extracting features from sample 5000
final shape of new HRV features: (5117, 29)
CPU times: user 6min 23s, sys: 1.13 s, total: 6min 24s
Wall time: 6min 31s


In [22]:
%%time
X_test_hrv = hrv_extractor.transform(X_test_cleaned)

Running HRV feature extraction...
extracting features from sample 0
extracting features from sample 500
extracting features from sample 1000
extracting features from sample 1500
extracting features from sample 2000
extracting features from sample 2500
extracting features from sample 3000
final shape of new HRV features: (3411, 29)
CPU times: user 2min 40s, sys: 422 ms, total: 2min 40s
Wall time: 2min 39s


In [23]:
pd.DataFrame(X_train_hrv).to_csv('../../aml-project/task2/data/features_hrv_train.csv', index=False)
pd.DataFrame(X_test_hrv).to_csv('../../aml-project/task2/data/features_hrv_test.csv', index=False)

In [24]:
X_train_hrv.shape

(5117, 29)

In [25]:
X_test_hrv.shape

(3411, 29)

In [26]:
print(f'Extracted a total of {X_train_hb.shape[1]+X_train_delin.shape[1]+X_train_hrv.shape[1]} features.')
print(f'Extracted a total of {X_test_hb.shape[1]+X_test_delin.shape[1]+X_test_hrv.shape[1]} features.')

Extracted a total of 989 features.
Extracted a total of 989 features.


---

In [None]:
### VALIDATION - check if features extracted correctly
#check = tvals[0].reshape((1,-1))
#print(check.shape)
#tclean = cleaner.fit_transform(check)
#tclean_nonans = tclean[~np.isnan(tclean)]
#_, trpeaks = nk.ecg_peaks(tclean_nonans, sampling_rate=sr)
#trpeaks = trpeaks['ECG_R_Peaks']
#tbeats = ecg.extract_heartbeats(tclean_nonans, trpeaks, sr)['templates']
## check
#hb_feat = extractor.fit_transform(newt)
#mean0 = hb_feat[0][:180]
#mean_beat = np.mean(tbeats, axis=0)
#np.array_equal(mean0, mean_beat) # should be TRUE