# Notebook 4 - Ventricular Activity

Discern ventricular and non-ventricular activity

In [None]:
from multiprocessing import Pool, cpu_count
import os

import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
import wfdb
from wfdb import processing

from vt.evaluate import calc_results
from vt.features import calc_spectral_ratios, visualize_features, has_tachycardia
from vt.records import get_alarms, data_dir
from vt.preprocessing import fill_missing

In [None]:
alarms, record_names, record_names_true, record_names_false = get_alarms()

## Section 0 - Extract and calculate features from training signals

In [None]:
def calc_training_features():
    """
    Calculate frequency features from labelled 
    intervals.
    
    For each record, calculate features from the labelled vtach section
    for both ecg signals.
    
    In addition, take a 15s interval from another
    arbitrary section of the record.
    """
    fs = 250
    vtach_intervals = {
        'v328s':[293, 296.5],
        'v334s':[296.2, 299.5],
        'v348s':[294, 300],
        'v368s':[290, 293],
        'v369l':[296, 300],
        'v404s':[292, 300],
        'v448s':[294, 299],
        'v471l':[298, 300],
        'v522s':[291, 299],
    }
    
    features = []
    
    for record_name in vtach_intervals:
        start_sec = int(vtach_intervals[record_name][0])
        stop_sec = int(vtach_intervals[record_name][1])

        # Read record
        signal, fields = wfdb.rdsamp(os.path.join(data_dir, record_name),
                                     sampfrom=start_sec * fs,
                                     sampto=stop_sec * fs, channels=[0,1])
        
        # Calculate spectral features for both ecg signals 
        features.append(list(calc_spectral_ratios(signal[:, 0], fs=250))+[True])
        features.append(list(calc_spectral_ratios(signal[:, 0], fs=250))+[True])
        
        # Add spectral ratios for another arbitrary segment
        signal, fields = wfdb.rdsamp(os.path.join(data_dir, record_name),
                                     sampfrom=200 * fs,
                                     sampto=215 * fs, channels=[0,1])
        
        # Calculate spectral features for both ecg signals 
        features.append(list(calc_spectral_ratios(signal[:, 0], fs=250))+[False])
        features.append(list(calc_spectral_ratios(signal[:, 0], fs=250))+[False])
    
    features = pd.DataFrame(features, columns = ['lfp', 'mfp', 'hfp', 'result'])
    
    return features
    

In [None]:
training_features = calc_training_features()

In [None]:
display(training_features.head())

In [None]:
visualize_features(training_features)

In [None]:
# Train the ventricular activity classifier
clf_vent = GradientBoostingClassifier()
clf_vent.fit(training_features.iloc[:, :-1], training_features['result'])

## Section 1 - Implement Ventricular Tachycardia Detector

Combine tachycardia and ventricular content

In [None]:
def calc_features(record_name):
    """
    Strategy is to find tachycardia for 5 beats in both of the ecg channels simultaneously.
    """
    fs = 250
    start_sec = 290
    stop_sec = 300
    # Read record
    signal, fields = wfdb.rdsamp(os.path.join(data_dir, record_name),
                                 sampfrom=start_sec * fs,
                                 sampto=stop_sec * fs, channels=[0,1,2])
    # Remove nans
    signal = fill_missing(signal)
    
    # Get beat indices
    qrs_0 = processing.gqrs_detect(signal[:, 0], fs=fs)
    qrs_1 = processing.gqrs_detect(signal[:, 1], fs=fs)
    
    # Figure out whether there is tachycardia
    tachycardia = has_tachycardia(qrs_0, qrs_1)
    
    # Figure out whether there is ventricular activity in any 4s window in either channel.
    # Inspect with 1s sliding duration
    ventricular = False
    for window_num in range(7):
        # Get the windowed signal
        sig_window = signal[window_num * fs:(4+window_num) * fs, :]
        # Calculate frequency features
        
        window_features = [list(calc_spectral_ratios(sig_window[:, 0], fs=250)),
                           list(calc_spectral_ratios(sig_window[:, 1], fs=250))]
        window_ventricular = clf_vent.predict(window_features)
        if np.any(window_ventricular):
            ventricular = True
            break
    
    # Alarm result
    result = alarms.loc[record_name, 'result']
    features = pd.DataFrame([[tachycardia, ventricular, tachycardia and ventricular, result]],
                            columns=['tachycardia', 'ventricular', 'ventricular_tachycardia', 'result'], index=[record_name])
    return features
    

In [None]:
# Calculate features for all records using multiple cpus
pool = Pool(processes=cpu_count() - 1)
features = pool.map(calc_features, record_names)

# Combine features into a single data frame
features = pd.concat(features)

print('Finished calculating features')

In [None]:
display(features.head())

## Section 2 - Use ventricular tachycardia feature as input to alarm classifier

In [None]:
# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(features.iloc[:, :-1], features.iloc[:, -1],
                                                    train_size=0.75, test_size=0.25,
                                                    random_state=0)

# Note that we're not using y_train here because we are not using a supervised classifier

In [None]:
# Calculate performance metrics
cm, p_correct, score = calc_results(y_true=y_test, y_pred=x_test['ventricular_tachycardia'].values)

In [None]:
# Display performance metrics
display(cm)
print('Final score: ', score)

We see that combining ventricular with tachycardia can reduce false positives. However, it cannot reduce false negatives.

Technically, what we trained was based on signal segments, which should capture ventricular tachycardia on its own. We can test to see if the 'ventricular' feature performs better by itself

In [None]:
# Just using tachycardia
cm, p_correct, score = calc_results(y_true=y_test, y_pred=x_test['tachycardia'].values)

print('Results purely using tachycardia feature')
display(cm)
print('Final score: ', score)

In [None]:
# Just using ventricular
cm, p_correct, score = calc_results(y_true=y_test, y_pred=x_test['ventricular'].values)

print('Results purely using ventricular feature')
display(cm)
print('Final score: ', score)