In [1]:
import os
import numpy as np
from scipy.stats import mode
import matplotlib.pyplot as plt
from lib import loadmat

ROOT = 'C:/Users/amcca/switchdrive/PhD/RR/'
OUTCOME_TYPE = 'clinical'  
OUTCOME_GROUP = 'AR'

# Import data
Import data from matlab and put into dictionary/matrix

In [2]:
tau = 1
m = 2
coef = 0.10

# load in matlab data with appropriate params
filename = 'tau' + str(tau) + '_dim' + str(m) + '_coef' + str(coef) + '.mat'
bl_mat = loadmat(ROOT + 'data/rr_indices/baseline/' + filename)
end_mat = loadmat(ROOT + 'data/rr_indices/end ablation/' + filename)

# get each AF complexity index in column, each row corresponds to one patient only
data_dict_bl = bl_mat['rr_indices_struct']
data_matrix_bl = np.stack(list(data_dict_bl.values())).transpose()
feat_names = data_dict_bl.keys()

data_dict_end = end_mat['rr_indices_struct']
data_matrix_end = np.stack(list(data_dict_end.values())).transpose()

# for clinical outcomes, SR=1, AR=0
y_clin = np.array([0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
# for procedural outcomes, LT=1, NT=0
y_proc=np.array([1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1])

Xbl = data_matrix_bl
Xend = data_matrix_end

# extract indices of data matrix to keep, based on outcome type and group desired
if OUTCOME_TYPE == 'clinical':
    if OUTCOME_GROUP == 'SR':
        keep_idx = np.nonzero(y_clin==1)[0]
    elif OUTCOME_GROUP == 'AR':
        keep_idx = np.nonzero(y_clin==0)[0]
elif OUTCOME_TYPE == 'procedural':
    if OUTCOME_GROUP == 'LT':
        keep_idx = np.nonzero(y_proc==1)[0]
    elif OUTCOME_GROUP == 'NT':
        keep_idx = np.nonzero(y_proc==0)[0]
        
Xbl = Xbl[keep_idx,:]
Xend = Xend[keep_idx,:]

In [3]:
feat_names

dict_keys(['rec', 'det', 'ent', 'div', 'sampen', 'pnn20', 'pnn50', 'sdnn', 'rmssd'])

## Statistical significance: baseline to end ablation
The purpose of this notebook is to find which RR interval indices (features) have significantly different values prior to ablation (baseline) and at the end of ablation, within the same patient group. Find which features are most discriminative across cross-validated folds, using one-way ANOVA or rank-sums for statistically significant differences in means between groups.

In [4]:
from sklearn.model_selection import ShuffleSplit
from statsmodels.stats.diagnostic import lilliefors
from scipy.stats import ranksums
from sklearn.feature_selection import f_classif
  
# create cross-validation folds
n_folds = 3
ss = ShuffleSplit(n_splits=n_folds, test_size=0.2, random_state=0)
fnorm = []
pnorm = []

plillie_bl = np.zeros((n_folds, len(feat_names)))
plillie_end = np.zeros((n_folds, len(feat_names)))
prank = np.zeros((n_folds, len(feat_names)))

for i, (train, test) in enumerate(ss.split(Xbl)):
        # enumerate over data folds and features
        for k, feat_name in enumerate(feat_names):
            # use lilliefors statistic to determine whether data dist normal. If returned
            # p-value is lower than some threshold (e.g. 0.05), then reject null hypothesis
            # that data are normally distributed
            ksstat_x, plillie_bl[i,k] = lilliefors(Xbl[train,k], dist='norm')
            ksstat_x, plillie_end[i,k] = lilliefors(Xend[train,k], dist='norm')
            
            # calculate ranksum statistic
            statistic, prank[i,k] = ranksums(Xbl[train,k], Xend[train,k])
            
        X = np.vstack([Xbl[train,:], Xend[train,:]])
        y = np.concatenate([np.zeros(len(train)), np.ones(len(train))])
        # calculate anova p-values for all features in fold, and append
        f_fold, p_fold = f_classif(X, y)
        fnorm.append(f_fold)
        pnorm.append(p_fold)
        
# calculate mean lilliefors/p-values across folds
fnorm = np.vstack(fnorm)
pnorm = np.vstack(pnorm)
            
mean_f = np.mean(fnorm, axis=0)
mean_plillie_bl = np.mean(plillie_bl, axis=0)
mean_plillie_end = np.mean(plillie_end, axis=0)
mean_pnorm = np.mean(pnorm, axis=0)
mean_prank = np.mean(prank, axis=0)

# calculate mean and std of baseline and end ablation data
mean_Xbl = np.mean(Xbl, axis=0)
mean_Xend = np.mean(Xend, axis=0)
std_Xbl = np.std(Xbl, axis=0)
std_Xend = np.std(Xend, axis=0)

## Feature statistical significance
Inspect mean/std of features values in each group, along with p-value (either rank-sum or ANOVA).

In [5]:
from prettytable import PrettyTable

# extract indices of data matrix to keep, based on outcome type and group desired
if OUTCOME_TYPE == 'clinical':
    if OUTCOME_GROUP == 'SR':
        title_string = 'Clinical outcomes: SR group'
    elif OUTCOME_GROUP == 'AR':
        title_string = 'Clinical outcomes: AR group'
elif OUTCOME_TYPE == 'procedural':
    if OUTCOME_GROUP == 'LT':
        title_string = 'Procedural outcomes: LT group'
    elif OUTCOME_GROUP == 'NT':
        title_string = 'Procedural outcomes: NT group'

# table with mean values
t = PrettyTable(['Feature', 'baseline', 'end ablation', 'p-anova'])
for i, feat_name in enumerate(feat_names):
    t.add_row([feat_name, mean_Xbl[i], mean_Xend[i], mean_pnorm[i]])

t.title = title_string
t.float_format = '0.4'
print(t)

# table with standard deviation values
t = PrettyTable(['Feature', 'std baseline', 'std end ablation', 'p-anova'])
for i, feat_name in enumerate(feat_names):
    t.add_row([feat_name, std_Xbl[i], std_Xend[i], mean_pnorm[i]])

t.title = title_string
t.float_format = '0.4'
print(t)

+---------------------------------------------+
|         Clinical outcomes: AR group         |
+---------+----------+--------------+---------+
| Feature | baseline | end ablation | p-anova |
+---------+----------+--------------+---------+
|   rec   |  0.0088  |    0.0104    |  0.3260 |
|   det   |  0.1659  |    0.2058    |  0.0244 |
|   ent   |  0.4599  |    0.5572    |  0.0190 |
|   div   |  0.3138  |    0.2529    |  0.0102 |
|  sampen |  2.6869  |    2.4689    |  0.0470 |
|  pnn20  |  0.4486  |    0.4067    |  0.0025 |
|  pnn50  |  0.3926  |    0.3222    |  0.0002 |
|   sdnn  | 186.6946 |   118.9003   |  0.0000 |
|  rmssd  | 267.4680 |   161.2253   |  0.0000 |
+---------+----------+--------------+---------+
+-----------------------------------------------------+
|             Clinical outcomes: AR group             |
+---------+--------------+------------------+---------+
| Feature | std baseline | std end ablation | p-anova |
+---------+--------------+------------------+---------+
