In [1]:
from preprocessing import *
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import statistics
import biosppy.signals.ecg as ecg

In [2]:
parent_dir = './Data/In-lab/'
paths = sorted(list(os.listdir(parent_dir)))


def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]


def normalize_data(data, mode):
    """inplace operation"""
    def minimum(data):
        return min(min(data, key=lambda x: min(x)))
    def maximum(data):
        return max(max(data, key=lambda x: max(x)))

    if mode == '0-1':
        mini = minimum(data)
        for subarr in data:
            subarr -= mini
        maxi = maximum(data)
        for subarr in data:
            subarr /= maxi
    if mode == 'z':
        raise NotImplementedError("not implemented yet")

def frequency_energy(data, sampling_freq, freq_start, freq_end):
    N = len(data)
    X = np.fft.fft(data)
    freqs = np.fft.fftfreq(N, 1/sampling_freq)
    
    indices = np.where((freqs>=freq_start) & (freqs<=freq_end))[0]
    energy = np.sum(np.abs(X[indices])**2)
    
    return energy

def get_features(data):
    """
    The selected features were mean, min, range, mode, low frequency energy (LF), 40th percentile,
    60th percentile, 80th percentile, standard deviation of successive RR interval differences,
    root mean square of successive RR interval differences
    """
    feature_data = np.zeros((10), dtype=float)
    
    feature_data[0] = np.mean(data)
    feature_data[1] = np.min(data)
    feature_data[2] = np.max(data)-np.min(data)
    feature_data[3] = statistics.mode(data)
    feature_data[4] = frequency_energy(data, 250, 0.1, 0.2)
    feature_data[5] = np.percentile(data, 40)
    feature_data[6] = np.percentile(data, 60)
    feature_data[7] = np.percentile(data, 80)
    
    rpeaks = ecg.hamilton_segmenter(signal=data, sampling_rate=250)['rpeaks']
    rr_intervals = np.diff(rpeaks)/250
    
    rr_diff = np.diff(rr_intervals)
    feature_data[8] = np.std(rr_diff)
    feature_data[9] = np.sqrt(np.mean(rr_diff**2))
    
    return feature_data
    
def return_dataset(paths):
    data = []
    activities_list = []
    labels_list = []
    ema_list = []
    for path in tqdm(paths):
        dataX, ema, labels, activities = get_data_activity_chunks(parent_dir+path, sampling=5)
        normalize_data(dataX, mode='0-1')

        ema_list.append(ema)
        labels_list.append(labels)
        activities_list.append(activities)
        
        for act_data in dataX:
            feature_data = get_features(act_data)
            data.append(feature_data)
     
    return np.array(data), np.concatenate(ema_list, axis=0), np.concatenate(labels_list, axis=0), np.concatenate(activities_list, axis=0) 




In [3]:
from sklearn import svm
from sklearn import tree
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score


In [4]:
df = pd.DataFrame(data=np.zeros((16, 4)), columns=['SVM', 'Decision Tree', 'Gradient Boosting', 'Adaboost'])


for val_idx in range(16):
    training_paths = paths[:val_idx] + paths[val_idx + 1:]
    validation_paths = paths[val_idx:val_idx + 1]
    
    trainX, trainEMA, trainY, trainActivities = return_dataset(training_paths)
    valX, valEMA, valY, valActivities = return_dataset(validation_paths)
    
    svm_clf = svm.SVC()
    d_clf = tree.DecisionTreeClassifier()
    gb_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=42)
    ada_clf = AdaBoostClassifier(n_estimators=100, algorithm="SAMME", random_state=0)
    
    svm_clf.fit(trainX, trainY)
    d_clf.fit(trainX, trainY)
    gb_clf.fit(trainX, trainY)
    ada_clf.fit(trainX, trainY)
    
    svm_pred = svm_clf.predict(valX)
    d_pred = d_clf.predict(valX)
    gb_pred = gb_clf.predict(valX)
    ada_pred = ada_clf.predict(valX)
    
    df.at[val_idx, 'SVM'] = f1_score(valY, svm_pred)
    df.at[val_idx, 'Decision Tree'] = f1_score(valY, d_pred)
    df.at[val_idx, 'Gradient Boosting'] = f1_score(valY, gb_pred)
    df.at[val_idx, 'Adaboost'] = f1_score(valY, ada_pred)
    
    
df.head()

100%|██████████| 15/15 [00:10<00:00,  1.37it/s]
100%|██████████| 1/1 [00:00<00:00,  1.43it/s]
100%|██████████| 15/15 [00:10<00:00,  1.38it/s]
100%|██████████| 1/1 [00:00<00:00,  1.41it/s]
100%|██████████| 15/15 [00:10<00:00,  1.40it/s]
100%|██████████| 1/1 [00:00<00:00,  1.46it/s]
100%|██████████| 15/15 [00:10<00:00,  1.40it/s]
100%|██████████| 1/1 [00:00<00:00,  1.53it/s]
100%|██████████| 15/15 [00:10<00:00,  1.41it/s]
100%|██████████| 1/1 [00:00<00:00,  1.43it/s]
100%|██████████| 15/15 [00:10<00:00,  1.42it/s]
100%|██████████| 1/1 [00:00<00:00,  1.35it/s]
100%|██████████| 15/15 [00:11<00:00,  1.35it/s]
100%|██████████| 1/1 [00:00<00:00,  1.37it/s]
100%|██████████| 15/15 [00:11<00:00,  1.34it/s]
100%|██████████| 1/1 [00:00<00:00,  1.42it/s]
100%|██████████| 15/15 [00:10<00:00,  1.38it/s]
100%|██████████| 1/1 [00:00<00:00,  1.40it/s]
100%|██████████| 15/15 [00:10<00:00,  1.40it/s]
100%|██████████| 1/1 [00:00<00:00,  1.40it/s]
100%|██████████| 15/15 [00:10<00:00,  1.41it/s]
100%|███████

Unnamed: 0,SVM,Decision Tree,Gradient Boosting,Adaboost
0,0.461538,0.5,0.588235,0.428571
1,0.545455,0.428571,0.705882,0.461538
2,0.0,0.666667,0.2,0.181818
3,0.75,0.705882,0.666667,0.625
4,0.5,0.333333,0.666667,0.823529


In [7]:
df.to_csv('./Results/loso_chunks.csv')