In [1]:
from preprocessing import *
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import statistics
import biosppy.signals.ecg as ecg

: 

In [None]:
parent_dir = './Data/In-lab/'
paths = sorted(list(os.listdir(parent_dir)))
paths = np.array(paths)
np.random.shuffle(paths)
training_paths = paths[:-4]
validation_paths = paths[-4:]


def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]


def normalize_data(data, mode):
    """inplace operation"""
    def minimum(data):
        return min(min(data, key=lambda x: min(x)))
    def maximum(data):
        return max(max(data, key=lambda x: max(x)))

    if mode == '0-1':
        mini = minimum(data)
        for subarr in data:
            subarr -= mini
        maxi = maximum(data)
        for subarr in data:
            subarr /= maxi
    if mode == 'z':
        raise NotImplementedError("not implemented yet")

def frequency_energy(data, sampling_freq, freq_start, freq_end):
    N = len(data)
    X = np.fft.fft(data)
    freqs = np.fft.fftfreq(N, 1/sampling_freq)
    
    indices = np.where((freqs>=freq_start) & (freqs<=freq_end))[0]
    energy = np.sum(np.abs(X[indices])**2)
    
    return energy

def get_features(data):
    """
    The selected features were mean, min, range, mode, low frequency energy (LF), 40th percentile,
    60th percentile, 80th percentile, standard deviation of successive RR interval differences,
    root mean square of successive RR interval differences
    """
    feature_data = np.zeros((10), dtype=float)
    
    feature_data[0] = np.mean(data)
    feature_data[1] = np.min(data)
    feature_data[2] = np.max(data)-np.min(data)
    feature_data[3] = statistics.mode(data)
    feature_data[4] = frequency_energy(data, 250, 0.1, 0.2)
    feature_data[5] = np.percentile(data, 40)
    feature_data[6] = np.percentile(data, 60)
    feature_data[7] = np.percentile(data, 80)
    
    rpeaks = ecg.hamilton_segmenter(signal=data, sampling_rate=250)['rpeaks']
    rr_intervals = np.diff(rpeaks)/250
    
    rr_diff = np.diff(rr_intervals)
    feature_data[8] = np.std(rr_diff)
    feature_data[9] = np.sqrt(np.mean(rr_diff**2))
    
    return feature_data
    
def return_dataset(paths):
    data = []
    activities_list = []
    labels_list = []
    ema_list = []
    for path in tqdm(paths):
        dataX, ema, labels, activities = get_data_activity_chunks(parent_dir+path, sampling=5)
        normalize_data(dataX, mode='0-1')

        ema_list.append(ema)
        labels_list.append(labels)
        activities_list.append(activities)
        
        for act_data in dataX:
            feature_data = get_features(act_data)
            data.append(feature_data)
     
    return np.array(data), np.concatenate(ema_list, axis=0), np.concatenate(labels_list, axis=0), np.concatenate(activities_list, axis=0) 


trainX, trainEMA, trainY, trainActivities = return_dataset(training_paths)
valX, valEMA, valY, valActivities = return_dataset(validation_paths)

100%|██████████| 12/12 [00:12<00:00,  1.02s/it]
100%|██████████| 4/4 [00:04<00:00,  1.06s/it]


In [None]:
from sklearn import svm
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

clf = svm.SVC()
clf.fit(trainX, trainY)

In [None]:
print("Train results")
trainY_pred = clf.predict(trainX)

precision = precision_score(trainY, trainY_pred)
recall = recall_score(trainY, trainY_pred)
f1 = f1_score(trainY, trainY_pred)

print(f"Precision: {precision}, Recall: {recall}, F1-score: {f1}")

print("Validation results")
valY_pred = clf.predict(valX)

precision = precision_score(valY, valY_pred)
recall = recall_score(valY, valY_pred)
f1 = f1_score(valY, valY_pred)

print(f"Precision: {precision}, Recall: {recall}, F1-score: {f1}")

Train results
Precision: 0.6, Recall: 1.0, F1-score: 0.7499999999999999
Validation results
Precision: 0.6, Recall: 1.0, F1-score: 0.7499999999999999


In [None]:
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf = clf.fit(trainX, trainY)

In [None]:
print("Train results")
trainY_pred = clf.predict(trainX)

precision = precision_score(trainY, trainY_pred)
recall = recall_score(trainY, trainY_pred)
f1 = f1_score(trainY, trainY_pred)

print(f"Precision: {precision}, Recall: {recall}, F1-score: {f1}")

print("Validation results")
valY_pred = clf.predict(valX)

precision = precision_score(valY, valY_pred)
recall = recall_score(valY, valY_pred)
f1 = f1_score(valY, valY_pred)

print(f"Precision: {precision}, Recall: {recall}, F1-score: {f1}")

Train results
Precision: 1.0, Recall: 1.0, F1-score: 1.0
Validation results
Precision: 0.6486486486486487, Recall: 0.6666666666666666, F1-score: 0.6575342465753425


In [None]:
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=42)
clf.fit(trainX, trainY)

In [None]:
print("Train results")
trainY_pred = clf.predict(trainX)

precision = precision_score(trainY, trainY_pred)
recall = recall_score(trainY, trainY_pred)
f1 = f1_score(trainY, trainY_pred)

print(f"Precision: {precision}, Recall: {recall}, F1-score: {f1}")

print("Validation results")
valY_pred = clf.predict(valX)

precision = precision_score(valY, valY_pred)
recall = recall_score(valY, valY_pred)
f1 = f1_score(valY, valY_pred)

print(f"Precision: {precision}, Recall: {recall}, F1-score: {f1}")

Train results
Precision: 0.9905660377358491, Recall: 0.9722222222222222, F1-score: 0.9813084112149533
Validation results
Precision: 0.6086956521739131, Recall: 0.7777777777777778, F1-score: 0.6829268292682927


In [None]:
from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier(n_estimators=100, algorithm="SAMME", random_state=0)
clf.fit(trainX, trainY)

In [None]:
print("Train results")
trainY_pred = clf.predict(trainX)

precision = precision_score(trainY, trainY_pred)
recall = recall_score(trainY, trainY_pred)
f1 = f1_score(trainY, trainY_pred)

print(f"Precision: {precision}, Recall: {recall}, F1-score: {f1}")

print("Validation results")
valY_pred = clf.predict(valX)

precision = precision_score(valY, valY_pred)
recall = recall_score(valY, valY_pred)
f1 = f1_score(valY, valY_pred)

print(f"Precision: {precision}, Recall: {recall}, F1-score: {f1}")

Train results
Precision: 0.8348623853211009, Recall: 0.8425925925925926, F1-score: 0.8387096774193548
Validation results
Precision: 0.625, Recall: 0.6944444444444444, F1-score: 0.6578947368421053
