Loading Data

In [None]:
# read data from .arff files
import os

import sktime
from sktime.datasets import load_from_tsfile
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from scipy import stats
from sklearn.feature_selection import VarianceThreshold
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectPercentile

# load data from .csv using pd
def load_data(path):

    # load data from .csv
    data = pd.read_csv(path, header=None)

    # get labels
    y = data.iloc[:, -1].values

    # get features
    X = data.iloc[:, :-1].values

    # return features and labels
    return X, y

Loading MITBIH DATA

In [None]:
# Load mitbih data
train_x_mitbih, train_y_mitbih = load_data(
    "./datasets/ecg/mitbih_train.csv"
)
test_x_mitbih, test_y_mitbih = load_data(
    "./datasets/ecg/mitbih_test.csv"
)


Loading PTBDB DATA

In [None]:
# load ptbdb data 
ptbdb_normal_x, ptbdb_normal_y = load_data(
    "./datasets/ecg/ptbdb_normal.csv"
)

ptbdb_abnormal_x, ptbdb_abnormal_y = load_data(
    "./datasets/ecg/ptbdb_abnormal.csv"
)

# merge the data and labels
x_ptbdb = np.concatenate((ptbdb_normal_x, ptbdb_abnormal_x))
y_ptbdb = np.concatenate((ptbdb_normal_y, ptbdb_abnormal_y))

# split the data into train and test using 
train_x_ptbdb, test_x_ptbdb, train_y_ptbdb, test_y_ptbdb = train_test_split(x_ptbdb, y_ptbdb, test_size=0.33, random_state=42)



PTBDB DATA VISUALIZATION

In [None]:
# Train data ptbdb

# create a dataframe from the data and labels 
df_train_ptbdb = pd.DataFrame(train_x_ptbdb)
df_train_ptbdb['label'] = train_y_ptbdb

# create a plot bar from the dataframe using pandas.plot.bar
df_train_ptbdb['label'].value_counts().plot.bar(title = "Train data ptbdb")


In [None]:
# Test data ptbdb

# create a dataframe from the data and labels 
df_test_ptbdb = pd.DataFrame(test_x_ptbdb)
df_test_ptbdb['label'] = test_y_ptbdb

# create a plot bar from the dataframe using pandas.plot.bar
df_test_ptbdb['label'].value_counts().plot.bar(title = "Test data ptbdb")


In [None]:
# put test_df and train_df on the same plot bar with different colors 
df_train_ptbdb['label'].value_counts().plot.bar(title = "Train and Test data ptbdb", color = 'blue', alpha = 1)
df_test_ptbdb['label'].value_counts().plot.bar(color = 'red', alpha = 1)

In [None]:
ptbdb_labels = df_train_ptbdb['label'].unique()
counts_train = df_train_ptbdb['label'].value_counts()
counts_test= df_test_ptbdb['label'].value_counts()


print("----------------------------------")
print("TRAIN: "+  str(counts_train))
print("----------------------------------")
print("TEST: " + str(counts_test))

In [None]:
# for each label, plot the data#
for label in ptbdb_labels:
    current = df_train_ptbdb[df_train_ptbdb['label'] == label].iloc[0]

    # create a dataframe from the data and labels
    current_df = pd.DataFrame(current)
    current_df = current_df.drop('label', axis = 0)
    current_df.plot(title = str(label), figsize = (10, 5))
    

MITBIH DATA VISUALIZATION

In [None]:
# Train data mitbih

# create a dataframe from the data and labels 
train_df_mitbih = pd.DataFrame(train_x_mitbih)
train_df_mitbih['label'] = train_y_mitbih

# create a plot bar from the dataframe using pandas.plot.bar
train_df_mitbih['label'].value_counts().plot.bar(title = "Train data mitbih")

In [None]:
# Test data mitbih

# create a dataframe from the data and labels 
test_df_mitbih = pd.DataFrame(test_x_mitbih)
test_df_mitbih['label'] = test_y_mitbih

# create a plot bar from the dataframe using pandas.plot.bar
test_df_mitbih['label'].value_counts().plot.bar(title = "Train data mitbih")

In [None]:
# put test_df and train_df on the same plot bar with different colors 
train_df_mitbih['label'].value_counts().plot.bar(title = "Train and Test data", color = 'blue', alpha = 1)
test_df_mitbih['label'].value_counts().plot.bar(color = 'red', alpha = 1)


In [None]:
# extract unique labels from the data
mitbih_labels = train_df_mitbih['label'].unique()
counts_train = train_df_mitbih['label'].value_counts()
counts_test= test_df_mitbih['label'].value_counts()


print(mitbih_labels)
print("----------------------------------")
print("TRAIN: "+  str(counts_train))
print("----------------------------------")
print("TEST: " + str(counts_test))

In [None]:
# for each label, plot the data#
for label in mitbih_labels:
    current = train_df_mitbih[train_df_mitbih['label'] == label].iloc[0]

    # create a dataframe from the data and labels
    current_df = pd.DataFrame(current)
    current_df = current_df.drop('label', axis = 0)
    current_df.plot(title = str(label), figsize = (10, 5))
    

MEDIA SI DEVIATIA STANDARD PER UNITATE DE TIMP

In [None]:
# Media si deviatia standard pt fiecare label pt mitbih
from matplotlib import pyplot as plt


for label in ptbdb_labels:
    current = df_train_ptbdb[df_train_ptbdb['label'] == label]

    # create a dataframe from the data and labels
    current_df = pd.DataFrame(current)
    current_df = current_df.drop('label', axis = 1)
    current_df_mean = current_df.mean()
    current_df_std = current_df.std()
    current_df_mean.plot(title = str(label), figsize = (10, 5))
    current_df_std.plot(title = str(label), figsize = (10, 5))

    # end plot
    plt.legend(['mean', 'std'])
    plt.show()

    

In [None]:
# Media si deviatia standard pt fiecare label pt mitbih
from matplotlib import pyplot as plt


for label in mitbih_labels:
    current = train_df_mitbih[train_df_mitbih['label'] == label]

    # create a dataframe from the data and labels
    current_df = pd.DataFrame(current)
    current_df = current_df.drop('label', axis = 1)
    current_df_mean = current_df.mean()
    current_df_std = current_df.std()
    current_df_mean.plot(title = str(label), figsize = (10, 5))
    current_df_std.plot(title = str(label), figsize = (10, 5))

    # end plot
    plt.legend(['mean', 'std'])
    plt.show()

    

3.2.2. Extragerea atributelor

In [None]:
# Setam atributele default date de intrare MITBIH pentru train
X_train_std_mitbih = pd.DataFrame(train_x_mitbih)

X_train_std_mitbih


In [None]:
# Setam atributele default date de intrare MITBIH pentru test
X_test_std_mitbih = pd.DataFrame(test_x_mitbih)

X_test_std_mitbih


In [None]:
# Setam atributele default date de intrare PTBDB pentru train
X_train_std_ptbdb = pd.DataFrame(train_x_ptbdb)

X_train_std_ptbdb

In [None]:
# Setam atributele default date de intrare PTBDB pentru test
X_test_std_ptbdb = pd.DataFrame(test_x_ptbdb)

X_test_std_ptbdb

In [None]:
# Setam atributele statistice date de intrare MITBIH pentru datele de train
X_train_statistics_mitbih = pd.DataFrame()

# Mean features
X_train_statistics_mitbih['mean'] = X_train_std_mitbih.mean(axis=1)

# Standard deviation features
X_train_statistics_mitbih['std'] = X_train_std_mitbih.std(axis=1)

# Average absolute difference features
X_train_statistics_mitbih['avg_abs_diff'] = X_train_std_mitbih.mad(axis=1)

# Min features
X_train_statistics_mitbih['min'] = X_train_std_mitbih.min(axis=1)

# Max features
X_train_statistics_mitbih['max'] = X_train_std_mitbih.max(axis=1)

# Max-min features
X_train_statistics_mitbih['max-min'] = X_train_statistics_mitbih['max'] - X_train_statistics_mitbih['min']

# Median features
X_train_statistics_mitbih['median'] = X_train_std_mitbih.median(axis=1)

# Median absolute deviation features, don't use X_train_std_mitbih.mad(axis=1) because it's the average absolute difference
# X_train_statistics_mitbih['median_abs_dev'] = (X_train_std_mitbih - X_train_statistics_mitbih['median']).abs().median(axis=1)

# Interquartile range features
X_train_statistics_mitbih['interquartile_range'] = X_train_std_mitbih.quantile(0.75, axis=1) - X_train_std_mitbih.quantile(0.25, axis=1)

# Values above mean features
# X_train_statistics_mitbih['values_above_mean'] = (X_train_std_mitbih > X_train_statistics_mitbih['mean']).sum(axis=1)

# Number of peaks features
# X_train_statistics_mitbih['number_of_peaks'] = (X_train_std_mitbih.diff(axis=1) < 0).sum(axis=1)

# Skewnness features
X_train_statistics_mitbih['skewness'] = X_train_std_mitbih.skew(axis=1)

# Kurtosis features
X_train_statistics_mitbih['kurtosis'] = X_train_std_mitbih.kurtosis(axis=1)

# Energy features
X_train_statistics_mitbih['energy'] = (X_train_std_mitbih ** 2).sum(axis=1)

# Average of absolute values features
X_train_statistics_mitbih['avg_abs_val'] = X_train_std_mitbih.abs().mean(axis=1)

# Signal magnitude area features
X_train_statistics_mitbih['signal_magnitude_area'] = X_train_std_mitbih.abs().sum(axis=1)

# Compute FFT features
fft = np.abs(np.fft.fft(X_train_std_mitbih))

# make fft data frame
fft = pd.DataFrame(fft)

# FFT mean features
X_train_statistics_mitbih['fft_mean'] = fft.mean(axis=1)

# FFT standard deviation features
X_train_statistics_mitbih['fft_std'] = fft.std(axis=1)

# FFT average absolute difference features
X_train_statistics_mitbih['fft_avg_abs_diff'] = fft.mad(axis=1)

# FFT min features
X_train_statistics_mitbih['fft_min'] = fft.min(axis=1)

# FFT max features
X_train_statistics_mitbih['fft_max'] = fft.max(axis=1)

# FFT max-min features
X_train_statistics_mitbih['fft_max-min'] = X_train_statistics_mitbih['fft_max'] - X_train_statistics_mitbih['fft_min']

# FFT median features
X_train_statistics_mitbih['fft_median'] = fft.median(axis=1)

# FFT median absolute deviation features, don't use fft.mad(axis=1) because it's the average absolute difference
# X_train_statistics_mitbih['fft_median_abs_dev'] = (fft - X_train_statistics_mitbih['fft_median']).abs().median(axis=1)

# FFT interquartile range features
X_train_statistics_mitbih['fft_interquartile_range'] = fft.quantile(0.75, axis=1) - fft.quantile(0.25, axis=1)

# FFT values above mean features
# X_train_statistics_mitbih['fft_values_above_mean'] = (fft > X_train_statistics_mitbih['fft_mean']).sum(axis=1)

# FFT number of peaks features
# X_train_statistics_mitbih['fft_number_of_peaks'] = (fft.diff(axis=1) < 0).sum(axis=1)

# FFT skewnness features
X_train_statistics_mitbih['fft_skewness'] = fft.skew(axis=1)

# FFT kurtosis features
X_train_statistics_mitbih['fft_kurtosis'] = fft.kurtosis(axis=1)

# FFT energy features
X_train_statistics_mitbih['fft_energy'] = (fft ** 2).sum(axis=1)

# FFT average of absolute values features
X_train_statistics_mitbih['fft_avg_abs_val'] = fft.abs().mean(axis=1)

# FFT signal magnitude area features
X_train_statistics_mitbih['fft_signal_magnitude_area'] = fft.abs().sum(axis=1)

X_train_statistics_mitbih

In [None]:
# Setam atributele statistice date de intrare ptbdb pentru datele de train
X_train_statistics_ptbdb = pd.DataFrame()

# Mean features
X_train_statistics_ptbdb['mean'] = X_train_std_ptbdb.mean(axis=1)

# Standard deviation features
X_train_statistics_ptbdb['std'] = X_train_std_ptbdb.std(axis=1)

# Average absolute difference features
X_train_statistics_ptbdb['avg_abs_diff'] = X_train_std_ptbdb.mad(axis=1)

# Min features
X_train_statistics_ptbdb['min'] = X_train_std_ptbdb.min(axis=1)

# Max features
X_train_statistics_ptbdb['max'] = X_train_std_ptbdb.max(axis=1)

# Max-min features
X_train_statistics_ptbdb['max-min'] = X_train_statistics_ptbdb['max'] - X_train_statistics_ptbdb['min']

# Median features
X_train_statistics_ptbdb['median'] = X_train_std_ptbdb.median(axis=1)

# Median absolute deviation features, don't use X_train_std_ptbdb.mad(axis=1) because it's the average absolute difference
# X_train_statistics_ptbdb['median_abs_dev'] = (X_train_std_ptbdb - X_train_statistics_ptbdb['median']).abs().median(axis=1)

# Interquartile range features
X_train_statistics_ptbdb['interquartile_range'] = X_train_std_ptbdb.quantile(0.75, axis=1) - X_train_std_ptbdb.quantile(0.25, axis=1)

# Values above mean features
# X_train_statistics_ptbdb['values_above_mean'] = (X_train_std_ptbdb > X_train_statistics_ptbdb['mean']).sum(axis=1)

# Number of peaks features
# X_train_statistics_ptbdb['number_of_peaks'] = (X_train_std_ptbdb.diff(axis=1) < 0).sum(axis=1)

# Skewnness features
X_train_statistics_ptbdb['skewness'] = X_train_std_ptbdb.skew(axis=1)

# Kurtosis features
X_train_statistics_ptbdb['kurtosis'] = X_train_std_ptbdb.kurtosis(axis=1)

# Energy features
X_train_statistics_ptbdb['energy'] = (X_train_std_ptbdb ** 2).sum(axis=1)

# Average of absolute values features
X_train_statistics_ptbdb['avg_abs_val'] = X_train_std_ptbdb.abs().mean(axis=1)

# Signal magnitude area features
X_train_statistics_ptbdb['signal_magnitude_area'] = X_train_std_ptbdb.abs().sum(axis=1)

# Compute FFT features
fft = np.abs(np.fft.fft(X_train_std_ptbdb))

# make fft data frame
fft = pd.DataFrame(fft)

# FFT mean features
X_train_statistics_ptbdb['fft_mean'] = fft.mean(axis=1)

# FFT standard deviation features
X_train_statistics_ptbdb['fft_std'] = fft.std(axis=1)

# FFT average absolute difference features
X_train_statistics_ptbdb['fft_avg_abs_diff'] = fft.mad(axis=1)

# FFT min features
X_train_statistics_ptbdb['fft_min'] = fft.min(axis=1)

# FFT max features
X_train_statistics_ptbdb['fft_max'] = fft.max(axis=1)

# FFT max-min features
X_train_statistics_ptbdb['fft_max-min'] = X_train_statistics_ptbdb['fft_max'] - X_train_statistics_ptbdb['fft_min']

# FFT median features
X_train_statistics_ptbdb['fft_median'] = fft.median(axis=1)

# FFT median absolute deviation features, don't use fft.mad(axis=1) because it's the average absolute difference
# X_train_statistics_ptbdb['fft_median_abs_dev'] = (fft - X_train_statistics_ptbdb['fft_median']).abs().median(axis=1)

# FFT interquartile range features
X_train_statistics_ptbdb['fft_interquartile_range'] = fft.quantile(0.75, axis=1) - fft.quantile(0.25, axis=1)

# FFT values above mean features
# X_train_statistics_ptbdb['fft_values_above_mean'] = (fft > X_train_statistics_ptbdb['fft_mean']).sum(axis=1)

# FFT number of peaks features
# X_train_statistics_ptbdb['fft_number_of_peaks'] = (fft.diff(axis=1) < 0).sum(axis=1)

# FFT skewnness features
X_train_statistics_ptbdb['fft_skewness'] = fft.skew(axis=1)

# FFT kurtosis features
X_train_statistics_ptbdb['fft_kurtosis'] = fft.kurtosis(axis=1)

# FFT energy features
X_train_statistics_ptbdb['fft_energy'] = (fft ** 2).sum(axis=1)

# FFT average of absolute values features
X_train_statistics_ptbdb['fft_avg_abs_val'] = fft.abs().mean(axis=1)

# FFT signal magnitude area features
X_train_statistics_ptbdb['fft_signal_magnitude_area'] = fft.abs().sum(axis=1)

X_train_statistics_ptbdb

In [None]:
# Setam atributele statistice date de intrare MITBIH pentru datele de test
X_test_statistics_mitbih = pd.DataFrame()

# Mean features
X_test_statistics_mitbih['mean'] = X_test_std_mitbih.mean(axis=1)

# Standard deviation features
X_test_statistics_mitbih['std'] = X_test_std_mitbih.std(axis=1)

# Average absolute difference features
X_test_statistics_mitbih['avg_abs_diff'] = X_test_std_mitbih.mad(axis=1)

# Min features
X_test_statistics_mitbih['min'] = X_test_std_mitbih.min(axis=1)

# Max features
X_test_statistics_mitbih['max'] = X_test_std_mitbih.max(axis=1)

# Max-min features
X_test_statistics_mitbih['max-min'] = X_test_statistics_mitbih['max'] - X_test_statistics_mitbih['min']

# Median features
X_test_statistics_mitbih['median'] = X_test_std_mitbih.median(axis=1)

# Median absolute deviation features, don't use X_test_std_mitbih.mad(axis=1) because it's the average absolute difference
# X_test_statistics_mitbih['median_abs_dev'] = (X_test_std_mitbih - X_test_statistics_mitbih['median']).abs().median(axis=1)

# Interquartile range features
X_test_statistics_mitbih['interquartile_range'] = X_test_std_mitbih.quantile(0.75, axis=1) - X_test_std_mitbih.quantile(0.25, axis=1)

# Values above mean features
# X_test_statistics_mitbih['values_above_mean'] = (X_test_std_mitbih > X_test_statistics_mitbih['mean']).sum(axis=1)

# Number of peaks features
# X_test_statistics_mitbih['number_of_peaks'] = (X_test_std_mitbih.diff(axis=1) < 0).sum(axis=1)

# Skewnness features
X_test_statistics_mitbih['skewness'] = X_test_std_mitbih.skew(axis=1)

# Kurtosis features
X_test_statistics_mitbih['kurtosis'] = X_test_std_mitbih.kurtosis(axis=1)

# Energy features
X_test_statistics_mitbih['energy'] = (X_test_std_mitbih ** 2).sum(axis=1)

# Average of absolute values features
X_test_statistics_mitbih['avg_abs_val'] = X_test_std_mitbih.abs().mean(axis=1)

# Signal magnitude area features
X_test_statistics_mitbih['signal_magnitude_area'] = X_test_std_mitbih.abs().sum(axis=1)

# Compute FFT features
fft = np.abs(np.fft.fft(X_test_std_mitbih))

# make fft data frame
fft = pd.DataFrame(fft)

# FFT mean features
X_test_statistics_mitbih['fft_mean'] = fft.mean(axis=1)

# FFT standard deviation features
X_test_statistics_mitbih['fft_std'] = fft.std(axis=1)

# FFT average absolute difference features
X_test_statistics_mitbih['fft_avg_abs_diff'] = fft.mad(axis=1)

# FFT min features
X_test_statistics_mitbih['fft_min'] = fft.min(axis=1)

# FFT max features
X_test_statistics_mitbih['fft_max'] = fft.max(axis=1)

# FFT max-min features
X_test_statistics_mitbih['fft_max-min'] = X_test_statistics_mitbih['fft_max'] - X_test_statistics_mitbih['fft_min']

# FFT median features
X_test_statistics_mitbih['fft_median'] = fft.median(axis=1)

# FFT median absolute deviation features, don't use fft.mad(axis=1) because it's the average absolute difference
# X_test_statistics_mitbih['fft_median_abs_dev'] = (fft - X_test_statistics_mitbih['fft_median']).abs().median(axis=1)

# FFT interquartile range features
X_test_statistics_mitbih['fft_interquartile_range'] = fft.quantile(0.75, axis=1) - fft.quantile(0.25, axis=1)

# FFT values above mean features
# X_test_statistics_mitbih['fft_values_above_mean'] = (fft > X_test_statistics_mitbih['fft_mean']).sum(axis=1)

# FFT number of peaks features
# X_test_statistics_mitbih['fft_number_of_peaks'] = (fft.diff(axis=1) < 0).sum(axis=1)

# FFT skewnness features
X_test_statistics_mitbih['fft_skewness'] = fft.skew(axis=1)

# FFT kurtosis features
X_test_statistics_mitbih['fft_kurtosis'] = fft.kurtosis(axis=1)

# FFT energy features
X_test_statistics_mitbih['fft_energy'] = (fft ** 2).sum(axis=1)

# FFT average of absolute values features
X_test_statistics_mitbih['fft_avg_abs_val'] = fft.abs().mean(axis=1)

# FFT signal magnitude area features
X_test_statistics_mitbih['fft_signal_magnitude_area'] = fft.abs().sum(axis=1)

X_test_statistics_mitbih

In [None]:
# Setam atributele statistice date de intrare ptbdb pentru datele de test
X_test_statistics_ptbdb = pd.DataFrame()

# Mean features
X_test_statistics_ptbdb['mean'] = X_test_std_ptbdb.mean(axis=1)

# Standard deviation features
X_test_statistics_ptbdb['std'] = X_test_std_ptbdb.std(axis=1)

# Average absolute difference features
X_test_statistics_ptbdb['avg_abs_diff'] = X_test_std_ptbdb.mad(axis=1)

# Min features
X_test_statistics_ptbdb['min'] = X_test_std_ptbdb.min(axis=1)

# Max features
X_test_statistics_ptbdb['max'] = X_test_std_ptbdb.max(axis=1)

# Max-min features
X_test_statistics_ptbdb['max-min'] = X_test_statistics_ptbdb['max'] - X_test_statistics_ptbdb['min']

# Median features
X_test_statistics_ptbdb['median'] = X_test_std_ptbdb.median(axis=1)

# Median absolute deviation features, don't use X_test_std_ptbdb.mad(axis=1) because it's the average absolute difference
# X_test_statistics_ptbdb['median_abs_dev'] = (X_test_std_ptbdb - X_test_statistics_ptbdb['median']).abs().median(axis=1)

# Interquartile range features
X_test_statistics_ptbdb['interquartile_range'] = X_test_std_ptbdb.quantile(0.75, axis=1) - X_test_std_ptbdb.quantile(0.25, axis=1)

# Values above mean features
# X_test_statistics_ptbdb['values_above_mean'] = (X_test_std_ptbdb > X_test_statistics_ptbdb['mean']).sum(axis=1)

# Number of peaks features
# X_test_statistics_ptbdb['number_of_peaks'] = (X_test_std_ptbdb.diff(axis=1) < 0).sum(axis=1)

# Skewnness features
X_test_statistics_ptbdb['skewness'] = X_test_std_ptbdb.skew(axis=1)

# Kurtosis features
X_test_statistics_ptbdb['kurtosis'] = X_test_std_ptbdb.kurtosis(axis=1)

# Energy features
X_test_statistics_ptbdb['energy'] = (X_test_std_ptbdb ** 2).sum(axis=1)

# Average of absolute values features
X_test_statistics_ptbdb['avg_abs_val'] = X_test_std_ptbdb.abs().mean(axis=1)

# Signal magnitude area features
X_test_statistics_ptbdb['signal_magnitude_area'] = X_test_std_ptbdb.abs().sum(axis=1)

# Compute FFT features
fft = np.abs(np.fft.fft(X_test_std_ptbdb))

# make fft data frame
fft = pd.DataFrame(fft)

# FFT mean features
X_test_statistics_ptbdb['fft_mean'] = fft.mean(axis=1)

# FFT standard deviation features
X_test_statistics_ptbdb['fft_std'] = fft.std(axis=1)

# FFT average absolute difference features
X_test_statistics_ptbdb['fft_avg_abs_diff'] = fft.mad(axis=1)

# FFT min features
X_test_statistics_ptbdb['fft_min'] = fft.min(axis=1)

# FFT max features
X_test_statistics_ptbdb['fft_max'] = fft.max(axis=1)

# FFT max-min features
X_test_statistics_ptbdb['fft_max-min'] = X_test_statistics_ptbdb['fft_max'] - X_test_statistics_ptbdb['fft_min']

# FFT median features
X_test_statistics_ptbdb['fft_median'] = fft.median(axis=1)

# FFT median absolute deviation features, don't use fft.mad(axis=1) because it's the average absolute difference
# X_test_statistics_ptbdb['fft_median_abs_dev'] = (fft - X_test_statistics_ptbdb['fft_median']).abs().median(axis=1)

# FFT interquartile range features
X_test_statistics_ptbdb['fft_interquartile_range'] = fft.quantile(0.75, axis=1) - fft.quantile(0.25, axis=1)

# FFT values above mean features
# X_test_statistics_ptbdb['fft_values_above_mean'] = (fft > X_test_statistics_ptbdb['fft_mean']).sum(axis=1)

# FFT number of peaks features
# X_test_statistics_ptbdb['fft_number_of_peaks'] = (fft.diff(axis=1) < 0).sum(axis=1)

# FFT skewnness features
X_test_statistics_ptbdb['fft_skewness'] = fft.skew(axis=1)

# FFT kurtosis features
X_test_statistics_ptbdb['fft_kurtosis'] = fft.kurtosis(axis=1)

# FFT energy features
X_test_statistics_ptbdb['fft_energy'] = (fft ** 2).sum(axis=1)

# FFT average of absolute values features
X_test_statistics_ptbdb['fft_avg_abs_val'] = fft.abs().mean(axis=1)

# FFT signal magnitude area features
X_test_statistics_ptbdb['fft_signal_magnitude_area'] = fft.abs().sum(axis=1)

X_test_statistics_ptbdb

Models

In [None]:
# SVM with standard parameters MITBIH
svm_std_parameters_mitbih = SVC()
svm_std_parameters_mitbih.fit(X_train_std_mitbih, train_y_mitbih)

# print prediction results
predictions = svm_std_parameters_mitbih.predict(X_test_std_mitbih)
print(classification_report(test_y_mitbih, predictions))


           precision    recall  f1-score   support

         0.0       0.97      1.00      0.98     18118
         1.0       0.96      0.56      0.71       556
         2.0       0.97      0.86      0.91      1448
         3.0       0.75      0.48      0.59       162
         4.0       1.00      0.91      0.95      1608

    accuracy                           0.97     21892
   macro avg       0.93      0.76      0.83     21892
weighted avg       0.97      0.97      0.97     21892

In [None]:
# SVM with standard parameters PTBDB
svm_std_parameters_ptbdb = SVC()
svm_std_parameters_ptbdb.fit(X_train_std_ptbdb, train_y_ptbdb)

# print prediction results
predictions = svm_std_parameters_ptbdb.predict(X_test_std_ptbdb)
print(classification_report(test_y_ptbdb, predictions))


             precision    recall  f1-score   support

         0.0       0.85      0.78      0.81      1349
         1.0       0.92      0.95      0.93      3454

    accuracy                           0.90      4803
   macro avg       0.88      0.86      0.87      4803
weighted avg       0.90      0.90      0.90      4803

In [None]:
# Variance Threshold
VARIANCE_THRESHOLD = 0.01
SELECT_PERCENTILE = 30


In [None]:
sel = VarianceThreshold(threshold=VARIANCE_THRESHOLD)

# Variance Threshold PTBDB 
X_train_variance_ptbdb = X_train_std_ptbdb.copy()
X_test_variance_ptbdb = X_test_std_ptbdb.copy()

sel.fit_transform(X_train_variance_ptbdb)
sel.fit_transform(X_test_variance_ptbdb)

# transform X_train to ndarray
X_train_variance_ptbdb = sel.transform(X_train_variance_ptbdb)
X_test_variance_ptbdb = sel.transform(X_test_variance_ptbdb)

print(X_test_variance_ptbdb.shape)

In [None]:
sel = VarianceThreshold(threshold=VARIANCE_THRESHOLD)

# Variance Threshold MITBIH 
X_train_variance_mitbih = X_train_std_mitbih.copy()
X_test_variance_mitbih = X_test_std_mitbih.copy()

sel.fit_transform(X_train_variance_mitbih)
sel.fit_transform(X_test_variance_mitbih)

# transform X_train to ndarray
X_train_variance_mitbih = sel.transform(X_train_variance_mitbih)
X_test_variance_mitbih = sel.transform(X_test_variance_mitbih)

print(X_test_variance_mitbih.shape)

In [51]:
from sklearn.feature_selection import SelectPercentile

# Select Percentile PTBDB
X_train_percentile_ptbdb = X_train_std_ptbdb.copy()

X_test_percentile_ptbdb = X_test_std_ptbdb.copy()

# add columns name to X_test_percentile x1, x2, x3, , xn
X_test_percentile_ptbdb.columns = [f'x{i}' for i in range(1, X_test_percentile_ptbdb.shape[1] + 1)]

# add columns name to X_train_percentile x1, x2, x3, , xn
X_train_percentile_ptbdb.columns = [f'x{i}' for i in range(1, X_train_percentile_ptbdb.shape[1] + 1)]

X_train_percentile_ptbdb = SelectPercentile(percentile=SELECT_PERCENTILE).fit(X_train_percentile_ptbdb, train_y_ptbdb)

train_features_names_ptbdb = X_train_percentile_ptbdb.get_feature_names_out()

X_train_percentile_ptbdb = X_train_percentile_ptbdb.transform(X_train_std_ptbdb)

# Remove features using SelectPercentile from X_test_percentile using the same features
X_test_percentile_ptbdb = X_test_percentile_ptbdb[train_features_names_ptbdb]

print(X_train_percentile_ptbdb)


[[2.27478936e-01 0.00000000e+00 2.20349971e-02 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [2.52151459e-01 8.17555934e-02 1.71256453e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [2.77120322e-01 7.34713972e-02 1.12426035e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 ...
 [8.79504502e-01 5.63063085e-01 3.47972959e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]
 [5.15370704e-02 4.52079577e-04 0.00000000e+00 ... 7.45931268e-02
  1.48282096e-01 8.13743249e-02]
 [5.35254419e-01 4.08338457e-01 2.84488052e-01 ... 0.00000000e+00
  0.00000000e+00 0.00000000e+00]]


  f = msb / msw


In [52]:
# Select Percentile mitbih
X_train_percentile_mitbih = X_train_std_mitbih.copy()

X_test_percentile_mitbih = X_test_std_mitbih.copy()

# add columns name to X_test_percentile x1, x2, x3, , xn
X_test_percentile_mitbih.columns = [f'x{i}' for i in range(1, X_test_percentile_mitbih.shape[1] + 1)]

# add columns name to X_train_percentile x1, x2, x3, , xn
X_train_percentile_mitbih.columns = [f'x{i}' for i in range(1, X_train_percentile_mitbih.shape[1] + 1)]

X_train_percentile_mitbih = SelectPercentile(percentile=SELECT_PERCENTILE).fit(X_train_percentile_mitbih, train_y_mitbih)

train_features_names_mitbih = X_train_percentile_mitbih.get_feature_names_out()

X_train_percentile_mitbih = X_train_percentile_mitbih.transform(X_train_std_mitbih)

# Remove features using SelectPercentile from X_test_percentile using the same features
X_test_percentile_mitbih = X_test_percentile_mitbih[train_features_names_mitbih]

print(X_train_percentile_mitbih)


[[0.97794116 0.92647058 0.68137252 ... 0.         0.         0.        ]
 [0.96011394 0.86324787 0.46153846 ... 0.02279202 0.03418804 0.01424501]
 [1.         0.65945947 0.18648648 ... 0.         0.         0.        ]
 ...
 [0.90612245 0.62448978 0.59591836 ... 0.5755102  0.55102038 0.4979592 ]
 [0.85822785 0.64556962 0.84556961 ... 0.16708861 0.12405063 0.12911393]
 [0.90150636 0.84588647 0.80069524 ... 0.26419467 0.17149478 0.14600232]]




In [None]:
# SVM with standard parameters MITBIH with variance threshold
svm_std_parameters_mitbih_variance = SVC()
svm_std_parameters_mitbih_variance.fit(X_train_variance_mitbih, train_y_mitbih)

# print prediction results
predictions = svm_std_parameters_mitbih_variance.predict(X_test_variance_mitbih)

print(classification_report(test_y_mitbih, predictions))

In [None]:
# SVM with standard parameters PTBDB with variance threshold
svm_std_parameters_ptbdb_variance = SVC()
svm_std_parameters_ptbdb_variance.fit(X_train_variance_ptbdb, train_y_ptbdb)

# print prediction results
predictions = svm_std_parameters_ptbdb_variance.predict(X_test_variance_ptbdb)

print(classification_report(test_y_ptbdb, predictions))

In [53]:
# SVM with standard parameters MITBIH with percentile threshold
svm_std_parameters_mitbih_percentile = SVC()
svm_std_parameters_mitbih_percentile.fit(X_train_percentile_mitbih, train_y_mitbih)

# print prediction results
predictions = svm_std_parameters_mitbih_percentile.predict(X_test_percentile_mitbih)

print(classification_report(test_y_mitbih, predictions))



              precision    recall  f1-score   support

         0.0       0.96      1.00      0.98     18118
         1.0       0.95      0.55      0.70       556
         2.0       0.95      0.80      0.87      1448
         3.0       0.92      0.28      0.43       162
         4.0       1.00      0.90      0.94      1608

    accuracy                           0.96     21892
   macro avg       0.95      0.71      0.78     21892
weighted avg       0.96      0.96      0.96     21892



In [54]:
# SVM with standard parameters PTBDB with percentile threshold
svm_std_parameters_ptbdb_percentile = SVC()

svm_std_parameters_ptbdb_percentile.fit(X_train_percentile_ptbdb, train_y_ptbdb)

# print prediction results
predictions = svm_std_parameters_ptbdb_percentile.predict(X_test_percentile_ptbdb)

print(classification_report(test_y_ptbdb, predictions))



              precision    recall  f1-score   support

         0.0       0.80      0.60      0.68      1349
         1.0       0.86      0.94      0.90      3454

    accuracy                           0.84      4803
   macro avg       0.83      0.77      0.79      4803
weighted avg       0.84      0.84      0.84      4803

