Loading Data

In [None]:
# read data from .arff files
import os

import sktime
from sktime.datasets import load_from_tsfile
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# load data from .csv using pd
def load_data(path):

    # load data from .csv
    data = pd.read_csv(path, header=None)

    # get labels
    y = data.iloc[:, -1].values

    # get features
    X = data.iloc[:, :-1].values

    # return features and labels
    return X, y

Loading MITBIH DATA

In [None]:
# Load mitbih data
train_x_mitbih, train_y_mitbih = load_data(
    "./datasets/ecg/mitbih_train.csv"
)
test_x_mitbih, test_y_mitbih = load_data(
    "./datasets/ecg/mitbih_test.csv"
)


Loading PTBDB DATA

In [None]:
# load ptbdb data 
ptbdb_normal_x, ptbdb_normal_y = load_data(
    "./datasets/ecg/ptbdb_normal.csv"
)

ptbdb_abnormal_x, ptbdb_abnormal_y = load_data(
    "./datasets/ecg/ptbdb_abnormal.csv"
)

# merge the data and labels
x_ptbdb = np.concatenate((ptbdb_normal_x, ptbdb_abnormal_x))
y_ptbdb = np.concatenate((ptbdb_normal_y, ptbdb_abnormal_y))

# split the data into train and test using 
train_x_ptbdb, test_x_ptbdb, train_y_ptbdb, test_y_ptbdb = train_test_split(x_ptbdb, y_ptbdb, test_size=0.33, random_state=42)



PTBDB DATA VISUALIZATION

In [None]:
# Train data ptbdb

# create a dataframe from the data and labels 
df_train_ptbdb = pd.DataFrame(train_x_ptbdb)
df_train_ptbdb['label'] = train_y_ptbdb

# create a plot bar from the dataframe using pandas.plot.bar
df_train_ptbdb['label'].value_counts().plot.bar(title = "Train data ptbdb")


In [None]:
# Test data ptbdb

# create a dataframe from the data and labels 
df_test_ptbdb = pd.DataFrame(test_x_ptbdb)
df_test_ptbdb['label'] = test_y_ptbdb

# create a plot bar from the dataframe using pandas.plot.bar
df_test_ptbdb['label'].value_counts().plot.bar(title = "Test data ptbdb")


In [None]:
# put test_df and train_df on the same plot bar with different colors 
df_train_ptbdb['label'].value_counts().plot.bar(title = "Train and Test data ptbdb", color = 'blue', alpha = 1)
df_test_ptbdb['label'].value_counts().plot.bar(color = 'red', alpha = 1)

In [None]:
ptbdb_labels = df_train_ptbdb['label'].unique()
counts_train = df_train_ptbdb['label'].value_counts()
counts_test= df_test_ptbdb['label'].value_counts()


print("----------------------------------")
print("TRAIN: "+  str(counts_train))
print("----------------------------------")
print("TEST: " + str(counts_test))

In [None]:
# for each label, plot the data#
for label in ptbdb_labels:
    current = df_train_ptbdb[df_train_ptbdb['label'] == label].iloc[0]

    # create a dataframe from the data and labels
    current_df = pd.DataFrame(current)
    current_df = current_df.drop('label', axis = 0)
    current_df.plot(title = str(label), figsize = (10, 5))
    

MITBIH DATA VISUALIZATION

In [None]:
# Train data mitbih

# create a dataframe from the data and labels 
train_df_mitbih = pd.DataFrame(train_x_mitbih)
train_df_mitbih['label'] = train_y_mitbih

# create a plot bar from the dataframe using pandas.plot.bar
train_df_mitbih['label'].value_counts().plot.bar(title = "Train data mitbih")

In [None]:
# Test data mitbih

# create a dataframe from the data and labels 
test_df_mitbih = pd.DataFrame(test_x_mitbih)
test_df_mitbih['label'] = test_y_mitbih

# create a plot bar from the dataframe using pandas.plot.bar
test_df_mitbih['label'].value_counts().plot.bar(title = "Train data mitbih")

In [None]:
# put test_df and train_df on the same plot bar with different colors 
train_df_mitbih['label'].value_counts().plot.bar(title = "Train and Test data", color = 'blue', alpha = 1)
test_df_mitbih['label'].value_counts().plot.bar(color = 'red', alpha = 1)


In [None]:
# extract unique labels from the data
mitbih_labels = train_df_mitbih['label'].unique()
counts_train = train_df_mitbih['label'].value_counts()
counts_test= test_df_mitbih['label'].value_counts()


print(mitbih_labels)
print("----------------------------------")
print("TRAIN: "+  str(counts_train))
print("----------------------------------")
print("TEST: " + str(counts_test))

In [None]:
# for each label, plot the data#
for label in mitbih_labels:
    current = train_df_mitbih[train_df_mitbih['label'] == label].iloc[0]

    # create a dataframe from the data and labels
    current_df = pd.DataFrame(current)
    current_df = current_df.drop('label', axis = 0)
    current_df.plot(title = str(label), figsize = (10, 5))
    

MEDIA SI DEVIATIA STANDARD PER UNITATE DE TIMP

In [None]:
# Media si deviatia standard pt fiecare label pt mitbih
from matplotlib import pyplot as plt


for label in ptbdb_labels:
    current = df_train_ptbdb[df_train_ptbdb['label'] == label]

    # create a dataframe from the data and labels
    current_df = pd.DataFrame(current)
    current_df = current_df.drop('label', axis = 1)
    current_df_mean = current_df.mean()
    current_df_std = current_df.std()
    current_df_mean.plot(title = str(label), figsize = (10, 5))
    current_df_std.plot(title = str(label), figsize = (10, 5))

    # end plot
    plt.legend(['mean', 'std'])
    plt.show()

    

In [None]:
# Media si deviatia standard pt fiecare label pt mitbih
from matplotlib import pyplot as plt


for label in mitbih_labels:
    current = train_df_mitbih[train_df_mitbih['label'] == label]

    # create a dataframe from the data and labels
    current_df = pd.DataFrame(current)
    current_df = current_df.drop('label', axis = 1)
    current_df_mean = current_df.mean()
    current_df_std = current_df.std()
    current_df_mean.plot(title = str(label), figsize = (10, 5))
    current_df_std.plot(title = str(label), figsize = (10, 5))

    # end plot
    plt.legend(['mean', 'std'])
    plt.show()

    

3.2.2. Extragerea atributelor

In [None]:
# Setam atributele default date de intrare MITBIH
X_train_std_mitbih = pd.DataFrame(train_x_mitbih)

X_train_std_mitbih


In [None]:
# Setam atributele default date de intrare PTBDB
X_train_std_ptbdb = pd.DataFrame(train_x_ptbdb)

X_train_std_ptbdb

In [67]:
# Setam atributele statistice date de intrare MITBIH
X_train_statistics_mitbih = pd.DataFrame()

# Mean features
X_train_statistics_mitbih['mean'] = X_train_std_mitbih.mean(axis=1)

# Standard deviation features
X_train_statistics_mitbih['std'] = X_train_std_mitbih.std(axis=1)

# Average absolute difference features
X_train_statistics_mitbih['avg_abs_diff'] = X_train_std_mitbih.mad(axis=1)

# Min features
X_train_statistics_mitbih['min'] = X_train_std_mitbih.min(axis=1)

# Max features
X_train_statistics_mitbih['max'] = X_train_std_mitbih.max(axis=1)

# Max-min features
X_train_statistics_mitbih['max-min'] = X_train_statistics_mitbih['max'] - X_train_statistics_mitbih['min']

# Median features
X_train_statistics_mitbih['median'] = X_train_std_mitbih.median(axis=1)

# Median absolute deviation features, don't use X_train_std_mitbih.mad(axis=1) because it's the average absolute difference
# X_train_statistics_mitbih['median_abs_dev'] = (X_train_std_mitbih - X_train_statistics_mitbih['median']).abs().median(axis=1)

# Interquartile range features
X_train_statistics_mitbih['interquartile_range'] = X_train_std_mitbih.quantile(0.75, axis=1) - X_train_std_mitbih.quantile(0.25, axis=1)

# Values above mean features
# X_train_statistics_mitbih['values_above_mean'] = (X_train_std_mitbih > X_train_statistics_mitbih['mean']).sum(axis=1)

# Number of peaks features
# X_train_statistics_mitbih['number_of_peaks'] = (X_train_std_mitbih.diff(axis=1) < 0).sum(axis=1)

# Skewnness features
X_train_statistics_mitbih['skewness'] = X_train_std_mitbih.skew(axis=1)

# Kurtosis features
X_train_statistics_mitbih['kurtosis'] = X_train_std_mitbih.kurtosis(axis=1)

# Energy features
X_train_statistics_mitbih['energy'] = (X_train_std_mitbih ** 2).sum(axis=1)

# Average of absolute values features
X_train_statistics_mitbih['avg_abs_val'] = X_train_std_mitbih.abs().mean(axis=1)

# Signal magnitude area features
X_train_statistics_mitbih['signal_magnitude_area'] = X_train_std_mitbih.abs().sum(axis=1)

# Compute FFT features
fft = np.abs(np.fft.fft(X_train_std_mitbih))

# make fft data frame
fft = pd.DataFrame(fft)

# FFT mean features
X_train_statistics_mitbih['fft_mean'] = fft.mean(axis=1)

# FFT standard deviation features
X_train_statistics_mitbih['fft_std'] = fft.std(axis=1)

# FFT average absolute difference features
X_train_statistics_mitbih['fft_avg_abs_diff'] = fft.mad(axis=1)

# FFT min features
X_train_statistics_mitbih['fft_min'] = fft.min(axis=1)

# FFT max features
X_train_statistics_mitbih['fft_max'] = fft.max(axis=1)

# FFT max-min features
X_train_statistics_mitbih['fft_max-min'] = X_train_statistics_mitbih['fft_max'] - X_train_statistics_mitbih['fft_min']

# FFT median features
X_train_statistics_mitbih['fft_median'] = fft.median(axis=1)

# FFT median absolute deviation features, don't use fft.mad(axis=1) because it's the average absolute difference
# X_train_statistics_mitbih['fft_median_abs_dev'] = (fft - X_train_statistics_mitbih['fft_median']).abs().median(axis=1)

# FFT interquartile range features
X_train_statistics_mitbih['fft_interquartile_range'] = fft.quantile(0.75, axis=1) - fft.quantile(0.25, axis=1)

# FFT values above mean features
# X_train_statistics_mitbih['fft_values_above_mean'] = (fft > X_train_statistics_mitbih['fft_mean']).sum(axis=1)

# FFT number of peaks features
# X_train_statistics_mitbih['fft_number_of_peaks'] = (fft.diff(axis=1) < 0).sum(axis=1)

# FFT skewnness features
X_train_statistics_mitbih['fft_skewness'] = fft.skew(axis=1)

# FFT kurtosis features
X_train_statistics_mitbih['fft_kurtosis'] = fft.kurtosis(axis=1)

# FFT energy features
X_train_statistics_mitbih['fft_energy'] = (fft ** 2).sum(axis=1)

# FFT average of absolute values features
X_train_statistics_mitbih['fft_avg_abs_val'] = fft.abs().mean(axis=1)

# FFT signal magnitude area features
X_train_statistics_mitbih['fft_signal_magnitude_area'] = fft.abs().sum(axis=1)

X_train_statistics_mitbih

  X_train_statistics_mitbih['avg_abs_diff'] = X_train_std_mitbih.mad(axis=1)
  X_train_statistics_mitbih['fft_avg_abs_diff'] = fft.mad(axis=1)


Unnamed: 0,mean,std,avg_abs_diff,min,max,max-min,median,interquartile_range,skewness,kurtosis,...,fft_min,fft_max,fft_max-min,fft_median,fft_interquartile_range,fft_skewness,fft_kurtosis,fft_energy,fft_avg_abs_val,fft_signal_magnitude_area
0,0.098419,0.176545,0.098821,0.0,1.0,1.0,0.061275,0.127451,3.574941,14.029701,...,0.124106,18.404412,18.280306,0.703730,1.423335,3.497342,19.568359,1422.818305,1.720818,321.793056
1,0.090010,0.160151,0.085626,0.0,1.0,1.0,0.054131,0.089744,3.866087,16.737549,...,0.241484,16.831909,16.590425,0.793195,1.635266,3.617907,21.567215,1175.412960,1.624733,303.825096
2,0.062104,0.139142,0.075097,0.0,1.0,1.0,0.005405,0.058108,4.359641,22.804009,...,0.074545,11.613513,11.538968,0.957636,1.511143,2.896435,14.159601,808.267542,1.516143,283.518829
3,0.084084,0.161252,0.096083,0.0,1.0,1.0,0.002762,0.110497,3.505675,14.097800,...,0.351315,15.723757,15.372442,0.701249,1.177544,3.377882,16.815646,1151.650240,1.570397,293.664221
4,0.116567,0.186651,0.112355,0.0,1.0,1.0,0.056338,0.145540,3.134928,10.550815,...,0.368329,21.798122,21.429792,0.714580,1.159663,4.272667,25.916296,1686.915647,1.693052,316.600798
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87549,0.158364,0.169925,0.132647,0.0,1.0,1.0,0.154386,0.242105,1.406400,3.360931,...,0.069429,29.614035,29.544606,0.796862,0.646505,6.574707,56.322569,1881.304039,1.460119,273.042327
87550,0.325508,0.293708,0.267895,0.0,1.0,1.0,0.465000,0.507500,0.177418,-1.298840,...,0.029903,60.870000,60.840097,0.722313,0.912005,7.807596,70.993992,6705.611140,2.013323,376.491407
87551,0.316665,0.275855,0.255715,0.0,1.0,1.0,0.436735,0.546939,0.029823,-1.446845,...,0.112007,59.216326,59.104319,0.859230,0.798083,8.172597,76.553161,6153.345197,2.009994,375.868802
87552,0.101807,0.161769,0.088529,0.0,1.0,1.0,0.081013,0.129114,3.682313,15.426469,...,0.056396,19.037975,18.981579,0.751192,1.207668,4.114582,26.887293,1272.666696,1.576269,294.762258
