# Binary classification

In [43]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
from scipy.stats import moment
from sklearn.impute import KNNImputer
from catboost import CatBoostClassifier
from sklearn.model_selection import cross_val_score, KFold, train_test_split

In [2]:
sheet1 = pd.read_excel(os.path.join("NN_Class", "GSR_hrac1.xlsx"), sheet_name="Sheet1")
hra2 = pd.read_excel(os.path.join("NN_Class", "GSR_hrac1.xlsx"), sheet_name="Hra_2")
hra3 = pd.read_excel(os.path.join("NN_Class", "GSR_hrac1.xlsx"), sheet_name="Hra_3")
hra4 = pd.read_excel(os.path.join("NN_Class", "GSR_hrac1.xlsx"), sheet_name="Hra_4")

In [3]:
hra2_0 = hra2.loc[hra2['Stres'] == 0]
hra2_1 = hra2.loc[hra2['Stres'] == 1]
hra2_2 = hra2.loc[hra2['Stres'] == 2]

In [4]:
import pandas as pd
import numpy as np

# Load data from the MATLAB file
data = hra2[["Data", "Stres"]]

zeroGroups = []
oneGroups = []

# Iterate through the DataFrame
lastSymbol = None
for index, row in data.iterrows():
    if lastSymbol is None or lastSymbol != row['Stres']:
        lastSymbol = row['Stres']
        if lastSymbol == 0:
            zeroGroups.append([])
        elif lastSymbol == 1:
            oneGroups.append([])

    if row['Stres'] == 0:
        zeroGroups[-1].append(row['Data'])
    elif row['Stres'] in [1, 2]:
        oneGroups[-1].append(row['Data'])


In [5]:
# AUC
auc_0 = []
auc_1 = []

for i in range(len(zeroGroups)):
    auc_0.append(np.trapz(zeroGroups[i]))

for x in range(len(oneGroups)):
    auc_1.append(np.trapz(oneGroups[x]))

In [6]:
# mean
mean_0 = []
mean_1 = []

for i in range(len(zeroGroups)):
    mean_0.append(np.mean(zeroGroups[i]))

for x in range(len(oneGroups)):
    mean_1.append(np.mean(oneGroups[x]))

In [7]:
# std
std_0 = []
std_1 = []

for i in range(len(zeroGroups)):
    std_0.append(np.std(zeroGroups[i]))

for x in range(len(oneGroups)):
    std_1.append(np.std(oneGroups[x]))

In [8]:
# number of peaks
numb_peaks_0 = []
numb_peaks_1 = []

for i in range(len(zeroGroups)):
    peaks_0, _ = find_peaks(zeroGroups[i])
    numb_peaks_0.append(len(peaks_0))

for x in range(len(oneGroups)):
    peaks_1, _ = find_peaks(oneGroups[x])
    numb_peaks_1.append(len(peaks_1))

In [9]:
# average paek amplitude
def average_peak_amplitude(group):
    peaks, _ = find_peaks(group)
    if len(peaks) > 0:
        peak_amplitudes = np.array(group)[peaks]
        return np.mean(peak_amplitudes)
    else:
        return None

avg_peak_amp_0 = [average_peak_amplitude(i) for i in zeroGroups]
avg_peak_amp_1 = [average_peak_amplitude(x) for x in oneGroups]

In [10]:
# sum of peak amplitudes
def sum_peak_amplitude(group):
    peaks, _ = find_peaks(group)
    if len(peaks) > 0:
        peak_amplitudes = np.array(group)[peaks]
        return np.sum(peak_amplitudes)
    else:
        return None

sum_peak_amp_0 = [sum_peak_amplitude(i) for i in zeroGroups]
sum_peak_amp_1 = [sum_peak_amplitude(x) for x in oneGroups]

In [11]:
# entropie
def calculate_entropy(group):
    hist, _ = np.histogram(group)
    hist = hist[hist != 0]
    hist = hist / np.sum(hist)
    return -np.sum(hist * np.log(hist))
entropy_0 = [calculate_entropy(i) for i in zeroGroups]
entropy_1 = [calculate_entropy(x) for x in oneGroups]

In [15]:
def calculate_moment(group):    
    return moment(group, 2)
moment_0 = [calculate_moment(i) for i in zeroGroups]
moment_1 = [calculate_moment(x) for x in oneGroups]

  return moment(group, 2)


In [17]:
features_dict = {
    'AUC': auc_0 + auc_1,
    'Mean': mean_0 + mean_1,
    'Std': std_0 + std_1,
    'NumPeaks': numb_peaks_0 + numb_peaks_1,
    'AvgPeakAmp': avg_peak_amp_0 + avg_peak_amp_1,
    'SumPeakAmp': sum_peak_amp_0 + sum_peak_amp_1,
    'Entropy': entropy_0 + entropy_1,
    'SecondMoment': moment_0 + moment_1,
}
target = [0] * len(auc_0) + [1] * len(auc_1)
features_dict['Target'] = target

In [20]:
features_df = pd.DataFrame(features_dict)

In [22]:
features_df = features_df.sample(frac=1).reset_index(drop=True)

In [30]:
imputer = KNNImputer(n_neighbors=3)
data_imputed = imputer.fit_transform(features_df)
ecg_data_imputed = pd.DataFrame(data_imputed, columns=features_df.columns).astype({"Target": int})

In [35]:
ecg_data_imputed

Unnamed: 0,AUC,Mean,Std,NumPeaks,AvgPeakAmp,SumPeakAmp,Entropy,SecondMoment,Target
0,11709.0,1672.75,2.106537,1.0,1677.0,1677.0,1.213008,4.4375,1
1,10030.0,1671.857143,1.456863,0.0,1714.666667,1714.666667,0.796312,2.122449,1
2,937450.5,1726.446691,13.532399,40.0,1728.525,69141.0,2.083745,183.125835,0
3,100284.5,1729.118644,7.741775,5.0,1726.2,8631.0,1.397953,59.935076,1
4,131587.5,1708.935897,5.287479,6.0,1708.166667,10249.0,1.784432,27.957429,0
5,14238.0,1779.888889,1.36987,1.0,1781.0,1781.0,0.936888,1.876543,1
6,11900.0,1700.0,0.0,0.0,1714.666667,1714.666667,-0.0,0.0,1
7,12792.0,1827.5,2.783882,0.0,1748.666667,1748.666667,0.974315,7.75,1
8,920497.0,1733.507519,12.868403,32.0,1732.375,55436.0,2.09676,165.595808,0
9,12029.0,1718.5,2.783882,1.0,1717.0,1717.0,1.559581,7.75,1


In [36]:
X, y = features_df.loc[:, features_df.columns != "Target"].values, ecg_data_imputed["Target"].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [40]:
rf_model = CatBoostClassifier()
cv = KFold(n_splits=10, random_state=0, shuffle=True)
scores = cross_val_score(rf_model, X, y, scoring='f1', cv=cv, n_jobs=-1)

In [44]:
scores

array([0.66666667, 0.66666667, 1.        , 0.8       , 0.66666667,
       1.        , 1.        , 1.        , 1.        , 0.66666667])