In [1]:
import os
import glob
import numpy as np
from tqdm import tqdm
import itertools
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Audio
import librosa
import librosa.display

# Scikit learn
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, roc_auc_score, matthews_corrcoef
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
from sklearn.utils import class_weight
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix as sk_plot_confusion_matrix

# Keras
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D


# TensorFlow
from tensorflow.keras.utils import to_categorical

# Ignore warnings
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

dataset = []
for folder in ["Heartbeat_Sounds/set_a/**","Heartbeat_Sounds/set_b/**"]:
    for filename in glob.iglob(folder):
        if os.path.exists(filename):
            label = os.path.basename(filename).split("_")[0]
            duration = librosa.get_duration(filename=filename)
            # skip audio smaller than 3 secs
            if duration>=3:
                slice_size = 3
                iterations = int((duration-slice_size)/(slice_size-1))
                iterations += 1
#                 initial_offset = (duration % slice_size)/2
                initial_offset = (duration - ((iterations*(slice_size-1))+1))/2
                if label not in ["Aunlabelledtest", "Bunlabelledtest"]:
                    for i in range(iterations):
                        offset = initial_offset + i*(slice_size-1)
                        
                        dataset.append({
                                "filename": filename,
                                "label": label,
                                "offset": offset
                            })
                       
                        
dataset = pd.DataFrame(dataset)
dataset = shuffle(dataset, random_state=42)
dataset.info()

# Function to extract features
def extract_features(audio_path, offset):
    y, sr = librosa.load(audio_path, offset=offset, duration=3)
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
    mfccs = librosa.feature.mfcc(S=librosa.power_to_db(S), n_mfcc=40)
    return mfccs

# Extract features for all data points
x_data = []
for idx in tqdm(range(len(dataset))):
    mfccs = extract_features(dataset.filename.iloc[idx], dataset.offset.iloc[idx])
    # Reshape the 2D array to 3D array
    mfccs = mfccs.reshape((mfccs.shape[0], mfccs.shape[1], 1))
    x_data.append(mfccs)

# Convert to numpy array
x_data = np.asarray(x_data)


# Encode Labels
encoder = LabelEncoder()
encoder.fit(dataset.label)
y_data = encoder.transform(dataset.label)

# Compute class weights
class_weights = class_weight.compute_class_weight(class_weight="balanced", classes=np.unique(y_data), y=y_data)

# Convert labels to one-hot encoding
y_data_one_hot = to_categorical(y_data)

# Initialize StratifiedKFold with train_size and test_size
kfold = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

# Initialize lists to store results for each fold
train_accuracies = []
test_accuracies = []
train_auc_scores = []
test_auc_scores = []
train_mcc_scores = []
test_mcc_scores = []
train_conf_matrices = []
test_conf_matrices = []
train_class_reports = []
test_class_reports = []
cm_test_normalized = []
report_test_list = []
cm_test_normalized_list = []

# Open the common results file in 'w' mode to overwrite existing content
with open('results_heartSound/all_folds_2dcnn5_results.txt', 'w') as common_file:
    # Split the indices instead of the dataset
    for fold, (train_index, test_index) in enumerate(kfold.split(x_data, y_data), 1):
        x_train_fold, x_test_fold = x_data[train_index], x_data[test_index]
        y_train_fold, y_test_fold = y_data_one_hot[train_index], y_data_one_hot[test_index]

        model = Sequential()
        model.add(Conv2D(filters=32, kernel_size=2, input_shape=(x_train_fold.shape[1], x_train_fold.shape[2], x_train_fold.shape[3]), activation='relu'))
        model.add(MaxPooling2D(pool_size=2))
        model.add(Dropout(0.2))

        model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
        model.add(MaxPooling2D(pool_size=2))
        model.add(Dropout(0.2))

        model.add(Conv2D(filters=128, kernel_size=2, activation='relu'))
        model.add(MaxPooling2D(pool_size=2))
        model.add(Dropout(0.2))

        model.add(Conv2D(filters=256, kernel_size=2, activation='relu'))
        model.add(MaxPooling2D(pool_size=2))
        model.add(Dropout(0.5))
        model.add(GlobalAveragePooling2D())

        model.add(Dense(len(encoder.classes_), activation='softmax'))

        # Compile the model
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

        your_epochs = 100  # You can choose an appropriate number of epochs
        your_batch_size = 64  # You can choose an appropriate batch size

        # Train the model
        model.fit(x_train_fold, y_train_fold, epochs=your_epochs, batch_size=your_batch_size, verbose=1)

        # Evaluate on train and test sets
        y_pred_train = model.predict(x_train_fold)
        y_pred_test = model.predict(x_test_fold)
        
        # Generate classification report and confusion matrix for test set
        y_pred_test_labels = np.argmax(y_pred_test, axis=1)
        report_test = classification_report(np.argmax(y_test_fold, axis=1), y_pred_test_labels, output_dict=True)


        # Calculate metrics for train set
        train_accuracy = accuracy_score(np.argmax(y_train_fold, axis=1), np.argmax(y_pred_train, axis=1))
        train_auc = roc_auc_score(y_train_fold, y_pred_train, multi_class='ovr')
        train_mcc = matthews_corrcoef(np.argmax(y_train_fold, axis=1), np.argmax(y_pred_train, axis=1))
        train_cm = confusion_matrix(np.argmax(y_train_fold, axis=1), np.argmax(y_pred_train, axis=1))
        train_class_report = classification_report(np.argmax(y_train_fold, axis=1), np.argmax(y_pred_train, axis=1),
                                                   target_names=encoder.classes_)

        # Calculate metrics for test set
        test_accuracy = accuracy_score(np.argmax(y_test_fold, axis=1), np.argmax(y_pred_test, axis=1))
        test_auc = roc_auc_score(y_test_fold, y_pred_test, multi_class='ovr')
        test_mcc = matthews_corrcoef(np.argmax(y_test_fold, axis=1), np.argmax(y_pred_test, axis=1))
        test_cm = confusion_matrix(np.argmax(y_test_fold, axis=1), np.argmax(y_pred_test, axis=1))
        test_class_report = classification_report(np.argmax(y_test_fold, axis=1), np.argmax(y_pred_test, axis=1),
                                                  target_names=encoder.classes_)

        # Inside the loop where you append accuracies to lists
        train_accuracies.append(train_accuracy)
        test_accuracies.append(test_accuracy)

        # Calculate std for train and test accuracies
        train_accuracy_std = np.std(train_accuracies)
        test_accuracy_std = np.std(test_accuracies)


        # Save results to the common text file for both train and test sets
        common_file.write(f'Fold {fold} Results:\n')
        common_file.write(f'Train Accuracy: {train_accuracy} (std: {train_accuracy_std})\n')
        common_file.write(f'Train AUC Score: {train_auc}\n')
        common_file.write(f'Train MCC Score: {train_mcc}\n\n')
        common_file.write('Train Confusion Matrix:\n')
        common_file.write(str(train_cm))
        common_file.write('\n\nTrain Classification Report:\n')
        common_file.write(train_class_report)

        common_file.write(f'\n\nTest Accuracy: {test_accuracy} (std: {test_accuracy_std})\n')
        common_file.write(f'Test AUC Score: {test_auc}\n')
        common_file.write(f'Test MCC Score: {test_mcc}\n\n')
        common_file.write('Test Confusion Matrix:\n')
        common_file.write(str(test_cm))
        common_file.write('\n\nTest Classification Report:\n')
        common_file.write(test_class_report)
        
        # Print a separator between folds
        common_file.write("\n" + "="*40 + "\n")
        
        # Calculate average metrics for train set
        avg_train_accuracy = np.mean(train_accuracies)
        avg_train_auc = np.mean(train_auc_scores)
        avg_train_mcc = np.mean(train_mcc_scores)
        avg_train_conf_matrix = np.mean(train_conf_matrices, axis=0)  # Average confusion matrix

        # Convert continuous probabilities to class labels using argmax
        y_pred_train_class = np.argmax(y_pred_train, axis=1)

        # Calculate average metrics for test set
        avg_test_accuracy = np.mean(test_accuracies)
        avg_test_auc = np.mean(test_auc_scores)
        avg_test_mcc = np.mean(test_mcc_scores)
        avg_test_conf_matrix = np.mean(test_conf_matrices, axis=0)  # Average confusion matrix

        # Convert continuous probabilities to class labels using argmax
        y_pred_test_class = np.argmax(y_pred_test, axis=1)
        
        # Calculate average results for train set
        avg_train_class_report = classification_report(np.argmax(y_train_fold, axis=1), y_pred_train_class, target_names=encoder.classes_)

        # Calculate average results for test set
        avg_test_class_report = classification_report(np.argmax(y_test_fold, axis=1), y_pred_test_class, target_names=encoder.classes_)

        # Inside the loop where you plot confusion matrices
        plt.figure()
        train_cm = confusion_matrix(np.argmax(y_train_fold, axis=1), y_pred_train.argmax(axis=1))

        # Round each value in the confusion matrix to 5 decimal places
        rounded_train_cm = np.round(train_cm / np.sum(train_cm, axis=1)[:, np.newaxis], 5)

        sns.heatmap(rounded_train_cm, annot=True, fmt='.5f', cmap='Blues', xticklabels=encoder.classes_, yticklabels=encoder.classes_)
        plt.title(f'Train Confusion Matrix - Fold {fold}')
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.savefig(f'2DCNN/train_confusion_matrix_fold_{fold}.png')
        plt.close()

        # Inside the loop where you plot confusion matrices
        plt.figure()
        test_cm = confusion_matrix(np.argmax(y_test_fold, axis=1), y_pred_test.argmax(axis=1))

        # Round each value in the confusion matrix to 5 decimal places
        rounded_test_cm = np.round(test_cm / np.sum(test_cm, axis=1)[:, np.newaxis], 5)

        sns.heatmap(rounded_test_cm, annot=True, fmt='.5f', cmap='Blues', xticklabels=encoder.classes_, yticklabels=encoder.classes_)
        plt.title(f'Test Confusion Matrix - Fold {fold}')
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.savefig(f'2DCNN/test_confusion_matrix_fold_{fold}.png')
        plt.close()

        # Inside the loop where you append results to lists
        train_accuracies.append(train_accuracy)
        test_accuracies.append(test_accuracy)
        train_auc_scores.append(train_auc)
        test_auc_scores.append(test_auc)
        train_mcc_scores.append(train_mcc)
        test_mcc_scores.append(test_mcc)
        train_conf_matrices.append(train_cm)
        test_conf_matrices.append(test_cm)
        train_class_reports.append(train_class_report)
        test_class_reports.append(test_class_report)
        
        # Print a separator between folds
        common_file.write("\n" + "="*40 + "\n")
        
        # Append classification report and confusion matrix to lists
        report_test_list.append(report_test)
        cm_test_normalized_list.append(cm_test_normalized)

    # Calculate average metrics for train set
    avg_train_accuracy = np.mean(train_accuracies)
    avg_train_auc = np.mean(train_auc_scores)
    avg_train_mcc = np.mean(train_mcc_scores)

    # Reshape y_train_fold to (num_samples, num_classes)
    y_train_fold_reshaped = to_categorical(y_train_fold, num_classes=len(encoder.classes_))

    # Convert predictions to one-hot encoding
    y_pred_train_one_hot = to_categorical(np.argmax(y_pred_train, axis=1), num_classes=len(encoder.classes_))

    # Calculate average confusion matrix for train set
    avg_train_conf_matrix = np.mean(train_conf_matrices, axis=0).astype(int)


    # Calculate average classification report for train set
    avg_train_class_report = classification_report(
        np.argmax(y_train_fold, axis=1), 
        np.argmax(y_pred_train_one_hot, axis=1), 
        target_names=encoder.classes_, 
        digits=5
    )
    

    # Calculate average metrics for test set
    avg_test_accuracy = np.mean(test_accuracies)
    avg_test_auc = np.mean(test_auc_scores)
    avg_test_mcc = np.mean(test_mcc_scores)

    # Reshape y_test_fold to (num_samples, num_classes)
    y_test_fold_reshaped = to_categorical(y_test_fold, num_classes=len(encoder.classes_))

    # Convert predictions to one-hot encoding
    y_pred_test_one_hot = to_categorical(np.argmax(y_pred_test, axis=1), num_classes=len(encoder.classes_))

    # Calculate average confusion matrix for test set
    avg_test_conf_matrix = np.mean(test_conf_matrices, axis=0).astype(int)

    # Calculate average classification report for test set
    avg_test_class_report = classification_report(
        np.argmax(y_test_fold, axis=1), 
        np.argmax(y_pred_test_one_hot, axis=1), 
        target_names=encoder.classes_, 
        digits=5
    )

    # Inside the loop where you plot the average confusion matrix for the test set
    avg_cm_test_normalized = np.mean(cm_test_normalized_list, axis=0)
    plt.figure()

    avg_test_cm = confusion_matrix(np.argmax(y_test_fold, axis=1), np.argmax(y_pred_test, axis=1))

    # Normalize the confusion matrix by dividing each value by the sum of its row
    normalized_avg_test_cm = avg_test_cm / avg_test_cm.sum(axis=1)[:, np.newaxis]

    # Plot confusion matrix for the average test set
    plt.figure()
    sns.heatmap(normalized_avg_test_cm, annot=True, fmt='.5f', cmap='Blues', xticklabels=encoder.classes_, yticklabels=encoder.classes_)
    plt.title(f'Average Test Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.savefig('2DCNN/average_test_confusion_matrix.png')
    plt.close()
    
    # Save average results to the common text file
    common_file.write("\n" + "="*40 + "\n")
    common_file.write(f'Average Test Accuracy: {avg_test_accuracy:.5f} (std: {np.std(test_accuracies):.5f})\n')
    common_file.write(f'Average Test AUC Score: {avg_test_auc:.5f}\n')
    common_file.write(f'Average Test MCC Score: {avg_test_mcc:.5f}\n\n')
    common_file.write('Average Test Confusion Matrix:\n')
    common_file.write(str(avg_cm_test_normalized.round(5).astype(int)))  # Display confusion matrix with 5 decimal places
    common_file.write("\n\nAverage Test Classification Report:\n")
    common_file.write(str(avg_test_class_report))


    # Print and write the average results to the common text file
    common_file.write("\n" + "="*40 + "\n")
    common_file.write(f'Average Test Accuracy: {avg_test_accuracy:.5f} (std: {np.std(test_accuracies):.5f})\n')
    common_file.write(f'Average Test AUC Score: {avg_test_auc:.5f}\n')
    common_file.write(f'Average Test MCC Score: {avg_test_mcc:.5f}\n\n')
    common_file.write('Average Test Confusion Matrix:\n')
    common_file.write(str(avg_cm_test_normalized.round(5).astype(int)))  # Display confusion matrix with 5 decimal places
    common_file.write("\n\nAverage Test Classification Report:\n")
    common_file.write(str(avg_test_class_report))


    # Print and write the average results to the common text file
    common_file.write("\n" + "="*40 + "\n")
    common_file.write(f'Average Train Accuracy: {avg_train_accuracy} (std: {np.std(train_accuracies)})\n')
    common_file.write(f'Average Train AUC Score: {avg_train_auc}\n')
    common_file.write(f'Average Train MCC Score: {avg_train_mcc}\n\n')
    common_file.write('Average Train Confusion Matrix:\n')
    common_file.write(str(avg_train_conf_matrix.astype(int)))
    common_file.write("\n\nAverage Train Classification Report:\n")
    common_file.write(str(avg_train_class_report))

    common_file.write("\n" + "="*40 + "\n")
    common_file.write(f'Average Test Accuracy: {avg_test_accuracy} (std: {np.std(test_accuracies)})\n')
    common_file.write(f'Average Test AUC Score: {avg_test_auc}\n')
    common_file.write(f'Average Test MCC Score: {avg_test_mcc}\n\n')
    common_file.write('Average Test Confusion Matrix:\n')
    common_file.write(str(avg_test_conf_matrix.astype(int)))
    common_file.write("\n\nAverage Test Classification Report:\n")
    common_file.write(str(avg_test_class_report))
        
    # Print average results for train set
    print(f'Average Train Accuracy: {avg_train_accuracy}')
    print(f'Average Train AUC Score: {avg_train_auc}')
    print(f'Average Train MCC Score: {avg_train_mcc} \n')
    print("\nAverage Train Classification Report:")
    print(avg_train_class_report)

    # Print average results for test set
    print(f'Average Test Accuracy: {avg_test_accuracy}')
    print(f'Average Test AUC Score: {avg_test_auc}')
    print(f'Average Test MCC Score: {avg_test_mcc} \n')
    print("\nAverage Test Classification Report:")
    print(avg_test_class_report)

2024-03-15 10:57:13.940638: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-03-15 10:57:13.940743: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


KeyboardInterrupt: 

In [4]:
import os
import glob
import numpy as np
from tqdm import tqdm
import itertools
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Audio processing
import librosa
import librosa.display

# Scikit-learn
from sklearn.model_selection import StratifiedKFold, train_test_split, StratifiedShuffleSplit
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, matthews_corrcoef, roc_auc_score
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle, class_weight

# Keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM, Flatten, BatchNormalization, Reshape
from keras.layers.convolutional import Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical



# Ignore warnings
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

dataset = []

for folder in ["Heartbeat_Sounds/set_a/**","Heartbeat_Sounds/set_b/**"]:
    for filename in glob.iglob(folder):
        if os.path.exists(filename):
            label = os.path.basename(filename).split("_")[0]
            duration = librosa.get_duration(filename=filename)
            # skip audio smaller than 3 secs
            if duration>=3:
                slice_size = 3
                iterations = int((duration-slice_size)/(slice_size-1))
                iterations += 1
#                 initial_offset = (duration % slice_size)/2
                initial_offset = (duration - ((iterations*(slice_size-1))+1))/2
                if label not in ["Aunlabelledtest", "Bunlabelledtest"]:
                    for i in range(iterations):
                        offset = initial_offset + i*(slice_size-1)
                        
                        dataset.append({
                                "filename": filename,
                                "label": label,
                                "offset": offset
                            })
                       
                        
dataset = pd.DataFrame(dataset)
dataset = shuffle(dataset, random_state=42)
dataset.info()

# Function to extract features
def extract_features(audio_path, offset):
    y, sr = librosa.load(audio_path, offset=offset, duration=3)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    # Reshape for CNN input
    mfccs = np.expand_dims(mfccs, axis=-1)
    return mfccs

# Extract features for all data points
x_data = []
for idx in tqdm(range(len(dataset))):
    mfccs = extract_features(dataset.filename.iloc[idx], dataset.offset.iloc[idx])
    # Reshape the 2D array to 3D array
    mfccs = mfccs.reshape((mfccs.shape[0], mfccs.shape[1], 1))
    x_data.append(mfccs)

# Convert to numpy array
x_data = np.asarray(x_data)


# Encode Labels
encoder = LabelEncoder()
encoder.fit(dataset.label)
y_data = encoder.transform(dataset.label)

# Compute class weights
class_weights = class_weight.compute_class_weight(class_weight="balanced", classes=np.unique(y_data), y=y_data)

# Convert labels to one-hot encoding
y_data_one_hot = to_categorical(y_data)

# Initialize StratifiedKFold with train_size and test_size
kfold = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)

# Initialize lists to store results for each fold
train_accuracies = []
test_accuracies = []
train_auc_scores = []
test_auc_scores = []
train_mcc_scores = []
test_mcc_scores = []
train_conf_matrices = []
test_conf_matrices = []
train_class_reports = []
test_class_reports = []
cm_test_normalized = []
report_test_list = []
cm_test_normalized_list = []

# Open the common results file in 'w' mode to overwrite existing content
with open('results_heartSound/all_folds_2dcnn5_results_kethop.txt', 'w') as common_file:
    # Split the indices instead of the dataset
    for fold, (train_index, test_index) in enumerate(kfold.split(x_data, y_data), 1):
        x_train_fold, x_test_fold = x_data[train_index], x_data[test_index]
        y_train_fold, y_test_fold = y_data_one_hot[train_index], y_data_one_hot[test_index]

        model = Sequential()

        # First block of conv.
        model.add(Conv2D(filters=32, kernel_size=2, input_shape=(x_train_fold.shape[1], x_train_fold.shape[2], x_train_fold.shape[3]), activation='relu'))
        model.add(MaxPooling2D(pool_size=2))
        model.add(Dropout(0.25))

        # Second block of conv.
        model.add(Conv2D(32, (3, 3), activation='relu'))
        model.add(MaxPooling2D((2, 2)))
        model.add(Dropout(0.25))

        # Third block of conv.
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(MaxPooling2D((2, 2)))
        model.add(Dropout(0.25))

        # LSTM layer
        model.add(Flatten())
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Reshape((-1, 64)))  # Reshape for LSTM
        model.add(LSTM(64, return_sequences=False))

        model.add(Dense(len(encoder.classes_), activation='softmax'))

        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

        your_epochs = 100  # You can choose an appropriate number of epochs
        your_batch_size = 1  # You can choose an appropriate batch size

        # Train the model
        model.fit(x_train_fold, y_train_fold, epochs=your_epochs, batch_size=your_batch_size, verbose=1)

        # Evaluate on train and test sets
        y_pred_train = model.predict(x_train_fold)
        y_pred_test = model.predict(x_test_fold)
        
        # Generate classification report and confusion matrix for test set
        y_pred_test_labels = np.argmax(y_pred_test, axis=1)
        report_test = classification_report(np.argmax(y_test_fold, axis=1), y_pred_test_labels, output_dict=True)


        # Calculate metrics for train set
        train_accuracy = accuracy_score(np.argmax(y_train_fold, axis=1), np.argmax(y_pred_train, axis=1))
        train_auc = roc_auc_score(y_train_fold, y_pred_train, multi_class='ovr')
        train_mcc = matthews_corrcoef(np.argmax(y_train_fold, axis=1), np.argmax(y_pred_train, axis=1))
        train_cm = confusion_matrix(np.argmax(y_train_fold, axis=1), np.argmax(y_pred_train, axis=1))
        train_class_report = classification_report(np.argmax(y_train_fold, axis=1), np.argmax(y_pred_train, axis=1),
                                                   target_names=encoder.classes_)

        # Calculate metrics for test set
        test_accuracy = accuracy_score(np.argmax(y_test_fold, axis=1), np.argmax(y_pred_test, axis=1))
        test_auc = roc_auc_score(y_test_fold, y_pred_test, multi_class='ovr')
        test_mcc = matthews_corrcoef(np.argmax(y_test_fold, axis=1), np.argmax(y_pred_test, axis=1))
        test_cm = confusion_matrix(np.argmax(y_test_fold, axis=1), np.argmax(y_pred_test, axis=1))
        test_class_report = classification_report(np.argmax(y_test_fold, axis=1), np.argmax(y_pred_test, axis=1),
                                                  target_names=encoder.classes_)

        # Inside the loop where you append accuracies to lists
        train_accuracies.append(train_accuracy)
        test_accuracies.append(test_accuracy)

        # Calculate std for train and test accuracies
        train_accuracy_std = np.std(train_accuracies)
        test_accuracy_std = np.std(test_accuracies)


        # Save results to the common text file for both train and test sets
        common_file.write(f'Fold {fold} Results:\n')
        common_file.write(f'Train Accuracy: {train_accuracy} (std: {train_accuracy_std})\n')
        common_file.write(f'Train AUC Score: {train_auc}\n')
        common_file.write(f'Train MCC Score: {train_mcc}\n\n')
        common_file.write('Train Confusion Matrix:\n')
        common_file.write(str(train_cm))
        common_file.write('\n\nTrain Classification Report:\n')
        common_file.write(train_class_report)

        common_file.write(f'\n\nTest Accuracy: {test_accuracy} (std: {test_accuracy_std})\n')
        common_file.write(f'Test AUC Score: {test_auc}\n')
        common_file.write(f'Test MCC Score: {test_mcc}\n\n')
        common_file.write('Test Confusion Matrix:\n')
        common_file.write(str(test_cm))
        common_file.write('\n\nTest Classification Report:\n')
        common_file.write(test_class_report)
        
        # Print a separator between folds
        common_file.write("\n" + "="*40 + "\n")
        
        # Calculate average metrics for train set
        avg_train_accuracy = np.mean(train_accuracies)
        avg_train_auc = np.mean(train_auc_scores)
        avg_train_mcc = np.mean(train_mcc_scores)
        avg_train_conf_matrix = np.mean(train_conf_matrices, axis=0)  # Average confusion matrix

        # Convert continuous probabilities to class labels using argmax
        y_pred_train_class = np.argmax(y_pred_train, axis=1)

        # Calculate average metrics for test set
        avg_test_accuracy = np.mean(test_accuracies)
        avg_test_auc = np.mean(test_auc_scores)
        avg_test_mcc = np.mean(test_mcc_scores)
        avg_test_conf_matrix = np.mean(test_conf_matrices, axis=0)  # Average confusion matrix

        # Convert continuous probabilities to class labels using argmax
        y_pred_test_class = np.argmax(y_pred_test, axis=1)
        
        # Calculate average results for train set
        avg_train_class_report = classification_report(np.argmax(y_train_fold, axis=1), y_pred_train_class, target_names=encoder.classes_)

        # Calculate average results for test set
        avg_test_class_report = classification_report(np.argmax(y_test_fold, axis=1), y_pred_test_class, target_names=encoder.classes_)

        # Inside the loop where you plot confusion matrices
        plt.figure()
        train_cm = confusion_matrix(np.argmax(y_train_fold, axis=1), y_pred_train.argmax(axis=1))

        # Round each value in the confusion matrix to 5 decimal places
        rounded_train_cm = np.round(train_cm / np.sum(train_cm, axis=1)[:, np.newaxis], 5)

        sns.heatmap(rounded_train_cm, annot=True, fmt='.5f', cmap='Blues', xticklabels=encoder.classes_, yticklabels=encoder.classes_)
        plt.title(f'Train Confusion Matrix - Fold {fold}')
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.savefig(f'KH/train_confusion_matrix_fold_{fold}.png')
        plt.close()

        # Inside the loop where you plot confusion matrices
        plt.figure()
        test_cm = confusion_matrix(np.argmax(y_test_fold, axis=1), y_pred_test.argmax(axis=1))

        # Round each value in the confusion matrix to 5 decimal places
        rounded_test_cm = np.round(test_cm / np.sum(test_cm, axis=1)[:, np.newaxis], 5)

        sns.heatmap(rounded_test_cm, annot=True, fmt='.5f', cmap='Blues', xticklabels=encoder.classes_, yticklabels=encoder.classes_)
        plt.title(f'Test Confusion Matrix - Fold {fold}')
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.savefig(f'KH/test_confusion_matrix_fold_{fold}.png')
        plt.close()

        # Inside the loop where you append results to lists
        train_accuracies.append(train_accuracy)
        test_accuracies.append(test_accuracy)
        train_auc_scores.append(train_auc)
        test_auc_scores.append(test_auc)
        train_mcc_scores.append(train_mcc)
        test_mcc_scores.append(test_mcc)
        train_conf_matrices.append(train_cm)
        test_conf_matrices.append(test_cm)
        train_class_reports.append(train_class_report)
        test_class_reports.append(test_class_report)
        
        # Print a separator between folds
        common_file.write("\n" + "="*40 + "\n")
        
        # Append classification report and confusion matrix to lists
        report_test_list.append(report_test)
        cm_test_normalized_list.append(cm_test_normalized)

    # Calculate average metrics for train set
    avg_train_accuracy = np.mean(train_accuracies)
    avg_train_auc = np.mean(train_auc_scores)
    avg_train_mcc = np.mean(train_mcc_scores)

    # Reshape y_train_fold to (num_samples, num_classes)
    y_train_fold_reshaped = to_categorical(y_train_fold, num_classes=len(encoder.classes_))

    # Convert predictions to one-hot encoding
    y_pred_train_one_hot = to_categorical(np.argmax(y_pred_train, axis=1), num_classes=len(encoder.classes_))

    # Calculate average confusion matrix for train set
    avg_train_conf_matrix = np.mean(train_conf_matrices, axis=0).astype(int)


    # Calculate average classification report for train set
    avg_train_class_report = classification_report(
        np.argmax(y_train_fold, axis=1), 
        np.argmax(y_pred_train_one_hot, axis=1), 
        target_names=encoder.classes_, 
        digits=5
    )
    

    # Calculate average metrics for test set
    avg_test_accuracy = np.mean(test_accuracies)
    avg_test_auc = np.mean(test_auc_scores)
    avg_test_mcc = np.mean(test_mcc_scores)

    # Reshape y_test_fold to (num_samples, num_classes)
    y_test_fold_reshaped = to_categorical(y_test_fold, num_classes=len(encoder.classes_))

    # Convert predictions to one-hot encoding
    y_pred_test_one_hot = to_categorical(np.argmax(y_pred_test, axis=1), num_classes=len(encoder.classes_))

    # Calculate average confusion matrix for test set
    avg_test_conf_matrix = np.mean(test_conf_matrices, axis=0).astype(int)

    # Calculate average classification report for test set
    avg_test_class_report = classification_report(
        np.argmax(y_test_fold, axis=1), 
        np.argmax(y_pred_test_one_hot, axis=1), 
        target_names=encoder.classes_, 
        digits=5
    )

    # Inside the loop where you plot the average confusion matrix for the test set
    avg_cm_test_normalized = np.mean(cm_test_normalized_list, axis=0)
    plt.figure()

    avg_test_cm = confusion_matrix(np.argmax(y_test_fold, axis=1), np.argmax(y_pred_test, axis=1))

    # Normalize the confusion matrix by dividing each value by the sum of its row
    normalized_avg_test_cm = avg_test_cm / avg_test_cm.sum(axis=1)[:, np.newaxis]

    # Plot confusion matrix for the average test set
    plt.figure()
    sns.heatmap(normalized_avg_test_cm, annot=True, fmt='.5f', cmap='Blues', xticklabels=encoder.classes_, yticklabels=encoder.classes_)
    plt.title(f'Average Test Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.savefig('KH/average_test_confusion_matrix.png')
    plt.close()
    
    # Save average results to the common text file
    common_file.write("\n" + "="*40 + "\n")
    common_file.write(f'Average Test Accuracy: {avg_test_accuracy:.5f} (std: {np.std(test_accuracies):.5f})\n')
    common_file.write(f'Average Test AUC Score: {avg_test_auc:.5f}\n')
    common_file.write(f'Average Test MCC Score: {avg_test_mcc:.5f}\n\n')
    common_file.write('Average Test Confusion Matrix:\n')
    common_file.write(str(avg_cm_test_normalized.round(5).astype(int)))  # Display confusion matrix with 5 decimal places
    common_file.write("\n\nAverage Test Classification Report:\n")
    common_file.write(str(avg_test_class_report))


    # Print and write the average results to the common text file
    common_file.write("\n" + "="*40 + "\n")
    common_file.write(f'Average Test Accuracy: {avg_test_accuracy:.5f} (std: {np.std(test_accuracies):.5f})\n')
    common_file.write(f'Average Test AUC Score: {avg_test_auc:.5f}\n')
    common_file.write(f'Average Test MCC Score: {avg_test_mcc:.5f}\n\n')
    common_file.write('Average Test Confusion Matrix:\n')
    common_file.write(str(avg_cm_test_normalized.round(5).astype(int)))  # Display confusion matrix with 5 decimal places
    common_file.write("\n\nAverage Test Classification Report:\n")
    common_file.write(str(avg_test_class_report))


    # Print and write the average results to the common text file
    common_file.write("\n" + "="*40 + "\n")
    common_file.write(f'Average Train Accuracy: {avg_train_accuracy} (std: {np.std(train_accuracies)})\n')
    common_file.write(f'Average Train AUC Score: {avg_train_auc}\n')
    common_file.write(f'Average Train MCC Score: {avg_train_mcc}\n\n')
    common_file.write('Average Train Confusion Matrix:\n')
    common_file.write(str(avg_train_conf_matrix.astype(int)))
    common_file.write("\n\nAverage Train Classification Report:\n")
    common_file.write(str(avg_train_class_report))

    common_file.write("\n" + "="*40 + "\n")
    common_file.write(f'Average Test Accuracy: {avg_test_accuracy} (std: {np.std(test_accuracies)})\n')
    common_file.write(f'Average Test AUC Score: {avg_test_auc}\n')
    common_file.write(f'Average Test MCC Score: {avg_test_mcc}\n\n')
    common_file.write('Average Test Confusion Matrix:\n')
    common_file.write(str(avg_test_conf_matrix.astype(int)))
    common_file.write("\n\nAverage Test Classification Report:\n")
    common_file.write(str(avg_test_class_report))
        
    # Print average results for train set
    print(f'Average Train Accuracy: {avg_train_accuracy}')
    print(f'Average Train AUC Score: {avg_train_auc}')
    print(f'Average Train MCC Score: {avg_train_mcc} \n')
    print("\nAverage Train Classification Report:")
    print(avg_train_class_report)

    # Print average results for test set
    print(f'Average Test Accuracy: {avg_test_accuracy}')
    print(f'Average Test AUC Score: {avg_test_auc}')
    print(f'Average Test MCC Score: {avg_test_mcc} \n')
    print("\nAverage Test Classification Report:")
    print(avg_test_class_report)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1441 entries, 168 to 1126
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   filename  1441 non-null   object 
 1   label     1441 non-null   object 
 2   offset    1441 non-null   float64
dtypes: float64(1), object(2)
memory usage: 45.0+ KB


100%|███████████████████████████████████████████████████████████████████████████████| 1441/1441 [04:58<00:00,  4.82it/s]


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, m

Average Train Accuracy: 0.5338541666666666
Average Train AUC Score: 0.49762979188361056
Average Train MCC Score: 0.0 


Average Train Classification Report:
              precision    recall  f1-score   support

    artifact    0.00000   0.00000   0.00000       128
    extrahls    0.00000   0.00000   0.00000        41
  extrastole    0.00000   0.00000   0.00000        70
      murmur    0.00000   0.00000   0.00000       298
      normal    0.53385   1.00000   0.69610       615

    accuracy                        0.53385      1152
   macro avg    0.10677   0.20000   0.13922      1152
weighted avg    0.28500   0.53385   0.37161      1152

Average Test Accuracy: 0.532871972318339
Average Test AUC Score: 0.4922549167518942
Average Test MCC Score: 0.0 


Average Test Classification Report:
              precision    recall  f1-score   support

    artifact    0.00000   0.00000   0.00000        32
    extrahls    0.00000   0.00000   0.00000        10
  extrastole    0.00000   0.00000   0.00

<Figure size 640x480 with 0 Axes>

In [None]:
import pandas as pd

# Đọc dữ liệu từ tệp CSV
data = pd.read_csv('heartbeat_sounds')

# Lặp qua từng hàng và tạo trích dẫn BibTeX
with open('heartbeat_sounds.bib', 'w') as bibfile:
    for index, row in data.iterrows():
        title = row['Title']
        author = row['Author']
        year = row['Year']
        url = row['Url']
        
        bibfile.write(f"@misc{{{author}{year},\n")
        bibfile.write(f"  title = {{{title}}},\n")
        bibfile.write(f"  author = {{{author}}},\n")
        bibfile.write(f"  year = {{{year}}},\n")
        bibfile.write(f"  url = {{{url}}}\n")
        bibfile.write("}\n\n")
