### This script processes heart sound audio files to classify them as normal or abnormal using MFCC features and a pre-trained neural network model. It loads, preprocesses, and splits the data into training, testing, and validation sets. The model is then evaluated on the test data to determine its accuracy. The dataset includes labeled heart sound files organized into normal and abnormal categories. Finally, the script prints the model's evaluation accuracy on the test set.


In [None]:
import librosa
import numpy as np
import os
import fnmatch
import librosa.display
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split

In [None]:
def load_file_data(folder, file_names, duration=10, sr=22050):
    input_length = sr * duration
    data = []
    for file_name in file_names:
        try:
            sound_file = folder + file_name
            print("load file ", sound_file)
            X, sr = librosa.load(sound_file, sr=sr, duration=duration)
            dur = librosa.get_duration(y=X, sr=sr)
            # pad audio file same duration
            if (round(dur) < duration):
                print("fixing audio length :", file_name)
                y = librosa.util.fix_length(X, size=input_length)
                # extract normalized mfcc feature from data
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sr, n_mfcc=25).T, axis=0)
        except Exception as e:
            print("Error encountered while parsing file: ", e)
        feature = np.array(mfccs).reshape([-1, 1])
        data.append(feature)
    return data


In [None]:
# simple encoding of categories
# Normal 
# Abnormal

# Map label text to integer
CLASSES = ['normal', 'abnormal']
NB_CLASSES = len(CLASSES)

# Map integer value to text labels
label_to_int = {k: v for v, k in enumerate(CLASSES)}
print(label_to_int)
print(" ")
int_to_label = {v: k for k, v in label_to_int.items()}
print(int_to_label)

In [None]:

normal_files = fnmatch.filter(os.listdir('heart_sound_dataset/train/normal/'), '*.wav')
normal_sounds = load_file_data(folder='heart_sound_dataset/train/normal/', file_names=normal_files)
normal_labels = [0 for items in normal_files]

abnormal_files = fnmatch.filter(os.listdir('heart_sound_dataset/train/abnormal/'), '*.wav')
abnormal_sounds = load_file_data(folder='heart_sound_dataset/train/abnormal/', file_names=abnormal_files)
abnormal_labels = [1 for items in abnormal_files]

print("Loading Done")


In [None]:

normal_files_val = fnmatch.filter(os.listdir('heart_sound_dataset/val/normal/'), '*.wav')
normal_sounds_val = load_file_data(folder='heart_sound_dataset/val/normal/', file_names=normal_files_val)
normal_labels_val = [0 for items in normal_files_val]

abnormal_files_val = fnmatch.filter(os.listdir('heart_sound_dataset/val/abnormal/'), '*.wav')
abnormal_sounds_val = load_file_data(folder='heart_sound_dataset/val/abnormal/', file_names=abnormal_files_val)
abnormal_labels_val = [1 for items in abnormal_files_val]

print("Loading Done")


In [None]:
#combine set-a and set-b 
x_data = np.concatenate((normal_sounds, abnormal_sounds))
y_data = np.concatenate((normal_labels, abnormal_labels))

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, train_size=0.8, random_state=42, shuffle=True)

y_train = np.array(tf.keras.utils.to_categorical(y_train, len(CLASSES)))
y_test = np.array(tf.keras.utils.to_categorical(y_test, len(CLASSES)))

x_val = np.concatenate((normal_sounds_val, abnormal_sounds_val))
y_val = np.array(tf.keras.utils.to_categorical(np.concatenate((normal_labels_val, abnormal_labels_val)), len(CLASSES)))


print("combined training data record: ", len(x_train))
print("combined test data record: ", len(x_test))
print("combined validation data record: ", len(x_val))
print('Total data: ', len(x_train)+len(x_val)+len(x_test))

print()

print(f'Training percentage : {round(len(x_train) / (len(x_train)+len(x_val)+len(x_test))*100)}%')
print(f'Test percentage : {round(len(x_test) / (len(x_train)+len(x_val)+len(x_test))*100)}%')
print(f'Validation percentage : {round(len(x_val) / (len(x_train)+len(x_val)+len(x_test))*100)}%')



In [None]:
model = load_model('heart_sounds.h5')

# scores = model.evaluate(x_train, y_train)
scores = model.evaluate(x_test, y_test)

print("Model evaluation accuracy: ", round(scores[1] * 100), "%")