In [16]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight

In [17]:
data_folder = 'data_set'
subfolders = ['awake', 'belly_pain', 'burping', 'discomfort', 'hug', 'hungry', 'tired']

In [18]:
features = []
labels = []

In [19]:
def pre_emphasis_filter(signal, alpha=0.95):
    return np.append(signal[0], signal[1:] - alpha * signal[:-1])

In [20]:
def mfcc_extraction(file_path, n_mfcc=13):
    # Load the audio file
    signal, sr = librosa.load(file_path, sr=None)  # Corrected sr value

    # Apply a pre-emphasis filter
    filtered_signal = pre_emphasis_filter(signal)

    # Frame the signal into short frames
    frames = librosa.util.frame(filtered_signal, frame_length=int(sr*0.025), hop_length=int(sr*0.01))

    # Compute the periodogram estimate of the power spectrum
    power_spectra = np.abs(np.fft.rfft(frames, axis=0))**2

    # Apply Mel filterbank
    mel_filterbank = librosa.filters.mel(sr=sr, n_fft=int(sr*0.025), n_mels=n_mfcc)
    mel_spectra = np.dot(mel_filterbank, power_spectra)

    # Take the logarithm of filterbank energies
    log_mel_spectra = np.log(mel_spectra + 1e-10)

    # Apply DCT to get MFCCs
    mfccs = librosa.feature.mfcc(S=log_mel_spectra, n_mfcc=n_mfcc)
    
    return mfccs.T

In [21]:
# Iterating over the dataset folder and extracting features and labels
for subfolder in subfolders:
    for file_name in os.listdir(os.path.join(data_folder, subfolder)):
        file_path = os.path.join(data_folder, subfolder, file_name)
        if os.path.isfile(file_path):
            # Extracting MFCCs from the audio file
            mfccs = mfcc_extraction(file_path)
            
            # Averaging the MFCCs along time axis to get a fixed size feature vector
            avg_mfccs = np.mean(mfccs, axis=0)
            
            features.append(avg_mfccs.tolist())
            labels.append(subfolders.index(subfolder))

In [22]:
features = np.array(features)
labels = np.array(labels)

In [23]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

In [24]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [25]:
# Compute Class Weights
class_weights = compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
weights_dict = {i : class_weights[i] for i in range(len(class_weights))}


In [26]:
#Create an instance of MLPClassifier
mlp = MLPClassifier(
    alpha=0.01,
    max_iter=600,
    hidden_layer_sizes=(12,), 
    solver='lbfgs',
    random_state=1
)

In [27]:
# Fit the model to the training data
mlp.fit(X_train_scaled, y_train)

# Predict labels of the test set
y_pred = mlp.predict(X_test_scaled)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [28]:
# Print classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Print confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Print accuracy score
print("Accuracy Score:")
print(accuracy_score(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.33      0.35      0.34        26
           1       0.00      0.00      0.00         4
           2       0.17      0.25      0.20         4
           3       0.55      0.41      0.47        64
           4       0.38      0.37      0.38        35
           5       0.56      0.64      0.60       118
           6       0.29      0.30      0.30        30

    accuracy                           0.48       281
   macro avg       0.33      0.33      0.33       281
weighted avg       0.47      0.48      0.47       281

Confusion Matrix:
[[ 9  0  0  2  1 10  4]
 [ 0  0  0  0  0  4  0]
 [ 0  0  1  0  0  3  0]
 [ 3  0  1 26  4 24  6]
 [ 4  0  0  5 13  7  6]
 [ 8  0  4 13 11 76  6]
 [ 3  0  0  1  5 12  9]]
Accuracy Score:
0.47686832740213525


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [29]:
def predict_audio_file(file_path, model):
    try:
        mfccs = mfcc_extraction(file_path)
        avg_mfccs = np.mean(mfccs, axis=0).reshape(1, -1)
        prediction = model.predict(avg_mfccs)
        print(f"File: {file_path} | Predicted category: {subfolders[prediction[0]]}")
    except Exception as e:
        print(f"Can't load file {file_path}: {e}")

In [30]:
folder_path = "a_p"
for subfolder in subfolders:
    subfolder_path = os.path.join(folder_path, subfolder)
    
    if not os.path.exists(subfolder_path):
        print(f"Subfolder {subfolder_path} not found.")
        continue

    print(f"Processing subfolder: {subfolder}")
    
    for new_file in os.listdir(subfolder_path):
        file_path = os.path.join(subfolder_path, new_file)
        predict_audio_file(file_path, mlp)

Processing subfolder: awake
File: a_p\awake\awake_0.wav | Predicted category: tired
File: a_p\awake\awake_130.wav | Predicted category: tired
File: a_p\awake\awake_16.wav | Predicted category: tired
File: a_p\awake\awake_83.wav | Predicted category: tired
Processing subfolder: belly_pain
File: a_p\belly_pain\549a46d8-9c84-430e-ade8-97eae2bef787-1430130772174-1.7-m-48-bp.wav | Predicted category: tired
File: a_p\belly_pain\BellyPain02.wav | Predicted category: tired
Processing subfolder: burping
File: a_p\burping\7E4B9C14-F955-4BED-9B03-7F3096A6CBFF-1430540826-1.0-f-26-bu.wav | Predicted category: tired
File: a_p\burping\Burping-10.wav | Predicted category: tired
Processing subfolder: discomfort
File: a_p\discomfort\2294E2B2-8E36-4DA6-A898-B947CB9446AB-1436462707-1.1-m-26-dc.wav | Predicted category: tired
File: a_p\discomfort\Cold_Hot09.wav | Predicted category: tired
File: a_p\discomfort\diaper_6.wav | Predicted category: discomfort
File: a_p\discomfort\diaper_91.wav | Predicted categ