In [3]:
import os
import pandas as pd
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import pickle
from collections import Counter
from pydub import AudioSegment
from io import BytesIO
import wave
import math
import uuid

In [14]:
# Define raw audio dictionary
raw_audio = {}

# Loop through directioris and label audio files
directories = ['belly_pain', 'burping', 'discomfort', 'hungry', 'tired']
for directory in directories:
    path = '../data/donateacry_corpus/' + directory
    for filename in os.listdir(path):
        if filename.endswith(".wav"):
            raw_audio[os.path.join(path, filename)] = directory

In [15]:
# Define function to extract MFCC features and chop audio
def extract_mfcc(audio_file, max_length=100):
    audiofile, sr = librosa.load(audio_file)
    fingerprint = librosa.feature.mfcc(y=audiofile, sr=sr, n_mfcc=20)
    if fingerprint.shape[1] < max_length:
        pad_width = max_length - fingerprint.shape[1]
        fingerprint_padded = np.pad(fingerprint, pad_width=((0, 0), (0, pad_width)), mode='constant')
        return fingerprint_padded.T
    elif fingerprint.shape[1] > max_length:
        return fingerprint[:, :max_length].T
    else:
        return fingerprint.T


# Chop audio and extract MFCC features for each track
X = []
y = []
max_length = 100

for i, (audio_file, label) in enumerate(raw_audio.items()):
    mfcc_features = extract_mfcc(audio_file, max_length=max_length)
    X.append(mfcc_features.flatten())
    y.append(label)

 # Convert features and labels to DataFrame and save to CSV
df = pd.DataFrame(X)
df = df.fillna(0)
df['label'] = y
df.to_csv('frey_audio_dataset.csv', index=False)

In [16]:
# Convert lists to numpy arrays
X = np.array(X)
y = np.array(y)

# Flatten the features and labels
X_flat = X.reshape(X.shape[0], -1)
y_flat = y

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_flat, y_flat, test_size=0.2, random_state=42)

In [17]:
# Train and evaluate models
models = [
    ('Random Forest', RandomForestClassifier(n_estimators=25, max_features=5)),
    ('Logistic Regression', LogisticRegression()),
    ('Decision Tree', DecisionTreeClassifier()),
    ('SVM', SVC()),
]
 

In [18]:
print("Model, Accuracy, Precision, Recall")
for model_name, model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    print(f"{model_name}: {accuracy}, {precision}, {recall}")

Model, Accuracy, Precision, Recall
Random Forest: 0.782608695652174, 0.6124763705103969, 0.782608695652174


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression: 0.6630434782608695, 0.6346739130434783, 0.6630434782608695
Decision Tree: 0.6521739130434783, 0.7154612597547382, 0.6521739130434783
SVM: 0.782608695652174, 0.6124763705103969, 0.782608695652174


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
