In [1]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, GlobalAveragePooling1D, Dense, Dropout, BatchNormalization

In [2]:
def extract_features(file_path, max_pad_len=862):
    y, sr = librosa.load(file_path, sr=None, duration=5.0)
    
    # MFCCs
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    
    # Padding to fixed length for CNN input
    if mfcc.shape[1] < max_pad_len:
        pad_width = max_pad_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :max_pad_len]
    
    return mfcc.T  # Shape: (time, features)

In [3]:
def load_dataset(root_path):
    X = []
    y = []
    label_map = {
        'belly_pain': 0,
        'burping': 1,
        'discomfort': 2,
        'hungry': 3,
        'tired': 4
    }

    for label_name, label_id in label_map.items():
        folder_path = os.path.join(root_path, label_name)
        for file in os.listdir(folder_path):
            if file.endswith('.wav'):
                file_path = os.path.join(folder_path, file)
                features = extract_features(file_path)
                X.append(features)
                y.append(label_id)

    return np.array(X), np.array(y)

In [4]:
X, y = load_dataset("/Users/gloria/Documents/uniproject/infant_cry/data/donateacry_corpus")

# Normalize
scaler = StandardScaler() 
X = X.reshape(-1, X.shape[-1])  # flatten time dimension temporarily
X = scaler.fit_transform(X)
X = X.reshape(-1, 862, 40)  # restore time dimension 862 numero di time step, 40 dimensione relativa coefficients MFCCs

y = to_categorical(y,5)

# Aggiunto dopo
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
model = Sequential([
    Conv1D(64, 3, activation='relu', input_shape=(862, 40)), 
    BatchNormalization(),
    MaxPooling1D(2),

    Conv1D(128, 3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(2),

    Conv1D(256, 3, activation='relu'),
    BatchNormalization(),
    GlobalAveragePooling1D(),

    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(5, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=32)

Epoch 1/20
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 51ms/step - accuracy: 0.7186 - loss: 0.9844 - val_accuracy: 0.5576 - val_loss: 1.2202
Epoch 2/20
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 41ms/step - accuracy: 0.9267 - loss: 0.3165 - val_accuracy: 0.6702 - val_loss: 0.8648
Epoch 3/20
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 42ms/step - accuracy: 0.9673 - loss: 0.1364 - val_accuracy: 0.8901 - val_loss: 0.5452
Epoch 4/20
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 41ms/step - accuracy: 0.9751 - loss: 0.0971 - val_accuracy: 0.8246 - val_loss: 0.6103
Epoch 5/20
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 42ms/step - accuracy: 0.9751 - loss: 0.0883 - val_accuracy: 0.6832 - val_loss: 0.9576
Epoch 6/20
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 41ms/step - accuracy: 0.9863 - loss: 0.0545 - val_accuracy: 0.6021 - val_loss: 1.0396
Epoch 7/20
[1m48/48[0m [32m━━━━

In [7]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {acc:.2f}")

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7120 - loss: 1.4418
Test accuracy: 0.71


In [8]:
import numpy as np

# Your class labels
class_labels = ['belly pain', 'burping', 'discomfort', 'hungry', 'tired']

def show_text_predictions(model, X_test, y_test, n=10):
    """
    Print a comparison of model predictions and true labels for n test examples.
    """
    indices = np.random.choice(len(X_test), n, replace=False)
    X_sample = X_test[indices]
    y_sample_true = np.argmax(y_test[indices], axis=1)

    y_pred_probs = model.predict(X_sample)
    y_sample_pred = np.argmax(y_pred_probs, axis=1)

    print(f"{'Index':<5} {'True Label':<15} {'Predicted Label':<15} {'Correct?'}")
    print("-" * 55)
    for i, idx in enumerate(indices):
        true_label = class_labels[y_sample_true[i]]
        pred_label = class_labels[y_sample_pred[i]]
        correct = "✅" if y_sample_true[i] == y_sample_pred[i] else "❌"
        print(f"{idx:<5} {true_label:<15} {pred_label:<15} {correct}")


In [9]:
show_text_predictions(model, X_test, y_test, n=3)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
Index True Label      Predicted Label Correct?
-------------------------------------------------------
17    burping         burping         ✅
294   tired           tired           ✅
363   tired           tired           ✅
