In [1]:
import os
import librosa
import numpy as np
import tensorflow as tf
import os
import noisereduce as nr
import librosa
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Reshape
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, LSTM, Dense, Dropout, BatchNormalization


In [2]:
# !pip install noisereduce

In [3]:
# Function for audio augmentation
def augment_audio(y, sr):
    pitch_shifted = librosa.effects.pitch_shift(y, sr=sr, n_steps=2)
    time_stretched = librosa.effects.time_stretch(y, rate=0.8)
    return [pitch_shifted, time_stretched]


In [4]:
# Define the path to the data directory
data_dir = 'data//my//'  
n_mfcc = 13
desired_labels = ['hungry', 'sad', 'uncomfortable']

# Initialize lists for storing data and file paths
X, labels, file_paths = [], [], []

# Load and preprocess the audio files
for label in desired_labels:
    class_dir = os.path.join(data_dir, label)
    for filename in os.listdir(class_dir):
        file_path = os.path.join(class_dir, filename)
        y, sr = librosa.load(file_path, sr=16000)

        # Noise reduction
        y = nr.reduce_noise(y=y, sr=sr)

        # Silence removal
        y, _ = librosa.effects.trim(y)

        # Original and augmented audio processing
        for y_processed in [y] + augment_audio(y, sr):
            mfccs = librosa.feature.mfcc(y=y_processed, sr=sr, n_mfcc=n_mfcc)
            X.append(mfccs.T)
            labels.append(label)
            file_paths.append(file_path)  # Append the file path here



In [5]:
# Now, the lengths of X, labels, and file_paths should match
print("Length of X: ", len(X))
print("Length of labels: ", len(labels))
print("Length of file_paths: ", len(file_paths))

Length of X:  12015
Length of labels:  12015
Length of file_paths:  12015


In [6]:
# Find the maximum length of the MFCC arrays
max_length = max(mfcc.shape[0] for mfcc in X)

# Pad each MFCC array to have the same length
X_padded = np.array([np.pad(mfcc, ((0, max_length - mfcc.shape[0]), (0, 0)), mode='constant') for mfcc in X])

# Convert to NumPy array and encode labels
X = np.array(X_padded)
labels_encoded = LabelEncoder().fit_transform(labels)
labels_categorical = to_categorical(labels_encoded)



In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, labels_categorical, test_size=0.4, random_state=42)  

In [8]:
# Reshape the data to include a channel dimension for CNN input
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

In [9]:
from tensorflow.keras.optimizers import Adam 

# Define the model
model = Sequential([
    Conv2D(32, (2, 2), activation='relu', input_shape=X_train.shape[1:]),
    MaxPooling2D((2, 2)),
    Dropout(0.25),

    Conv2D(64, (2, 2), activation='relu'),
    Dropout(0.25),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(y_train.shape[1], activation='softmax')
])


model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
# Train the model 
history = model.fit(X_train, y_train,
                    epochs=30,
                    batch_size=32,
                    validation_split=0.3, 
                    verbose=1)

Epoch 1/30
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 29ms/step - accuracy: 0.4407 - loss: 7.0511 - val_accuracy: 0.7013 - val_loss: 0.7522
Epoch 2/30
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - accuracy: 0.5991 - loss: 0.7840 - val_accuracy: 0.7221 - val_loss: 0.6564
Epoch 3/30
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - accuracy: 0.6634 - loss: 0.7065 - val_accuracy: 0.7540 - val_loss: 0.5844
Epoch 4/30
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - accuracy: 0.6893 - loss: 0.6665 - val_accuracy: 0.7827 - val_loss: 0.5229
Epoch 5/30
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 28ms/step - accuracy: 0.7314 - loss: 0.6040 - val_accuracy: 0.7915 - val_loss: 0.5195
Epoch 6/30
[1m158/158[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - accuracy: 0.7340 - loss: 0.5657 - val_accuracy: 0.7748 - val_loss: 0.5239
Epoch 7/30
[1m158/158

In [12]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)
print("test Accuracy: ", test_accuracy)

151/151 - 1s - 4ms/step - accuracy: 0.8612 - loss: 0.4382
test Accuracy:  0.8612151741981506


In [13]:
from sklearn.metrics import classification_report

# Print the classification report
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_test_classes = np.argmax(y_test, axis=1)

report = classification_report(y_test_classes, y_pred_classes, target_names=['hungry', 'sad', 'uncomfortable'])

print(report)

[1m151/151[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
               precision    recall  f1-score   support

       hungry       0.87      0.80      0.84      1613
          sad       0.99      0.86      0.92      1615
uncomfortable       0.76      0.92      0.84      1578

     accuracy                           0.86      4806
    macro avg       0.87      0.86      0.86      4806
 weighted avg       0.87      0.86      0.86      4806

