In [4]:
import os
import pandas as pd
import numpy as np
import librosa
import librosa.display
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical


In [5]:
# Load the dataset containing file IDs and their respective labels
df = pd.read_csv('Urban Sound Dataset.csv')
print(df.head())


   ID         Class
0   0         siren
1   1  street_music
2   2      drilling
3   3         siren
4   4      dog_bark


In [6]:
# Function to extract MFCC features from audio files
def extract_features(file_path, max_pad_len=174):
    try:
        audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        padded_mfccs = np.pad(mfccs, ((0, 0), (0, max(0, max_pad_len - mfccs.shape[1]))), mode='constant')
        return padded_mfccs[:, :max_pad_len].T  # Transpose for RNN input (time_steps, features)
    except Exception as e:
        print(f"Error processing file {file_path}: {e}")
        return None


In [7]:
# Extract features and labels from the dataset
features = []
labels = []

for idx, row in df.iterrows():
    file_path = os.path.join('Train', f"{row['ID']}.wav")
    mfccs = extract_features(file_path)
    if mfccs is not None:
        features.append(mfccs)
        labels.append(row['Class'])

# Encode the labels to integers and convert to one-hot encoding
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)
labels_encoded = to_categorical(labels_encoded)

# Convert features and labels to NumPy arrays
features = np.array(features)
labels_encoded = np.array(labels_encoded)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)




In [8]:
# Define the RNN-LSTM model architecture
model = Sequential()

# First LSTM layer
model.add(LSTM(128, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(BatchNormalization())
model.add(Dropout(0.3))

# Second LSTM layer
model.add(LSTM(64))
model.add(BatchNormalization())
model.add(Dropout(0.3))

# Fully connected layer
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.4))

# Output layer for classification
model.add(Dense(labels_encoded.shape[1], activation='softmax'))


  super().__init__(**kwargs)


In [9]:
# Compile the model with optimizer, loss function, and evaluation metric
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [10]:
# Train the model using training data and validate on a portion of it
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_split=0.2)


Epoch 1/30
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 196ms/step - accuracy: 0.1711 - loss: 2.3040 - val_accuracy: 0.4103 - val_loss: 1.7866
Epoch 2/30
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 190ms/step - accuracy: 0.3856 - loss: 1.6382 - val_accuracy: 0.5402 - val_loss: 1.3604
Epoch 3/30
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 184ms/step - accuracy: 0.4971 - loss: 1.3942 - val_accuracy: 0.5805 - val_loss: 1.1955
Epoch 4/30
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 183ms/step - accuracy: 0.5693 - loss: 1.2416 - val_accuracy: 0.6276 - val_loss: 1.0442
Epoch 5/30
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 184ms/step - accuracy: 0.6211 - loss: 1.0752 - val_accuracy: 0.6425 - val_loss: 1.0210
Epoch 6/30
[1m109/109[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 178ms/step - accuracy: 0.6619 - loss: 0.9834 - val_accuracy: 0.6517 - val_loss: 1.0580
Epoch 7/30

In [11]:
# Evaluate the model's performance on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")


[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 145ms/step - accuracy: 0.7731 - loss: 0.7860
Test Loss: 0.7583, Test Accuracy: 0.7856
