Some libraries

In [27]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from sklearn.metrics import accuracy_score, confusion_matrix

We are going to change the labels of the dataset to numbers. After this transformation, the labels in df["Label"] will be numerical (e.g., 0 for air_conditioner, 1 for car_horn, etc.), which is ideal for use in metrics like the confusion matrix.

In [28]:
# Load the 10 folds into a list of DataFrames
datasets = [pd.read_csv(f"datasets/sound_features_fold{i}.csv") for i in range(1, 11)]

# Define the mapping of labels to numbers
labels_dict = {
    "air_conditioner": 0,
    "car_horn": 1,
    "children_playing": 2,
    "dog_bark": 3,
    "drilling": 4,
    "engine_idling": 5,
    "gun_shot": 6,
    "jackhammer": 7,
    "siren": 8,
    "street_music": 9
}
# Replace string labels with numeric labels in each fold DataFrame
for df in datasets:
    df["Label"] = df["Label"].replace(labels_dict)

# Check the updated labels in the first fold
datasets[0].head()

  df["Label"] = df["Label"].replace(labels_dict)


Unnamed: 0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,...,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_rolloff,spectral_flatness,rms,zero_crossing_rate,tempo,tempogram,Label
0,-374.4459,97.04725,9.259301,-17.928537,1.152152,-8.538063,-15.637719,-8.188658,-11.003868,1.985415,...,960.279319,1313.641864,22.421554,1763.516865,0.00324,0.079418,0.050405,[66.96428571],0.040491,3
1,-333.74377,133.21625,15.807837,-33.060143,-1.289476,-17.097427,-30.018026,-22.263353,-18.194607,4.316967,...,865.872849,1143.308567,23.85201,1420.634921,0.001789,0.087223,0.053943,[110.29411765],0.059969,3
2,-382.80362,68.959465,4.313661,-2.062933,3.721442,-5.11752,-8.518147,-1.823458,-4.410032,4.914453,...,1132.635828,1453.488132,21.033922,2213.789683,0.009837,0.052242,0.059059,[52.08333333],0.031151,3
3,-96.21553,87.579865,-52.939266,-5.229744,-16.976635,3.661066,-4.20417,10.106073,3.249916,10.754143,...,1801.149302,1495.233043,21.253521,3214.285714,0.039912,0.118702,0.158978,[170.45454545],0.115612,3
4,-217.80743,124.95071,21.462078,11.288554,-10.218013,0.942786,-1.060578,7.17595,0.728432,1.115492,...,1163.692856,1572.547755,20.424197,2396.515377,0.015071,0.070206,0.071091,[133.92857143],0.02505,6


Now lets check if there is some obejct values and replace them

In [29]:
for df in datasets:
    object_columns = df.select_dtypes(include=['object']).columns
    print(object_columns)

Index(['tempo'], dtype='object')
Index(['tempo'], dtype='object')
Index(['tempo'], dtype='object')
Index(['tempo'], dtype='object')
Index(['tempo'], dtype='object')
Index(['tempo'], dtype='object')
Index(['tempo'], dtype='object')
Index(['tempo'], dtype='object')
Index(['tempo'], dtype='object')
Index(['tempo'], dtype='object')


In [30]:
# Example: Check what type of obejct 'tempo' is
print(datasets[0]['tempo'].head())

0     [66.96428571]
1    [110.29411765]
2     [52.08333333]
3    [170.45454545]
4    [133.92857143]
Name: tempo, dtype: object


In [31]:
import ast  # For safely evaluating the string representation of lists

for df in datasets:
    # Convert the string representation of lists to actual lists
    df['tempo'] = df['tempo'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

    # Now, extract the first value from the list (if it’s a list)
    df['tempo'] = df['tempo'].apply(lambda x: x[0] if isinstance(x, list) else x)

    # Convert the 'tempo' column to numeric
    df['tempo'] = pd.to_numeric(df['tempo'], errors='coerce')

    # Check for NaN values
    print(df['tempo'].isna().sum())  # Count of NaN values

    # Handle NaN values (optional)
    df['tempo'].fillna(df['tempo'].mean(), inplace=True)  # Fill NaNs with the mean
    # Alternatively, you could drop rows with NaNs: df.dropna(subset=['tempo'], inplace=True)
datasets[0].head()

0
0
0
0
0
0
0
0
0
0


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['tempo'].fillna(df['tempo'].mean(), inplace=True)  # Fill NaNs with the mean


Unnamed: 0,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,mfcc_7,mfcc_8,mfcc_9,mfcc_10,...,spectral_centroid,spectral_bandwidth,spectral_contrast,spectral_rolloff,spectral_flatness,rms,zero_crossing_rate,tempo,tempogram,Label
0,-374.4459,97.04725,9.259301,-17.928537,1.152152,-8.538063,-15.637719,-8.188658,-11.003868,1.985415,...,960.279319,1313.641864,22.421554,1763.516865,0.00324,0.079418,0.050405,66.964286,0.040491,3
1,-333.74377,133.21625,15.807837,-33.060143,-1.289476,-17.097427,-30.018026,-22.263353,-18.194607,4.316967,...,865.872849,1143.308567,23.85201,1420.634921,0.001789,0.087223,0.053943,110.294118,0.059969,3
2,-382.80362,68.959465,4.313661,-2.062933,3.721442,-5.11752,-8.518147,-1.823458,-4.410032,4.914453,...,1132.635828,1453.488132,21.033922,2213.789683,0.009837,0.052242,0.059059,52.083333,0.031151,3
3,-96.21553,87.579865,-52.939266,-5.229744,-16.976635,3.661066,-4.20417,10.106073,3.249916,10.754143,...,1801.149302,1495.233043,21.253521,3214.285714,0.039912,0.118702,0.158978,170.454545,0.115612,3
4,-217.80743,124.95071,21.462078,11.288554,-10.218013,0.942786,-1.060578,7.17595,0.728432,1.115492,...,1163.692856,1572.547755,20.424197,2396.515377,0.015071,0.070206,0.071091,133.928571,0.02505,6


In [32]:
for df in datasets:
    object_columns = df.select_dtypes(include=['object']).columns
    print(object_columns)

Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')
Index([], dtype='object')


In [None]:

# Initialize list to store accuracies and confusion matrices
accuracies = []
confusion_matrices = []

# Perform 10-fold cross-validation using the 10 separate dataset files
for test_fold in range(1, 11):  # Iterate through fold1 to fold10
    print(f"Training and testing on fold {test_fold}...")

    # Load the test fold
    test_data = pd.read_csv(fold_files[test_fold - 1])
    X_test = test_data.drop(columns=['label']).values  # Features
    y_test = test_data['label'].values  # Labels

    # Load the remaining folds for training
    train_data = pd.concat([pd.read_csv(fold_files[i]) for i in range(10) if i != test_fold - 1])
    X_train = train_data.drop(columns=['label']).values  # Features
    y_train = train_data['label'].values  # Labels

    # Standardize the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Further split the train data into training and validation sets (90% train, 10% validation)
    from sklearn.model_selection import train_test_split
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, stratify=y_train, random_state=42)

    # Define the MLP model
    def create_mlp_model(input_dim, num_classes):
        model = Sequential([
            Dense(128, activation='relu', input_dim=input_dim),
            Dropout(0.3),  # Regularization
            Dense(64, activation='relu'),
            Dropout(0.3),  # Regularization
            Dense(num_classes, activation='softmax')  # Output layer for multi-class classification
        ])
        return model

    # Model parameters
    input_dim = X_train.shape[1]  # Number of features
    learning_rate = 0.001
    batch_size = 64
    epochs = 50

    # Create and compile the model
    model = create_mlp_model(input_dim, num_classes)
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)
    checkpoint = ModelCheckpoint(f'mlp_fold_{test_fold}_best.h5', monitor='val_loss', save_best_only=True)

    # Train the model
    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[early_stopping, lr_scheduler, checkpoint],
        verbose=0  # Suppress training output
    )

    # Evaluate on test set
    test_preds = np.argmax(model.predict(X_test), axis=1)
    accuracy = accuracy_score(y_test, test_preds)
    accuracies.append(accuracy)

    # Compute confusion matrix for this fold
    conf_matrix = confusion_matrix(y_test, test_preds)
    confusion_matrices.append(conf_matrix)

    print(f"Fold {test_fold} Accuracy: {accuracy:.4f}")

# Average results across all folds
mean_accuracy = np.mean(accuracies)
std_accuracy = np.std(accuracies)

print(f"Average Accuracy: {mean_accuracy:.4f} ± {std_accuracy:.4f}")

# Aggregate confusion matrices across all folds
final_confusion_matrix = np.sum(confusion_matrices, axis=0)
print("Final Confusion Matrix:")
print(final_confusion_matrix)
