importing libraries

In [None]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import joblib

The function extract_features is designed to extract a variety of audio features from an audio file using the librosa library. Here’s a breakdown of the features being extracted:

1. MFCCs (Mel-frequency cepstral coefficients):

The function computes the MFCCs of the audio signal, which are widely used in audio processing and speech recognition. The mean of the MFCCs is calculated and added to the feature list.

2. Chroma Features:

Chroma features represent the energy distribution across the 12 different pitch classes. The mean of these features is also calculated and added to the list.

3. Mel Spectrogram:

The Mel spectrogram is computed, which provides a representation of the short-term power spectrum of sound. The mean of the Mel spectrogram is included in the features.
4. Spectral Contrast:

This feature measures the difference in amplitude between peaks and valleys in the sound spectrum. The mean of the spectral contrast is added to the feature list.

4. Tonnetz (Tonal centroid features):

Tonnetz features capture the harmonic relations in music. The harmonic component of the audio signal is first extracted, and then the tonnetz features are computed and averaged.

5. Zero Crossing Rate (ZCR):

The ZCR measures how often the signal changes from positive to negative or back. The mean value is appended to the features.

6. Root Mean Square Error (RMSE):

RMSE provides a measure of the energy of the audio signal. The mean value of the RMSE is calculated and added to the feature list.

The function returns a list of these extracted features, which can be used for further analysis, such as training machine learning models for tasks like classification or regression in audio processing.

In [None]:
def extract_features(filepath):
    y, sr = librosa.load(filepath, sr=None)
    features = []

    # MFCCs
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
    features.extend(np.mean(mfcc, axis=1))

    # Chroma
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    features.extend(np.mean(chroma, axis=1))

    # Mel Spectrogram
    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    features.extend(np.mean(mel, axis=1))

    # Spectral Contrast
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    features.extend(np.mean(contrast, axis=1))

    # Tonnetz
    y_harmonic = librosa.effects.harmonic(y)
    tonnetz = librosa.feature.tonnetz(y=y_harmonic, sr=sr)
    features.extend(np.mean(tonnetz, axis=1))

    # Zero Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(y)
    features.append(np.mean(zcr))

    # RMSE
    rmse = librosa.feature.rms(y=y)
    features.append(np.mean(rmse))

    return features

In [None]:
# LOAD & PROCESS TRAIN DATA
train_df = pd.read_csv("dataset/train.csv")  # Contains 'filename' & 'label'

X = []
y = []

for i, row in train_df.iterrows():
    path = f"dataset/audios_train/{row['filename']}"
    feats = extract_features(path)
    X.append(feats)
    y.append(row["label"])

X = pd.DataFrame(X)
y = pd.Series(y)

print("Train features shape:", X.shape)

Train features shape: (444, 195)


 initializes a RandomizedSearchCV for a RandomForestRegressor with specified hyperparameters, fits the model to the training data, and retrieves the best hyperparameters found during the search, all in one expression.

In [None]:
#TRAIN-TEST SPLIT & HYPERPARAMETER TUNING
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

param_dist = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'bootstrap': [True, False]
}

rf = RandomForestRegressor(random_state=42)
random_search = RandomizedSearchCV(
    rf, param_distributions=param_dist, n_iter=20, cv=5,
    verbose=1, n_jobs=-1, random_state=42
)

random_search.fit(X_train, y_train)

print("Best Params:", random_search.best_params_)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best Params: {'n_estimators': 200, 'min_samples_split': 5, 'min_samples_leaf': 2, 'max_depth': 20, 'bootstrap': True}


the model training process by using the best hyperparameters, trains the model on the complete dataset, and saves the trained model to disk for later use.

In [None]:
#FINAL TRAINING WITH BEST PARAMS
final_model = RandomForestRegressor(**random_search.best_params_, random_state=42)
final_model.fit(X, y)

# Save model for reuse
joblib.dump(final_model, "final_model.pkl")

['final_model.pkl']

 loads the test audio filenames from a CSV file, extracts relevant audio features for each file using the extract_features function, and stores these features in a DataFrame for further analysis or model prediction. The shape of the resulting DataFrame is printed to confirm the extraction process.

In [None]:
# LOAD TEST DATA & EXTRACT FEATURES
test_df = pd.read_csv("dataset/test.csv")  
filenames = test_df["filename"].tolist()

test_features = []

for fname in filenames:
    path = f"dataset/audios_test/{fname}"
    feats = extract_features(path)
    test_features.append(feats)

test_features_df = pd.DataFrame(test_features)
print("Test features shape:", test_features_df.shape)

Test features shape: (195, 195)


predicts labels for the test data using the trained final_model, prepares a submission DataFrame containing the filenames and their corresponding predicted labels, and saves this DataFrame to a CSV file named "submission.csv" for submission or further analysis.

In [None]:
#PREDICT ON TEST DATA
predictions = final_model.predict(test_features_df)

# Prepare submission
submission = pd.DataFrame({
    "filename": filenames,
    "label": predictions
})

submission.to_csv("submission.csv", index=False)