In [64]:
import librosa
import joblib
from xgboost import XGBClassifier
import json
import numpy as np
import os
from keras.models import load_model
from sklearn.preprocessing import LabelEncoder

In [65]:
def extract_features(file_path):
    try:
        # Load MP3 file
        audio, _ = librosa.load(file_path, sr=22050, mono=True)  # Adjust duration as needed
        mfccs = librosa.feature.mfcc(y=audio, sr=22050, n_mfcc=13)
        chroma = librosa.feature.chroma_stft(y=audio, sr=22050)
        spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=22050)
        tonnetz = librosa.feature.tonnetz(y=audio, sr=22050)
        features = np.vstack([mfccs, chroma, spectral_contrast, tonnetz])
        mean_features = np.mean(features.T, axis=0)
        return mean_features
    except Exception as e:
        print(f"Error encountered while parsing file '{file_path}': {e}")
        return None

In [66]:
def classify_music(file_path):
    saved_folder = 'new'

    # Load data and models from the saved folder
    y_test = np.load(os.path.join(saved_folder, 'y_test.npy'))
    cnn_model = load_model(os.path.join(saved_folder, 'cnn_model.h5'))
    baseline_cnn_model = load_model(os.path.join(saved_folder, 'baseline_cnn_model.h5'))
    rf_model = joblib.load(os.path.join(saved_folder, 'rf_model.joblib'))
    svm_model = joblib.load(os.path.join(saved_folder, 'svm_model.joblib'))
    ensemble_input = np.load(os.path.join(saved_folder, 'ensemble_input.npy'))

    # Load label dictionary
    with open(os.path.join(saved_folder, 'label_dict.json'), 'r') as json_file:
        label_dict = json.load(json_file)

    # Extract features
    sample_features = extract_features(file_path)
    sample_features_cnn = sample_features.reshape(1, sample_features.shape[0], 1)

    # CNN prediction
    cnn_prediction_prob = baseline_cnn_model.predict(sample_features_cnn)[0]
    genre_names = {idx: genre for genre, idx in label_dict.items()}
    predicted_percentages_cnn = {genre_names[idx]: float(genre_prob) * 100 for idx, genre_prob in enumerate(cnn_prediction_prob)}
    predicted_genre_cnn = max(predicted_percentages_cnn, key=predicted_percentages_cnn.get)

    # Ensemble prediction
    cnn_features_sample = cnn_model.predict(sample_features_cnn)
    svm_prediction_sample = svm_model.predict(cnn_features_sample)
    rf_prediction_sample = rf_model.predict(cnn_features_sample)
    ensemble_input_sample = np.column_stack((svm_prediction_sample, rf_prediction_sample, np.argmax(cnn_prediction_prob)))
    xgb_model_ensemble = XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1)
    xgb_model_ensemble.fit(ensemble_input, y_test)
    ensemble_prediction_sample = xgb_model_ensemble.predict(ensemble_input_sample)
    predicted_genre_ensemble = genre_names[ensemble_prediction_sample[0]]
    predicted_probabilities_ensemble = xgb_model_ensemble.predict_proba(ensemble_input_sample)[0]
    predicted_percentages_ensemble = {genre: float(percentage) * 100 for genre, percentage in zip(genre_names.values(), predicted_probabilities_ensemble)}

    # Sorted predictions
    sorted_cnn_predictions = {genre: percentage for genre, percentage in sorted(predicted_percentages_cnn.items(), key=lambda x: x[1], reverse=True)}
    sorted_ensemble_predictions = {genre: percentage for genre, percentage in sorted(predicted_percentages_ensemble.items(), key=lambda x: x[1], reverse=True)}

    # JSON results
    sorted_json_result_cnn = {"Predicted Genre (CNN)": predicted_genre_cnn, "Predicted Percentages (CNN)": sorted_cnn_predictions}
    sorted_json_result_ensemble = {"Predicted Genre (OCNN)": predicted_genre_ensemble, "Predicted Percentages (OCNN)": sorted_ensemble_predictions}
    merged_json_result = {"CNN": sorted_json_result_cnn, "OCNN": sorted_json_result_ensemble}

    return merged_json_result


In [69]:
filename = "./Data/genres_original/metal/metal.00000.wav" #"magna.wav"


# print()
print(json.dumps(classify_music(filename), indent=4))



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 187ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 349ms/step
{
    "CNN": {
        "Predicted Genre (CNN)": "country",
        "Predicted Percentages (CNN)": {
            "country": 76.5015959739685,
            "classical": 18.58040541410446,
            "metal": 2.5527222082018852,
            "hiphop": 1.960441842675209,
            "blues": 0.27204391080886126,
            "pop": 0.05756336613558233,
            "noise": 0.035301721072755754,
            "rock": 0.03296226495876908,
            "reggae": 0.005929822873440571,
            "jazz": 0.0008822489689919166,
            "disco": 0.0001459030386286031
        }
    },
    "OCNN": {
        "Predicted Genre (OCNN)": "metal",
        "Predicted Percentages (OCNN)": {
            "metal": 88.40416073799133,
            "rock": 3.862478584051132,
            "jazz": 3.8304243236780167,
            "hiphop": 0.9169843047857285,
            "b