In [28]:
import librosa
import numpy as np
import json
import os

In [29]:
def extract_features(file_path):
    try:
        audio, _ = librosa.load(file_path, mono=True)
        mfccs = librosa.feature.mfcc(y=audio, n_mfcc=13)
        chroma = librosa.feature.chroma_stft(y=audio)
        spectral_contrast = librosa.feature.spectral_contrast(y=audio)
        tonnetz = librosa.feature.tonnetz(y=audio)
        features = np.vstack([mfccs, chroma, spectral_contrast, tonnetz])
        return np.mean(features.T, axis=0)
    except Exception as e:
        print(f"Error encountered while parsing file '{file_path}': {e}")
        return None


In [30]:
from keras.models import load_model
import joblib

def classify_music(file_path):
    saved_folder = 'model_folder'

    # Load data and models from the saved folder
    # y_test = np.load(os.path.join(saved_folder, 'y_test.npy'))
    cnn_model = load_model(os.path.join(saved_folder, 'cnn_model.h5'))
    baseline_cnn_model = load_model(os.path.join(saved_folder, 'baseline_cnn_model.h5'))
    rf_model = joblib.load(os.path.join(saved_folder, 'rf_model.joblib'))
    svm_model = joblib.load(os.path.join(saved_folder, 'svm_model.joblib'))
    xgb_model = joblib.load(os.path.join(saved_folder, 'xgb_model.joblib'))
    # ensemble_input = np.load(os.path.join(saved_folder, 'ensemble_input.npy'))

    # Load label dictionary
    with open(os.path.join(saved_folder, 'label_dict.json'), 'r') as json_file:
        label_dict = json.load(json_file)

    # Extract features
    sample_features = extract_features(file_path)
    sample_features_cnn = sample_features.reshape(1, sample_features.shape[0], 1)

    # Predict with baseline CNN model
    baseline_cnn_prediction_prob = baseline_cnn_model.predict(sample_features_cnn)[0]
    genre_names = {idx: genre for genre, idx in label_dict.items()}
    predicted_percentages_baseline_cnn = {genre_names[idx]: float(prob) * 100 for idx, prob in enumerate(baseline_cnn_prediction_prob)}
    predicted_genre_baseline_cnn = max(predicted_percentages_baseline_cnn, key=predicted_percentages_baseline_cnn.get)
    sorted_baseline_cnn_predictions = dict(sorted(predicted_percentages_baseline_cnn.items(), key=lambda item: item[1], reverse=True))
    sorted_json_result_baseline_cnn = {
        "Predicted Genre (Baseline CNN)": predicted_genre_baseline_cnn,
        "Predicted Percentages (Baseline CNN)": sorted_baseline_cnn_predictions
    }

    cnn_prediction_prob = cnn_model.predict(sample_features_cnn)[0]
    # Predict with SVM, Random Forest, and XGBoost models
    cnn_features_sample = cnn_model.predict(sample_features_cnn)
    svm_prediction_sample = svm_model.predict(cnn_features_sample)
    rf_prediction_sample = rf_model.predict(cnn_features_sample)
    ensemble_input_sample = np.column_stack((svm_prediction_sample, rf_prediction_sample, np.argmax(cnn_prediction_prob)))
    ensemble_prediction_sample = xgb_model.predict(ensemble_input_sample)
    
    # Organize predictions
    predicted_genre_ensemble = genre_names[ensemble_prediction_sample[0]]
    predicted_probabilities_ensemble = xgb_model.predict_proba(ensemble_input_sample)[0]
    predicted_percentages_ensemble = {genre_names[idx]: float(prob) * 100 for idx, prob in enumerate(predicted_probabilities_ensemble)}
    sorted_ensemble_predictions = dict(sorted(predicted_percentages_ensemble.items(), key=lambda item: item[1], reverse=True))
    sorted_json_result_ensemble = {
        "Predicted Genre (OCNN)": predicted_genre_ensemble,
        "Predicted Percentages (OCNN)": sorted_ensemble_predictions
    }

    # Merge results
    merged_json_result = {"Baseline CNN": sorted_json_result_baseline_cnn, "OCNN": sorted_json_result_ensemble}
    
    return merged_json_result


In [31]:
filename = "one day reggae.wav" #"magna.wav"


# print()
print(json.dumps(classify_music(filename), indent=4))



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 224ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 244ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
{
    "Baseline CNN": {
        "Predicted Genre (Baseline CNN)": "disco",
        "Predicted Percentages (Baseline CNN)": {
            "disco": 86.92527413368225,
            "metal": 5.351472645998001,
            "hiphop": 2.074967138469219,
            "jazz": 1.4219047501683235,
            "classical": 1.3325255364179611,
            "country": 1.1458273977041245,
            "pop": 1.0018767789006233,
            "rock": 0.697948457673192,
            "blues": 0.030386258731596172,
            "reggae": 0.01532921742182225,
            "noise": 0.0024840508558554575
        }
    },
    "OCNN": {
        "Predicted Genre (OCNN)": "reggae",
        "Predicted Percentages (OCNN)": {
            "reggae": 84.7791850566864,
            "pop": 4.427284747362137,
      