In [None]:
import glob

# import essentia
# import essentia.standard as es
import librosa
import librosa.display
import matplotlib.pyplot as plt
import mir_eval
import numpy as np
import pandas as pd

import IPython.display as ipd
plt.rcParams["figure.figsize"] = (15,5)

In [None]:
def plot_comparison(x, fs, expected_beats, librosa_beats, start=0, end=50):
    fig, ax = plt.subplots(nrows=1, sharex=True)
    ax.plot(x[start*fs:end*fs], label='waveform')

    ax.vlines(
        (expected_beats[(expected_beats >= start) & (expected_beats <= end)]-start)*fs, 
              0, 1, alpha=0.5, color='r', linestyle='--', label='groundtruth'
    )
    ax.vlines(
        (librosa_beats[(librosa_beats >= start) & (librosa_beats <= end)]-start)*fs, 
        0, 1, alpha=0.5, color='g', linestyle='--', label='librosa')

    ax.legend()

In [None]:
# CONSTANTS
FS = 44100

In [None]:
# dataset available in https://www.eumus.edu.uy/candombe/datasets/ISMIR2015/dataset.html
file_path = [i[:-4] for i in glob.glob('../datasets/candombe/*.wav')]

In [None]:
beat_multi_feature = es.BeatTrackerMultiFeature()
beat_degara = es.BeatTrackerDegara()

In [None]:
dataset_result = {}

for file in file_path:
    print(f"processing {file}")
    x, fs = librosa.load(f"{file}.wav", mono=True, sr=FS)
    x_df = pd.read_csv(f"{file}.csv", names=["timestamp", "beat"])
    ground_truth = x_df['timestamp'].values

    # calculate the beats using librosa approach
    _, beat_frame = librosa.beat.beat_track(x, FS)
    librosa_timestamps = librosa.frames_to_time(beat_frame, FS)
    
    #essentia_multi_feature, _ = beat_multi_feature(x)
    #essentia_degara = beat_degara(x)

    dataset_result[file]= {
        "ground_truth_beats": x_df['timestamp'].values,
        "librosa_beats": librosa_timestamps,
        "f_score_librosa": mir_eval.beat.f_measure(ground_truth, librosa_timestamps),
        "p_score_librosa": mir_eval.beat.p_score(ground_truth, librosa_timestamps),
#         "multi_feature_beats": essentia_multi_feature,
#         "degara_beats": essentia_degara,
#         "f_score_multi_beat": mir_eval.beat.f_measure(ground_truth, essentia_multi_feature),
#         "p_score_multi_beat": mir_eval.beat.p_score(ground_truth, essentia_multi_feature),
#         "f_score_degara": mir_eval.beat.f_measure(ground_truth, essentia_degara),
#         "p_score_degara": mir_eval.beat.p_score(ground_truth, essentia_degara),
    }

In [None]:
# ordering from worse to best f-score
dataset_result = {k: v for k, v in sorted(dataset_result.items(), key=lambda item: item[1]["f_score"])}

In [None]:
for i in dataset_result.items():
    print(f'{i[0]} -> {i[1]["f_score"]}')

In [None]:
x, fs = librosa.load("../datasets/candombe/csic.1995_ansina2_04.wav", mono=True, sr=FS)

In [None]:
tmp = dataset_result["../datasets/candombe/csic.1995_ansina2_04"]

In [None]:
wrong_click_sound = np.sin(2*np.pi*np.arange(FS*.1)*500/(1.*FS))
wrong_click_sound *= np.exp(-np.arange(FS*.1)/(FS*.01)) # exponential decay

In [None]:
clicks_truth = mir_eval.sonify.clicks(tmp["ground_truth_beats"], FS, click=None, length=len(x))
wrong_clicks = mir_eval.sonify.clicks(tmp["librosa_beats"], FS, click=wrong_click_sound, length=len(x))

In [None]:
ipd.Audio(x+clicks_truth+wrong_clicks, rate=FS)

In [None]:
ipd.Audio(x+clicks_truth, rate=FS)

In [None]:
ipd.Audio(x+wrong_clicks, rate=FS)