In [None]:
from pathlib import Path
import time

import essentia
import essentia.standard as es
import librosa
import librosa.display
# IMPORTANT: since TCN is not available on pip version of madmom
# you have to build the library from source
import madmom
import matplotlib.pyplot as plt
import mir_eval
import numpy as np
import pandas as pd

In [None]:
base_path = Path.cwd()

experiments_path = base_path / "results" / "baselines"
# output_path = base_path / "experiments_results" / "beat_trackers_baseline"
dataset_path = base_path.parent.parent / "datasets" / "candombe"

Important dataset information:
* Candombe:
    * sampling rate: 44100 Hz
    * precision: 16-bit
    * total audios: 36

In [None]:
SR = 44100
TOTAL_AUDIOS = 36

In [None]:
def librosa_beats(audio):
    bpm, beats = librosa.beat.beat_track(x, sr=SR, units="time")
    return beats

# TODO: fix this one
def librosa_beats_with_onset_agg(audio):
    onset_subbands = librosa.onset.onset_strength_multi(y=audio, sr=SR, channels=[0, 32, 64, 96, 128])
    bpm, beats = librosa.beat.beat_track(onset_envelope=onset_subbands, sr=SR)
    return beats

def essentia_beats(audio):
    beats, confidence = es.BeatTrackerMultiFeature()(x)
    return beats

#refence for implementation https://github.com/CPJKU/madmom/issues/403
def madmom_rnn_beats(audio):
    beat_processor = madmom.features.beats.RNNBeatProcessor()
    beat_decoder = madmom.features.beats.DBNBeatTrackingProcessor(beats_per_bar=[4], fps=100)
    beats = beat_decoder(beat_processor(audio))
    return beats

def madmom_tcn_beats(audio):
    beat_processor = madmom.features.beats.TCNBeatProcessor()
    beat_decoder = madmom.features.beats.DBNBeatTrackingProcessor(beats_per_bar=[4], fps=100)
    beats = beat_decoder(beat_processor(audio))
    return beats

In [None]:
baselines = {
    "librosa": {"function": librosa_beats, "time": np.zeros(TOTAL_AUDIOS)},
    "madmom_rnn": {"function": madmom_rnn_beats, "time": np.zeros(TOTAL_AUDIOS)},
    "madmom_tcn": {"function": madmom_tcn_beats, "time": np.zeros(TOTAL_AUDIOS)},
    "essentia": {"function": essentia_beats, "time": np.zeros(TOTAL_AUDIOS)}
}

In [None]:
audiofiles = dataset_path.rglob("*.wav")

idx = 0
for audio in audiofiles:
    x, _ = librosa.load(audio, mono=True, sr=SR)
    
    for key, val in baselines.items():
        file_npz = experiments_path / key / audio.stem
        
        # if file exists, do nothing
        if not file_npz.is_file():
            start = time.perf_counter()
            beats = val["function"](x)
            end = time.perf_counter()
            val["time"][idx] = end - start

            if not file_npz.parent.is_dir():
                print(f"Creating folder for {file_npz.parent}")
                file_npz.parent.mkdir(parents=True)

            np.savez(file_npz, estimated=beats)    
    idx += 1

In [None]:
for key, value in baselines.items():
    print(f"{key} time metrics")
    print(f"\tavg time {np.mean(val['time']):0.4f} seconds")
    print(f"\tmedian time {np.median(val['time']):0.4f} seconds")
    print(f"\tmax time {np.max(val['time']):0.4f} seconds")
    

In [None]:
# TODO: document the default parameter for those algorithms
# TODO: add more baselines? (e.g librosa with multi-channel?)
# TODO: evaluate baselines against ground_truth value 
# TODO: parallelize experiments run 

# baseline evaluation
* main metrics for each method separatedly
* metrics compared between each other (<- ?)

In [None]:
# we have to group information in a dataframe or anything like this to 
# better analyze

In [None]:
# we're not worried about downbeat estimation, so let's first just save our beats
csvfiles = dataset_path.rglob("*.csv")
reference = {}

for file in csvfiles:
    x_df = pd.read_csv(csv, names=["timestamp", "beat"])
    
    reference[file.stem] = {}
    reference[file.stem]["baseline"]  = x_df["timestamp"].values

In [None]:
estimations = experiments_path.glob("*")
for folder in estimations:
    print(f"Reading files from /{folder.name}")
    for file in folder.glob("*.npz"):
        reference[file.stem][folder.name] = np.load(file)["estimated"]

In [None]:
df = pd.DataFrame(reference).transpose()

In [None]:
df["madmom_rnn_metrics"] = df[["baseline", "madmom_rnn"]].apply(lambda x: mir_eval.beat.evaluate(x["baseline"], x["madmom_rnn"]), axis=1)
df["librosa_metrics"] = df[["baseline", "librosa"]].apply(lambda x: mir_eval.beat.evaluate(x["baseline"], x["librosa"]), axis=1)
df["madmom_tcn_metrics"] = df[["baseline", "madmom_tcn"]].apply(lambda x: mir_eval.beat.evaluate(x["baseline"], x["madmom_tcn"]), axis=1)
df["essentia_metrics"] = df[["baseline", "essentia"]].apply(lambda x: mir_eval.beat.evaluate(x["baseline"], x["essentia"]), axis=1)

In [None]:
df[["madmom_rnn_metrics", "librosa_metrics", "madmom_tcn_metrics", "essentia_metrics"]]

In [None]:
metrics = pd.json_normalize(df[["madmom_rnn_metrics", "librosa_metrics", "madmom_tcn_metrics", "essentia_metrics"]])

In [None]:
pd.json_normalize(df["madmom_tcn_metrics"]).median()

In [None]:
pd.json_normalize(df["madmom_rnn_metrics"]).median()

In [None]:
pd.json_normalize(df["librosa_metrics"]).median()

In [None]:
pd.json_normalize(df["essentia_metrics"]).median()