In [80]:
import numpy as np
import librosa
import pickle
import pandas as pd
import random
from math import floor

In [88]:
# Utility function to load a pickle file
def load_pickle(file_path):
    with open(file_path, 'rb') as f:
        return pickle.load(f)

def beat_tracking_dp(y, sr, hop_length=512, alpha=0.5):
    """Perform beat tracking using dynamic programming."""
    tempo_estimate, _ = librosa.beat.beat_track(y=y, sr=sr)
    floor_tempo = floor(tempo_estimate)
    tempo_hypotheses = [floor_tempo - 1, floor_tempo, tempo_estimate, floor_tempo + 1]
    tempo_hypotheses = np.unique(tempo_hypotheses)

    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
    time_step = hop_length / sr
    time_grid = np.arange(len(onset_env)) * time_step
    C = np.zeros((len(onset_env), len(tempo_hypotheses)))
    P = np.zeros_like(C, dtype=int)

    for i, t in enumerate(time_grid[1:], 1):
        for k, tempo in enumerate(tempo_hypotheses):
            tau_p = 60 / tempo
            start = max(0, i - int(2 * tau_p / time_step))
            end = max(0, i - int(tau_p / (2 * time_step)))
            if start < end:
                transition_scores = C[start:end, k] - alpha * (np.log((t - time_grid[start:end]) / tau_p))**2
                best_predecessor = start + np.argmax(transition_scores)
                max_score = transition_scores[best_predecessor - start]
            else:
                best_predecessor = 0
                max_score = 0
            C[i, k] = onset_env[i] + max_score
            P[i, k] = best_predecessor

    best_tempo_index = np.argmax(C[-1])
    best_bpm = tempo_hypotheses[best_tempo_index]

    # Backtrace to recover the beat sequence
    beat_frames = [len(onset_env) - 1]  # Start from the last frame
    current_frame = beat_frames[-1]

    while current_frame > 0:
        current_frame = P[current_frame, best_tempo_index]
        beat_frames.append(current_frame)

    beat_frames.pop()  # Remove the last appended frame which is zero due to initialization
    beat_frames.reverse()  # Reverse the beat frames to be in chronological order

    return best_bpm, beat_frames, tempo_estimate

In [91]:
X_test = load_pickle('../data/pkl/test_data.pkl')

random.seed(42)
random_song_id = int(random.choice(list(X_test.keys())))
audio_file = f'../data/audio_files/processed/{random_song_id}.mp3'
y, sr = librosa.load(audio_file, sr=None)
tempo, beats, original_bpm = beat_tracking_dp(y, sr=sr)
print("Song ID:", random_song_id)
print("Best BPM estimate:", tempo)
print("Original BPM estimate:", original_bpm)
print("Beat frames:", beats)

Song ID: 71
Best BPM estimate: 128.0
Original BPM estimate: 127.8409090909091
Beat frames: [2, 13, 21, 27, 36, 44, 53, 61, 67, 74, 83, 94, 105, 116, 127, 133, 142, 150, 161, 172, 183, 194, 205, 211, 217, 223, 231, 239, 250, 261, 272, 283, 289, 295, 306, 312, 321, 328, 334, 340, 346, 354, 362, 368, 374, 385, 395, 404, 411, 417, 428, 434, 440, 448, 456, 462, 468, 478, 484, 490, 501, 507, 513, 523, 529, 535, 545, 551, 557, 568, 574, 580, 590, 596, 602, 612, 618, 624, 635, 641, 647, 657, 663, 669, 679, 685, 691, 701, 707, 713, 724, 730, 741, 752, 758, 764, 774, 780, 791, 797, 803, 813, 819, 825, 831, 841, 847, 858, 864, 870, 880, 886, 892, 903, 909, 919, 925, 931, 937, 947, 953, 959, 969, 975, 981, 992, 998, 1004, 1014, 1020, 1026, 1036, 1042, 1048, 1054, 1065, 1076, 1082, 1088, 1096, 1103, 1109, 1115, 1124, 1132, 1138, 1146, 1154, 1165, 1171, 1177, 1183, 1193, 1199, 1205, 1215, 1221, 1227, 1235, 1243, 1249, 1255, 1266, 1272, 1282, 1288, 1294, 1305, 1311, 1321, 1327, 1333, 1339, 1349, 1355