In [69]:
import numpy as np
import librosa
import pickle
import pandas as pd
import random
from math import floor

In [78]:
# Utility function to load a pickle file
def load_pickle(file_path):
    with open(file_path, 'rb') as f:
        return pickle.load(f)

def beat_tracking_dp(y, sr, hop_length=512, alpha=0.5):
    """Perform beat tracking using dynamic programming."""
    tempo_estimate, _ = librosa.beat.beat_track(y=y, sr=sr)
    floor_tempo = floor(tempo_estimate)
    tempo_hypotheses = [floor_tempo - 1, floor_tempo, tempo_estimate, floor_tempo + 1]
    tempo_hypotheses = np.unique(tempo_hypotheses)

    onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
    time_step = hop_length / sr
    time_grid = np.arange(len(onset_env)) * time_step
    C = np.zeros((len(onset_env), len(tempo_hypotheses)))
    P = np.zeros_like(C, dtype=int)

    for i, t in enumerate(time_grid[1:], 1):
        for k, tempo in enumerate(tempo_hypotheses):
            tau_p = 60 / tempo
            start = max(0, i - int(2 * tau_p / time_step))
            end = max(0, i - int(tau_p / (2 * time_step)))
            if start < end:
                transition_scores = C[start:end, k] - alpha * (np.log((t - time_grid[start:end]) / tau_p))**2
                best_predecessor = start + np.argmax(transition_scores)
                max_score = transition_scores[best_predecessor - start]
            else:
                best_predecessor = 0
                max_score = 0
            C[i, k] = onset_env[i] + max_score
            P[i, k] = best_predecessor

    best_tempo_index = np.argmax(C[-1])
    best_bpm = tempo_hypotheses[best_tempo_index]

    # Backtrace to recover the beat sequence
    beat_frames = [len(onset_env) - 1]  # Start from the last frame
    current_frame = beat_frames[-1]

    while current_frame > 0:
        current_frame = P[current_frame, best_tempo_index]
        beat_frames.append(current_frame)

    beat_frames.pop()  # Remove the last appended frame which is zero due to initialization
    beat_frames.reverse()  # Reverse the beat frames to be in chronological order

    return best_bpm, beat_frames, tempo_estimate

In [79]:
X_test = load_pickle('../data/pkl/test_data.pkl')

random.seed(42)
random_song_id = int(random.choice(list(X_test.keys())))
audio_file = f'../data/audio_files/processed/{random_song_id}.mp3'
y, sr = librosa.load(audio_file, sr=None)
tempo, beats, original_bpm = beat_tracking_dp(y, sr)

print("Song ID:", random_song_id)
print("Best BPM estimate:", tempo)
print("Original BPM estimate:", original_bpm)
print("Beat frames:", beats)

Song ID: 71
Best BPM estimate: 126.0
Original BPM estimate: 125.0
Beat frames: [45, 76, 103, 136, 170, 206, 237, 271, 296, 326, 371, 416, 460, 505, 549, 594, 638, 683, 728, 772, 817, 861, 884, 918, 951, 985, 1012, 1040, 1085, 1129, 1174, 1208, 1242, 1275, 1308, 1342, 1375, 1409, 1442, 1465, 1490, 1530, 1553, 1576, 1609, 1642, 1665, 1709, 1732, 1755, 1788, 1821, 1844, 1888, 1933, 1956, 1999, 2022, 2066, 2089, 2133, 2156, 2179, 2223, 2246, 2290, 2313, 2356, 2379, 2402, 2446, 2469, 2492, 2536, 2580, 2603, 2647, 2670, 2714, 2758, 2781, 2825, 2848, 2892, 2915, 2959, 3004, 3027, 3071, 3115, 3138, 3161, 3205, 3228, 3272, 3295, 3329, 3361, 3384, 3428, 3451, 3495, 3518, 3562, 3607, 3651, 3674, 3718, 3741, 3785, 3808, 3831, 3874, 3897, 3920, 3964, 3987, 4031, 4075, 4098, 4142, 4165, 4188, 4232, 4255, 4289, 4321, 4344, 4378, 4410, 4433, 4456, 4490, 4522, 4545, 4579, 4611, 4634, 4678, 4701, 4724, 4767, 4790, 4813, 4857, 4880, 4903, 4946, 4969, 4992, 5035, 5058, 5081, 5124, 5147, 5170, 5214, 5237, 