In [35]:
import pickle
from pathlib import Path
import pandas as pd

IN = Path("../data/dataset/dataset_index.pkl")

with open(IN, "rb") as f:
    dataset_index = pickle.load(f)

print(len(dataset_index), "songs loaded")

180 songs loaded


In [36]:
import librosa
import numpy as np

SR = 22050
HOP = 512              # ≈ 23ms
BINS_PER_OCTAVE = 12
N_OCTAVES = 7
N_BINS = BINS_PER_OCTAVE * N_OCTAVES

In [37]:
def load_audio(path, sr=SR):
    y, sr = librosa.load(path, sr=sr, mono=True)
    return y, sr

In [38]:
def extract_chroma(y, sr):
    cqt = librosa.cqt(
        y,
        sr=sr,
        hop_length=HOP,
        n_bins=N_BINS,
        bins_per_octave=BINS_PER_OCTAVE,
    )

    mag = np.abs(cqt)

    chroma = librosa.feature.chroma_cqt(
        C=mag,
        sr=sr,
        hop_length=HOP
    )

    return chroma.T   # → shape = [T, 12]

In [39]:
def get_frame_times(y, sr):
    n_frames = 1 + (len(y) // HOP)
    return librosa.frames_to_time(
        np.arange(n_frames),
        sr=sr,
        hop_length=HOP
    )

In [40]:
def align_chords_to_frames(times, chord_df):
    labels = []

    for t in times:
        match = chord_df[(chord_df["start"] <= t) & (t < chord_df["end"])]

        if len(match):
            labels.append(match.iloc[0]["label"])
        else:
            labels.append("N")

    return labels

In [41]:
def align_downbeats_to_frames(times, beats_df, tol=0.03):
    down = beats_df[beats_df["position"] == 1]["time"].values
    flags = []

    for t in times:
        flags.append(int(np.any(np.abs(down - t) <= tol)))

    return flags

In [42]:
results = []

for song in dataset_index:
    y, sr = load_audio(song["audio_path"])
    X = extract_chroma(y, sr)
    times = get_frame_times(y, sr)

    chords_df = pd.DataFrame(song["chords"])
    beats_df  = pd.DataFrame(song["beat_position"])

    y_chord = align_chords_to_frames(times, chords_df)
    y_down  = align_downbeats_to_frames(times, beats_df)

    results.append({
        "id": song["id"],
        "X": X,
        "times": times,
        "y_chord": y_chord,
        "y_downbeat": y_down
    })

In [43]:
out = Path("../data/features")
out.mkdir(exist_ok=True)

for r in results:
    np.savez(
        out / f"{r['id']}.npz",
        X=r["X"],
        times=r["times"],
        y_chord=np.array(r["y_chord"], dtype=object),
        y_downbeat=np.array(r["y_downbeat"])
    )