In [16]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
import json
import random

# Reproducibility
random.seed(42)
np.random.seed(42)

FEATURE_DIR = Path("../data/features")
OUTPUT_DIR = Path("../data/output")
OUTPUT_DIR.mkdir(exist_ok=True)

In [17]:
files = sorted(FEATURE_DIR.glob("*.npz"))

dataset = []

for f in files:
    data = np.load(f, allow_pickle=True)
    X = data["X"]                # (T,12)
    times = data["times"]        # (T,)
    y = data["y_chord"]          # (T,)
    y_db = data["y_downbeat"]    # (T,)

    assert X.shape[0] == len(y) == len(y_db)

    dataset.append({
        "id": f.stem,
        "X": X,
        "times": times,
        "chords": list(y),
        "downbeats": list(y_db)
    })

len(dataset)

180

In [18]:
def normalize_chroma(X):
    """L2-normalize chroma vectors frame-wise."""
    norm = np.linalg.norm(X, axis=1, keepdims=True) + 1e-8
    return X / norm

for song in dataset:
    song["X"] = normalize_chroma(song["X"])

In [19]:
all_chords = []

for song in dataset:
    all_chords.extend(song["chords"])
vocab = sorted(set(all_chords))

In [20]:
chord_to_idx = {c:i for i,c in enumerate(vocab)}
idx_to_chord = {i:c for c,i in chord_to_idx.items()}

with open(OUTPUT_DIR/"chord_vocab.json","w") as f:
    json.dump(chord_to_idx,f,indent=2)

In [21]:
# Encode labels to integers based on chord_to_idx
for song in dataset:
    song["y_idx"] = np.array([chord_to_idx[c] for c in song["chords"]])
    assert song["X"].shape[0] == len(song["y_idx"])

In [22]:
# Todo: class weighting for dim and aug
from collections import Counter
Counter(np.concatenate([s["y_idx"] for s in dataset]))

Counter({np.int64(13): 21057,
         np.int64(19): 17916,
         np.int64(30): 17512,
         np.int64(17): 16751,
         np.int64(24): 16334,
         np.int64(34): 15933,
         np.int64(27): 15278,
         np.int64(6): 14390,
         np.int64(21): 13140,
         np.int64(37): 12880,
         np.int64(1): 12079,
         np.int64(8): 9792,
         np.int64(38): 5680,
         np.int64(31): 5606,
         np.int64(2): 5363,
         np.int64(35): 4972,
         np.int64(25): 4441,
         np.int64(14): 3817,
         np.int64(22): 3678,
         np.int64(9): 3132,
         np.int64(7): 2724,
         np.int64(20): 2592,
         np.int64(18): 2359,
         np.int64(28): 2064,
         np.int64(36): 1666,
         np.int64(0): 1140,
         np.int64(29): 768,
         np.int64(16): 658,
         np.int64(12): 474,
         np.int64(33): 440,
         np.int64(23): 435,
         np.int64(5): 348,
         np.int64(26): 220,
         np.int64(32): 219,
         np.int64(1

In [23]:
# ---- Check 1: X and y same length ----
for song in dataset:
    assert len(song["X"]) == len(song["y_idx"]), \
        f"Length mismatch in {song['id']}"

print("✓ All songs have matching X & y lengths.")


# ---- Check 2: No NaNs or infs ----
for song in dataset:
    assert np.isfinite(song["X"]).all(), f"Bad values in {song['id']}"

print("✓ All features are finite.")


# ---- Check 3: Label range correctness ----
max_label = max(chord_to_idx.values())
for song in dataset:
    assert song["y_idx"].min() >= 0
    assert song["y_idx"].max() <= max_label

print("✓ All integer labels are valid.")


# ---- Check 4: Report dataset statistics ----
import collections

label_counts = collections.Counter()

for song in dataset:
    label_counts.update(song["y_idx"].tolist())

print("Total frames:", sum(label_counts.values()))
print("Classes:", len(label_counts))

print("Top 10 most common labels:")
for idx, count in label_counts.most_common(10):
    print(idx_to_chord[idx], "→", count)

✓ All songs have matching X & y lengths.
✓ All features are finite.
✓ All integer labels are valid.
Total frames: 236280
Classes: 39
Top 10 most common labels:
C#:maj → 21057
D#:maj → 17916
F:maj → 17512
C:maj → 16751
E:maj → 16334
G#:maj → 15933
F#:maj → 15278
A:maj → 14390
D:maj → 13140
G:maj → 12880


In [24]:
import numpy as np

np.savez("../data/output/per_song_dataset.npz", dataset=dataset)