In [None]:
import pandas as pd

In [None]:
# Load the MFCC feature file you just created
feat_df = pd.read_parquet("../data/processed/fma_small_mfcc.parquet")
# 1. Check the number of rows 
print("Rows, Columns:", feat_df.shape)
# 2. Inspect the column names – should be 'track_id' and 'feature'
print("Columns:", feat_df.columns.tolist())
# 3. Preview the first few rows
print(feat_df.head())
# 4. Confirm each 'feature' entry is a 26-length vector (13 MFCC means + 13 stds)
lengths = feat_df["feature"].apply(lambda x: len(x))
print("Unique feature lengths:", lengths.unique())  # should be array([26])
# Optionally: check for any missing values
print("Any null features:", feat_df["feature"].isnull().any())

In [None]:
import numpy as np
from pathlib import Path

In [None]:
# Load your outputs
idx_df = pd.read_parquet("../data/processed/fma_small_index.parquet")
feat_df = pd.read_parquet("../data/processed/fma_small_mfcc.parquet")

def show_mfcc_summary(track_id: int):
    row = feat_df.loc[feat_df.track_id == track_id]
    if row.empty:
        print(f"Track {track_id} not found or failed decoding.")
        return
    v = np.array(row.iloc[0]["feature"])
    means, stds = v[:13], v[13:]
    print(f"Track {track_id}")
    print("MFCC means (0..12):")
    for i, x in enumerate(means):
        label = "energy" if i == 0 else ("tilt" if i == 1 else ("curvature" if i == 2 else "detail"))
        print(f"  mfcc[{i:2d}] ({label:9s}): {x: .3f}")
    print("MFCC stds (0..12):")
    for i, x in enumerate(stds):
        print(f"  std[{i:2d}]: {x: .3f}")

In [None]:
show_mfcc_summary(2)

In [None]:
%pip install matplotlib

In [None]:
import librosa, librosa.display
import matplotlib.pyplot as plt

In [None]:
def plot_track(track_id: int, sr=22050):
    apath = idx_df.loc[idx_df.track_id == track_id, "audio_path"].iloc[0]
    y, sr = librosa.load(apath, sr=sr, mono=True)
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    S_db = librosa.power_to_db(S, ref=np.max)
    M = librosa.feature.mfcc(S=S_db, n_mfcc=13)

    plt.figure(figsize=(10, 6))
    plt.subplot(2,1,1)
    librosa.display.specshow(S_db, sr=sr, x_axis='time', y_axis='mel')
    plt.title(f"Mel spectrogram — track {track_id}")
    plt.colorbar()
    plt.subplot(2,1,2)
    librosa.display.specshow(M, x_axis='time')
    plt.title("MFCCs (13)")
    plt.colorbar()
    plt.tight_layout()
    plt.show()

In [None]:
plot_track(2)