<a href="https://colab.research.google.com/github/felipednegredo/tcc-emocoes-musicais-codigo/blob/main/TCC_STFT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# TCC ‚Äî Pipeline STFT e Fingerprint
- **1) Leitura e valida√ß√£o** (Parquet ‚Üí DataFrames + *schema checks* + `manifest`)
- **2) Pr√©-processamento de √°udio** (resample, normaliza√ß√£o de loudness simples e STFT)
- **3) Alinhamento emo√ß√£o √ó tempo** (blocos de 10s, padroniza√ß√£o e quadrantes)

> Observa√ß√£o: Ajuste os caminhos/nomes de colunas conforme seu DEAM (2013‚Äì2015) j√° convertido para Parquet.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

# --- imports ---
from __future__ import annotations
from pathlib import Path
from typing import Optional, List, Tuple, Dict, Any
import os, math
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
import librosa as lb
import librosa.display as lbd
import plotly.graph_objects as go
from IPython.display import Audio, display, Markdown

# Depend√™ncias de √°udio (librosa √© opcional)
try:
    import librosa as lb
    _HAS_LIBROSA = True
except Exception:
    _HAS_LIBROSA = False

try:
    import soundfile as sf
except Exception:
    sf = None

SR_TARGET = 22050
N_FFT     = 2048
HOP       = 512
WINDOW    = "hann"
CENTER    = True

LABELS_4 = [
    "Feliz/Excitado",
    "Calmo/Relaxado",
    "Tenso / Raivoso",
    "Triste/Deprimido",
]

# Limites de threads (ajuste se necess√°rio)
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["NUMBA_NUM_THREADS"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Librosa e cache
os.environ.setdefault("LIBROSA_CACHE_DIR", "/content/librosa_cache")
os.makedirs(os.environ["LIBROSA_CACHE_DIR"], exist_ok=True)

# Fun√ß√µes utilit√°rias
def free_mem(*vars_):
    for v in vars_:
        try: del v
        except: pass
    gc.collect()

def limit_df(df, n=20):
    try:
        from IPython.display import display
        display(df.head(n))
    except:
        print(df.head(n))

# ---- Caminhos (ajuste aqui) ----
BASE = Path("/content/drive/MyDrive/DataSet TCC/DEAM")
PARQ = BASE/"parquet"
AUDIO = BASE/"audio"

# Parquet esperados (ajuste nomes se necess√°rio)
P_DYN    = PARQ/"dynamic_labels.parquet"
P_STATIC = PARQ/"deam_static.parquet"
P_META   = PARQ/"deam_metadata.parquet"

# Manifest de sa√≠da
P_MANIFEST = PARQ/"manifest.parquet"

print("[paths]", "\n- DYN   =", P_DYN, "\n- STATIC=", P_STATIC, "\n- META  =", P_META, "\n- AUDIO =", AUDIO)


[paths] 
- DYN   = /content/drive/MyDrive/DataSet TCC/DEAM/parquet/dynamic_labels.parquet 
- STATIC= /content/drive/MyDrive/DataSet TCC/DEAM/parquet/deam_static.parquet 
- META  = /content/drive/MyDrive/DataSet TCC/DEAM/parquet/deam_metadata.parquet 
- AUDIO = /content/drive/MyDrive/DataSet TCC/DEAM/audio


# 1 - Leitura e Valida√ß√£o (schema + manifest)

### 1.1 - Utilit√°rios de valida√ß√£o

In [None]:
def debug_columns(df: pd.DataFrame, name: str = "DataFrame", sample: int = 5) -> None:
    """
    Mostra um resumo r√°pido e claro das colunas do DataFrame:
      - Nome das colunas
      - Tipos de dados (dtype)
      - Primeiras linhas
      - Colunas com valores nulos
    """
    print("="*80)
    print(f"[{name}]  shape={df.shape}")
    print("="*80)

    # lista de colunas e tipos
    cols = list(df.columns)
    print(f"üß© Total de colunas: {len(cols)}")
    print("üßæ Colunas (primeiras 30):")
    for c in cols[:30]:
        print(f"  - {c:30s} ({df[c].dtype})")
    if len(cols) > 30:
        print(f"  ... (+{len(cols)-30} colunas ocultas)")

    # amostra de dados
    print("\nüìä Amostra das primeiras linhas:")
    display(df.head(sample))

    # contagem de nulos
    nulls = df.isna().sum()
    nulls = nulls[nulls > 0]
    if not nulls.empty:
        print("\n‚ö†Ô∏è Colunas com valores nulos:")
        print(nulls)
    else:
        print("\n‚úÖ Nenhum valor nulo encontrado.")



def _ensure_song_id(df: pd.DataFrame) -> pd.DataFrame:
    """Garante coluna 'song_id' num√©rica (Int64). Tenta renomear se vier 'id' ou similar."""
    candidates = [c for c in df.columns if c.lower() in {"song_id","id","track_id"}]
    if candidates and candidates[0] != "song_id":
        df = df.rename(columns={candidates[0]: "song_id"})
    if "song_id" not in df.columns:
        raise KeyError("Coluna 'song_id' n√£o encontrada.")
    df["song_id"] = pd.to_numeric(df["song_id"], errors="coerce").astype("Int64")
    return df

def _assert_cols(df: pd.DataFrame, cols: list[str], name: str = "df"):
    missing = [c for c in cols if c not in df.columns]
    if missing:
        raise KeyError(f"{name}: colunas ausentes: {missing}")

def _find_wide_family(df: pd.DataFrame, prefix: str) -> list[str]:
    """
    Encontra colunas 'wide' para uma fam√≠lia (valence/arousal).
    Aceita varia√ß√µes:
      valence_15000ms, Valence-mean_15000ms, valence15000ms, valence_15s, VALENCE_15000
    Retorna lista ordenada por tempo crescente.
    """
    cols = []
    for c in df.columns:
        cl = c.lower()
        if prefix not in cl:
            continue
        # pega apenas sufixos com n√∫mero + (ms|s) opcional
        m = re.search(r'(\d+)\s*(ms|s)?$', cl)
        if m:
            cols.append(c)

    if not cols:
        return []

    def _time_key(cname: str):
        m = re.search(r'(\d+)\s*(ms|s)?$', cname.lower())
        if not m:
            return float('inf')
        val = int(m.group(1))
        unit = m.group(2) or "ms"  # se n√£o vier unidade, assume ms (caso legacy)
        # normaliza para MILISSEGUNDOS para ordenar
        return val if unit == "ms" else val*1000

    cols = sorted(cols, key=_time_key)
    return cols

def _ensure_song_id_any(df: pd.DataFrame, source_name: str = "df") -> pd.DataFrame:
    """
    Garante a coluna 'song_id' (Int64) no DataFrame, tentando:
      - colunas can√¥nicas: song_id, id, track_id, songid, song, track
      - extra√ß√£o a partir de 'filename' (pega d√≠gitos do nome do arquivo, ex.: '123.mp3' -> 123)
    Se nada encontrado, levanta erro com diagn√≥stico.
    """
    d = df.copy()
    cols_lower = {c.lower(): c for c in d.columns}

    # 1) candidatos diretos
    for key in ["song_id", "id", "track_id", "songid", "song", "track"]:
        if key in cols_lower:
            cand = cols_lower[key]
            if cand != "song_id":
                d = d.rename(columns={cand: "song_id"})
            d["song_id"] = pd.to_numeric(d["song_id"], errors="coerce").astype("Int64")
            return d

    # 2) partir do filename (se existir)
    fname_col = None
    for key in ["filename", "file", "path"]:
        if key in cols_lower:
            fname_col = cols_lower[key]
            break

    if fname_col is not None:
        # extrai primeiro grupo de d√≠gitos do nome (sem diret√≥rio)
        def _extract_id_from_filename(val):
            if pd.isna(val):
                return np.nan
            name = str(val)
            # pega s√≥ o nome base
            name = Path(name).name
            m = re.search(r"(\d+)", name)
            return int(m.group(1)) if m else np.nan

        d["song_id"] = d[fname_col].map(_extract_id_from_filename).astype("Int64")
        if d["song_id"].notna().any():
            return d

    # 3) desistiu: erro com diagn√≥stico
    raise KeyError(
        f"[{source_name}] n√£o encontrei 'song_id'. Colunas dispon√≠veis: {list(d.columns)[:30]}\n"
        "Tente renomear manualmente a coluna do ID para 'song_id' ou garantir que 'filename' contenha o n√∫mero da m√∫sica."
    )

def _melt_family(df: pd.DataFrame, family: str) -> pd.DataFrame:
    """
    Converte colunas wide da fam√≠lia para formato longo can√¥nico:
      -> (song_id, time [s], <family>)
    Nunca retorna None (sempre DataFrame, possivelmente vazio).
    """
    fam_cols = _find_wide_family(df, prefix=family.lower())
    if not fam_cols:
        return pd.DataFrame(columns=["song_id","time",family.lower()])

    long_df = df[["song_id"] + fam_cols].melt(
        id_vars="song_id",
        value_vars=fam_cols,
        var_name="__var",
        value_name=family.lower()
    )

    # extrai n√∫mero e unidade (ms|s)
    m = long_df["__var"].str.lower().str.extract(r'(?P<num>\d+)\s*(?P<unit>ms|s)?$')
    long_df["__num"]  = pd.to_numeric(m["num"], errors="coerce")
    long_df["__unit"] = m["unit"].fillna("ms")  # se faltou, assume ms

    # tempo em segundos
    long_df["time"] = np.where(long_df["__unit"].eq("s"), long_df["__num"], long_df["__num"]/1000.0)
    long_df = long_df.drop(columns=["__var","__num","__unit"])
    return long_df[["song_id","time",family.lower()]]

def _check_va_ranges(df: pd.DataFrame, v_col: str, a_col: str, name: str):
    if v_col in df.columns:
        vmin, vmax = float(df[v_col].min()), float(df[v_col].max())
        if not (0.0 <= vmax and vmin <= 10.0):
            print(f"[WARN] {name}: {v_col} fora do range t√≠pico [0..10] ‚Üí (min={vmin:.3f}, max={vmax:.3f})")
    if a_col in df.columns:
        vmin, vmax = float(df[a_col].min()), float(df[a_col].max())
        if not (0.0 <= vmax and vmin <= 10.0):
            print(f"[WARN] {name}: {a_col} fora do range t√≠pico [0..10] ‚Üí (min={vmin:.3f}, max={vmax:.3f})")

### 1.2 - Loaders can√¥nicos

In [None]:
def load_dyn(path: Path) -> pd.DataFrame:
    """
    Retorna df din√¢mico CAN√îNICO: ['song_id','time','valence','arousal'] (time em segundos).
    - LONGO: se achar 'time' + ('valence' e 'arousal' ou equivalentes).
    - WIDE:   derrete fam√≠lias 'valence_*' e 'arousal_*' (aceita ms ou s).
    Caso s√≥ exista uma fam√≠lia no wide, cria a outra como NaN e avisa.
    """
    df = pd.read_parquet(path)
    df = _ensure_song_id(df)

    # --- LONGO ---
    time_candidates = [c for c in df.columns if c.lower() in {"time","t","sec","second","timestamp","start_time","time_s"}]
    a_cols = [c for c in df.columns if "arous" in c.lower()]
    v_cols = [c for c in df.columns if "valen" in c.lower()]

    if time_candidates and (a_cols or v_cols):
        time_col = time_candidates[0]

        # preferir nomes "puros" se existirem
        def _pick(cols, pref=""):
            if not cols: return None
            if pref:
                for c in cols:
                    if c.lower() == pref:
                        return c
            return cols[0]

        a_col = _pick(a_cols, pref="arousal")
        v_col = _pick(v_cols, pref="valence")

        out = df[["song_id", time_col] + ([v_col] if v_col else []) + ([a_col] if a_col else [])].rename(
            columns={time_col: "time", (v_col or "valence"): "valence", (a_col or "arousal"): "arousal"}
        )
        # se alguma fam√≠lia n√£o existir, cria NaN
        if "valence" not in out.columns: out["valence"] = np.nan
        if "arousal" not in out.columns: out["arousal"] = np.nan

        out["time"]    = pd.to_numeric(out["time"], errors="coerce")
        out["valence"] = pd.to_numeric(out["valence"], errors="coerce")
        out["arousal"] = pd.to_numeric(out["arousal"], errors="coerce")
        _check_va_ranges(out, "valence", "arousal", "dynamic(long)")
        return out.sort_values(["song_id","time"]).reset_index(drop=True)

    # --- WIDE ---
    long_val = _melt_family(df, "valence")   # sempre DataFrame
    long_aro = _melt_family(df, "arousal")   # sempre DataFrame

    if not long_val.empty and not long_aro.empty:
        out = pd.merge(long_val, long_aro, on=["song_id","time"], how="outer")
    elif not long_val.empty and long_aro.empty:
        print("[WARN] dynamic(wide): encontrei apenas VALENCE_*. Gerando AROUSAL como NaN.")
        out = long_val.copy()
        out["arousal"] = np.nan
    elif long_val.empty and not long_aro.empty:
        print("[WARN] dynamic(wide): encontrei apenas AROUSAL_*. Gerando VALENCE como NaN.")
        out = long_aro.copy()
        out["valence"] = np.nan
    else:
        sample_like = [c for c in df.columns if "sample_" in c.lower() or c.lower().endswith(("ms","s"))]
        raise KeyError(
            "dynamic: n√£o encontrei 'time' nem fam√≠lias wide reconhec√≠veis.\n"
            f"- colunas com 'valen*': {v_cols[:10]}\n"
            f"- colunas com 'arous*': {a_cols[:10]}\n"
            f"- colunas com sufixo ms/s: {sample_like[:10]}\n"
            "Ajuste os nomes ou me envie as primeiras 30 colunas para eu adaptar a regex."
        )

    out["valence"] = pd.to_numeric(out.get("valence", np.nan), errors="coerce")
    out["arousal"] = pd.to_numeric(out.get("arousal", np.nan), errors="coerce")
    _check_va_ranges(out, "valence", "arousal", "dynamic(wide)")
    return out.sort_values(["song_id","time"]).reset_index(drop=True)

def load_static(path: Path) -> pd.DataFrame:
    df = pd.read_parquet(path)
    df = _ensure_song_id(df)
    # tenta localizar valence_mean/arousal_mean (ou similares)
    a_cols = [c for c in df.columns if "arous" in c.lower()]
    v_cols = [c for c in df.columns if "valen" in c.lower()]
    if not a_cols or not v_cols:
        print("[WARN] static: n√£o encontrei colunas de VA ‚Äî seguindo apenas com metadados.")
    else:
        _check_va_ranges(df, v_cols[0], a_cols[0], "static")
    return df

def load_meta(path: Path) -> pd.DataFrame:
    df = pd.read_parquet(path)
    # normaliza√ß√µes de nomes mais comuns
    ren = {}
    for c in df.columns:
        cl = c.lower()
        if cl in {"filename","file","path"}: ren[c] = "filename"
        elif cl in {"artist","autor"}:       ren[c] = "artist"
        elif cl in {"title","track","song"}: ren[c] = "title"
        elif cl == "album":                  ren[c] = "album"
        elif cl in {"genre","style"}:        ren[c] = "genre"
    if ren: df = df.rename(columns=ren)

    df = _ensure_song_id_any(df, source_name="meta")
    return df

### 1.3 - Manifest (can√¥nico)

In [None]:
def build_manifest(df_meta: pd.DataFrame, df_dyn: pd.DataFrame, df_static: pd.DataFrame) -> pd.DataFrame:
    # Garante que 'song_id' exista como COLUNA (n√£o √≠ndice)
    for name, d in (("meta", df_meta), ("dynamic", df_dyn), ("static", df_static)):
        if "song_id" not in d.columns:
            # caso raro: se veio como √≠ndice
            if getattr(d.index, "name", None) == "song_id":
                d.reset_index(inplace=True)
            else:
                raise KeyError(f"[{name}] sem coluna 'song_id' (nem no √≠ndice).")

    # Reduz o meta ao essencial (se dispon√≠vel)
    cols_meta = [c for c in ["song_id","filename","title","artist","album","genre","origem"] if c in df_meta.columns]
    if not cols_meta:
        cols_meta = ["song_id"]
    dfm = df_meta[cols_meta].drop_duplicates(subset=["song_id"]).copy()

    # has_dyn / has_static com song_id como COLUNA
    have_dyn    = df_dyn[["song_id"]].drop_duplicates().assign(has_dyn=True)
    have_static = df_static[["song_id"]].drop_duplicates().assign(has_static=True)

    man = (
        dfm
        .merge(have_dyn,    on="song_id", how="left")
        .merge(have_static, on="song_id", how="left")
    )
    man["has_dyn"]    = man["has_dyn"].fillna(False)
    man["has_static"] = man["has_static"].fillna(False)

    # dura√ß√£o por m√∫sica (se houver 'time' no din√¢mico)
    if "time" in df_dyn.columns:
        dur = df_dyn.groupby("song_id", as_index=False)["time"].max().rename(columns={"time": "duration_s"})
        man = man.merge(dur, on="song_id", how="left")

    # Tipagem opcional
    man["song_id"] = man["song_id"].astype("Int64")
    return man


### 1.4 - Execu√ß√£o Pipeline

In [None]:
df_dyn    = load_dyn(P_DYN)    # sua fun√ß√£o load_dyn (a robusta que criamos)
df_static = load_static(P_STATIC)
df_meta   = load_meta(P_META)

debug_columns(df_meta, "df_meta")        # opcional
debug_columns(df_static, "df_static")    # opcional
debug_columns(df_dyn, "df_dyn")          # opcional

manifest = build_manifest(df_meta, df_dyn, df_static)
print("linhas:", len(manifest), "| NaNs em song_id:", manifest["song_id"].isna().sum())
display(manifest.head(10))

[df_meta]  shape=(1773, 7)
üß© Total de colunas: 7
üßæ Colunas (primeiras 30):
  - song_id                        (Int64)
  - filename                       (string)
  - title                          (string)
  - artist                         (string)
  - album                          (string)
  - genre                          (string)
  - origem                         (string)

üìä Amostra das primeiras linhas:


Unnamed: 0,song_id,filename,title,artist,album,genre,origem
0,2,2.mp3,Tonight A Lonely Century,The New Mystikal Troubadours,,Blues,metadata_2013.csv
1,3,3.mp3,DD Groove,Kevin MacLeod,,Blues,metadata_2013.csv
2,4,4.mp3,Slow Burn,Kevin MacLeod,,Blues,metadata_2013.csv
3,5,5.mp3,Nothing Much,My Bubba & Mi,,Blues,metadata_2013.csv
4,7,7.mp3,Hustle,Kevin MacLeod,,Blues,metadata_2013.csv



‚ö†Ô∏è Colunas com valores nulos:
album    775
dtype: int64
[df_static]  shape=(1802, 5)
üß© Total de colunas: 5
üßæ Colunas (primeiras 30):
  - song_id                        (Int64)
  - arousal_mean                   (float32)
  - arousal_std                    (float32)
  - valence_mean                   (float32)
  - valence_std                    (float32)

üìä Amostra das primeiras linhas:


Unnamed: 0,song_id,arousal_mean,arousal_std,valence_mean,valence_std
0,2,3.0,0.63,3.1,0.94
1,3,3.3,1.62,3.5,1.75
2,4,5.5,1.63,5.7,1.42
3,5,5.3,1.85,4.4,2.01
4,7,6.4,1.69,5.8,1.47



‚úÖ Nenhum valor nulo encontrado.
[df_dyn]  shape=(2205648, 4)
üß© Total de colunas: 4
üßæ Colunas (primeiras 30):
  - song_id                        (Int64)
  - time                           (float64)
  - valence                        (float64)
  - arousal                        (float64)

üìä Amostra das primeiras linhas:


Unnamed: 0,song_id,time,valence,arousal
0,2,15.0,-0.073341,-0.109386
1,2,15.5,-0.074661,-0.114942
2,2,16.0,-0.074077,-0.116413
3,2,16.5,-0.078154,-0.118613
4,2,17.0,-0.081588,-0.126457



‚ö†Ô∏è Colunas com valores nulos:
valence    2075650
arousal    2075649
dtype: int64
linhas: 1773 | NaNs em song_id: 0


Unnamed: 0,song_id,filename,title,artist,album,genre,origem,has_dyn,has_static,duration_s
0,2,2.mp3,Tonight A Lonely Century,The New Mystikal Troubadours,,Blues,metadata_2013.csv,True,True,626.5
1,3,3.mp3,DD Groove,Kevin MacLeod,,Blues,metadata_2013.csv,True,True,626.5
2,4,4.mp3,Slow Burn,Kevin MacLeod,,Blues,metadata_2013.csv,True,True,626.5
3,5,5.mp3,Nothing Much,My Bubba & Mi,,Blues,metadata_2013.csv,True,True,626.5
4,7,7.mp3,Hustle,Kevin MacLeod,,Blues,metadata_2013.csv,True,True,626.5
5,8,8.mp3,Dirt Rhodes,Kevin MacLeod,,Blues,metadata_2013.csv,True,True,626.5
6,10,10.mp3,Stormy Blues,Arne Bang Huseby,,Blues,metadata_2013.csv,True,True,626.5
7,12,12.mp3,Rebel Blues,Sul Rebel,,Blues,metadata_2013.csv,True,True,626.5
8,13,13.mp3,Oh Kiss No,My Bubba & Mi,,Blues,metadata_2013.csv,True,True,626.5
9,17,17.mp3,Fjcb Reddit Gtr Fills Mix,Phoephus,,Blues,metadata_2013.csv,True,True,626.5


# 2 - Pr√©-processamento de √Åudio (resample, normaliza√ß√£o e STFT)

### 2.1 - Normaliza√ß√£o

In [None]:
def rms_dbfs(y: np.ndarray, eps: float = 1e-12) -> float:
    r = np.sqrt(np.mean(np.square(y)) + eps)
    return 20.0 * np.log10(max(r, eps))

def normalize_rms(y: np.ndarray, target_dbfs: float = -20.0, eps: float = 1e-12) -> np.ndarray:
    """Normaliza√ß√£o simples por RMS para aproximar loudness. Evita clipping com *soft-limit*."""
    cur = rms_dbfs(y, eps=eps)
    gain_db = float(target_dbfs - cur)
    gain = 10.0 ** (gain_db / 20.0)
    out = y * gain
    # soft-limit: evita clip duro
    max_abs = np.max(np.abs(out)) if out.size else 0.0
    if max_abs > 0.999:
        out = out / (max_abs + 1e-9) * 0.999
    return out

### 2.2 - Leitura de √°udio + resample

In [None]:
def load_audio(path: Path, sr_target: int = 22050, mono: bool = True) -> Tuple[np.ndarray, int]:
    if _HAS_LIBROSA:
        y, sr = lb.load(str(path), sr=sr_target, mono=mono)
        return y.astype(np.float32), int(sr)
    elif sf is not None:
        y, sr = sf.read(str(path), always_2d=False)
        y = y.astype(np.float32)
        if mono and y.ndim == 2:
            y = np.mean(y, axis=1)
        if sr_target and sr != sr_target and _HAS_LIBROSA:
            y = lb.resample(y, orig_sr=sr, target_sr=sr_target)
            sr = sr_target
        return y, int(sr)
    else:
        raise RuntimeError("Nenhuma biblioteca de √°udio dispon√≠vel (librosa/soundfile). Instale-as.")

### 2.3 - STFT

In [None]:
def compute_stft(y: np.ndarray, sr: int, n_fft: int = 2048, hop_length: int = 512,
                 window: str = "hann", center: bool = True, to_db: bool = True) -> Dict[str, Any]:
    if not _HAS_LIBROSA:
        raise RuntimeError("librosa √© necess√°ria para STFT.")
    S = lb.stft(y, n_fft=n_fft, hop_length=hop_length, window=window, center=center)
    mag = np.abs(S)
    if to_db:
        mag = lb.amplitude_to_db(mag, ref=np.max)
    times = lb.frames_to_time(np.arange(mag.shape[1]), sr=sr, hop_length=hop_length, n_fft=n_fft)
    freqs = lb.fft_frequencies(sr=sr, n_fft=n_fft)
    meta = dict(sr=sr, n_fft=n_fft, hop_length=hop_length, window=window, center=center, scale_db=to_db)
    return {"spec": mag, "times": times, "freqs": freqs, "meta": meta}

### 2.4 - Carregamento de arquivo

In [None]:
def preprocess_file(audio_path: Path, sr_target: int = 22050, target_dbfs: float = -20.0) -> Dict[str, Any]:
    y, sr = load_audio(audio_path, sr_target=sr_target)
    y = normalize_rms(y, target_dbfs=target_dbfs)
    st = compute_stft(y, sr=sr)
    return {"y": y, "sr": sr, **st}

# print("Fun√ß√µes de pr√©-processamento prontas. Use 'preprocess_file(AUDIO/"<arquivo>")'.")

# 3 - Alinhamento Emo√ß√£o √ó Tempo (blocos de 10s + padroniza√ß√£o + quadrantes)

### 3.1 - Padroniza√ß√£o por m√∫sica ou global

In [None]:
def standardize_va(df: pd.DataFrame, by: str = "per_song") -> pd.DataFrame:
    """Adiciona colunas 'valence_z' e 'arousal_z'."""
    d = df.copy()
    if by == "per_song":
        d["valence_z"] = d.groupby("song_id")["valence"].transform(lambda s: (s - s.mean()) / (s.std(ddof=0) + 1e-9))
        d["arousal_z"] = d.groupby("song_id")["arousal"].transform(lambda s: (s - s.mean()) / (s.std(ddof=0) + 1e-9))
    elif by == "global":
        d["valence_z"] = (d["valence"] - d["valence"].mean()) / (d["valence"].std(ddof=0) + 1e-9)
        d["arousal_z"] = (d["arousal"] - d["arousal"].mean()) / (d["arousal"].std(ddof=0) + 1e-9)
    else:
        raise ValueError("by deve ser 'per_song' ou 'global'")
    return d

### 3.2 - Discretiza√ß√£o em quadrantes

In [None]:
def quadrant_label(v: float, a: float, v_thr: float = 0.0, a_thr: float = 0.0) -> Tuple[str, str]:
    """Retorna (zona, r√≥tulo curto) dados V/A padronizados (z-score)."""
    if np.isnan(v) or np.isnan(a):
        return ("Indefinido", "NA")
    if v >= v_thr and a >= a_thr:
        return ("Alegre / Energ√©tico", "Feliz/Excitado")
    if v >= v_thr and a <  a_thr:
        return ("Calmo / Relaxado", "Calmo/Relaxado")
    if v <  v_thr and a >= a_thr:
        return ("Tenso / Raivoso", "Raiva/Tenso")
    return ("Triste / Melanc√≥lico", "Triste/Deprimido")

### 3.3 - Agrega√ß√£o por blocos de 10s alinhados ao t0 dos r√≥tulos

In [None]:
def make_blocks(df_dyn: pd.DataFrame, window_sec: float = 10.0,
                standardize: str = "per_song",
                v_thr: float = 0.0, a_thr: float = 0.0,
                agg: str = "mean") -> pd.DataFrame:
    """
    Espera df_dyn com colunas: ['song_id','time','valence','arousal'].
    - Alinha blocos a partir do 'time' m√≠nimo de cada m√∫sica.
    - Faz padroniza√ß√£o (z-score) per_song ou global.
    - Agrega por janela (mean/median).
    - Rotula quadrantes com base em limiares (por padr√£o 0,0 em z-score).
    """
    _assert_cols(df_dyn, ["song_id","time","valence","arousal"], "df_dyn")
    d = df_dyn.dropna(subset=["time","valence","arousal"]).copy()

    # padroniza√ß√£o
    d = standardize_va(d, by=standardize)

    # t0 por m√∫sica
    t0 = d.groupby("song_id")["time"].min().rename("t0").to_frame()
    d = d.merge(t0, on="song_id", how="left")
    d["t_adj"] = d["time"] - d["t0"]

    # √≠ndice de janela
    d["block_idx"] = (d["t_adj"] // window_sec).astype(int)
    grp = d.groupby(["song_id","block_idx"], as_index=False)

    if agg == "mean":
        agg_df = grp.agg({
            "t_adj": ["min","max"],
            "valence": "mean",
            "arousal": "mean",
            "valence_z": "mean",
            "arousal_z": "mean",
        })
    elif agg == "median":
        agg_df = grp.agg({
            "t_adj": ["min","max"],
            "valence": "median",
            "arousal": "median",
            "valence_z": "median",
            "arousal_z": "median",
        })
    else:
        raise ValueError("agg deve ser 'mean' ou 'median'")

    # achatando MultiIndex de colunas
    agg_df.columns = ["_".join([c for c in col if c]) if isinstance(col, tuple) else col for col in agg_df.columns]
    # renomear campos de tempo
    agg_df = agg_df.rename(columns={
        "t_adj_min": "t_start_s",
        "t_adj_max": "t_end_s",
        "valence_mean": "mean_valence",
        "arousal_mean": "mean_arousal",
        "valence_z_mean": "mean_valence_z",
        "arousal_z_mean": "mean_arousal_z",
        "valence_median": "median_valence",
        "arousal_median": "median_arousal",
        "valence_z_median": "median_valence_z",
        "arousal_z_median": "median_arousal_z",
    })

    # escolher colunas finais (preferindo mean_* se existirem)
    use_val = "mean_valence" if "mean_valence" in agg_df.columns else "median_valence"
    use_aro = "mean_arousal" if "mean_arousal" in agg_df.columns else "median_arousal"
    use_vz  = "mean_valence_z" if "mean_valence_z" in agg_df.columns else "median_valence_z"
    use_az  = "mean_arousal_z" if "mean_arousal_z" in agg_df.columns else "median_arousal_z"

    # r√≥tulos
    zones, labels = [], []
    for v, a in zip(agg_df[use_vz].to_numpy(), agg_df[use_az].to_numpy()):
        z, l = quadrant_label(v, a, v_thr=v_thr, a_thr=a_thr)
        zones.append(z); labels.append(l)

    out = pd.DataFrame({
        "song_id": agg_df["song_id"],
        "block_idx": agg_df["block_idx"],
        "t_start_s": agg_df["t_start_s"],
        "t_end_s": agg_df["t_end_s"],
        "valence": agg_df[use_val],
        "arousal": agg_df[use_aro],
        "valence_z": agg_df[use_vz],
        "arousal_z": agg_df[use_az],
        "emotion_zone": zones,
        "emotion_label": labels,
    })
    return out

print("Fun√ß√µes de alinhamento e rotulagem prontas: standardize_va, make_blocks.")


Fun√ß√µes de alinhamento e rotulagem prontas: standardize_va, make_blocks.


In [None]:
blocks = make_blocks(df_dyn, window_sec=10.0, standardize='per_song', v_thr=0.0, a_thr=0.0, agg='mean')
display(blocks.head(12))

Unnamed: 0,song_id,block_idx,t_start_s,t_end_s,valence,arousal,valence_z,arousal_z,emotion_zone,emotion_label
0,2,0,0.0,9.5,-0.08514,-0.137974,1.29059,0.999989,Alegre / Energ√©tico,Feliz/Excitado
1,2,1,10.0,19.5,-0.241535,-0.191833,-0.257625,0.095459,Tenso / Raivoso,Raiva/Tenso
2,2,2,20.0,29.5,-0.319857,-0.262745,-1.032966,-1.095448,Triste / Melanc√≥lico,Triste/Deprimido
3,3,0,0.0,9.5,-0.20846,-0.153975,1.261746,0.921563,Alegre / Energ√©tico,Feliz/Excitado
4,3,1,10.0,19.5,-0.287444,-0.178349,-0.474599,0.348712,Tenso / Raivoso,Raiva/Tenso
5,3,2,20.0,29.5,-0.301661,-0.247237,-0.787147,-1.270275,Triste / Melanc√≥lico,Triste/Deprimido
6,4,0,0.0,9.5,0.118796,0.17656,-0.788048,-1.189614,Triste / Melanc√≥lico,Triste/Deprimido
7,4,1,10.0,19.5,0.146558,0.249613,-0.187227,0.116979,Tenso / Raivoso,Raiva/Tenso
8,4,2,20.0,29.5,0.200275,0.303045,0.975275,1.072634,Alegre / Energ√©tico,Feliz/Excitado
9,5,0,0.0,9.5,0.160234,-0.24692,0.916394,-1.142204,Calmo / Relaxado,Calmo/Relaxado


# 4. Analise das Emo√ß√µes

### 4.1 Utils

In [None]:
def _drop_allnan_cols(df: pd.DataFrame) -> pd.DataFrame:
    """Remove colunas 100% NaN (comum em CSVs grandes)."""
    return df.loc[:, ~df.isna().all(axis=0)]

_time_re = re.compile(r'^(arousal|valence)_(\d+)ms$', re.IGNORECASE)

def _extract_time_ms(cols):
    """Retorna dict: {'arousal': {t_ms: colname, ...}, 'valence': {...}}"""
    maps = {'arousal': {}, 'valence': {}}
    for c in cols:
        m = _time_re.match(c)
        if m:
            kind = m.group(1).lower()
            t_ms = int(m.group(2))
            maps[kind][t_ms] = c
    return maps

def _standardize_series(x):
    """z-score seguro (evita div/0)."""
    m = np.nanmean(x)
    s = np.nanstd(x)
    if s == 0 or np.isnan(s):
        return np.zeros_like(x, dtype=float)
    return (x - m) / s

def _emotion_zone_label(v, a, thr=0.0):
    """Quadrantes (simples) com threshold = 0 (z-score) ou outro valor."""
    if pd.isna(v) or pd.isna(a):
        return ("NA","Indefinido")
    if v >= thr and a >= thr:
        return ("Q1","Feliz/Excitado")
    if v >= thr and a <  thr:
        return ("Q2","Calmo/Relaxado")
    if v <  thr and a >= thr:
        return ("Q3","Raiva/Tenso")
    return ("Q4","Triste/Deprimido")

In [None]:
def make_blocks_from_df_dyn(df_dyn: pd.DataFrame, song_id: int,
                            window_sec: float = 10.0, agg: str = 'mean',
                            standardize: str = 'per_song',
                            drop_all_nan_cols: bool = True) -> pd.DataFrame:
    row = df_dyn.loc[df_dyn["song_id"].astype("Int64") == song_id]
    if row.empty:
        raise ValueError(f"song_id {song_id} n√£o encontrado no df_dyn.")
    row = row.copy()
    if drop_all_nan_cols: row = _drop_allnan_cols(row)

    maps = _extract_time_ms(row.columns)
    t_common = sorted(set(maps['arousal']).intersection(maps['valence']))
    if not t_common:
        raise ValueError(f"Sem interse√ß√£o de tempos A/V para song_id={song_id}.")

    r0 = row.iloc[0]
    arousal = np.array([r0[maps['arousal'][t]] for t in t_common], dtype=float)
    valence = np.array([r0[maps['valence'][t]] for t in t_common], dtype=float)
    t_s     = np.array(t_common, dtype=int) / 1000.0

    df_long = pd.DataFrame({"song_id": song_id, "t_s": t_s,
                            "arousal": arousal, "valence": valence}).sort_values("t_s")
    if standardize == 'per_song':
        df_long["arousal_z"] = _standardize_series(df_long["arousal"].values)
        df_long["valence_z"] = _standardize_series(df_long["valence"].values)
    else:
        df_long["arousal_z"] = np.nan
        df_long["valence_z"] = np.nan

    if df_long.empty: raise ValueError("df_long vazio.")
    t_min, t_max = float(df_long["t_s"].min()), float(df_long["t_s"].max())
    edges = np.arange(t_min, t_max + window_sec, window_sec)
    if len(edges) < 2: edges = np.array([t_min, t_min + window_sec])

    agg_func = {'mean': np.nanmean, 'median': np.nanmedian, 'max': np.nanmax, 'min': np.nanmin}.get(agg, np.nanmean)

    blocks = []
    for i in range(len(edges)-1):
        a, b = edges[i], edges[i+1]
        sel = (df_long["t_s"] >= a) & (df_long["t_s"] < b)
        if not sel.any(): continue
        v   = df_long.loc[sel, "valence"].values
        ar  = df_long.loc[sel, "arousal"].values
        vz  = df_long.loc[sel, "valence_z"].values
        az  = df_long.loc[sel, "arousal_z"].values

        v_agg  = float(agg_func(v))  if v.size  else np.nan
        a_agg  = float(agg_func(ar)) if ar.size else np.nan
        vz_agg = float(agg_func(vz)) if np.isfinite(vz).any() else np.nan
        az_agg = float(agg_func(az)) if np.isfinite(az).any() else np.nan

        zone, label = _emotion_zone_label(vz_agg, az_agg, thr=0.0)

        blocks.append({
            "song_id":      song_id,
            "block_idx":    i,
            "t_start_s":    a,
            "t_end_s":      b,
            # >>>>>>> garante salvar valence e arousal (e z-scores) <<<<<<<
            "valence":      v_agg,
            "arousal":      a_agg,
            "valence_z":    vz_agg,
            "arousal_z":    az_agg,
            "emotion_zone":  zone,
            "emotion_label": label
        })
    bs = pd.DataFrame(blocks)
    if not bs.empty:
        for c, dt in {"song_id":"Int64","block_idx":"Int64"}.items():
            if c in bs.columns:
                with pd.option_context('mode.chained_assignment', None):
                    try: bs[c] = bs[c].astype(dt)
                    except: pass
        bs = bs[["song_id","block_idx","t_start_s","t_end_s",
                 "valence","arousal","valence_z","arousal_z",
                 "emotion_zone","emotion_label"]]
    return bs

In [None]:
def label_colors(labels: List[str]) -> Dict[str, str]:
    base = plt.rcParams['axes.prop_cycle'].by_key().get('color', ['C0','C1','C2','C3','C4','C5'])
    return {lab: base[i % len(base)] for i, lab in enumerate(labels)}

EMO_COLORS = label_colors(LABELS_4)
EMO_COLORS


{'Feliz/Excitado': '#1f77b4',
 'Calmo/Relaxado': '#ff7f0e',
 'Tenso / Raivoso': '#2ca02c',
 'Triste/Deprimido': '#d62728'}

In [None]:
def render_song_header(row_meta: pd.Series, emotion_general: str, emotion_specific: str):
    title  = str(row_meta.get("title", f"song_id={row_meta.get('song_id','?')}"))
    artist = str(row_meta.get("artist", "‚Äî"))
    genre  = str(row_meta.get("genre",  "‚Äî"))
    sid    = int(row_meta.get("song_id"))
    md = f"""
### üéµ {title} ‚Äî *{artist}*  `({genre})`
**song_id:** `{sid}`
**Emo√ß√£o geral:** **{emotion_general}**
**Emo√ß√£o espec√≠fica:** **{emotion_specific}**
"""
    display(Markdown(md))

# ==== EXPORTA√á√ÉO: CSV + FIGURAS + WAVs ========================================
def export_blocks_csv(blocks_song: pd.DataFrame, out_path: Path) -> Path:
    out_path.parent.mkdir(parents=True, exist_ok=True)
    blocks_song.to_csv(out_path, index=False)
    return out_path

def export_plotly(fig, out_html: Path, out_png: Path | None = None):
    out_html.parent.mkdir(parents=True, exist_ok=True)
    fig.write_html(str(out_html), include_plotlyjs="cdn", full_html=True)
    if out_png is not None:
        try:
            fig.write_image(str(out_png), scale=2)  # requer kaleido
        except Exception as e:
            print(f"[WARN] PNG n√£o salvo (kaleido ausente?): {e}")
    return out_html

def export_segments_wav(segments: list[dict], out_dir: Path, prefix: str="seg"):
    out_dir.mkdir(parents=True, exist_ok=True)
    saved = []
    for s in segments:
        t0, t1, lab, sid = s["t0"], s["t1"], s["label"], s.get("song_id", None)
        fname = f"{prefix}_id{sid or 'NA'}_{int(round(t0*1000))}-{int(round(t1*1000))}_{lab}.wav"
        p = out_dir / fname.replace(" ", "_").replace("/", "-")
        sf.write(str(p), s["y"], s["sr"], subtype="PCM_16")
        saved.append(p)
    return saved

In [None]:
def play_block_audio(segments: List[Dict], title_prefix: str = "Trecho"):
    """
    Exibe players de √°udio (um por bloco).
    """
    for seg in segments:
        title = f"{title_prefix} #{seg['idx']} ‚Äî {seg['label']} ‚Äî {seg['t0']:.1f}s‚Äì{seg['t1']:.1f}s"
        print(title)
        display(Audio(seg["y"], rate=seg["sr"]))

In [None]:
def _compute_stft_to_db(y, sr, n_fft=2048, hop=512, window="hann", center=True):
    """
    Usa o SEU compute_stft (qualquer formato de retorno) e devolve sempre:
        (S_db, times, freqs)
    Compat√≠vel com dicts, tuplas e arrays NumPy.
    """
    if "compute_stft" not in globals():
        raise RuntimeError("compute_stft n√£o est√° definido no ambiente.")

    # 1) Chama o seu compute_stft
    try:
        out = compute_stft(y, sr, n_fft=n_fft, hop_length=hop, window=window, center=center)
    except TypeError:
        out = compute_stft(y, sr, n_fft=n_fft, hop=hop, window=window, center=center)

    # 2) Extrai os campos
    S = times = freqs = None
    if isinstance(out, dict):
        # acessa as chaves de forma segura
        for key in ["spec", "S", "stft", "magnitude"]:
            if key in out and isinstance(out[key], np.ndarray):
                S = out[key]; break
        if "times" in out: freqs = out["freqs"] if "freqs" in out else None
        if "freqs" in out: times = out["times"] if "times" in out else None
        # corrige se invertido
        if times is None and "times" in out: times = out["times"]
        if freqs is None and "freqs" in out: freqs = out["freqs"]

    elif isinstance(out, (list, tuple)):
        if len(out) < 1:
            raise ValueError("compute_stft retornou lista vazia.")
        S = out[0]
        times = out[1] if len(out) > 1 else None
        freqs = out[2] if len(out) > 2 else None
    else:
        raise ValueError(f"Retorno de compute_stft n√£o reconhecido (tipo {type(out)}).")

    if S is None:
        raise ValueError("compute_stft n√£o retornou espectro em 'spec' ou 'S'.")

    S = np.asarray(S)

    # 3) Se n√£o veio times/freqs, calcula
    if times is None:
        times = lb.frames_to_time(np.arange(S.shape[1]), sr=sr, hop_length=hop, n_fft=n_fft)
    if freqs is None:
        freqs = lb.fft_frequencies(sr=sr, n_fft=n_fft)

    # 4) Converter para dB se necess√°rio
    S_min, S_max = float(np.nanmin(S)), float(np.nanmax(S))
    looks_like_db = (S_max <= 5.0) and (S_min < -10.0)
    if looks_like_db:
        S_db = S
    else:
        S_db = lb.amplitude_to_db(np.abs(S), ref=np.max)

    return S_db, np.asarray(times), np.asarray(freqs)

In [None]:
def _infer_song_id_from_filename(p: Path) -> int | None:
    """Extrai o primeiro n√∫mero do nome do arquivo (ex.: '2001.mp3' -> 2001)."""
    import re
    m = re.search(r"(\d+)", p.name)
    return int(m.group(1)) if m else None

def _emotion_from_static_row(row: pd.Series) -> tuple[str,str]:
    """
    Fallback se n√£o houver 'emotion_general' / 'emotion_specific':
    usa valence_mean/arousal_mean e mapeia para (zona, r√≥tulo).
    """
    def quadrant_label(v, a, v_thr=5.0, a_thr=5.0):
        if pd.isna(v) or pd.isna(a): return ("Indefinido", "NA")
        if v >= v_thr and a >= a_thr: return ("Alegre / Energ√©tico", "Feliz/Excitado")
        if v >= v_thr and a <  a_thr: return ("Calmo / Relaxado", "Calmo/Relaxado")
        if v <  v_thr and a >= a_thr: return ("Tenso / Raivoso", "Raiva/Tenso")
        return ("Triste / Melanc√≥lico", "Triste/Deprimido")
    v = row.get("valence_mean", np.nan)
    a = row.get("arousal_mean", np.nan)
    return quadrant_label(v, a)

def _clip_blocks_to_audio(blocks_song: pd.DataFrame, y: np.ndarray, sr: int) -> pd.DataFrame:
    """Corta blocos que escapem do √°udio e remove degenerados."""
    dur_s = len(y) / sr
    bs = blocks_song.copy()
    bs["t_start_s"] = bs["t_start_s"].clip(lower=0, upper=dur_s)
    bs["t_end_s"]   = bs["t_end_s"].clip(lower=0, upper=dur_s)
    bs = bs[bs["t_end_s"] > bs["t_start_s"]].reset_index(drop=True)
    return bs

#### 4.1.1 Figuras

In [None]:
# Vers√£o fig do espectrograma com blocos
import plotly.graph_objects as go

def plot_spectrogram_with_blocks_fig(
    y, sr, blocks_song, title="Espectrograma + Blocos (10s)",
    emo_colors=None, alpha_span=0.25,
    n_fft=2048, hop=512, window="hann", center=True,
    zmin=None, zmax=None, colorscale="Viridis"
):
    S_db, times, freqs = _compute_stft_to_db(y, sr, n_fft=n_fft, hop=hop, window=window, center=center)
    if zmin is None: zmin = float(np.nanpercentile(S_db, 1))
    if zmax is None: zmax = float(np.nanpercentile(S_db, 99))

    # paleta
    if emo_colors is None:
        base = ["#1f77b4","#ff7f0e","#2ca02c","#d62728",
                "#9467bd","#8c564b","#e377c2","#7f7f7f","#bcbd22","#17becf"]
        uniq = list(dict.fromkeys(blocks_song["emotion_label"].dropna().tolist()))
        emo_colors = {lab: base[i % len(base)] for i, lab in enumerate(uniq)}

    fig = go.Figure(data=go.Heatmap(
        x=times, y=freqs, z=S_db,
        zmin=zmin, zmax=zmax, colorscale=colorscale,
        colorbar=dict(title="Amplitude (dB)")
    ))

    df = blocks_song.copy()
    df = df[df["t_end_s"] > df["t_start_s"]].reset_index(drop=True)
    for _, r in df.iterrows():
        t0, t1 = float(r["t_start_s"]), float(r["t_end_s"])
        lab = r.get("emotion_label", "NA")
        color = emo_colors.get(lab, "#7f7f7f")
        fig.add_vrect(x0=t0, x1=t1, fillcolor=color, opacity=float(alpha_span), line_width=0, layer="above")
        fig.add_vline(x=t0, line=dict(width=1, dash="dash", color=color), layer="above")
        fig.add_vline(x=t1, line=dict(width=1, dash="dash", color=color), layer="above")

    for lab, color in emo_colors.items():
        fig.add_trace(go.Scatter(x=[None], y=[None], mode="markers",
                                 marker=dict(size=10, color=color),
                                 name=lab, hoverinfo="skip", showlegend=True))

    fig.update_layout(
        title=title, xaxis_title="Tempo (s)", yaxis_title="Frequ√™ncia (Hz)",
        template="plotly_white", width=1000, height=500,
        margin=dict(l=60, r=30, t=60, b=50),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
    )
    fig.update_yaxes(type="linear")
    return fig

# Vers√£o fig da timeline
def plot_block_timeline_fig(blocks_song, title="Timeline de Blocos (10s)", emo_colors=None, height=1.2):
    df = blocks_song.copy().reset_index(drop=True)
    df["dur_s"] = (df["t_end_s"] - df["t_start_s"]).astype(float)
    df = df[df["dur_s"] > 0].copy()
    if df.empty:
        raise ValueError("Nenhum bloco v√°lido (dura√ß√£o > 0) para plotar.")

    if emo_colors is None:
        uniq = list(dict.fromkeys(df["emotion_label"].dropna().tolist()))
        base = ["#1f77b4","#ff7f0e","#2ca02c","#d62728",
                "#9467bd","#8c564b","#e377c2","#7f7f7f","#bcbd22","#17becf"]
        emo_colors = {lab: base[i % len(base)] for i, lab in enumerate(uniq)}

    y_lane = 0.5
    fig = go.Figure()
    for i, r in df.iterrows():
        t0, dur, lab = float(r["t_start_s"]), float(r["dur_s"]), r.get("emotion_label", "NA")
        color = emo_colors.get(lab, "#7f7f7f")
        custom = dict(t0=t0, t1=float(r["t_end_s"]), lab=lab, idx=int(i), song=r.get("song_id", None))
        hover = ("Bloco %{customdata.idx}<br>"
                 "Tempo: %{customdata.t0:.1f}s‚Äì%{customdata.t1:.1f}s<br>"
                 "Emo√ß√£o: %{customdata.lab}"
                 + ("<br>song_id: %{customdata.song}" if custom["song"] is not None else "")
                 + "<extra></extra>")
        fig.add_trace(go.Bar(x=[dur], y=[y_lane], base=[t0], orientation="h",
                             marker=dict(color=color, line=dict(width=0)),
                             hovertemplate=hover, customdata=[custom],
                             showlegend=False, name=str(lab)))
    for lab, color in emo_colors.items():
        fig.add_trace(go.Bar(x=[0], y=[-10], marker=dict(color=color), showlegend=True, name=lab, hoverinfo="skip"))

    t_min, t_max = float(df["t_start_s"].min()), float(df["t_end_s"].max())
    fig.update_layout(
        title=title, barmode="stack", bargap=0.0, template="plotly_white",
        height=max(160, int(140 + 40*height)), margin=dict(l=50, r=30, t=60, b=40),
        showlegend=True, legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
    )
    fig.update_xaxes(title_text="Tempo (s)", range=[max(0.0, t_min-0.5), t_max+0.5], zeroline=False)
    fig.update_yaxes(showticklabels=False, showgrid=False, range=[0, 1], fixedrange=True)
    return fig

# Vers√£o fig da densidade (com cores por emo√ß√£o)
def plot_block_feature_density_fig(blocks_song, values, title="Densidade de Features por Bloco (10s)"):
    assert len(values) == len(blocks_song)
    dfp = blocks_song.copy().reset_index(drop=True)
    dfp["densidade"] = values
    dfp["idx"] = np.arange(len(dfp))
    # cores por emo√ß√£o
    uniq = list(dict.fromkeys(dfp["emotion_label"].dropna().tolist()))
    base = ["#1f77b4","#ff7f0e","#2ca02c","#d62728","#9467bd","#8c564b","#e377c2","#7f7f7f","#bcbd22","#17becf"]
    color_map = {lab: base[i % len(base)] for i, lab in enumerate(uniq)}
    colors = dfp["emotion_label"].map(color_map).fillna("#7f7f7f")

    hover = ("Bloco %{x}<br>"
             "Densidade: %{y:.2f}<br>"
             "Tempo: %{customdata[0]:.1f}s‚Äì%{customdata[1]:.1f}s<br>"
             "Emo√ß√£o: %{customdata[2]}<extra></extra>")
    fig = go.Figure(go.Bar(
        x=dfp["idx"], y=dfp["densidade"], marker_color=colors,
        customdata=np.stack([dfp["t_start_s"], dfp["t_end_s"], dfp["emotion_label"]], axis=-1),
        hovertemplate=hover, name="Densidade"
    ))
    fig.update_layout(
        title=title, xaxis_title="√çndice do bloco (10s)", yaxis_title="Densidade / contagem (proxy)",
        template="plotly_white", bargap=0.2, width=900, height=400,
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
    )
    return fig


### 4.2 Espectogramas com labels

In [None]:
def plot_spectrogram_with_blocks(
    y, sr, blocks_song,
    title="Espectrograma + Blocos (10s)",
    emo_colors=None,
    alpha_span=0.25,
    n_fft=2048, hop=512, window="hann", center=True,
    zmin=None, zmax=None, colorscale="Viridis"
):
    """
    Vers√£o Plotly:
      - Espectrograma: Heatmap (times √ó freqs) com S_db
      - Blocos: add_vrect por 'emotion_label' com alpha configur√°vel
      - Linhas de borda: add_vline tracejado
      - Legenda de emo√ß√µes: tra√ßos "fantasmas" (Scatter) para exibir a paleta

    Requisitos em blocks_song: ['t_start_s','t_end_s','emotion_label'] (+ opcional 'song_id')
    Depende do adaptador: _compute_stft_to_db(...)
    """
    need = {"t_start_s","t_end_s","emotion_label"}
    missing = need - set(blocks_song.columns)
    if missing:
        raise KeyError(f"blocks_song precisa das colunas {sorted(need)}; faltando: {sorted(missing)}")
    if len(blocks_song) == 0:
        raise ValueError("blocks_song est√° vazio para esse song_id.")

    # paleta (ordem de apari√ß√£o para estabilidade)
    if emo_colors is None:
        base = ["#1f77b4","#ff7f0e","#2ca02c","#d62728",
                "#9467bd","#8c564b","#e377c2","#7f7f7f","#bcbd22","#17becf"]
        uniq = list(dict.fromkeys(blocks_song["emotion_label"].dropna().tolist()))
        emo_colors = {lab: base[i % len(base)] for i, lab in enumerate(uniq)}

    # espectrograma (usa seu compute_stft via adaptador)
    S_db, times, freqs = _compute_stft_to_db(y, sr, n_fft=n_fft, hop=hop, window=window, center=center)
    # z limits opcionais
    if zmin is None: zmin = float(np.nanpercentile(S_db, 1))
    if zmax is None: zmax = float(np.nanpercentile(S_db, 99))

    # Heatmap do espectrograma
    fig = go.Figure(data=go.Heatmap(
        x=times, y=freqs, z=S_db,
        zmin=zmin, zmax=zmax,
        colorscale=colorscale,
        colorbar=dict(title="Amplitude (dB)")
    ))

    # Sobreposi√ß√£o dos blocos (ret√¢ngulos transparentes e linhas de borda)
    df = blocks_song.copy()
    df["t_start_s"] = df["t_start_s"].astype(float)
    df["t_end_s"]   = df["t_end_s"].astype(float)
    df = df[df["t_end_s"] > df["t_start_s"]].reset_index(drop=True)

    for _, r in df.iterrows():
        t0, t1 = float(r["t_start_s"]), float(r["t_end_s"])
        lab = r.get("emotion_label", "NA")
        color = emo_colors.get(lab, "#7f7f7f")

        # faixa (vrect)
        fig.add_vrect(x0=t0, x1=t1,
                      fillcolor=color, opacity=float(alpha_span),
                      line_width=0, layer="above")

        # linhas tracejadas nas bordas
        fig.add_vline(x=t0, line=dict(width=1, dash="dash", color=color), layer="above")
        fig.add_vline(x=t1, line=dict(width=1, dash="dash", color=color), layer="above")

    # "tra√ßos de legenda" (sem dados, s√≥ para mostrar cores das emo√ß√µes)
    for lab, color in emo_colors.items():
        fig.add_trace(go.Scatter(
            x=[None], y=[None],
            mode="markers",
            marker=dict(size=10, color=color),
            name=lab,
            hoverinfo="skip",
            showlegend=True
        ))

    # layout
    dur_s = len(y) / sr if sr else times[-1]
    fig.update_layout(
        title=title,
        xaxis_title="Tempo (s)",
        yaxis_title="Frequ√™ncia (Hz)",
        template="plotly_white",
        width=1000, height=500,
        margin=dict(l=60, r=30, t=60, b=50),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
    )

    # eixos
    fig.update_xaxes(range=[max(0.0, float(df["t_start_s"].min()) - 0.5), min(dur_s, float(df["t_end_s"].max()) + 0.5)])
    fig.update_yaxes(type="linear")  # mude para "log" se quiser visual em log-freq

    fig.show()

### 4.3 Timeline em Blocos

In [None]:
def slice_audio_blocks(
    y: np.ndarray, sr: int, blocks_song: pd.DataFrame,
    pad: float = 0.0, min_duration: float = 0.2
) -> List[Dict]:
    """
    Recorta o √°udio para cada bloco de blocks_song (colunas: t_start_s, t_end_s, emotion_label).
    - pad: segundos extras em torno do bloco (>=0)
    - min_duration: ignora blocos menores que esse tempo (s)
    Retorna lista de dicts: {'idx','t0','t1','label','y','sr','duration'}.
    """
    assert sr > 0 and y.ndim == 1, "Esperado √°udio mono e sr > 0."
    assert {"t_start_s","t_end_s"}.issubset(blocks_song.columns)

    T = len(y) / sr
    out = []
    for i, r in blocks_song.reset_index(drop=True).iterrows():
        t0 = float(r["t_start_s"]) - pad
        t1 = float(r["t_end_s"]) + pad
        t0 = max(0.0, t0)
        t1 = min(T,   t1)
        if t1 <= t0 or (t1 - t0) < min_duration:
            continue
        s0 = int(np.floor(t0 * sr))
        s1 = int(np.ceil(t1 * sr))
        s0 = max(0, s0); s1 = min(len(y), s1)
        seg = y[s0:s1].copy()
        out.append({
            "idx": int(i),
            "t0": float(t0),
            "t1": float(t1),
            "label": r.get("emotion_label", "NA"),
            "y": seg,
            "sr": int(sr),
            "duration": float(len(seg)/sr)
        })
    return out

In [None]:
def plot_block_timeline(blocks_song: pd.DataFrame,
                        title: str = "Timeline de Blocos (10s)",
                        emo_colors: Optional[Dict[str,str]] = None,
                        height: float = 1.2):
    """
    Timeline interativa dos blocos (um √∫nico trilho).
    Cada bloco vira uma barra horizontal colorida pelo 'emotion_label'.

    Requisitos em blocks_song: ['t_start_s','t_end_s','emotion_label'] (+ opcional 'song_id')
    """
    need = {"t_start_s","t_end_s","emotion_label"}
    missing = need - set(blocks_song.columns)
    if missing:
        raise KeyError(f"blocks_song precisa das colunas {sorted(need)}; faltando: {sorted(missing)}")

    df = blocks_song.copy().reset_index(drop=True)
    df["dur_s"] = (df["t_end_s"] - df["t_start_s"]).astype(float)
    df = df[df["dur_s"] > 0].copy()
    if df.empty:
        raise ValueError("Nenhum bloco v√°lido (dura√ß√£o > 0) para plotar.")

    # Paleta
    if emo_colors is None:
        # usa a ordem de apari√ß√£o para estabilidade
        uniq = list(dict.fromkeys(df["emotion_label"].dropna().tolist()))
        base = ["#1f77b4","#ff7f0e","#2ca02c","#d62728",
                "#9467bd","#8c564b","#e377c2","#7f7f7f","#bcbd22","#17becf"]
        emo_colors = {lab: base[i % len(base)] for i, lab in enumerate(uniq)}

    # montamos um √∫nico "trilho" (y fixo)
    y_lane = 0.5  # posi√ß√£o vertical
    fig = go.Figure()

    # Um trace por bloco (mant√©m hover rico e cores por emo√ß√£o)
    # Para desempenho com muitos blocos, d√° para agrupar por emo√ß√£o e usar base com arrays.
    for i, r in df.iterrows():
        t0 = float(r["t_start_s"])
        dur = float(r["dur_s"])
        lab = r.get("emotion_label", "NA")
        color = emo_colors.get(lab, "#7f7f7f")

        custom = {
            "t0": t0,
            "t1": float(r["t_end_s"]),
            "lab": lab,
            "idx": int(i),
            "song": r.get("song_id", None)
        }
        hover = (
            "Bloco %{customdata.idx}<br>"
            "Tempo: %{customdata.t0:.1f}s‚Äì%{customdata.t1:.1f}s<br>"
            "Emo√ß√£o: %{customdata.lab}"
            + ("<br>song_id: %{customdata.song}" if custom["song"] is not None else "")
            + "<extra></extra>"
        )

        fig.add_trace(go.Bar(
            x=[dur],                  # comprimento = dura√ß√£o
            y=[y_lane],               # um √∫nico trilho
            base=[t0],                # in√≠cio no tempo t0
            orientation="h",
            marker=dict(color=color, line=dict(width=0)),
            hovertemplate=hover,
            customdata=[custom],
            showlegend=False,         # legenda vamos criar separadamente
            name=str(lab)
        ))

    # Legenda manual (uma *faixa invis√≠vel* por emo√ß√£o)
    for lab, color in emo_colors.items():
        fig.add_trace(go.Bar(
            x=[0], y=[-10],           # fora do viewport
            marker=dict(color=color),
            showlegend=True,
            name=lab,
            hoverinfo="skip"
        ))

    # Layout
    t_min = float(df["t_start_s"].min())
    t_max = float(df["t_end_s"].max())
    fig.update_layout(
        title=title,
        barmode="stack",
        bargap=0.0,
        template="plotly_white",
        height= max(160, int(140 + 40*height)),   # usa seu 'height' como "espessura" visual
        margin=dict(l=50, r=30, t=60, b=40),
        showlegend=True,
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
    )

    # Eixo X em segundos
    fig.update_xaxes(
        title_text="Tempo (s)",
        range=[max(0.0, t_min-0.5), t_max+0.5],
        zeroline=False
    )
    # Eixo Y: esconder ticks/labels e deixar s√≥ a faixa
    fig.update_yaxes(
        showticklabels=False,
        showgrid=False,
        range=[0, 1],
        fixedrange=True,
        title_text=None
    )

    fig.show()

In [None]:
def plot_block_spectrogram_plotly(
    y_seg: np.ndarray, sr: int,
    title: str = "Espectrograma do Trecho",
    n_fft: int = 2048, hop: int = 512, window: str = "hann", center: bool = True,
    zmin: Optional[float] = None, zmax: Optional[float] = None,
    colorscale: str = "Viridis"
):
    """
    Plota o espectrograma (em dB) do trecho (y_seg) como Heatmap Plotly.
    """
    S_db, times, freqs = _compute_stft_to_db(y_seg, sr, n_fft=n_fft, hop=hop, window=window, center=center)

    if zmin is None: zmin = float(np.nanpercentile(S_db, 1))
    if zmax is None: zmax = float(np.nanpercentile(S_db, 99))

    fig = go.Figure(data=go.Heatmap(
        x=times, y=freqs, z=S_db,
        zmin=zmin, zmax=zmax, colorscale=colorscale,
        colorbar=dict(title="Amplitude (dB)")
    ))
    fig.update_layout(
        title=title,
        xaxis_title="Tempo (s)",
        yaxis_title="Frequ√™ncia (Hz)",
        template="plotly_white",
        width=900, height=420,
        margin=dict(l=60, r=30, t=60, b=50),
    )
    fig.update_yaxes(type="linear")  # mude para "log" se preferir escala log
    fig.show()

### 4.4 Densidade de features por bloco (proxy para fingerprints)

In [None]:
def plot_block_feature_density(blocks_song: pd.DataFrame, values: np.ndarray,
                               title: str = "Densidade de Features por Bloco (10s)"):
    """
    Vers√£o Plotly da densidade de features por bloco (proxy de fingerprint).
    - Cada barra representa um bloco de 10s.
    - Mostra tooltip com tempo inicial/final e r√≥tulo emocional.
    """
    assert len(values) == len(blocks_song), \
        "values deve ter o mesmo comprimento que blocks_song"

    df_plot = blocks_song.copy()
    df_plot["densidade"] = values
    df_plot["√≠ndice"] = np.arange(len(blocks_song))

    hover_text = (
        "Bloco %{x}<br>"
        "Densidade: %{y:.2f}<br>"
        "Tempo: %{customdata[0]:.1f}s‚Äì%{customdata[1]:.1f}s<br>"
        "Emo√ß√£o: %{customdata[2]}"
    )

    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=df_plot["√≠ndice"],
        y=df_plot["densidade"],
        marker_color="steelblue",
        customdata=np.stack([
            df_plot["t_start_s"],
            df_plot["t_end_s"],
            df_plot["emotion_label"]
        ], axis=-1),
        hovertemplate=hover_text,
        name="Densidade"
    ))

    fig.update_layout(
        title=title,
        xaxis_title="√çndice do bloco (10s)",
        yaxis_title="Densidade / contagem (proxy)",
        template="plotly_white",
        bargap=0.2,
        width=900,
        height=400,
    )

    fig.show()

### 4.5 Sele√ß√£o de m√∫sica e proxy de feature

In [None]:
def show_blocks_gallery_plotly( y: np.ndarray, sr: int, blocks_song: pd.DataFrame, pad: float = 0.0, min_duration: float = 0.2, n_fft: int = 2048, hop: int = 512, window: str = "hann", center: bool = True ):
  """
  Recorta os blocos, toca cada trecho e plota o espectrograma separado (Loop).
  """
  segs = slice_audio_blocks(y, sr, blocks_song, pad=pad, min_duration=min_duration)
  if not segs:
    raise ValueError("Nenhum trecho v√°lido ap√≥s aplicar filtros/padding.")
  for seg in segs:
    title = f"Trecho #{seg['idx']} ‚Äî {seg['label']} ‚Äî {seg['t0']:.1f}s‚Äì{seg['t1']:.1f}s"
    print(title)
    display(Audio(seg["y"], rate=seg["sr"]))
    # Espectrograma do trecho
    plot_block_spectrogram_plotly( seg["y"], seg["sr"], title=f"Espectrograma ‚Äî {title}", n_fft=n_fft, hop=hop, window=window, center=center )

def select_blocks_for_song(blocks: pd.DataFrame, song_id: int) -> pd.DataFrame:
    cols_need = ["song_id","t_start_s","t_end_s","emotion_label"]
    missing = [c for c in cols_need if c not in blocks.columns]
    if missing:
        raise KeyError(f"Blocks precisa conter colunas: {cols_need}. Faltando: {missing}")
    bs = blocks.loc[blocks["song_id"] == song_id, cols_need].sort_values(["t_start_s"]).reset_index(drop=True)
    if bs.empty:
        raise ValueError(f"Nenhum bloco encontrado para song_id={song_id}")
    return bs

def simple_feature_proxy(y: np.ndarray, sr: int, blocks_song: pd.DataFrame,
                         n_fft: int = 2048, hop: int = 512, window: str = "hann", center: bool = True
                         ) -> np.ndarray:
    """
    Para cada bloco, calcula uma medida simples (energia m√©dia em dB do espectrograma).
    Usa o adaptador _compute_stft_to_db para sempre obter (S_db, times, freqs),
    independentemente do formato do seu compute_stft.
    """
    S_db, times, _ = _compute_stft_to_db(y, sr, n_fft=n_fft, hop=hop, window=window, center=center)

    # utilit√°rio consistente de tempo -> frame (usando os mesmos params)
    def t_to_frame(t: float) -> int:
        return int(np.clip(
            lb.time_to_frames(t, sr=sr, hop_length=hop, n_fft=n_fft),
            0, S_db.shape[1]-1
        ))

    vals = []
    for _, r in blocks_song.iterrows():
        f0 = t_to_frame(float(r['t_start_s'])); f1 = t_to_frame(float(r['t_end_s']))
        if f1 <= f0:
            f1 = min(f0+1, S_db.shape[1]-1)
        vals.append(float(np.nanmean(S_db[:, f0:f1])))

    return np.asarray(vals, dtype=float)


### 4.6 Execu√ß√£o geral

In [None]:
# ===================== IMPORTS & CONFIG =====================
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import re, io, math, json, os, warnings, time, gc
import numpy as np
import pandas as pd
import librosa
from IPython.display import Audio, display
warnings.filterwarnings("ignore")

import plotly.graph_objects as go
from plotly.subplots import make_subplots

# ---- Execu√ß√£o/plots ----
DO_AUDIO   = False       # tocar √°udio no notebook
DO_PLOTS   = False       # mostrar gr√°ficos no notebook (al√©m de salvar)
DO_GALLERY = False       # reservado p/ futuras galerias

# =============== TOGGLES DE SA√çDA (claros e independentes) ===============
# 1) salvar um Parquet por m√∫sica (blocks_by_song)
SAVE_BLOCKS_PARQUET = False
BLOCKS_PARQUET_DIR  = Path("/content/drive/MyDrive/DataSet TCC/DEAM/parquet/blocks_by_song")
BLOCKS_PARQUET_DIR.mkdir(parents=True, exist_ok=True)

# 2) salvar √≠ndice global leve (parquet_index)
SAVE_INDEX_PARQUET = False
INDEX_PARQUET_PATH = Path("/content/drive/MyDrive/DataSet TCC/DEAM/parquet/blocks_index.parquet")

# 3) salvar gr√°ficos Plotly por bloco + overview (HTML)
SAVE_PLOTS = True
PLOTS_DIR  = Path("/content/drive/MyDrive/DataSet TCC/DEAM/plots_by_song")
PLOTS_DIR.mkdir(parents=True, exist_ok=True)
# ==========================================================================

# ---- Carregar DFs de Parquet (opcional) ----
LOAD_DFS = True
PARQUETS_DIR = [
    "/content/drive/MyDrive/DataSet TCC/DEAM/parquet/deam_metadata.parquet",
    "/content/drive/MyDrive/DataSet TCC/DEAM/parquet/deam_static.parquet",
    "/content/drive/MyDrive/DataSet TCC/DEAM/parquet/dynamic_labels.parquet",
]

# nomes esperados ap√≥s leitura (apenas "chaves-alvo" p/ _pick_df)
ALIAS = {
    "deam_metadata":       "df_meta",
    "deam_static":         "df_static",
    "deam_dynamic_labels": "df_dyn",
}

# ---- √Åudio base ----
AUDIO_DIR = Path("/content/drive/MyDrive/DataSet TCC/DEAM/audio")

# ---- Par√¢metros dos blocos ----
WINDOW_SEC  = 10.0
AGG         = "mean"          # "mean" ou "median"
STANDARDIZE = "per_song"      # "per_song" ou None

# ---- √Åudio / DSP ----
SR_TARGET = 44100             # resample no load

# ---- STFT (armazenada no Parquet) ----
N_FFT   = 2048
HOP     = 512
WINDOW  = "hann"
CENTER  = True
POWER   = 2.0      # 1.0 = magnitude, 2.0 = power
TO_DB   = True
STFT_DTYPE = "float16"        # "float32" ou "float16" (recomendado: float16 com dB)

# ---- Fingerprint / Peaks (constellation) ----
FP_PEAK_NF    = 4        # top-K picos por coluna (tempo)
FP_DB_MIN_REL = -35.0    # limiar relativo (dB) ao max da coluna para aceitar pico

# ===================== CORES E R√ìTULOS =====================
EMO_COLORS = {
    "Q1": "#2ca02c",  # Feliz/Excitado
    "Q2": "#ff7f0e",  # Tenso/Ansioso
    "Q3": "#1f77b4",  # Triste/Deprimido
    "Q4": "#9467bd",  # Calmo/Relaxado
    "Indefinido": "#7f7f7f",
}

# ===================== FUN√á√ïES UTILIT√ÅRIAS =====================
def ndarray_to_bytes(arr: np.ndarray) -> bytes:
    """Serializa ndarray em bytes (np.save para buffer)."""
    buf = io.BytesIO()
    np.save(buf, arr, allow_pickle=False)
    return buf.getvalue()

def bytes_to_ndarray(b: bytes) -> np.ndarray:
    """Reconstr√≥i ndarray serializado por ndarray_to_bytes."""
    if b is None or (isinstance(b, float) and math.isnan(b)):
        return np.array([])
    buf = io.BytesIO(b)
    buf.seek(0)
    return np.load(buf, allow_pickle=False)

def estimate_peaks_from_spectrogram(
    S_db: np.ndarray,
    freqs: np.ndarray,
    times: np.ndarray,
    topk_per_col: int = FP_PEAK_NF,
    db_min_rel: float = FP_DB_MIN_REL
) -> tuple[np.ndarray, np.ndarray]:
    """
    Para cada coluna do espectrograma (em dB), escolhe at√© K bins com maior energia
    acima de (max_col + db_min_rel). Retorna (peaks_t, peaks_f) como float32.
    """
    if S_db.size == 0:
        return np.array([]), np.array([])
    T = S_db.shape[1]
    pk_t, pk_f = [], []
    for t in range(T):
        col = S_db[:, t]
        m = float(np.max(col))
        mask = col >= (m + db_min_rel)
        idxs = np.where(mask)[0]
        if idxs.size:
            sel = idxs[np.argsort(col[idxs])[::-1][:topk_per_col]]
            pk_t.extend([times[t]] * len(sel))
            pk_f.extend(freqs[sel].tolist())
    return np.asarray(pk_t, dtype=np.float32), np.asarray(pk_f, dtype=np.float32)

def _normalize_stem(stem: str) -> str:
    s = re.sub(r"\s*\(\d+\)\s*$", "", str(stem).strip())
    s = s.replace(" ", "_").lower()
    return s

def _read_parquet_safe(path: Path) -> pd.DataFrame:
    try:
        df = pd.read_parquet(path)
        df.columns = [str(c).strip() for c in df.columns]
        return df
    except Exception as e:
        print(f"[WARN] Falha ao ler {path}: {e}")
        return pd.DataFrame()

def _pick_df(dataframes: dict, target_key: str) -> pd.DataFrame:
    target_key = target_key.strip().lower()
    # 1) pelo nome exato
    for k in list(dataframes.keys()):
        if k == target_key:
            return dataframes[k]
    # 2) por heur√≠stica de nome
    for k in list(dataframes.keys()):
        nk = _normalize_stem(k)
        if target_key.endswith("meta")   and ("meta"   in nk): return dataframes[k]
        if target_key.endswith("static") and ("static" in nk): return dataframes[k]
        if target_key.endswith("dyn")    and (("dyn" in nk) or ("label" in nk)): return dataframes[k]
    # 3) por colunas
    for k, df in dataframes.items():
        cols = {c.lower() for c in df.columns}
        if target_key.endswith("meta") and {"song_id","title"} <= cols:
            return df
        if target_key.endswith("static") and ("emotion_general" in cols or "emotion_specific" in cols):
            return df
        if target_key.endswith("dyn") and (any(c.lower().startswith("valence_") for c in df.columns) or "valence" in cols):
            return df
    raise KeyError(f"N√£o foi poss√≠vel localizar DF para '{target_key}'. Ajuste PARQUETS_DIR/ALIAS.")

def _ensure_nullable_int(s):
    return pd.to_numeric(s, errors="coerce").astype("Int64")

def _ensure_required_cols(df: pd.DataFrame, required: list[str]) -> pd.DataFrame:
    for c in required:
        if c not in df.columns:
            df[c] = pd.Series([np.nan]*len(df))
    return df[required]

def _coerce_dtypes(df: pd.DataFrame, prefer_dtypes: dict[str, str]) -> pd.DataFrame:
    for col, dt in prefer_dtypes.items():
        if col not in df.columns or dt is None:
            continue
        try:
            if dt == "Int64":
                df[col] = _ensure_nullable_int(df[col])
            else:
                df[col] = df[col].astype(dt)
        except Exception:
            pass
    return df

def _safe_title(row_meta: pd.DataFrame, sid: int) -> str:
    if row_meta.empty:
        return f"song_id={sid}"
    if "title" in row_meta.columns and pd.notna(row_meta["title"].iloc[0]):
        return str(row_meta["title"].iloc[0])
    return f"song_id={sid}"

# ===================== HARMONIZA√á√ÉO df_dyn: WIDE -> LONG =====================
def _df_dyn_to_long(df_dyn_in: pd.DataFrame) -> pd.DataFrame:
    cols_lower = {c.lower(): c for c in df_dyn_in.columns}
    if {"song_id","valence","arousal"} <= set(map(str.lower, df_dyn_in.columns)):
        time_col = None
        for cand in ["t_s","time_s","t_sec","time_sec","t","time","timestamp_s"]:
            if cand in cols_lower:
                time_col = cols_lower[cand]; break
        df = df_dyn_in.copy()
        if time_col is None:
            for cand in ["time_ms","t_ms","timestamp_ms"]:
                if cand in cols_lower:
                    time_col = cols_lower[cand]
                    df["t_s"] = pd.to_numeric(df[time_col], errors="coerce")/1000.0
                    return df[["song_id","t_s","valence","arousal"]]
            for cand in ["frame","idx","index"]:
                if cand in cols_lower:
                    step = 0.5
                    df = df.sort_values(cols_lower[cand]).reset_index(drop=True)
                    df["t_s"] = df.index * step
                    return df[["song_id","t_s","valence","arousal"]]
            raise ValueError("df_dyn long mas sem coluna de tempo. Adicione 't_s' ou 'time_ms'.")
        else:
            if time_col.lower().endswith("_ms"):
                df["t_s"] = pd.to_numeric(df[time_col], errors="coerce")/1000.0
            else:
                df["t_s"] = pd.to_numeric(df[time_col], errors="coerce")
            return df[["song_id","t_s","valence","arousal"]]

    v_cols = [c for c in df_dyn_in.columns if re.match(r"(?i)^valence_\d+ms$", c)]
    a_cols = [c for c in df_dyn_in.columns if re.match(r"(?i)^arousal_\d+ms$", c)]
    if not v_cols or not a_cols:
        raise ValueError("df_dyn n√£o possui colunas 'valence_XXXms'/'arousal_XXXms' nem 'valence/arousal' long.")
    def _ms_from(col): return int(re.findall(r"(\d+)ms$", col, flags=re.I)[0])
    v_map = {c: _ms_from(c) for c in v_cols}
    a_map = {c: _ms_from(c) for c in a_cols}
    common_ms = sorted(set(v_map.values()).intersection(a_map.values()))
    v_keep = [c for c in v_cols if v_map[c] in common_ms]
    a_keep = [c for c in a_cols if a_map[c] in common_ms]

    df = df_dyn_in.copy()
    df["song_id"] = pd.to_numeric(df["song_id"], errors="coerce").astype("Int64")

    v_long = df.melt(id_vars=["song_id"], value_vars=v_keep, var_name="var", value_name="valence")
    v_long["ms"] = v_long["var"].str.extract(r"(\d+)ms$", expand=False).astype("int64")
    v_long.drop(columns=["var"], inplace=True)

    a_long = df.melt(id_vars=["song_id"], value_vars=a_keep, var_name="var", value_name="arousal")
    a_long["ms"] = a_long["var"].str.extract(r"(\d+)ms$", expand=False).astype("int64")
    a_long.drop(columns=["var"], inplace=True)

    long_df = pd.merge(v_long, a_long, on=["song_id","ms"], how="inner")
    long_df["t_s"] = long_df["ms"] / 1000.0
    long_df = long_df[["song_id","t_s","valence","arousal"]].sort_values(["song_id","t_s"]).reset_index(drop=True)
    long_df["valence"] = pd.to_numeric(long_df["valence"], errors="coerce")
    long_df["arousal"] = pd.to_numeric(long_df["arousal"], errors="coerce")
    return long_df

def _standardize_per_song(df_blocks: pd.DataFrame) -> pd.DataFrame:
    def _z(g):
        g = g.copy()
        for c in ["valence","arousal"]:
            m, s = g[c].mean(skipna=True), g[c].std(skipna=True)
            g[f"{c}_z"] = (g[c] - m) / (s if s and s==s else 1.0)
        return g
    return df_blocks.groupby("song_id", group_keys=False).apply(_z)

def _zone_from_va(v, a, use_z=False):
    if pd.isna(v) or pd.isna(a):
        return ("Indefinido","Indefinido")
    if use_z: vv, aa = v, a
    else:     vv, aa = v-0.5, a-0.5
    if vv >= 0 and aa >= 0: return ("Q1", "Feliz/Excitado")
    if vv <  0 and aa >= 0: return ("Q2", "Tenso/Ansioso")
    if vv <  0 and aa <  0: return ("Q3", "Triste/Deprimido")
    return ("Q4", "Calmo/Relaxado")

def make_blocks_from_df_dyn(
    df_dyn: pd.DataFrame, song_id: int,
    window_sec: float = 10.0, agg: str = "mean",
    standardize: str | None = "per_song"
) -> pd.DataFrame:
    dyn = _df_dyn_to_long(df_dyn)
    dyn = dyn.loc[_ensure_nullable_int(dyn["song_id"]) == int(song_id)].copy()
    if dyn.empty:
        return pd.DataFrame(columns=[
            "song_id","block_idx","t_start_s","t_end_s",
            "valence","arousal","valence_z","arousal_z","emotion_zone","emotion_label"
        ])

    dyn["t_s"] = pd.to_numeric(dyn["t_s"], errors="coerce")
    t_min, t_max = float(dyn["t_s"].min()), float(dyn["t_s"].max())
    dyn["block_idx"] = ((dyn["t_s"] - t_min) // window_sec).astype("Int64")

    if agg not in {"mean","median"}: agg = "mean"
    aggfunc = {"valence": agg, "arousal": agg}
    blocks = dyn.groupby("block_idx", as_index=False).agg(aggfunc)
    blocks["t_start_s"] = blocks["block_idx"].astype(float) * window_sec + t_min
    blocks["t_end_s"]   = blocks["t_start_s"] + window_sec
    blocks["song_id"]   = int(song_id)

    blocks["valence_z"] = np.nan
    blocks["arousal_z"] = np.nan
    if standardize == "per_song":
        tmp = _standardize_per_song(blocks[["song_id","valence","arousal"]].copy())
        blocks["valence_z"] = tmp["valence_z"].values
        blocks["arousal_z"] = tmp["arousal_z"].values

    use_z = standardize == "per_song"
    zones, labels = [], []
    for _, r in blocks.iterrows():
        v = r["valence_z"] if use_z else r["valence"]
        a = r["arousal_z"] if use_z else r["arousal"]
        z, lab = _zone_from_va(v, a, use_z=use_z)
        zones.append(z); labels.append(lab)
    blocks["emotion_zone"]  = zones
    blocks["emotion_label"] = labels

    blocks = blocks[["song_id","block_idx","t_start_s","t_end_s",
                     "valence","arousal","valence_z","arousal_z",
                     "emotion_zone","emotion_label"]].sort_values("block_idx").reset_index(drop=True)

    blocks["song_id"]   = _ensure_nullable_int(blocks["song_id"])
    blocks["block_idx"] = _ensure_nullable_int(blocks["block_idx"])
    for c in ["t_start_s","t_end_s","valence","arousal","valence_z","arousal_z"]:
        blocks[c] = pd.to_numeric(blocks[c], errors="coerce").astype(float)
    blocks["emotion_zone"]  = blocks["emotion_zone"].astype("string")
    blocks["emotion_label"] = blocks["emotion_label"].astype("string")
    return blocks

# ===================== DSP/IO B√ÅSICAS =====================
def load_audio(path: Path, sr_target: int = 44100) -> Tuple[np.ndarray, int]:
    y, sr = librosa.load(str(path), sr=sr_target, mono=True)
    return y.astype(np.float32, copy=False), sr

def select_blocks_for_song(blocks: pd.DataFrame, sid: int) -> pd.DataFrame:
    return blocks.loc[_ensure_nullable_int(blocks["song_id"]) == int(sid)].copy().reset_index(drop=True)

def _clip_blocks_to_audio(bs: pd.DataFrame, y: np.ndarray, sr: int) -> pd.DataFrame:
    if bs.empty: return bs
    dur = len(y)/sr if sr else 0.0
    bs = bs.copy()
    bs["t_start_s"] = bs["t_start_s"].clip(lower=0, upper=max(0.0, dur))
    bs["t_end_s"]   = bs["t_end_s"].clip(upper=dur)
    bs = bs[bs["t_end_s"] > bs["t_start_s"]].reset_index(drop=True)
    return bs

def simple_feature_proxy(y: np.ndarray, sr: int, bs: pd.DataFrame) -> Optional[List[float]]:
    vals = []
    for _, r in bs.iterrows():
        i0 = int(max(0, r["t_start_s"])*sr)
        i1 = int(min(len(y), r["t_end_s"])*sr)
        seg = y[i0:i1]
        vals.append(float(np.sqrt(np.mean(seg**2)))) if len(seg) else vals.append(np.nan)
    return vals

def _emotion_from_static_row(rs: pd.Series) -> Tuple[str,str]:
    eg = rs.get("emotion_general", None)
    es = rs.get("emotion_specific", None)
    if pd.notna(eg) or pd.notna(es):
        return (str(eg) if pd.notna(eg) else "Indefinido", str(es) if pd.notna(es) else "NA")
    return ("Indefinido", "NA")

# ===================== STFT 1√ó POR M√öSICA =====================
def _cast_dtype_for_save(arr: np.ndarray) -> np.ndarray:
    return arr.astype(np.float16 if STFT_DTYPE=="float16" else np.float32, copy=False)

def compute_song_stft(y: np.ndarray, sr: int):
    S = np.abs(librosa.stft(y, n_fft=N_FFT, hop_length=HOP, window=WINDOW, center=CENTER))
    if POWER != 1.0:
        S = S ** float(POWER)
    if TO_DB:
        S = librosa.power_to_db(S, ref=np.max) if POWER >= 2.0 else librosa.amplitude_to_db(S, ref=np.max)
    S = _cast_dtype_for_save(S)
    freqs = librosa.fft_frequencies(sr=sr, n_fft=N_FFT).astype(np.float32)
    times = librosa.frames_to_time(np.arange(S.shape[1]), sr=sr, hop_length=HOP, n_fft=N_FFT).astype(np.float32)
    return S, freqs, times

# ===================== FATIAR STFT + MONTAR PAYLOADS =====================
def slice_stft_to_parquet_payloads(
    S, freqs, times, sr, bs: pd.DataFrame, sid: int,
    keep_sdb_in_memory: bool = False
) -> Tuple[List[dict], Optional[List[np.ndarray]], Optional[List[np.ndarray]]]:
    """
    Retorna:
      - payloads (para Parquet),
      - lista S_db_por_bloco (ou None),
      - lista times_por_bloco (ou None)
    """
    payloads: List[dict] = []
    sdb_list, times_list = ([] if keep_sdb_in_memory else None), ([] if keep_sdb_in_memory else None)
    F, T = S.shape
    for _, r in bs.iterrows():
        t0, t1 = float(r["t_start_s"]), float(r["t_end_s"])
        f0 = int(np.searchsorted(times, t0, side="left"))
        f1 = int(np.searchsorted(times, t1, side="right"))
        if f1 <= f0 or f0 >= T:
            payloads.append({
                "stft_bytes": None, "freqs_bytes": None, "times_bytes": None,
                "peaks_f_bytes": None, "peaks_t_bytes": None,
                "f0": np.nan, "f1": np.nan,
                "stft_shape_f": F, "stft_shape_t": np.nan,
                "fp_peak_nf": FP_PEAK_NF, "fp_amp_min_db": FP_DB_MIN_REL
            })
            if keep_sdb_in_memory:
                sdb_list.append(np.array([]))
                times_list.append(np.array([]))
            continue

        f0 = max(0, min(f0, T-1))
        f1 = max(f0+1, min(f1, T))
        S_slice   = S[:, f0:f1]
        times_blk = times[f0:f1]

        # dB para sele√ß√£o de picos e plot
        if TO_DB:
            S_db = S_slice.astype(np.float32, copy=False)
        else:
            S_db = librosa.power_to_db(S_slice, ref=np.max) if POWER >= 2.0 \
                   else librosa.amplitude_to_db(S_slice, ref=np.max)

        peaks_t, peaks_f = estimate_peaks_from_spectrogram(
            S_db=S_db, freqs=freqs, times=times_blk,
            topk_per_col=FP_PEAK_NF, db_min_rel=FP_DB_MIN_REL
        )

        payloads.append({
            "stft_bytes":    ndarray_to_bytes(S_slice),
            "freqs_bytes":   ndarray_to_bytes(freqs),
            "times_bytes":   ndarray_to_bytes(times_blk),
            "peaks_f_bytes": ndarray_to_bytes(peaks_f),
            "peaks_t_bytes": ndarray_to_bytes(peaks_t),
            "f0": float(f0), "f1": float(f1),
            "stft_shape_f": int(S_slice.shape[0]),
            "stft_shape_t": int(S_slice.shape[1]),
            "fp_peak_nf": FP_PEAK_NF,
            "fp_amp_min_db": FP_DB_MIN_REL
        })

        if keep_sdb_in_memory:
            sdb_list.append(S_db)
            times_list.append(times_blk)

    return payloads, sdb_list, times_list

# ===================== PLOTS (helpers) =====================
def _plot_block_spectrogram_with_peaks(
    S_db: np.ndarray,
    freqs: np.ndarray,
    times_blk: np.ndarray,
    peaks_t: np.ndarray,
    peaks_f: np.ndarray,
    title: str = "Espectrograma (dB) + picos"
) -> go.Figure:
    """Cria figura Plotly com Heatmap (S_db) + scatter dos picos."""
    fig = go.Figure()
    fig.add_trace(go.Heatmap(z=S_db, x=times_blk, y=freqs, coloraxis="coloraxis", zsmooth=False))
    if peaks_t.size and peaks_f.size:
        fig.add_trace(go.Scatter(x=peaks_t, y=peaks_f, mode="markers",
                                 marker=dict(size=6, symbol="circle-open"), name="picos"))
    fig.update_layout(
        title=title,
        xaxis_title="Tempo (s)",
        yaxis_title="Frequ√™ncia (Hz)",
        coloraxis=dict(colorscale="Inferno"),
        template="plotly_white",
        width=900, height=480
    )
    return fig

def _save_plot_html(fig: go.Figure, out_html: Path):
    out_html.parent.mkdir(parents=True, exist_ok=True)
    fig.write_html(str(out_html), include_plotlyjs="cdn", full_html=True)

def _emotion_color(zone: str) -> str:
    return EMO_COLORS.get(str(zone), EMO_COLORS["Indefinido"])

def _plot_song_overview(
    S_full_db: np.ndarray,  # espectrograma da m√∫sica inteira em dB
    freqs: np.ndarray,
    times_full: np.ndarray,
    df_song_blocks: pd.DataFrame,  # df_song (com t_start_s, t_end_s, emotion_zone)
    title: str
) -> go.Figure:
    """
    Figura 2 linhas:
      (1) Heatmap do espectrograma de toda a m√∫sica
      (2) Linha temporal com blocos coloridos por emotion_zone
    """
    fig = make_subplots(
        rows=2, cols=1, shared_xaxes=True,
        row_heights=[0.75, 0.25],
        vertical_spacing=0.08,
        subplot_titles=(None, "Blocos emocionais")
    )

    # ---- (1) espectrograma completo ----
    fig.add_trace(
        go.Heatmap(
            z=S_full_db, x=times_full, y=freqs,
            coloraxis="coloraxis", zsmooth=False
        ),
        row=1, col=1
    )

    # ---- (2) faixa de blocos emocionais ----
    legend_done = set()
    y0, y1 = 0, 1  # faixa vertical arbitr√°ria
    for _, r in df_song_blocks.iterrows():
        t0 = float(r["t_start_s"]); t1 = float(r["t_end_s"])
        zone = str(r.get("emotion_zone", "Indefinido"))
        color = _emotion_color(zone)

        fig.add_shape(
            type="rect", xref="x", yref="y2",
            x0=t0, x1=t1, y0=y0, y1=y1,
            line=dict(width=0), fillcolor=color, opacity=0.80
        )

        if zone not in legend_done:
            fig.add_trace(
                go.Scatter(
                    x=[None], y=[None],
                    mode="markers",
                    marker=dict(size=10, color=color),
                    name=f"{zone}",
                    showlegend=True
                ),
                row=2, col=1
            )
            legend_done.add(zone)

    total_dur = float(times_full[-1]) if len(times_full) else float(df_song_blocks["t_end_s"].max())
    fig.update_layout(
        title=title,
        template="plotly_white",
        coloraxis=dict(colorscale="Inferno"),
        width=1000, height=700,
        legend_title_text="Zona emocional",
        margin=dict(l=60, r=20, t=60, b=40)
    )
    fig.update_xaxes(title_text="Tempo (s)", row=2, col=1, range=[0, total_dur])
    fig.update_yaxes(title_text="Frequ√™ncia (Hz)", row=1, col=1)
    fig.update_yaxes(visible=False, row=2, col=1, range=[y0, y1])

    return fig

def _save_song_overview_html(fig: go.Figure, out_html: Path):
    out_html.parent.mkdir(parents=True, exist_ok=True)
    fig.write_html(str(out_html), include_plotlyjs="cdn", full_html=True)

# ===================== CARGA DE DATAFRAMES BASE =====================
if LOAD_DFS:
    dataframes = {}
    for pq in PARQUETS_DIR:
        p = Path(pq)
        df = _read_parquet_safe(p)
        if not df.empty:
            dataframes[_normalize_stem(p.stem)] = df
    df_meta   = _pick_df(dataframes, "df_meta")
    df_static = _pick_df(dataframes, "df_static")
    df_dyn    = _pick_df(dataframes, "df_dyn")

# ===================== LOOP PRINCIPAL ‚Äî UM PARQUET POR M√öSICA =====================
ids_dyn  = set(_ensure_nullable_int(df_dyn["song_id"]).dropna().unique().tolist())
ids_meta = set(_ensure_nullable_int(df_meta["song_id"]).dropna().unique().tolist())
all_ids  = sorted(ids_dyn.intersection(ids_meta))

index_rows = []   # para √≠ndice global opcional
skipped = 0

for sid in all_ids:
    try:
        # ---- t√≠tulo/arquivo ----
        row_meta = df_meta.loc[_ensure_nullable_int(df_meta["song_id"]) == sid].reset_index(drop=True)
        title    = _safe_title(row_meta, sid)
        audio_path = AUDIO_DIR / f"{sid}.mp3"
        print(f"[{sid}] {title} ‚Üí {audio_path}")

        # ---- emo√ß√£o est√°tica ----
        row_static = df_static.loc[_ensure_nullable_int(df_static["song_id"]) == sid]
        if row_static.empty:
            emotion_general, emotion_specific = ("Indefinido", "NA")
        else:
            rs = row_static.iloc[0]
            def _get_case(df, cname):
                for c in df.columns:
                    if c.lower() == cname: return c
                return cname
            if {"emotion_general","emotion_specific"} <= {c.lower() for c in row_static.columns}:
                egc = _get_case(row_static, "emotion_general")
                esc = _get_case(row_static, "emotion_specific")
                emotion_general, emotion_specific = rs[egc], rs[esc]
            else:
                emotion_general, emotion_specific = _emotion_from_static_row(rs)

        # ---- blocos VA ----
        bs = make_blocks_from_df_dyn(df_dyn=df_dyn, song_id=sid,
                                     window_sec=WINDOW_SEC, agg=AGG, standardize=STANDARDIZE)

        # ---- √°udio / clipe ----
        try:
            y, sr = load_audio(audio_path, sr_target=SR_TARGET)
        except Exception as e:
            print(f"[WARN] song_id={sid}: falha ao carregar √°udio ({e}). Continuando sem √°udio.")
            y, sr = np.array([], dtype=np.float32), SR_TARGET

        bs = select_blocks_for_song(bs, sid)
        bs = _clip_blocks_to_audio(bs, y, sr)
        if bs.empty:
            print(f"[WARN] song_id={sid}: sem blocos ap√≥s clipe ‚Üí pulando.")
            skipped += 1
            continue

        # ---- feature proxy (opcional) ----
        vals = simple_feature_proxy(y, sr, bs) if len(y) else [np.nan]*len(bs)

        # ---- STFT 1√ó + payloads por bloco (com peaks) ----
        stft_payloads = [{"stft_bytes": None, "freqs_bytes": None, "times_bytes": None,
                          "peaks_f_bytes": None, "peaks_t_bytes": None,
                          "f0": np.nan, "f1": np.nan, "stft_shape_f": np.nan, "stft_shape_t": np.nan,
                          "fp_peak_nf": FP_PEAK_NF, "fp_amp_min_db": FP_DB_MIN_REL}] * len(bs)
        sdb_mem, times_mem = None, None
        S_full_db_ref, freqs, times = None, None, None

        if len(y):
            S_full, freqs, times = compute_song_stft(y, sr)
            S_full_db_ref = S_full.astype(np.float32, copy=False)

            keep_for_plots = bool(SAVE_PLOTS or DO_PLOTS)
            stft_payloads, sdb_mem, times_mem = slice_stft_to_parquet_payloads(
                S_full, freqs, times, sr, bs, sid, keep_sdb_in_memory=keep_for_plots
            )
            # S_full em si n√£o √© mais necess√°rio depois disso (ficamos s√≥ com S_full_db_ref)
            del S_full
            gc.collect()

        # ---- montar DataFrame desta m√∫sica ----
        df_song = bs.copy()
        if vals is not None and len(vals) == len(df_song):
            try:
                df_song["feature_proxy"] = np.asarray(vals)
            except Exception as e:
                print(f"[WARN] song_id={sid}: n√£o anexei feature_proxy ({e}).")

        df_song["song_id"]          = sid
        df_song["title"]            = title
        df_song["emotion_general"]  = emotion_general
        df_song["emotion_specific"] = emotion_specific
        df_song["sr"]               = sr

        df_song = pd.concat([df_song.reset_index(drop=True),
                             pd.DataFrame(stft_payloads).reset_index(drop=True)], axis=1)

        df_song["stft_n_fft"]   = N_FFT
        df_song["stft_hop"]     = HOP
        df_song["stft_window"]  = str(WINDOW)
        df_song["stft_center"]  = bool(CENTER)
        df_song["stft_power"]   = float(POWER)
        df_song["stft_to_db"]   = bool(TO_DB)
        df_song["stft_dtype"]   = str(STFT_DTYPE)

        df_song["fp_peak_nf"]    = df_song.get("fp_peak_nf", FP_PEAK_NF)
        df_song["fp_amp_min_db"] = df_song.get("fp_amp_min_db", FP_DB_MIN_REL)

        prefer_dtypes = {
            "song_id":          "Int64",
            "block_idx":        "Int64" if "block_idx" in df_song.columns else None,
            "sr":               "Int64",
            "title":            "string",
            "emotion_general":  "string",
            "emotion_specific": "string",
        }
        df_song = _coerce_dtypes(df_song, prefer_dtypes)

        REQUIRED_COLS = [
            "song_id", "block_idx", "t_start_s", "t_end_s",
            "valence", "arousal", "valence_z", "arousal_z",
            "emotion_zone", "emotion_label", "feature_proxy",
            "title", "emotion_general", "emotion_specific", "sr",
            "stft_bytes", "freqs_bytes", "times_bytes",
            "peaks_f_bytes", "peaks_t_bytes",
            "f0", "f1", "stft_shape_f", "stft_shape_t",
            "stft_n_fft", "stft_hop", "stft_window", "stft_center", "stft_power", "stft_to_db", "stft_dtype",
            "fp_peak_nf", "fp_amp_min_db"
        ]
        df_song = _ensure_required_cols(df_song, REQUIRED_COLS)

        # ---------------- SALVAR PARQUET POR M√öSICA (se habilitado) ----------------
        out_path = None
        if SAVE_BLOCKS_PARQUET:
            out_path = BLOCKS_PARQUET_DIR / f"song_{int(sid)}.parquet"
            df_song.to_parquet(out_path, engine="pyarrow", compression="zstd", index=False)
            print(f"[OK] song_id={sid}: {len(df_song)} blocos ‚Üí {out_path}")

        # ---------------- ALIMENTAR √çNDICE GLOBAL (se habilitado) ------------------
        if SAVE_INDEX_PARQUET:
            index_rows.append({
                "song_id": int(sid),
                "title": str(title),
                "parquet_path": str(out_path) if out_path else "",
                "n_blocks": int(len(df_song)),
                "sr": int(sr),
                "stft_n_fft": int(N_FFT),
                "stft_hop": int(HOP),
                "window_sec": float(WINDOW_SEC),
            })

        # ---------------- SALVAR PLOTS POR BLOCO (se habilitado) -------------------
        if SAVE_PLOTS and sdb_mem is not None:
            norm_title = re.sub(r'[^a-zA-Z0-9_-]+', '_', str(title))[:80]
            song_dir = PLOTS_DIR / f"song_{int(sid)}_{norm_title}"
            song_dir.mkdir(parents=True, exist_ok=True)

            for i in range(len(df_song)):
                try:
                    S_db_blk  = sdb_mem[i] if i < len(sdb_mem) else np.array([])
                    times_blk = times_mem[i] if i < len(times_mem) else np.array([])
                    if S_db_blk.size == 0 or times_blk.size == 0:
                        continue

                    peaks_t = bytes_to_ndarray(df_song.iloc[i]["peaks_t_bytes"])
                    peaks_f = bytes_to_ndarray(df_song.iloc[i]["peaks_f_bytes"])
                    freqs_i = bytes_to_ndarray(df_song.iloc[i]["freqs_bytes"])
                    if freqs_i.size == 0:
                        freqs_i = librosa.fft_frequencies(sr=sr, n_fft=N_FFT).astype(np.float32)

                    fig_blk = _plot_block_spectrogram_with_peaks(
                        S_db=S_db_blk,
                        freqs=freqs_i,
                        times_blk=times_blk,
                        peaks_t=peaks_t,
                        peaks_f=peaks_f,
                        title=f"{title} ‚Äî bloco {int(df_song.iloc[i]['block_idx'])} "
                              f"({float(df_song.iloc[i]['t_start_s']):.1f}s‚Äì{float(df_song.iloc[i]['t_end_s']):.1f}s)"
                    )
                    out_html_blk = song_dir / f"block_{int(df_song.iloc[i]['block_idx']):04d}.html"
                    _save_plot_html(fig_blk, out_html_blk)

                    if DO_PLOTS:
                        display(fig_blk)

                except Exception as e:
                    print(f"[WARN] song_id={sid} bloco {df_song.iloc[i].get('block_idx', 'NA')}: "
                          f"falha ao salvar plot ({e})")

        # ---------------- OVERVIEW (ESPECTRO COMPLETO + BLOCOS EMOCIONAIS) ---------
        if SAVE_PLOTS and S_full_db_ref is not None and times is not None and len(times):
            norm_title = re.sub(r'[^a-zA-Z0-9_-]+', '_', str(title))[:80]
            song_dir = PLOTS_DIR / f"song_{int(sid)}_{norm_title}"
            song_dir.mkdir(parents=True, exist_ok=True)

            cols_needed = {"t_start_s", "t_end_s", "emotion_zone", "block_idx"}
            if not cols_needed.issubset(set(df_song.columns)):
                print(f"[WARN] song_id={sid}: df_song sem colunas para overview; pulando.")
            else:
                fig_over = _plot_song_overview(
                    S_full_db=S_full_db_ref,
                    freqs=freqs,
                    times_full=times,
                    df_song_blocks=df_song[["block_idx","t_start_s","t_end_s","emotion_zone"]],
                    title=f"{title} ‚Äî vis√£o geral (m√∫sica inteira)"
                )
                out_html_over = song_dir / "song_overview.html"
                _save_song_overview_html(fig_over, out_html_over)
                print(f"[OK] Overview salvo ‚Üí {out_html_over}")

                if DO_PLOTS:
                    display(fig_over)

            S_full_db_ref = None
            gc.collect()

        if DO_AUDIO and len(y):
            display(Audio(y, rate=sr))

    except Exception as e:
        import traceback
        print(f"[ERRO] song_id={sid}: {e}")
        traceback.print_exc()
        skipped += 1

# ===================== √çNDICE GLOBAL (OPCIONAL) =====================
if SAVE_INDEX_PARQUET and len(index_rows):
    df_index = pd.DataFrame(index_rows)
    df_index.to_parquet(INDEX_PARQUET_PATH, engine="pyarrow", compression="zstd", index=False)
    print(f"[OK] √çndice global: {len(df_index)} m√∫sicas ‚Üí {INDEX_PARQUET_PATH}")
print(f"[INFO] Puladas/erro: {skipped}")


[2] Tonight  A Lonely Century ‚Üí /content/drive/MyDrive/DataSet TCC/DEAM/audio/2.mp3
[OK] Overview salvo ‚Üí /content/drive/MyDrive/DataSet TCC/DEAM/plots_by_song/song_2_Tonight_A_Lonely_Century/song_overview.html
[3] DD Groove ‚Üí /content/drive/MyDrive/DataSet TCC/DEAM/audio/3.mp3
[OK] Overview salvo ‚Üí /content/drive/MyDrive/DataSet TCC/DEAM/plots_by_song/song_3_DD_Groove/song_overview.html
[4] Slow Burn ‚Üí /content/drive/MyDrive/DataSet TCC/DEAM/audio/4.mp3
[OK] Overview salvo ‚Üí /content/drive/MyDrive/DataSet TCC/DEAM/plots_by_song/song_4_Slow_Burn/song_overview.html
[5] Nothing Much ‚Üí /content/drive/MyDrive/DataSet TCC/DEAM/audio/5.mp3
[OK] Overview salvo ‚Üí /content/drive/MyDrive/DataSet TCC/DEAM/plots_by_song/song_5_Nothing_Much/song_overview.html
[7] Hustle ‚Üí /content/drive/MyDrive/DataSet TCC/DEAM/audio/7.mp3
[OK] Overview salvo ‚Üí /content/drive/MyDrive/DataSet TCC/DEAM/plots_by_song/song_7_Hustle/song_overview.html
[8] Dirt Rhodes ‚Üí /content/drive/MyDrive/DataSe

In [None]:
# ===================== PLOTS ESPEC√çFICOS PARA O TCC (SE√á√ÉO 4.5) =====================

# Se quiser s√≥ gerar HTML sem mostrar no Colab, deixe DO_PLOTS = False
SAVE_PLOTS = True
DO_PLOTS   = False

def _save_plot_html(fig: go.Figure, out_html: Path):
    """Salva um gr√°fico Plotly em HTML (modo TCC)."""
    out_html.parent.mkdir(parents=True, exist_ok=True)
    fig.write_html(str(out_html), include_plotlyjs="cdn", full_html=True)


def _emotion_color(zone: str) -> str:
    """Devolve a cor fixa da zona emocional, usando EMO_COLORS."""
    return EMO_COLORS.get(str(zone), EMO_COLORS["Indefinido"])


def _plot_block_spectrogram_clean_from_S(
    S_db: np.ndarray,
    freqs: np.ndarray,
    times_blk: np.ndarray,
    title: str,
) -> go.Figure:
    """
    Espectrograma STFT de um bloco (j√° em dB), SEM sobrepor picos.
    Usa os S_db / times j√° gerados pelo pipeline (sdb_mem, times_mem).
    """
    fig = go.Figure()
    fig.add_trace(
        go.Heatmap(
            z=S_db,
            x=times_blk,
            y=freqs,
            coloraxis="coloraxis",
            zsmooth=False,
        )
    )

    fig.update_layout(
        title=dict(text=title, x=0.5, xanchor="center"),
        xaxis_title="Tempo (s)",
        yaxis_title="Frequ√™ncia (Hz)",
        template="plotly_white",
        width=900,
        height=450,
        margin=dict(l=70, r=70, t=60, b=60),
        coloraxis=dict(
            colorscale="Inferno",
            colorbar=dict(title="Amplitude (dB)")
        ),
    )

    return fig


def _plot_song_overview(
    S_full_db: np.ndarray,  # espectrograma da m√∫sica inteira em dB
    freqs: np.ndarray,
    times_full: np.ndarray,
    df_song_blocks: pd.DataFrame,  # df_song (t_start_s, t_end_s, emotion_zone)
    title: str
) -> go.Figure:
    """
    Figura em 2 linhas, pensada para o TCC:
      (1) Espectrograma STFT da m√∫sica inteira (dB)
      (2) Linha temporal com blocos emocionais (10 s), coloridos por 'emotion_zone'
    """
    # Garante que s√≥ blocos v√°lidos entrem
    df_blocks = df_song_blocks.copy()
    df_blocks = df_blocks[df_blocks["t_end_s"] > df_blocks["t_start_s"]].reset_index(drop=True)
    if df_blocks.empty:
        raise ValueError("df_song_blocks est√° vazio ou sem blocos v√°lidos.")

    # Dura√ß√£o total para limitar eixo x
    if len(times_full):
        total_dur = float(times_full[-1])
    else:
        total_dur = float(df_blocks["t_end_s"].max())

    # -------------------- FIGURA COM SUBPLOTS --------------------
    fig = make_subplots(
        rows=2,
        cols=1,
        shared_xaxes=True,
        row_heights=[0.76, 0.24],
        vertical_spacing=0.07,
        subplot_titles=(None, "Blocos emocionais"),
    )

    # ---- (1) Espectrograma completo ----
    fig.add_trace(
        go.Heatmap(
            z=S_full_db,
            x=times_full,
            y=freqs,
            coloraxis="coloraxis",
            zsmooth=False,
        ),
        row=1, col=1,
    )

    # ---- (2) Faixa de blocos emocionais (ret√¢ngulos cheios) ----
    legend_done = set()
    y0, y1 = 0.0, 1.0  # faixa vertical arbitr√°ria para o painel de blocos

    for _, r in df_blocks.iterrows():
        t0 = float(r["t_start_s"])
        t1 = float(r["t_end_s"])
        zone = str(r.get("emotion_zone", "Indefinido"))
        color = _emotion_color(zone)

        # ret√¢ngulo preenchido
        fig.add_shape(
            type="rect",
            xref="x",
            yref="y2",
            x0=t0,
            x1=t1,
            y0=y0,
            y1=y1,
            line=dict(width=0),
            fillcolor=color,
            opacity=0.85,
        )

        # trace "fantasma" s√≥ para aparecer na legenda
        if zone not in legend_done:
            fig.add_trace(
                go.Scatter(
                    x=[None],
                    y=[None],
                    mode="markers",
                    marker=dict(size=10, color=color),
                    name=zone,
                    showlegend=True,
                    hoverinfo="skip",
                ),
                row=2, col=1,
            )
            legend_done.add(zone)

    # -------------------- LAYOUT "MAIS ACAD√äMICO" --------------------
    fig.update_layout(
        title=dict(
            text=title,
            x=0.5,
            xanchor="center",
            yanchor="top",
        ),
        template="plotly_white",
        width=1100,
        height=650,
        margin=dict(l=70, r=70, t=80, b=80),
        # coloraxis global para o Heatmap
        coloraxis=dict(
            colorscale="Inferno",
            colorbar=dict(
                title="Amplitude (dB)",
                titleside="right",
            ),
        ),
        legend=dict(
            title="Zona emocional",
            orientation="h",
            x=0.5,
            xanchor="center",
            y=-0.08,
            yanchor="top",
        ),
    )

    # Eixos
    fig.update_xaxes(
        title_text="Tempo (s)",
        row=2, col=1,
        range=[0.0, total_dur],
        showgrid=False,
    )
    fig.update_yaxes(
        title_text="Frequ√™ncia (Hz)",
        row=1, col=1,
        showgrid=False,
    )
    fig.update_yaxes(
        visible=False,
        row=2, col=1,
        range=[y0, y1],
        showgrid=False,
    )

    return fig


# ===================== GERAR PLOTS USANDO AS VARI√ÅVEIS DO PIPELINE =====================
# OBS: ideal √© colocar este trecho DENTRO do seu `for sid in ...:`.
# Aqui deixei protegido com `in locals()` para n√£o dar NameError
# se voc√™ rodar a c√©lula isolada.

if SAVE_PLOTS and 'df_song' in locals() and 'sdb_mem' in locals():
    norm_title = re.sub(r'[^a-zA-Z0-9_-]+', '_', str(title))[:80]
    song_dir = PLOTS_DIR / f"song_{int(sid)}_{norm_title}"
    song_dir.mkdir(parents=True, exist_ok=True)

    # -------- PLOTS POR BLOCO (SEM PICOS) --------
    for i in range(len(df_song)):
        try:
            S_db_blk  = sdb_mem[i]   if i < len(sdb_mem)   else np.array([])
            times_blk = times_mem[i] if i < len(times_mem) else np.array([])
            if S_db_blk.size == 0 or times_blk.size == 0:
                continue

            freqs_i = bytes_to_ndarray(df_song.iloc[i]["freqs_bytes"])
            if freqs_i.size == 0:
                freqs_i = librosa.fft_frequencies(sr=sr, n_fft=N_FFT).astype(np.float32)

            fig_blk = _plot_block_spectrogram_clean_from_S(
                S_db=S_db_blk,
                freqs=freqs_i,
                times_blk=times_blk + float(df_song.iloc[i]["t_start_s"]),  # eixo absoluto
                title=(
                    f"{title} ‚Äî bloco {int(df_song.iloc[i]['block_idx'])} "
                    f"({float(df_song.iloc[i]['t_start_s']):.1f}s‚Äì"
                    f"{float(df_song.iloc[i]['t_end_s']):.1f}s)"
                ),
            )

            out_html_blk = song_dir / f"block_{int(df_song.iloc[i]['block_idx']):04d}.html"
            _save_plot_html(fig_blk, out_html_blk)

            if DO_PLOTS:
                display(fig_blk)

        except Exception as e:
            print(
                f"[WARN] song_id={sid} bloco {df_song.iloc[i].get('block_idx', 'NA')}: "
                f"falha ao salvar plot ({e})"
            )

# -------- OVERVIEW (ESPECTRO COMPLETO + BLOCOS EMOCIONAIS) --------
if SAVE_PLOTS and 'S_full_db_ref' in locals() and 'times' in locals() \
   and S_full_db_ref is not None and times is not None and len(times):

    norm_title = re.sub(r'[^a-zA-Z0-9_-]+', '_', str(title))[:80]
    song_dir = PLOTS_DIR / f"song_{int(sid)}_{norm_title}"
    song_dir.mkdir(parents=True, exist_ok=True)

    cols_needed = {"t_start_s", "t_end_s", "emotion_zone", "block_idx"}
    if not cols_needed.issubset(set(df_song.columns)):
        print(f"[WARN] song_id={sid}: df_song sem colunas para overview; pulando.")
    else:
        try:
            fig_over = _plot_song_overview(
                S_full_db=S_full_db_ref,
                freqs=freqs,
                times_full=times,
                df_song_blocks=df_song,
                title=f"{title} ‚Äî vis√£o geral (m√∫sica inteira)",
            )
            out_html_over = song_dir / "overview.html"
            _save_plot_html(fig_over, out_html_over)

            if DO_PLOTS:
                display(fig_over)

        except Exception as e:
            print(f"[WARN] song_id={sid}: falha ao gerar overview ({e})")

    # libera mem√≥ria do espectrograma completo
    S_full_db_ref = None
    gc.collect()

# 5. Fingerprint

### 5.1 Fingerprint tipo Shazam (constellation + peak-pair hash)

In [None]:
from dataclasses import dataclass
from pathlib import Path
import numpy as np
import librosa
from collections import defaultdict, Counter

@dataclass
class ShazamConfig:
    sr: int = 22050
    n_fft: int = 2048
    hop: int = 512
    peak_neighborhood_freq: int = 15     # vizinhan√ßa p/ detectar m√°ximos (bins)
    peak_neighborhood_time: int = 10     # vizinhan√ßa (frames)
    amp_min_db: float = -60.0            # limiar de magnitude em dB
    fanout: int = 5                      # n¬∫ de alvos por √¢ncora
    target_t_min: int = 1                # janelinha alvo (frames) ap√≥s √¢ncora
    target_t_max: int = 50               # ~ (50 * hop / sr) s
    f_quant: int = 1                     # quantiza√ß√£o (bins)
    dt_quant: int = 1                    # quantiza√ß√£o (frames)

def _spectrogram(y, cfg: ShazamConfig):
    S = np.abs(librosa.stft(y, n_fft=cfg.n_fft, hop_length=cfg.hop, window="hann", center=True))
    S_db = librosa.amplitude_to_db(S, ref=np.max)
    return S_db

def _constellation(S_db, cfg: ShazamConfig):
    # m√°scara de picos locais (max pooling 2D simples)
    from scipy.ndimage import maximum_filter
    neighborhood = (cfg.peak_neighborhood_freq, cfg.peak_neighborhood_time)
    local_max = (S_db == maximum_filter(S_db, size=neighborhood))
    mask = local_max & (S_db >= cfg.amp_min_db)
    freqs, times = np.where(mask)
    # ordena por tempo para estabilidade
    order = np.argsort(times)
    return freqs[order], times[order]

def _hash_pairs(freqs, times, cfg: ShazamConfig):
    hashes = []
    N = len(times)
    for i in range(N):
        f1, t1 = freqs[i], times[i]
        # janela de busca de alvos [t1+min, t1+max]
        j_start = np.searchsorted(times, t1 + cfg.target_t_min, side='left')
        j_end   = np.searchsorted(times, t1 + cfg.target_t_max, side='right')
        # limita fan-out
        for j in range(j_start, min(j_end, j_start + cfg.fanout)):
            f2, t2 = freqs[j], times[j]
            dt = t2 - t1
            # quantiza√ß√£o leve para toler√¢ncia
            f1q = int(f1 // cfg.f_quant)
            f2q = int(f2 // cfg.f_quant)
            dtq = int(dt // cfg.dt_quant)
            # hash como tupla (f1,f2,dt) ‚Äî pode serializar p/ inteiro se quiser
            hashes.append(((f1q, f2q, dtq), t1))
    return hashes

class ShazamIndex:
    def __init__(self, cfg: ShazamConfig):
        self.cfg = cfg
        self.db = defaultdict(list)  # hash -> list[(song_id, t_anchor)]
        self.meta = {}               # song_id -> dict(meta)

    def add_track(self, song_id: int, path: Path, meta: dict=None):
        y, sr = librosa.load(path, sr=self.cfg.sr, mono=True)
        S_db = _spectrogram(y, self.cfg)
        freqs, times = _constellation(S_db, self.cfg)
        hashes = _hash_pairs(freqs, times, self.cfg)
        for h, t1 in hashes:
            self.db[h].append((song_id, int(t1)))
        self.meta[song_id] = meta or {}

    def query(self, y_query: np.ndarray):
        S_db = _spectrogram(y_query, self.cfg)
        freqs, times = _constellation(S_db, self.cfg)
        q_hashes = _hash_pairs(freqs, times, self.cfg)

        votes = Counter()   # (song_id, offset) -> contagem
        for h, t1q in q_hashes:
            for (sid, t1s) in self.db.get(h, ()):
                offset = t1s - t1q
                votes[(sid, offset)] += 1

        if not votes:
            return None

        # pega o (sid, offset) com mais votos
        (sid_best, off_best), score = votes.most_common(1)[0]
        return {"song_id": sid_best, "offset_frames": off_best, "votes": score}


### 5.2 - Fingerprint Haitsma & Kalker (sub-band energy + bits diferenciais)

In [None]:
from dataclasses import dataclass
import numpy as np
import librosa
from collections import defaultdict, Counter

@dataclass
class HKConfig:
    sr: int = 8000                # cl√°ssico roda bem com 8 kHz
    frame_len: float = 0.371      # ~371 ms (literatura usa ~372 ms)
    frame_hop: float = 0.031      # ~31 ms
    n_bands: int = 32             # 32 sub-bandas
    f_min: float = 300.0
    f_max: float = 3000.0         # voz/m√©dios onde h√° mais robustez
    eps: float = 1e-10

def _hk_filterbank(cfg: HKConfig, n_fft: int):
    # bandas log-spaced (simples)
    edges = np.geomspace(cfg.f_min, cfg.f_max, cfg.n_bands+1)
    return edges

def _frame_params(cfg: HKConfig):
    N = int(round(cfg.frame_len * cfg.sr))
    H = int(round(cfg.frame_hop * cfg.sr))
    n_fft = 1
    while n_fft < N:
        n_fft *= 2
    return N, H, n_fft

def _band_energies(S_mag, sr, n_fft, edges):
    # S_mag: |STFT| (freq x time)
    freqs = np.linspace(0, sr/2, S_mag.shape[0])
    E = np.zeros((len(edges)-1, S_mag.shape[1]), dtype=np.float32)
    for b in range(len(edges)-1):
        f0, f1 = edges[b], edges[b+1]
        idx = np.where((freqs >= f0) & (freqs < f1))[0]
        if len(idx) == 0: continue
        Eb = (S_mag[idx, :]**2).sum(axis=0)  # energia
        E[b, :] = Eb
    # log-escala (estabilidade)
    return np.log(E + 1e-10)

def hk_subfingerprints(y, cfg: HKConfig):
    N, H, n_fft = _frame_params(cfg)
    S = librosa.stft(y, n_fft=n_fft, hop_length=H, win_length=N, window="hann", center=False)
    S_mag = np.abs(S)
    edges = _hk_filterbank(cfg, n_fft)
    Elog = _band_energies(S_mag, cfg.sr, n_fft, edges)  # shape: (bands, frames)
    B = Elog.shape[0]

    # Gera 32 bits comparando energias (ex: entre bandas adjacentes no TEMPO)
    # bit_k(t) = 1 se E_k(t) - E_k(t-1) >= E_{k+1}(t) - E_{k+1}(t-1), sen√£o 0
    # (variante simplificada e comum em reimplementa√ß√µes)
    F = Elog.shape[1]
    bits = np.zeros((F-1,), dtype=np.uint32)
    for t in range(1, F):
        word = 0
        for k in range(B-1):
            dk  = Elog[k, t]   - Elog[k, t-1]
            dkp = Elog[k+1, t] - Elog[k+1, t-1]
            bit = 1 if (dk >= dkp) else 0
            word |= (bit << (k if k < 32 else 31))  # limita a 32 bits
        bits[t-1] = word
    return bits  # vetor de sub-fingerprints por quadro (uint32)

class HKIndex:
    def __init__(self, cfg: HKConfig):
        self.cfg = cfg
        self.db = defaultdict(list)  # word -> [(song_id, frame_idx)]

    def add_track(self, song_id: int, path: Path):
        y, sr = librosa.load(path, sr=self.cfg.sr, mono=True)
        sfp = hk_subfingerprints(y, self.cfg)
        for t, w in enumerate(sfp):
            self.db[int(w)].append((song_id, t))

    def query(self, y_query: np.ndarray, topk_offsets: int = 1):
        sfp_q = hk_subfingerprints(y_query, self.cfg)
        votes = Counter()
        for tq, w in enumerate(sfp_q):
            for (sid, ts) in self.db.get(int(w), ()):
                votes[(sid, ts - tq)] += 1
        if not votes:
            return None
        best = votes.most_common(topk_offsets)
        # retorna o melhor (sid, offset, votos)
        (sid, off), score = best[0]
        return {"song_id": sid, "offset_frames": off, "votes": score}


### 5.3 Execu√ß√£o do Fingerprint

#### 5.3.1 Helpers

In [None]:
def _s2smp(t, sr):
    return int(round(t * sr))

def _maybe_resample(y, sr_from, sr_to):
    return (y if sr_from == sr_to else librosa.resample(y, orig_sr=sr_from, target_sr=sr_to)), sr_to

def add_block_to_index(fp_index, FP_METHOD, y_block, sr_block,
                       song_id: int, block_id: int, t0_block_s: float, meta: dict=None):
    """
    Indexa UM bloco (10 s) no √≠ndice de fingerprint, com metadados.
    O t0 (em segundos absoluto dentro da m√∫sica) √© guardado apenas como refer√™ncia.
    """
    if FP_METHOD == "shazam":
        # ShazamIndex espera √°udio no sr do cfg
        y_in, _ = _maybe_resample(y_block, sr_block, fp_index.cfg.sr)
        # gera hashes e insere como se fosse uma "faixa", mas voc√™ pode guardar block_id no meta
        # -- pequena adapta√ß√£o: exp√µe um m√©todo interno equivalente ao add_track para arrays:
        S_db = np.abs(librosa.stft(y_in, n_fft=fp_index.cfg.n_fft, hop_length=fp_index.cfg.hop, window="hann", center=True))
        S_db = librosa.amplitude_to_db(S_db, ref=np.max)
        # constellation:
        from scipy.ndimage import maximum_filter
        neighborhood = (fp_index.cfg.peak_neighborhood_freq, fp_index.cfg.peak_neighborhood_time)
        local_max = (S_db == maximum_filter(S_db, size=neighborhood))
        mask = local_max & (S_db >= fp_index.cfg.amp_min_db)
        freqs, times = np.where(mask)
        order = np.argsort(times); freqs = freqs[order]; times = times[order]

        # pares (√¢ncora, alvo)
        hashes = []
        for i in range(len(times)):
            f1, t1 = freqs[i], times[i]
            j_start = np.searchsorted(times, t1 + fp_index.cfg.target_t_min, side='left')
            j_end   = np.searchsorted(times, t1 + fp_index.cfg.target_t_max, side='right')
            for j in range(j_start, min(j_end, j_start + fp_index.cfg.fanout)):
                f2, t2 = freqs[j], times[j]
                dt = t2 - t1
                f1q = int(f1 // fp_index.cfg.f_quant)
                f2q = int(f2 // fp_index.cfg.f_quant)
                dtq = int(dt // fp_index.cfg.dt_quant)
                hashes.append(((f1q, f2q, dtq), int(t1)))

        # insere no √≠ndice com (song_id, block_id)
        for h, t1 in hashes:
            fp_index.db[h].append((int(song_id), int(block_id), int(t1)))

        # guarda meta
        if not hasattr(fp_index, "meta"):
            fp_index.meta = {}
        fp_index.meta[(int(song_id), int(block_id))] = {"t0": float(t0_block_s), **(meta or {})}

    elif FP_METHOD == "hk":
        # HKIndex gera sub-fingerprints por quadro; adaptamos para armazenar (song_id, block_id, frame_idx)
        y_in, _ = _maybe_resample(y_block, sr_block, fp_index.cfg.sr)
        sfp = hk_subfingerprints(y_in, fp_index.cfg)  # se importou como m√≥dulo, ajuste: from fp_hk import hk_subfingerprints
        for t, w in enumerate(sfp):
            fp_index.db[int(w)].append((int(song_id), int(block_id), int(t)))

        if not hasattr(fp_index, "meta"):
            fp_index.meta = {}
        fp_index.meta[(int(song_id), int(block_id))] = {"t0": float(t0_block_s), **(meta or {})}
    else:
        raise ValueError("FP_METHOD inv√°lido.")


def query_clip(fp_index, FP_METHOD, y_clip, sr_clip, topk_offsets=1):
    """
    Consulta um clipe curto (ex.: 5 s). Retorna (song_id, block_id, offset, votes).
    """
    from collections import Counter
    votes = Counter()

    if FP_METHOD == "shazam":
        yq, _ = _maybe_resample(y_clip, sr_clip, fp_index.cfg.sr)
        # repetir a gera√ß√£o de hashes da query (mesmo que no add_block)
        S_db = np.abs(librosa.stft(yq, n_fft=fp_index.cfg.n_fft, hop_length=fp_index.cfg.hop, window="hann", center=True))
        S_db = librosa.amplitude_to_db(S_db, ref=np.max)
        from scipy.ndimage import maximum_filter
        neighborhood = (fp_index.cfg.peak_neighborhood_freq, fp_index.cfg.peak_neighborhood_time)
        local_max = (S_db == maximum_filter(S_db, size=neighborhood))
        mask = local_max & (S_db >= fp_index.cfg.amp_min_db)
        freqs, times = np.where(mask)
        order = np.argsort(times); freqs = freqs[order]; times = times[order]

        # q-hashes
        q_hashes = []
        for i in range(len(times)):
            f1, t1 = freqs[i], times[i]
            j_start = np.searchsorted(times, t1 + fp_index.cfg.target_t_min, side='left')
            j_end   = np.searchsorted(times, t1 + fp_index.cfg.target_t_max, side='right')
            for j in range(j_start, min(j_end, j_start + fp_index.cfg.fanout)):
                f2, t2 = freqs[j], times[j]
                dt = t2 - t1
                f1q = int(f1 // fp_index.cfg.f_quant)
                f2q = int(f2 // fp_index.cfg.f_quant)
                dtq = int(dt // fp_index.cfg.dt_quant)
                q_hashes.append(((f1q, f2q, dtq), int(t1)))

        # vota√ß√£o por offset
        for h, t1q in q_hashes:
            for (sid, bid, t1s) in fp_index.db.get(h, ()):
                votes[(sid, bid, t1s - t1q)] += 1

    elif FP_METHOD == "hk":
        yq, _ = _maybe_resample(y_clip, sr_clip, fp_index.cfg.sr)
        # gerar sub-fingerprints (necess√°rio importar a fun√ß√£o se n√£o estiver em classe)
        sfp_q = hk_subfingerprints(yq, fp_index.cfg)
        for tq, w in enumerate(sfp_q):
            for (sid, bid, ts) in fp_index.db.get(int(w), ()):
                votes[(sid, bid, ts - tq)] += 1

    if not votes:
        return None

    (sid, bid, off), score = votes.most_common(1)[0]
    return {"song_id": int(sid), "block_id": int(bid), "offset_frames": int(off), "votes": int(score)}

In [None]:
# ======== FINGERPRINT CONFIG ========
FP_METHOD = "shazam"  # "shazam" ou "hk"

if FP_METHOD == "shazam":
    fp_cfg = ShazamConfig(sr=sr, n_fft=2048, hop=512,
                          peak_neighborhood_freq=15, peak_neighborhood_time=10,
                          amp_min_db=-60.0, fanout=5, target_t_min=1, target_t_max=50)
    fp_index = ShazamIndex(fp_cfg)
elif FP_METHOD == "hk":
    # Para HK cl√°ssico, usa-se sr mais baixo; se sr != cfg.sr, vamos reamostrar no helper
    fp_cfg = HKConfig(sr=8000, frame_len=0.371, frame_hop=0.031, n_bands=32, f_min=300.0, f_max=3000.0)
    fp_index = HKIndex(fp_cfg)
else:
    raise ValueError("FP_METHOD inv√°lido. Use 'shazam' ou 'hk'.")


### Indexa√ß√£o dos blocos

In [None]:
# ======== INDEXA√á√ÉO DOS BLOCOS USANDO O DATAFRAME 'blocks' ========
# pr√©-requisitos: y, sr, song_id, title, fp_index, FP_METHOD, e os helpers _s2smp(), add_block_to_index()

blocks

# def _clip_to_audio_bounds(t0, t1, sr, y_len):
#     s0 = max(0, _s2smp(float(t0), sr))
#     s1 = min(y_len, _s2smp(float(t1), sr))
#     return s0, s1

# # 1) selecionar blocos s√≥ desta m√∫sica
# blocks_song = blocks.loc[blocks["song_id"].astype("Int64") == int(song_id)].copy()
# if blocks_song.empty:
#     raise ValueError(f"N√£o h√° blocos em 'blocks' para song_id={song_id}.")

# # 2) ordenar e indexar
# blocks_song = blocks_song.sort_values("t_start_s").reset_index(drop=True)

# n_added = 0
# for _, row in blocks_song.iterrows():
#     t0 = float(row["t_start_s"])
#     t1 = float(row["t_end_s"])
#     bid = int(row["block_idx"])

#     s0, s1 = _clip_to_audio_bounds(t0, t1, sr, len(y))
#     if s1 - s0 < _s2smp(0.2, sr):   # descarta blocos muito curtos
#         continue

#     seg = y[s0:s1]
#     meta = {
#         "title":        title,
#         "emotion_zone":  row.get("emotion_zone", None),
#         "emotion_label": row.get("emotion_label", None),
#         "valence":       float(row.get("valence", np.nan)),
#         "arousal":       float(row.get("arousal", np.nan)),
#         "valence_z":     float(row.get("valence_z", np.nan)),
#         "arousal_z":     float(row.get("arousal_z", np.nan)),
#         "t_start_s":     t0,
#         "t_end_s":       t1,
#     }

#     add_block_to_index(
#         fp_index, FP_METHOD,
#         y_block=seg, sr_block=sr,
#         song_id=int(song_id), block_id=bid, t0_block_s=t0,
#         meta=meta
#     )
#     n_added += 1

# print(f"Indexados {n_added} blocos de song_id={song_id} ({title}).")


Unnamed: 0,song_id,block_idx,t_start_s,t_end_s,valence,arousal,valence_z,arousal_z,emotion_zone,emotion_label
0,2029,0,15.0,25.0,0.001083,-0.014083,1.441287,-1.063899,Q2,Calmo/Relaxado
1,2029,1,25.0,35.0,-0.007333,-0.017667,1.30575,-1.103187,Q2,Calmo/Relaxado
2,2029,2,35.0,45.0,-0.025333,-0.005083,1.015891,-0.965224,Q2,Calmo/Relaxado
3,2029,3,45.0,55.0,-0.03025,-0.021333,0.936716,-1.143388,Q2,Calmo/Relaxado
4,2029,4,55.0,65.0,-0.063167,0.001083,0.406648,-0.897613,Q2,Calmo/Relaxado
5,2029,5,65.0,75.0,-0.123667,0.031667,-0.567604,-0.562299,Q4,Triste/Deprimido
6,2029,6,75.0,85.0,-0.181417,0.02675,-1.497571,-0.616205,Q4,Triste/Deprimido
7,2029,7,85.0,95.0,-0.176833,0.032417,-1.423764,-0.554076,Q4,Triste/Deprimido
8,2029,8,95.0,105.0,-0.15675,0.110833,-1.100355,0.30568,Q3,Raiva/Tenso
9,2029,9,105.0,115.0,-0.1475,0.138583,-0.9514,0.60993,Q3,Raiva/Tenso


In [None]:
# ======== CONSULTA R√ÅPIDA ========
blk_row = blocks_song.iloc[0]  # pegue outro √≠ndice se quiser testar outro bloco
t0, t1 = float(blk_row.t_start_s), float(blk_row.t_end_s)
mid = (t0 + t1) / 2.0
win = 5.0
q0, q1 = max(t0, mid - win/2), min(t1, mid + win/2)

yq = y[_s2smp(q0, sr):_s2smp(q1, sr)]
res = query_clip(fp_index, FP_METHOD, yq, sr)
print("Resultado:", res)

if res is not None:
    sid = res["song_id"]; bid = res["block_id"]
    meta = fp_index.meta.get((sid, bid), {})
    print(f"match ‚Üí song_id={sid}, block_idx={bid}, votes={res['votes']}")
    print(f"emotions ‚Üí zone={meta.get('emotion_zone')} | label={meta.get('emotion_label')}")


In [None]:
fp_ok = []
for _, r in blocks_song.iterrows():
    t0, t1 = float(r.t_start_s), float(r.t_end_s)
    if (t1 - t0) < 5.0:
        fp_ok.append(False); continue
    mid = (t0 + t1)/2.0
    yq = y[_s2smp(mid-2.5, sr):_s2smp(mid+2.5, sr)]
    res = query_clip(fp_index, FP_METHOD, yq, sr)
    ok = (res is not None and res["song_id"] == int(song_id) and res["block_id"] == int(r.block_idx))
    fp_ok.append(ok)

blocks_song = blocks_song.assign(fp_ok=fp_ok)
print(blocks_song[["block_idx","t_start_s","t_end_s","emotion_label","fp_ok"]].head())

In [None]:
# Supondo: blocks (df com colunas do seu schema), index constru√≠do (Shazam/HK),
# fun√ß√£o make_snippets(block_row) -> lista de np.ndarray yq, sr
test_song_ids = blocks_song["song_id"].unique()

results = []
for song_id in test_song_ids:
    y, sr = load_audio(song_path(song_id), SR_TARGET)
    B = blocks[blocks.song_id == song_id]
    for _, b in B.iterrows():
        yqs = make_snippets(b, y, sr)  # 1‚Äì3 snippets
        for yq in yqs:
            gt_zone  = b["emotion_zone"]
            gt_label = b["emotion_label"]
            gt_V, gt_A = b["valence"], b["arousal"]

            res = query_clip(fp_index, FP_METHOD, yq, sr)  # retorna song_id, block_id
            if res is None:
                pred_zone = None; pred_label = None; pred_V = np.nan; pred_A = np.nan
            else:
                meta = fp_index.meta[(res["song_id"], res["block_id"])]
                pred_zone  = meta.get("emotion_zone")
                pred_label = meta.get("emotion_label")
                pred_V     = meta.get("valence")
                pred_A     = meta.get("arousal")

            results.append(dict(
                song_id=song_id, block_idx=int(b["block_idx"]),
                hit_exact = int(res is not None and res["song_id"]==song_id and res["block_id"]==int(b["block_idx"])),
                hit_pm1   = int(res is not None and res["song_id"]==song_id and abs(res["block_id"]-int(b["block_idx"]))<=1),
                gt_zone=gt_zone, pred_zone=pred_zone,
                gt_label=gt_label, pred_label=pred_label,
                gt_V=gt_V, pred_V=pred_V, gt_A=gt_A, pred_A=pred_A
            ))

df_res = pd.DataFrame(results)
# ‚Üí compute Hit@k, F1 macro, Œ∫, RMSE/MAE, etc., com IC95% via bootstrap por m√∫sica


In [None]:
# ============================================================
# Shazam-like fingerprint (constellation + peak pairs) ROBUSTO p/ seus .npz
# Caminho dos blocos: /content/drive/MyDrive/DataSet TCC/DEAM/stft_blocks/song_2001/*.npz
# ============================================================
from dataclasses import dataclass
from pathlib import Path
from collections import defaultdict, Counter
from typing import Dict, List, Tuple, Optional, Any

import numpy as np
import librosa
from scipy.ndimage import maximum_filter
import zipfile

# Plotly
import plotly.graph_objects as go


# ------------------------- Config ---------------------------

@dataclass
class ShazamConfig:
    # STFT (quando partir de √°udio)
    sr: int = 22050
    n_fft: int = 2048
    hop: int = 512
    window: str = "hann"
    center: bool = True

    # detec√ß√£o de picos
    peak_neighborhood_freq: int = 15
    peak_neighborhood_time: int = 10
    amp_min_db: float = -60.0

    # pares alvo
    fanout: int = 5
    target_t_min: int = 1
    target_t_max: int = 50

    # quantiza√ß√£o / hashing
    f_quant: int = 1
    dt_quant: int = 1
    bits_f: int = 12
    bits_dt: int = 14

    # consulta
    top_k: int = 5


# ---------------------- Utils de NPZ/Numpy ------------------

def _coerce_array(a):
    """np.asarray + desempacote 0-D object."""
    a = np.asarray(a)
    if a.ndim == 0 and a.dtype == object:
        a = np.asarray(a.item())
    return a

def _is_stringlike_scalar(a: np.ndarray) -> bool:
    """Retorna True se for escalar string/bytes/Path-like."""
    if a.ndim != 0:
        return False
    if a.dtype.kind in "SU":  # unicode/bytes numpy
        return True
    if a.dtype == object:
        v = a.item()
        return isinstance(v, (str, bytes, Path))
    return False

def _as_path(a: np.ndarray) -> Optional[Path]:
    if not _is_stringlike_scalar(a):
        return None
    v = a.item() if a.dtype == object else a.tolist()
    p = Path(v.decode() if isinstance(v, (bytes, bytearray)) else v)
    return p

def _load_array_maybe_path(entry) -> np.ndarray:
    """
    Se 'entry' for um escalar string/bytes com caminho .npy/.npz, abre e retorna o array adequado.
    Caso contr√°rio, retorna o pr√≥prio array (desempacotado).
    """
    a = _coerce_array(entry)
    # caso 0-D string apontando para arquivo
    p = _as_path(a)
    if p is not None and p.suffix.lower() in (".npy", ".npz"):
        if p.suffix.lower() == ".npy":
            arr = np.load(str(p), allow_pickle=True)
            return _coerce_array(arr)
        else:
            # .npz aninhado: tentamos chaves padr√£o
            with np.load(str(p), allow_pickle=True) as z:
                k = _pick_key(z, ["power", "S", "stft", "power_db", "S_db", "stft_db"])
                if k is None:
                    # pega o primeiro array do npz
                    k = z.files[0]
                return _coerce_array(z[k])
    return a

def _pick_key(z, candidates):
    """Procura por v√°rias chaves (com/sem '.npy')."""
    keys = set(z.files)
    for name in candidates:
        if name in keys:
            return name
        if not name.endswith(".npy") and (name + ".npy") in keys:
            return name + ".npy"
        if name.endswith(".npy") and (name[:-4]) in keys:
            return name[:-4]
    return None

def _ensure_2d(S, name="S"):
    S = _load_array_maybe_path(S)
    if S.ndim != 2:
        raise ValueError(f"Esperado {name} 2-D (F x T), obtive shape {S.shape}")
    return S

def _ensure_1d(v, name="vec"):
    v = _load_array_maybe_path(v)
    v = np.squeeze(v)
    if v.ndim != 1:
        raise ValueError(f"Esperado {name} 1-D, obtive shape {v.shape}")
    return v

def debug_npz_keys(path: Path):
    with np.load(str(path), allow_pickle=True) as z:
        print("chaves:", z.files)
        for k in z.files:
            a = np.asarray(z[k])
            shape, dtype = a.shape, a.dtype
            note = ""
            if _is_stringlike_scalar(a):
                note = " (stringlike scalar -> path)"
            elif a.ndim == 0 and a.dtype == object:
                try:
                    a2 = np.asarray(a.item())
                    shape, dtype = a2.shape, a2.dtype
                    note = f" (0-D object -> {shape})"
                except Exception:
                    note = " (0-D object n√£o expandiu)"
            print(f" - {k}: shape={shape}, dtype={dtype}{note}")


# ---------------------- Espectrogramas ----------------------

def _spectrogram_from_audio(y: np.ndarray, cfg: ShazamConfig) -> np.ndarray:
    S = np.abs(librosa.stft(y, n_fft=cfg.n_fft, hop_length=cfg.hop,
                            window=cfg.window, center=cfg.center))
    return librosa.amplitude_to_db(S, ref=np.max)

def _spectrogram_from_npz(npz: dict) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    L√™ S/power (ou *_db) e freqs/times. Resolve:
      - chaves com '.npy'
      - 0-D object
      - 0-D string apontando para .npy/.npz
    Retorna (S_db, freqs_hz, times_s).
    """
    # Espectrograma
    k_Sdb = _pick_key(npz, ["power_db", "S_db", "stft_db"])
    if k_Sdb is not None:
        S_db = _ensure_2d(npz[k_Sdb], name=k_Sdb)
    else:
        k_power = _pick_key(npz, ["power"])
        k_S     = _pick_key(npz, ["S", "stft"])
        if k_power is None and k_S is None:
            raise ValueError("NPZ n√£o cont√©m 'power' nem 'S'/'stft' (nem vers√µes *_db).")

        if k_power is not None:
            P = _ensure_2d(npz[k_power], name=k_power)
            P = np.maximum(P, 1e-12)
            S_db = librosa.power_to_db(P, ref=np.max)
        else:
            A = _ensure_2d(npz[k_S], name=k_S)
            if np.iscomplexobj(A):
                A = np.abs(A)
            A = np.maximum(A, 1e-12)
            S_db = librosa.amplitude_to_db(A, ref=np.max)

    # Freqs/Times
    k_f = _pick_key(npz, ["freqs", "frequencies"])
    k_t = _pick_key(npz, ["times", "t"])
    freqs = _ensure_1d(npz[k_f], "freqs") if k_f is not None else None
    times = _ensure_1d(npz[k_t], "times") if k_t is not None else None

    if freqs is None or times is None:
        sr   = int(np.squeeze(_load_array_maybe_path(npz.get("sr", 22050))))
        hop  = int(np.squeeze(_load_array_maybe_path(npz.get("hop", 512))))
        F, T = S_db.shape
        if freqs is None:
            freqs = np.linspace(0, sr/2, F)
        if times is None:
            times = np.arange(T) * (hop / sr)

    return S_db, freqs, times


# --------------------- Constela√ß√£o/Peaks --------------------

def _constellation(S_db: np.ndarray, cfg: ShazamConfig) -> Tuple[np.ndarray, np.ndarray]:
    if S_db.ndim != 2:
        raise ValueError(f"S_db precisa ser 2-D, obtive {S_db.shape}")
    neighborhood = (cfg.peak_neighborhood_freq, cfg.peak_neighborhood_time)
    local_max = (S_db == maximum_filter(S_db, size=neighborhood, mode="nearest"))
    mask = local_max & (S_db >= cfg.amp_min_db)
    freqs, times = np.where(mask)
    order = np.argsort(times, kind="mergesort")
    return freqs[order], times[order]


# ----------------------- Hash 64b ---------------------------

def _pack_hash_64(f1q: int, f2q: int, dtq: int, cfg: ShazamConfig) -> np.uint64:
    max_f  = (1 << cfg.bits_f) - 1
    max_dt = (1 << cfg.bits_dt) - 1
    f1q = int(np.clip(f1q, 0, max_f))
    f2q = int(np.clip(f2q, 0, max_f))
    dtq = int(np.clip(dtq, 0, max_dt))
    return np.uint64((f1q << (cfg.bits_f + cfg.bits_dt)) | (f2q << cfg.bits_dt) | dtq)

def _hash_pairs(freqs: np.ndarray, times: np.ndarray, cfg: ShazamConfig) -> List[Tuple[np.uint64, int]]:
    out: List[Tuple[np.uint64, int]] = []
    N = len(times)
    for i in range(N):
        f1, t1 = int(freqs[i]), int(times[i])
        j_start = np.searchsorted(times, t1 + cfg.target_t_min, side='left')
        j_end   = np.searchsorted(times, t1 + cfg.target_t_max, side='right')
        for j in range(j_start, min(j_end, j_start + cfg.fanout)):
            f2, t2 = int(freqs[j]), int(times[j])
            dt = t2 - t1
            f1q = f1 // cfg.f_quant
            f2q = f2 // cfg.f_quant
            dtq = dt // cfg.dt_quant
            out.append((_pack_hash_64(f1q, f2q, dtq, cfg), t1))
    return out


# ----------------------- √çndice/Busca -----------------------

class ShazamIndex:
    def __init__(self, cfg: ShazamConfig):
        self.cfg = cfg
        self.db: Dict[np.uint64, List[Tuple[int,int]]] = defaultdict(list)
        self.meta: Dict[int, Dict[str, Any]] = {}
        self._max_time_by_song: Dict[int, int] = {}

    def add_track_from_audio(self, song_id: int, y: np.ndarray, meta: dict=None):
        S_db = _spectrogram_from_audio(y, self.cfg)
        f_idx, t_idx = _constellation(S_db, self.cfg)
        for h, t1 in _hash_pairs(f_idx, t_idx, self.cfg):
            self.db[h].append((song_id, int(t1)))
        self._max_time_by_song[song_id] = int(t_idx.max()) if len(t_idx) else 0
        self.meta[song_id] = meta or {}

    def add_track_from_file(self, song_id: int, path: Path, meta: dict=None):
        y, _ = librosa.load(str(path), sr=self.cfg.sr, mono=True)
        self.add_track_from_audio(song_id, y, meta=meta or {"path": str(path)})

    def add_track_from_npz(self, song_id: int, npz_path: Path, meta: dict=None):
        with np.load(str(npz_path), allow_pickle=True) as z:
            S_db, freqs, times = _spectrogram_from_npz(z)
        f_idx, t_idx = _constellation(S_db, self.cfg)
        for h, t1 in _hash_pairs(f_idx, t_idx, self.cfg):
            self.db[h].append((song_id, int(t1)))
        self._max_time_by_song[song_id] = int(t_idx.max()) if len(t_idx) else 0
        m = {"npz_path": str(npz_path)}
        if meta: m.update(meta)
        self.meta[song_id] = m

    def _query_common(self, f_idx: np.ndarray, t_idx: np.ndarray, top_k: Optional[int]=None) -> List[Dict[str, Any]]:
        q_hashes = _hash_pairs(f_idx, t_idx, self.cfg)
        votes = Counter()
        for h, t1q in q_hashes:
            for (sid, t1s) in self.db.get(h, ()):
                votes[(sid, int(t1s) - int(t1q))] += 1
        if not votes:
            return []
        by_song: Dict[int, Tuple[int,int]] = {}
        for (sid, off), v in votes.items():
            if sid not in by_song or v > by_song[sid][1]:
                by_song[sid] = (off, v)
        items = []
        for sid, (off, v) in by_song.items():
            conf = v / max(1, len(q_hashes))
            items.append({
                "song_id": sid,
                "offset_frames": off,
                "votes": int(v),
                "confidence": float(conf),
                "meta": self.meta.get(sid, {}),
            })
        items.sort(key=lambda d: (-d["votes"], -d["confidence"]))
        return items[: (top_k or self.cfg.top_k)]

    def query_from_npz(self, npz_path: Path, top_k: Optional[int]=None) -> List[Dict[str, Any]]:
        with np.load(str(npz_path), allow_pickle=True) as z:
            S_db, _, _ = _spectrogram_from_npz(z)
        f_idx, t_idx = _constellation(S_db, self.cfg)
        return self._query_common(f_idx, t_idx, top_k=top_k)


# ------------------------ Visual ----------------------------

def plot_spectrogram_constellation_plotly(S_db: np.ndarray,
                                          freqs_hz: Optional[np.ndarray],
                                          times_s: Optional[np.ndarray],
                                          peaks_f: np.ndarray,
                                          peaks_t: np.ndarray,
                                          title: str = "Espectrograma + Constela√ß√£o"):
    F, T = S_db.shape
    x = times_s if times_s is not None else np.arange(T)
    y = freqs_hz if freqs_hz is not None else np.arange(F)
    fig = go.Figure(data=go.Heatmap(
        z=S_db, x=x, y=y, colorscale="magma",
        colorbar=dict(title="dB"),
        hovertemplate="t=%{x:.3f}<br>f=%{y:.1f}<br>z=%{z:.1f} dB<extra></extra>"
    ))
    fig.add_trace(go.Scatter(
        x=(times_s[peaks_t] if times_s is not None else peaks_t),
        y=(freqs_hz[peaks_f] if freqs_hz is not None else peaks_f),
        mode="markers",
        marker=dict(size=5, color="cyan"),
        name="peaks",
        hovertemplate="peak<br>t=%{x:.3f}<br>f=%{y:.1f}<extra></extra>"
    ))
    fig.update_layout(
        title=title,
        xaxis_title="Tempo (s)" if times_s is not None else "Frame",
        yaxis_title="Frequ√™ncia (Hz)" if freqs_hz is not None else "Bin",
        template="plotly_white",
        margin=dict(l=50, r=30, t=60, b=50)
    )
    fig.show()


# -------------------------- Main ----------------------------

if __name__ == "__main__":
    SONG_PATH = Path("/content/drive/MyDrive/DataSet TCC/DEAM/stft_blocks/song_2001")
    npz_list = sorted(SONG_PATH.glob("*.npz"))
    print(f"Arquivos encontrados: {len(npz_list)}")

    cfg = ShazamConfig()
    idx = ShazamIndex(cfg)

    # Indexar todos os blocos com o MESMO song_id (estilo Shazam por m√∫sica)
    for npz_path in npz_list:
        try:
            idx.add_track_from_npz(song_id=2001, npz_path=npz_path, meta={"from": "deam_npz"})
        except Exception as e:
            print(f"[WARN] Falhou em {npz_path.name}: {e}")
            # Diagn√≥stico r√°pido (descomente se precisar)
            # debug_npz_keys(npz_path)

    print(f"Hashes distintos no √≠ndice: {len(idx.db)}")

    # Consulta usando o primeiro bloco (se existir)
    if npz_list:
        qpath = npz_list[0]
        try:
            results = idx.query_from_npz(qpath, top_k=5)
            print("Resultados da consulta:")
            for r in results:
                print(r)

            # Plot do espectrograma + constela√ß√£o do query
            with np.load(str(qpath), allow_pickle=True) as z:
                S_db, freqs, times = _spectrogram_from_npz(z)
            f_idx, t_idx = _constellation(S_db, cfg)
            plot_spectrogram_constellation_plotly(
                S_db, freqs, times, f_idx, t_idx,
                title=f"Query: {qpath.name} ‚Äî Espectrograma + Constela√ß√£o"
            )
        except Exception as e:
            print(f"[ERR] Query falhou: {e}")
            # debug_npz_keys(qpath)


Arquivos encontrados: 29
[WARN] Falhou em blk_0000.npz: Esperado power 2-D (F x T), obtive shape ()
[WARN] Falhou em blk_0001.npz: Esperado power 2-D (F x T), obtive shape ()
[WARN] Falhou em blk_0002.npz: Esperado power 2-D (F x T), obtive shape ()
[WARN] Falhou em blk_0003.npz: Esperado power 2-D (F x T), obtive shape ()
[WARN] Falhou em blk_0004.npz: Esperado power 2-D (F x T), obtive shape ()
[WARN] Falhou em blk_0005.npz: Esperado power 2-D (F x T), obtive shape ()
[WARN] Falhou em blk_0006.npz: Esperado power 2-D (F x T), obtive shape ()
[WARN] Falhou em blk_0007.npz: Esperado power 2-D (F x T), obtive shape ()
[WARN] Falhou em blk_0008.npz: Esperado power 2-D (F x T), obtive shape ()
[WARN] Falhou em blk_0009.npz: Esperado power 2-D (F x T), obtive shape ()
[WARN] Falhou em blk_0010.npz: Esperado power 2-D (F x T), obtive shape ()
[WARN] Falhou em blk_0011.npz: Esperado power 2-D (F x T), obtive shape ()
[WARN] Falhou em blk_0012.npz: Esperado power 2-D (F x T), obtive shape ()


In [None]:
with np.load("/content/drive/MyDrive/DataSet TCC/DEAM/stft_blocks/song_2001/blk_0000.npz", allow_pickle=True) as z:
    S_db, freqs, times = _spectrogram_from_npz(z)
f_idx, t_idx = _constellation(S_db, ShazamConfig())
plot_spectrogram_constellation_plotly(S_db, freqs, times, f_idx, t_idx,
                                      title="Bloco ‚Äî Espectrograma + Constela√ß√£o (Shazam)")

ValueError: Input spectrogram S_db must be 2-dimensional, but got shape ()