In [3]:
import os
import glob
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
import librosa

# ─── parametri già definiti prima ───────────────────────────────────────────
MODEL_PATH   = "cnn_network.h5"
AUDIO_FOLDER = "audio_test"                 # <-- tua cartella con 5 file
CLASS_NAMES  = ["noisy", "music"]           # stesso ordine di addestramento
# ────────────────────────────────────────────────────────────────────────────

# 1) Carica il modello
model = load_model(MODEL_PATH)
print("✅ Modello caricato")

# ──────────────────── FUNZIONI DI PREPROCESSING ─────────────────────────────
def load_and_normalize(path, sr=SR, duration=DURATION):
    y, _ = librosa.load(path, sr=sr, duration=duration)
    required = int(sr * duration)
    y = np.pad(y, (0, max(0, required - len(y))))[:required]
    max_amp = np.max(np.abs(y)) or 1.0
    return y / max_amp

def compute_log_mel_spectrogram(y):
    S = librosa.feature.melspectrogram(
        y=y, sr=SR, n_fft=N_FFT, hop_length=HOP_LENGTH,
        win_length=WIN_LENGTH, n_mels=N_MELS, power=2.0
    )
    log_S = librosa.power_to_db(S, ref=np.max)
    # pad / tronca a TARGET_FRAMES
    if log_S.shape[1] < TARGET_FRAMES:
        pad = TARGET_FRAMES - log_S.shape[1]
        log_S = np.pad(log_S, ((0, 0), (0, pad)), mode='constant',
                       constant_values=log_S.min())
    else:
        log_S = log_S[:, :TARGET_FRAMES]
    return log_S

# 2) Raccogli tutti i file audio (qualsiasi estensione comune)
file_list = sorted(
    glob.glob(os.path.join(AUDIO_FOLDER, "*.[wW][aA][vV]")) +
    glob.glob(os.path.join(AUDIO_FOLDER, "*.[mM][pP]3")) +
    glob.glob(os.path.join(AUDIO_FOLDER, "*.[fF][lL][aA][cC]"))
)

if not file_list:
    raise RuntimeError(f"Nessun file audio trovato in {AUDIO_FOLDER}")

# 3) Helper: predici un singolo file
def predict_file(path):
    y = load_and_normalize(path)                  # ⇠ stesso preprocessing
    logmel = compute_log_mel_spectrogram(y)
    mu, sigma = logmel.mean(), logmel.std() or 1.0
    logmel = (logmel - mu) / sigma
    logmel = logmel[np.newaxis, ..., np.newaxis]  # shape (1, N_MELS, T, 1)

    proba = model.predict(logmel, verbose=0)[0]
    idx   = int(np.argmax(proba))
    return CLASS_NAMES[idx], proba

# 4) Loop sui 5 file
print("\nRisultati:")
for path in file_list:
    label, proba = predict_file(path)
    fname = os.path.basename(path)
    print(f"• {fname:<25} →  {label.upper():5s}  "
          f"(noisy {proba[0]:.3f} | music {proba[1]:.3f})")



✅ Modello caricato

Risultati:
• music_pure.wav            →  MUSIC  (noisy 0.000 | music 1.000)
• noise_pure.wav            →  NOISY  (noisy 1.000 | music 0.000)
• voice_base_music.wav      →  MUSIC  (noisy 0.000 | music 1.000)
• voice_base_noise.wav      →  NOISY  (noisy 0.945 | music 0.055)
• voice_base_pure.wav       →  MUSIC  (noisy 0.000 | music 1.000)
