In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # Desactiva GPU
os.environ["TF_XLA_FLAGS"] = "--tf_xla_enable_xla_devices=false"  # Desactiva compilación JIT
import tensorflow as tf
from tensorflow import keras
import librosa
import numpy as np
import pyaudio
import wave

In [2]:
def grabar_audio(nombre_archivo="grabacion.wav", duracion=30, sample_rate=22050):
    formato = pyaudio.paInt16
    canales = 1
    chunk = 1024
    audio = pyaudio.PyAudio()
    stream = audio.open(format=formato, channels=canales,
                        rate=sample_rate, input=True,
                        frames_per_buffer=chunk)

    print("🎙️ Grabando durante", duracion, "segundos...")
    frames = []

    for _ in range(0, int(sample_rate / chunk * duracion)):
        data = stream.read(chunk)
        frames.append(data)

    print("✅ Grabación terminada.")

    stream.stop_stream()
    stream.close()
    audio.terminate()

    with wave.open(nombre_archivo, 'wb') as wf:
        wf.setnchannels(canales)
        wf.setsampwidth(audio.get_sample_size(formato))
        wf.setframerate(sample_rate)
        wf.writeframes(b''.join(frames))


In [3]:
def audio_a_mfcc(archivo_audio, max_pad_len=862):  # ajusta este valor a tu dataset
    y, sr = librosa.load(archivo_audio, sr=22050)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)

    if mfcc.shape[1] < max_pad_len:
        pad_width = max_pad_len - mfcc.shape[1]
        mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfcc = mfcc[:, :max_pad_len]
        
    return mfcc


In [22]:
def predecir_genero_desde_audio(audio_path, modelo, etiquetas=None):
    # Paso 1: Cargar audio y extraer MFCC
    audio_series, sample_rate = librosa.load(audio_path, duration=30.0, res_type="soxr_hq")
    mfcc = librosa.feature.mfcc(y=audio_series, sr=sample_rate, n_mfcc=40).T

    # Paso 2: Padding o recorte
    desired_length = 1300
    if mfcc.shape[0] < desired_length:
        pad_width = desired_length - mfcc.shape[0]
        mfcc = np.pad(mfcc, ((0, pad_width), (0, 0)), mode='constant')
    elif mfcc.shape[0] > desired_length:
        mfcc = mfcc[:desired_length, :]

    # Paso 3: Expandir dimensión para batch
    entrada = mfcc[np.newaxis, ...]  # (1, 1300, 40)

    # Paso 4: Predecir
    prediccion = modelo.predict(entrada)
    indice = np.argmax(prediccion)
    confianza = prediccion[0][indice]
    genero = etiquetas[indice] if etiquetas else indice
    return genero, confianza


In [None]:
# Cargar el modelo
modelo = keras.models.load_model("genre_classifier_model.keras")

# Opcional: etiquetas si las tienes
etiquetas = ["blues", "classical", "country", "disco", "hiphop", "jazz", "metal", "pop", "reggae", "rock"]  # por ejemplo

# Grabar
grabar_audio("grabacion.wav", duracion=30)

🎙️ Grabando durante 30 segundos...
✅ Grabación terminada.
Forma de entrada: [[-388.8929    -355.73294   -350.6865    ... -329.34967   -327.11124
  -343.36945  ]
 [  64.098526    77.31606     79.23186   ...   50.90915     42.084923
    46.05301  ]
 [ -52.434376   -55.724068   -57.874985  ...  -38.218025   -26.128994
   -23.494293 ]
 ...
 [  -9.213314    -9.239636   -10.564787  ...   -6.000933    -7.1496296
   -10.122371 ]
 [ -10.0705805   -7.6585      -9.575674  ...   -4.182612    -3.8900504
    -5.983966 ]
 [   5.077876     1.2550437    1.5002912 ...   10.255703    13.931439
    14.495608 ]]


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input Tensor("data:0", shape=(32, 862), dtype=float32). Expected shape (None, 1300, 40), but input has incompatible shape (32, 862)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(32, 862), dtype=float32)
  • training=False
  • mask=None
  • kwargs=<class 'inspect._empty'>

In [16]:
# Procesar
mfcc = audio_a_mfcc("grabacion.wav")

In [23]:
# Predecir
genero, confianza = predecir_genero_desde_audio("grabacion.wav", modelo, etiquetas)
print(f"🎵 Género detectado: {genero} (confianza: {confianza:.2f})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


IndexError: list index out of range