<a href="https://colab.research.google.com/github/WafleraVertical/Clasificaci-n-Musical-por-Genero/blob/main/CRNN_(Choi_et_al_%2C_2017).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import librosa
import librosa.display
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


In [2]:
def extract_spectrogram(file_path, n_mels=128, fmax=8000):
    y, sr = librosa.load(file_path, sr=None)
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=fmax)
    log_S = librosa.power_to_db(S, ref=np.max)
    return log_S

In [3]:
def load_gtzan_data(gtzan_path, genres, n_mels=128):
    X = []
    y = []
    for genre in genres:
        genre_dir = os.path.join(gtzan_path, genre)
        for file_name in os.listdir(genre_dir):
            file_path = os.path.join(genre_dir, file_name)
            spectrogram = extract_spectrogram(file_path, n_mels=n_mels)
            if spectrogram.shape[1] > 128:
                spectrogram = spectrogram[:, :128]
            elif spectrogram.shape[1] < 128:
                spectrogram = np.pad(spectrogram, ((0, 0), (0, 128 - spectrogram.shape[1])), mode='constant')

            X.append(spectrogram)
            y.append(genres.index(genre))
    return np.array(X), np.array(y)

In [4]:
GTZAN_PATH = '/content/drive/MyDrive/Tesis/GZTAN/genres_original'
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

In [5]:
X, y = load_gtzan_data(GTZAN_PATH, genres)

In [6]:
X = X[..., np.newaxis]
X = X / np.max(X)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [8]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss')


In [9]:
model = models.Sequential([
    layers.Conv2D(64, (3, 3), activation='relu', input_shape=(128, 128, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Reshape((64, -1)),  # Para entrada a LSTM
    layers.LSTM(64),
    layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [10]:
model.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test),
          callbacks=[early_stopping, model_checkpoint])



Epoch 1/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 1s/step - accuracy: 0.1225 - loss: 2.3881 - val_accuracy: 0.0650 - val_loss: 2.3841
Epoch 2/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 1s/step - accuracy: 0.0953 - loss: 2.3452 - val_accuracy: 0.0850 - val_loss: 2.3407
Epoch 3/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 1s/step - accuracy: 0.1070 - loss: 2.3186 - val_accuracy: 0.0650 - val_loss: 2.3220
Epoch 4/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 1s/step - accuracy: 0.0847 - loss: 2.3328 - val_accuracy: 0.1400 - val_loss: 2.3102
Epoch 5/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 1s/step - accuracy: 0.1023 - loss: 2.3170 - val_accuracy: 0.1350 - val_loss: 2.3168
Epoch 6/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 1s/step - accuracy: 0.1217 - loss: 2.3107 - val_accuracy: 0.1050 - val_loss: 2.3977
Epoch 7/30
[1m25/25[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7b41c1c51a50>

In [11]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Precisión en test: {test_acc:.2f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 227ms/step - accuracy: 0.1370 - loss: 2.3184
Precisión en test: 0.14
