<a href="https://colab.research.google.com/github/WafleraVertical/Clasificaci-n-Musical-por-Genero/blob/main/End_to_End_CNN_(Dieleman_%26_Schrauwen%2C_2014).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import librosa
import librosa.display
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


In [2]:
def extract_spectrogram(file_path, n_mels=128, fmax=8000):
    y, sr = librosa.load(file_path, sr=None)
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=fmax)
    log_S = librosa.power_to_db(S, ref=np.max)
    return log_S

In [3]:
def load_gtzan_data(gtzan_path, genres, n_mels=128):
    X = []
    y = []
    for genre in genres:
        genre_dir = os.path.join(gtzan_path, genre)
        for file_name in os.listdir(genre_dir):
            file_path = os.path.join(genre_dir, file_name)
            spectrogram = extract_spectrogram(file_path, n_mels=n_mels)
            if spectrogram.shape[1] > 128:
                spectrogram = spectrogram[:, :128]
            elif spectrogram.shape[1] < 128:
                spectrogram = np.pad(spectrogram, ((0, 0), (0, 128 - spectrogram.shape[1])), mode='constant')

            X.append(spectrogram)
            y.append(genres.index(genre))
    return np.array(X), np.array(y)

In [4]:
GTZAN_PATH = '/content/drive/MyDrive/Tesis/GZTAN/genres_original'
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

In [6]:
X, y = load_gtzan_data(GTZAN_PATH, genres)

In [7]:
X = X[..., np.newaxis]
X = X / np.max(X)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [18]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss')


In [9]:
model = models.Sequential([
    layers.Conv2D(32, (5, 5), activation='relu', input_shape=(128, 128, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (5, 5), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [19]:
model.fit(X_train, y_train, epochs=30, validation_data=(X_test, y_test),
          callbacks=[early_stopping, model_checkpoint])



Epoch 1/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 2s/step - accuracy: 0.1059 - loss: 13759936.0000 - val_accuracy: 0.1150 - val_loss: 2.3032
Epoch 2/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 2s/step - accuracy: 0.0962 - loss: 2.3021 - val_accuracy: 0.0650 - val_loss: 2.3041
Epoch 3/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 2s/step - accuracy: 0.1207 - loss: 2.3016 - val_accuracy: 0.0650 - val_loss: 2.3048
Epoch 4/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 2s/step - accuracy: 0.1047 - loss: 2.3027 - val_accuracy: 0.0650 - val_loss: 2.3055
Epoch 5/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 1s/step - accuracy: 0.1098 - loss: 2.3021 - val_accuracy: 0.0650 - val_loss: 2.3066
Epoch 6/30
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 1s/step - accuracy: 0.1000 - loss: 2.3026 - val_accuracy: 0.0650 - val_loss: 2.3071
[1m7/7[0m [32m━━━━━━━━━━━━━━━━

In [20]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Precisión en test: {test_acc:.2f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 268ms/step - accuracy: 0.1100 - loss: 2.3032
Precisión en test: 0.12
