In [1]:
import os
import numpy as np
import librosa
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical


In [3]:
# --- CONFIG ---
DATASET_PATH = r"C:\Users\Gayatri\Downloads\archiveB3\Data\genres_original"
MAX_PAD_LEN = 130
N_MFCC = 40

In [6]:
# --- FEATURE EXTRACTION ---
def extract_features(file_path, max_pad_len=MAX_PAD_LEN):
    """Extracts MFCC features from an audio file."""
    audio, sr = librosa.load(file_path, duration=30)
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=N_MFCC)
    # Pad or trim to fixed length
    mfcc = np.pad(mfcc, ((0, 0), (0, max(0, max_pad_len - mfcc.shape[1]))), mode='constant')[:, :max_pad_len]
    return mfcc



In [8]:
# --- LOAD DATA ---
genres = sorted(os.listdir(DATASET_PATH))
X, y = [], []

for genre in genres:
    genre_path = os.path.join(DATASET_PATH, genre)
    if not os.path.isdir(genre_path):
        continue
    print(f"Processing genre: {genre}")
    for file in os.listdir(genre_path):
        file_path = os.path.join(genre_path, file)
        try:
            features = extract_features(file_path)
            X.append(features)
            y.append(genre)
        except Exception as e:
            print(f"Error with file {file_path}: {e}")

X = np.array(X)
X = np.transpose(X, (0, 2, 1))  # Shape: (samples, timesteps, features)


Processing genre: blues
Processing genre: classical
Processing genre: country
Processing genre: disco
Processing genre: hiphop
Processing genre: jazz


  audio, sr = librosa.load(file_path, duration=30)


Error with file C:\Users\Gayatri\Downloads\archiveB3\Data\genres_original\jazz\jazz.00054.wav: 
Processing genre: metal
Processing genre: pop
Processing genre: reggae
Processing genre: rock


In [10]:
# --- ENCODE LABELS ---
le = LabelEncoder()
y = to_categorical(le.fit_transform(y))

# --- SPLIT DATA ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [12]:
# --- BUILD MODEL ---
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X.shape[1], X.shape[2])),
    BatchNormalization(),
    Dropout(0.3),

    LSTM(32, return_sequences=False),
    BatchNormalization(),
    Dropout(0.3),

    Dense(32, activation='relu'),
    Dropout(0.3),

    Dense(y.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


  super().__init__(**kwargs)


In [None]:
# --- TRAIN ---
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)



Epoch 1/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 78ms/step - accuracy: 0.1064 - loss: 2.7058 - val_accuracy: 0.2188 - val_loss: 2.2742
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 66ms/step - accuracy: 0.1878 - loss: 2.3888 - val_accuracy: 0.2125 - val_loss: 2.2521
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 66ms/step - accuracy: 0.2300 - loss: 2.2216 - val_accuracy: 0.2500 - val_loss: 2.2366
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 68ms/step - accuracy: 0.2379 - loss: 2.1165 - val_accuracy: 0.2438 - val_loss: 2.2130
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 70ms/step - accuracy: 0.3083 - loss: 1.9962 - val_accuracy: 0.2375 - val_loss: 2.1813
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 69ms/step - accuracy: 0.3036 - loss: 2.0089 - val_accuracy: 0.2625 - val_loss: 2.1463
Epoch 7/50
[1m20/20[0m [32m━━━━