In [None]:
import os
import numpy as np
import librosa
import pandas as pd

def extract_features(file_path = "/musicData/genres_original"):
    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled = np.mean(mfccs.T, axis=0)
    return mfccs_scaled

# Initialize lists to hold features and labels
features = []
labels = []

# Iterate through each genre folder
dataset_path = 'musicData/genres_original'
for genre in os.listdir(dataset_path):
    genre_path = os.path.join(dataset_path, genre)
    if os.path.isdir(genre_path):
        for file in os.listdir(genre_path):
            file_path = os.path.join(genre_path, file)
            try:
                mfccs = extract_features(file_path)
                features.append(mfccs)
                labels.append(genre)
            except Exception as e:
                print(f"Error processing {file_path}: {e}")

# Convert lists to NumPy arrays
features = np.array(features)
labels = np.array(labels)

# Save features and labels as .npz file for later use
np.savez('music_features_labels.npz', features=features, labels=labels)

# Print shapes of features and labels arrays
print(f"Features shape: {features.shape}")
print(f"Labels shape: {labels.shape}")

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Load features and labels
data = np.load('music_features_labels.npz')
features = data['features']
labels = data['labels']

# Encode labels
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

# Build and train model
model = Sequential([
    Dense(256, input_shape=(40,), activation='relu'),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(len(np.unique(labels_encoded)), activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train model
history = model.fit(X_train, y_train, epochs=70, batch_size=20, validation_data=(X_test, y_test))

# Evaluate model
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f"Test accuracy: {test_acc}")