In [2]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from tqdm import tqdm


In [3]:
def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, duration=30)
        mfccs = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0)
        chroma = np.mean(librosa.feature.chroma_stft(y=y, sr=sr).T, axis=0)
        mel = np.mean(librosa.feature.melspectrogram(y=y, sr=sr).T, axis=0)
        return np.hstack([mfccs, chroma, mel])
    except Exception as e:
        print("Error:", file_path, e)
        return None


In [7]:
# Path to your audio data
DATA_DIR = '/kaggle/input/gtzan-dataset-music-genre-classification/Data/genres_original'

features = []
labels = []

for genre in os.listdir(DATA_DIR):
    genre_path = os.path.join(DATA_DIR, genre)
    if not os.path.isdir(genre_path):
        continue
    for file_name in tqdm(os.listdir(genre_path), desc=f'Processing {genre}'):
        file_path = os.path.join(genre_path, file_name)
        data = extract_features(file_path)
        if data is not None:
            features.append(data)
            labels.append(genre)


Processing disco: 100%|██████████| 100/100 [00:38<00:00,  2.63it/s]
Processing metal: 100%|██████████| 100/100 [00:21<00:00,  4.75it/s]
Processing reggae: 100%|██████████| 100/100 [00:21<00:00,  4.74it/s]
Processing blues: 100%|██████████| 100/100 [00:21<00:00,  4.70it/s]
Processing rock: 100%|██████████| 100/100 [00:21<00:00,  4.71it/s]
Processing classical: 100%|██████████| 100/100 [00:21<00:00,  4.70it/s]
  y, sr = librosa.load(file_path, duration=30)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
Processing jazz:  27%|██▋       | 27/100 [00:05<00:16,  4.55it/s]

Error: /kaggle/input/gtzan-dataset-music-genre-classification/Data/genres_original/jazz/jazz.00054.wav 


Processing jazz: 100%|██████████| 100/100 [00:21<00:00,  4.66it/s]
Processing hiphop: 100%|██████████| 100/100 [00:21<00:00,  4.74it/s]
Processing country: 100%|██████████| 100/100 [00:22<00:00,  4.47it/s]
Processing pop: 100%|██████████| 100/100 [00:21<00:00,  4.73it/s]


In [8]:

columns = [f'mfcc{i}' for i in range(13)] + [f'chroma{i}' for i in range(12)] + [f'mel{i}' for i in range(128)]

df = pd.DataFrame(features, columns=columns)
df['label'] = labels

df.to_csv('extracted_features_30sec.csv', index=False)
df.head()


Unnamed: 0,mfcc0,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,...,mel119,mel120,mel121,mel122,mel123,mel124,mel125,mel126,mel127,label
0,-66.56881,58.310619,-8.179027,15.493415,5.575189,7.697163,4.071543,-3.58353,-1.321885,1.608826,...,0.070176,0.05436,0.0651,0.060349,0.050193,0.046597,0.043076,0.03895926,0.03910745,disco
1,-114.932045,113.463867,-29.804312,42.612103,-10.32601,26.973412,-14.493735,21.240532,-17.650537,17.097872,...,0.008605,0.004307,0.002449,0.001,0.000251,4.3e-05,6e-06,5.529185e-07,1.358968e-07,disco
2,-154.623306,97.932602,-10.35732,39.123672,-11.213964,24.111309,-11.846502,24.191231,-13.460304,20.553188,...,0.008696,0.003707,0.002829,0.001122,0.000161,3.3e-05,3e-06,1.68894e-07,3.369086e-08,disco
3,-40.729237,63.947403,4.90489,16.340809,12.100067,9.339081,4.540618,5.189633,-0.589437,-0.069852,...,0.22954,0.267159,0.262168,0.232001,0.236154,0.210102,0.186524,0.1718432,0.1728092,disco
4,-121.095222,116.942497,-22.78697,45.148933,-4.239051,30.256325,-12.245274,21.729876,-16.540764,16.233282,...,0.007461,0.004067,0.002181,0.000854,0.000191,3.2e-05,5e-06,5.331093e-07,2.105899e-07,disco


In [9]:
X=df.drop('label', axis=1)
y=LabelEncoder().fit_transform(df['label'])

In [15]:
from sklearn.model_selection import cross_val_score, StratifiedKFold
clf=RandomForestClassifier()
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

scores = cross_val_score(clf, X, y, cv=cv, scoring='accuracy')
print("Cross-validation accuracy scores:", scores)
print("Average accuracy:", scores.mean())


Cross-validation accuracy scores: [0.62       0.665      0.605      0.615      0.70854271]
Average accuracy: 0.6427085427135678


In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

In [17]:
print("Classification Report:\n")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))


Classification Report:

              precision    recall  f1-score   support

           0       0.82      0.82      0.82        22
           1       0.92      0.86      0.89        28
           2       0.62      0.45      0.53        22
           3       0.50      0.33      0.40        21
           4       0.42      0.53      0.47        19
           5       0.55      0.71      0.62        17
           6       0.57      1.00      0.73        12
           7       0.65      0.75      0.70        20
           8       0.68      0.62      0.65        24
           9       0.40      0.27      0.32        15

    accuracy                           0.64       200
   macro avg       0.61      0.63      0.61       200
weighted avg       0.64      0.64      0.63       200


Confusion Matrix:

[[18  0  1  0  0  1  1  0  1  0]
 [ 0 24  2  0  0  1  0  0  0  1]
 [ 1  0 10  0  1  5  0  0  4  1]
 [ 0  0  0  7  5  0  3  3  0  3]
 [ 1  0  0  2 10  0  1  4  1  0]
 [ 0  2  1  2  0 12  0  0  0  0]