In [11]:
import pandas as pd
from extract_features import process_dataset

# Run feature extraction interactively
df = process_dataset("features.csv")

# Preview data
df.head()   
# Save features to CSV
df.to_csv("features.csv", index=False)
print("Features saved to features.csv")



Processing genre: blues
Processing genre: classical
Processing genre: country
Processing genre: disco
Processing genre: hiphop
Processing genre: jazz
Processing genre: metal
Processing genre: pop
Processing genre: reggae
Processing genre: rock
Features saved to c:\music_genre_project\features.csv
Features saved to features.csv


In [12]:
import sys, os
sys.path.append(os.path.abspath("../src"))

from extract_features import process_dataset

df = process_dataset("../features.csv")
df.head()


Processing genre: blues
Processing genre: classical
Processing genre: country
Processing genre: disco
Processing genre: hiphop
Processing genre: jazz
Processing genre: metal
Processing genre: pop
Processing genre: reggae
Processing genre: rock


PermissionError: [Errno 13] Permission denied: 'c:\\features.csv'

In [9]:
import sys
import os
sys.path.append(os.path.abspath("../src"))

from extract_features import process_dataset


In [15]:
# feature_extraction.ipynb

import os
import pandas as pd
import numpy as np
import librosa
import librosa.display
from tqdm import tqdm

# ✅ Path setup
DATASET_PATH = "../data/gtzan_dataset"   # adjust if dataset is elsewhere
OUTPUT_CSV = "../data/features.csv"

# Function to extract features from an audio file
def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, duration=30)  # load 30s
        # Spectral features
        spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
        spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
        spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
        zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))

        # MFCCs
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
        mfccs_mean = np.mean(mfccs, axis=1)

        features = [spectral_centroid, spectral_bandwidth, spectral_rolloff, zero_crossing_rate]
        features.extend(mfccs_mean)
        return features
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Process dataset
genres = os.listdir(DATASET_PATH)
all_features = []
labels = []

for genre in tqdm(genres, desc="Extracting features"):
    genre_path = os.path.join(DATASET_PATH, genre)
    if not os.path.isdir(genre_path):
        continue
    for file in os.listdir(genre_path):
        file_path = os.path.join(genre_path, file)
        features = extract_features(file_path)
        if features:
            all_features.append(features)
            labels.append(genre)

# Create DataFrame
columns = ["spectral_centroid", "spectral_bandwidth", "spectral_rolloff", "zero_crossing_rate"]
columns.extend([f"mfcc_{i+1}" for i in range(20)])
df = pd.DataFrame(all_features, columns=columns)
df["label"] = labels

# Save to CSV
os.makedirs(os.path.dirname(OUTPUT_CSV), exist_ok=True)
df.to_csv(OUTPUT_CSV, index=False)

print(f"✅ Features extracted and saved to {OUTPUT_CSV}")
df.head()


Extracting features: 100%|██████████| 10/10 [03:23<00:00, 20.37s/it]

✅ Features extracted and saved to ../data/features.csv





Unnamed: 0,spectral_centroid,spectral_bandwidth,spectral_rolloff,zero_crossing_rate,mfcc_1,mfcc_2,mfcc_3,mfcc_4,mfcc_5,mfcc_6,...,mfcc_12,mfcc_13,mfcc_14,mfcc_15,mfcc_16,mfcc_17,mfcc_18,mfcc_19,mfcc_20,label
0,1784.416546,2002.657106,3806.41865,0.083066,-113.619385,121.553017,-19.151058,42.345768,-6.371168,18.613033,...,8.806787,-3.665802,5.745939,-5.161711,0.750298,-1.688358,-0.409329,-2.298869,1.219947,blues
1,1529.871314,2038.612143,3548.986873,0.056044,-207.581512,123.997147,8.939115,35.870758,2.919971,21.518787,...,5.375942,-2.237833,4.217125,-6.011047,0.932668,-0.717028,0.292333,-0.285717,0.532144,blues
2,1552.637786,1747.382028,3041.089944,0.076301,-90.776344,140.448608,-29.100559,31.68734,-13.975971,25.741682,...,5.785763,-8.899733,-1.087024,-9.216775,2.453648,-7.721793,-1.816497,-3.434354,-2.231391,blues
3,1070.110059,1596.244204,2185.061787,0.033309,-199.462006,150.094727,5.649167,26.870144,1.755441,14.23688,...,6.085353,-2.476188,-1.076225,-2.876056,0.776909,-3.320055,0.637263,-0.617507,-3.40694,blues
4,1835.507008,1748.367477,3581.003346,0.1015,-160.291855,126.19577,-35.602501,22.151169,-32.48764,10.862022,...,-2.809849,-6.935621,-7.559294,-9.172152,-4.510408,-5.453736,-0.924468,-4.408076,-11.701833,blues
