In [None]:
# Import necessary libraries
import os
import librosa
import pandas as pd
import numpy as np
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Define a function to extract features from an audio file.
def extract_features(dataset_path, duration=30):

    # Load the audio file with specified duration in mono format
    audio_data, sample_rate = librosa.load(dataset_path, mono=True, duration=duration)

    # Compute the MFCCs for the audio data (excluding the 1st coefficient)
    mfcc_features = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
    mfcc_scaled = np.mean(mfcc_features[1:].T, axis=0)

    # Compute the Chroma feature for the audio data
    chroma = librosa.feature.chroma_stft(y=audio_data, sr=sample_rate)
    chroma_scaled = np.mean(chroma.T, axis=0)

    # Compute the Spectral Contrast
    contrast = librosa.feature.spectral_contrast(y=audio_data, sr=sample_rate)
    contrast_scaled = np.mean(contrast.T, axis=0)

    # Compute the Tempogram for the audio data
    tempogram = librosa.feature.tempogram(y=audio_data, sr=sample_rate)
    tempogram_scaled = np.mean(tempogram.T, axis=0)

    # Compute RMS Energy
    rms_energy = librosa.feature.rms(y=audio_data)
    rms_energy_scaled = np.mean(rms_energy)

    # Compute Spectral Bandwidth
    sp_bandwidth = librosa.feature.spectral_bandwidth(y=audio_data, sr=sample_rate)
    sp_bandwidth_scaled = np.mean(sp_bandwidth.T, axis=0)

    # Combine all features in one array
    features_combined = np.hstack([mfcc_scaled, chroma_scaled, contrast_scaled, tempogram_scaled, rms_energy_scaled, sp_bandwidth_scaled])

    return features_combined

# Initialize lists to hold song names, song data, and labels
audio_data = []
track_names = []
labels = []

# Define the dataset path and genres.
dataset_path = '/content/drive/MyDrive/GTZAN/genres_original'
genres = ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

# Loop over all files in the GTZAN dataset and extract features.
for music_genre in genres:
    genre_folder = os.path.join(dataset_path, music_genre)
    for track in os.listdir(genre_folder):
        track_name = os.path.join(genre_folder, track)
        track_names.append(track)
        labels.append(music_genre)
        audio_data.append(extract_features(track_name))

# Convert the list into an array
song_data = np.array(audio_data)

# Create a pandas dataframe.
df = pd.DataFrame(data=audio_data)
df['song_name'] = track_names
df['genre'] = labels

# Save the dataframe to csv format
df.to_csv('gtzan.csv', index=False)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
