In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("imsparsh/fma-free-music-archive-small-medium")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/fma-free-music-archive-small-medium


In [2]:
!pip install librosa matplotlib numpy



In [3]:
import os
import librosa
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input

# Paths (Update these paths based on your dataset location)
DATASET_PATH = "/kaggle/input/fma-free-music-archive-small-medium/fma_small/fma_small"
TRACKS_CSV_PATH = "/kaggle/input/fma-free-music-archive-small-medium/fma_metadata/tracks.csv"
GENRES_CSV_PATH = "/kaggle/input/fma-free-music-archive-small-medium/fma_metadata/genres.csv"
MEL_SPECTROGRAM_DIR = "/kaggle/working/mel_spectrograms"

# Create the directory for Mel spectrograms if it doesn't exist
os.makedirs(MEL_SPECTROGRAM_DIR, exist_ok=True)

# Load metadata
tracks = pd.read_csv(TRACKS_CSV_PATH, index_col=0, low_memory=False)
genres_df = pd.read_csv(GENRES_CSV_PATH)

# Process track information and genres
track_info = tracks[["track.7", "track.8"]]
track_info.columns = track_info.iloc[0].rename("track_id")
track_info = track_info.iloc[2:]

# Assign parent genres to tracks without a specified `genre_top`
track_info_wo_genre = track_info.loc[pd.isnull(track_info).any(axis=1)]
genre_names = []

for i in track_info_wo_genre.genres:
    j = eval(i) + [0]
    if j[0] != 0:
        top_level = genres_df[genres_df.genre_id == j[0]].top_level.values[0]
        parent_genre = genres_df[genres_df.genre_id == top_level]["title"].values[0]
    else:
        parent_genre = np.nan
    genre_names.append(parent_genre)

# Update track_info with parent genres
track_info_ = track_info.copy()
track_info_.loc[track_info_wo_genre.index, "genre_top"] = genre_names
track_info_.index = pd.to_numeric(track_info_.index)
genres = track_info_[["genre_top"]]

# Filter metadata to include only tracks in FMA Medium
fma_medium_tracks = []

for root, _, files in os.walk(DATASET_PATH):
    for file in files:
        if file.endswith(".mp3"):
            track_id = int(file.split(".")[0])  # Extract track ID from filename
            fma_medium_tracks.append(track_id)

filtered_genres = genres[genres.index.isin(fma_medium_tracks)]

# Map genres to numeric labels for classification
genre_to_label = {genre: idx for idx, genre in enumerate(filtered_genres.genre_top.unique())}
label_to_genre = {idx: genre for genre, idx in genre_to_label.items()}


In [4]:
genre_to_label

{'Hip-Hop': 0,
 'Pop': 1,
 'Folk': 2,
 'Experimental': 3,
 'Rock': 4,
 'International': 5,
 'Electronic': 6,
 'Instrumental': 7}

In [5]:
# Function to generate and save Mel spectrograms
def generate_and_save_mel_spectrogram(audio_path, save_path):
    try:
        # Load audio file
        y, sr = librosa.load(audio_path, sr=None)

        # Generate Mel spectrogram
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

        # Normalize spectrogram values between 0 and 1
        mel_spec_db_normalized = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())

        # Save normalized Mel spectrogram as a .npy file
        np.save(save_path, mel_spec_db_normalized)
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")

# Generate and save Mel spectrograms classified by genres
counter = 0
for track_id in filtered_genres.index:
    genre = filtered_genres.loc[track_id, "genre_top"]
    if pd.isna(genre):
        continue  # Skip tracks with no genre information

    # Construct the path to the MP3 file
    track_folder = f"{int(track_id):06d}"[:3]  # First three digits as folder name
    audio_path = os.path.join(DATASET_PATH, track_folder, f"{int(track_id):06d}.mp3")

    if not os.path.exists(audio_path):
        print(f"File not found: {audio_path}")
        continue

    # Create a subdirectory for the genre
    genre_dir = os.path.join(MEL_SPECTROGRAM_DIR, genre)
    os.makedirs(genre_dir, exist_ok=True)

    # Path to save the Mel spectrogram
    save_path = os.path.join(genre_dir, f"{int(track_id):06d}.npy")

    # Generate and save Mel spectrogram
    if not os.path.exists(save_path):
        generate_and_save_mel_spectrogram(audio_path, save_path)
        counter += 1
        if counter % 100 == 0:
            print(f"Processed {counter} files.")

print("Mel spectrograms have been saved and classified by genres.")

Processed 100 files.
Processed 200 files.
Processed 300 files.
Processed 400 files.


[src/libmpg123/layer3.c:INT123_do_layer3():1841] error: dequantization failed!


Processed 500 files.
Processed 600 files.
Processed 700 files.
Processed 800 files.
Processed 900 files.


[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!


Processed 1000 files.
Processed 1100 files.


[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!


Processed 1200 files.
Processed 1300 files.
Processed 1400 files.
Processed 1500 files.
Processed 1600 files.
Processed 1700 files.
Processed 1800 files.
Processed 1900 files.
Processed 2000 files.
Processed 2100 files.
Processed 2200 files.


[src/libmpg123/layer3.c:INT123_do_layer3():1771] error: part2_3_length (3360) too large for available bit count (3240)
[src/libmpg123/layer3.c:INT123_do_layer3():1771] error: part2_3_length (3328) too large for available bit count (3240)


Processed 2300 files.
Processed 2400 files.
Processed 2500 files.
Processed 2600 files.
Processed 2700 files.
Processed 2800 files.
Processed 2900 files.
Processed 3000 files.
Processed 3100 files.
Processed 3200 files.
Processed 3300 files.
Processed 3400 files.
Processed 3500 files.
Processed 3600 files.
Processed 3700 files.
Processed 3800 files.
Processed 3900 files.
Processed 4000 files.
Processed 4100 files.
Processed 4200 files.
Processed 4300 files.
Processed 4400 files.


Note: Illegal Audio-MPEG-Header 0x00000000 at offset 33361.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1365] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
  y, sr = librosa.load(audio_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
Note: Illegal Audio-MPEG-Header 0x00000000 at offset 22401.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1365] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
  y, sr = librosa.load(audio_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequan

Error processing /kaggle/input/fma-free-music-archive-small-medium/fma_small/fma_small/099/099134.mp3: 
Processed 4500 files.
Processed 4600 files.
Processed 4700 files.


  mel_spec_db_normalized = (mel_spec_db - mel_spec_db.min()) / (mel_spec_db.max() - mel_spec_db.min())


Processed 4800 files.
Processed 4900 files.


  y, sr = librosa.load(audio_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error processing /kaggle/input/fma-free-music-archive-small-medium/fma_small/fma_small/108/108925.mp3: 
Processed 5000 files.
Processed 5100 files.
Processed 5200 files.
Processed 5300 files.
Processed 5400 files.
Processed 5500 files.
Processed 5600 files.
Processed 5700 files.
Processed 5800 files.
Processed 5900 files.
Processed 6000 files.
Processed 6100 files.
Processed 6200 files.
Processed 6300 files.
Processed 6400 files.
Processed 6500 files.
Processed 6600 files.
Processed 6700 files.
Processed 6800 files.
Processed 6900 files.


  y, sr = librosa.load(audio_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error processing /kaggle/input/fma-free-music-archive-small-medium/fma_small/fma_small/133/133297.mp3: 
Processed 7000 files.
Processed 7100 files.
Processed 7200 files.
Processed 7300 files.
Processed 7400 files.
Processed 7500 files.
Processed 7600 files.
Processed 7700 files.
Processed 7800 files.
Processed 7900 files.
Processed 8000 files.
Mel spectrograms have been saved and classified by genres.


In [6]:
!zip -r mel_spectograms.zip /kaggle/working/mel_spectrograms

  pid, fd = os.forkpty()


  adding: kaggle/working/mel_spectrograms/ (stored 0%)
  adding: kaggle/working/mel_spectrograms/Experimental/ (stored 0%)
  adding: kaggle/working/mel_spectrograms/Experimental/126608.npy (deflated 14%)
  adding: kaggle/working/mel_spectrograms/Experimental/074383.npy (deflated 14%)
  adding: kaggle/working/mel_spectrograms/Experimental/057271.npy (deflated 49%)
  adding: kaggle/working/mel_spectrograms/Experimental/095189.npy (deflated 13%)
  adding: kaggle/working/mel_spectrograms/Experimental/001066.npy (deflated 15%)
  adding: kaggle/working/mel_spectrograms/Experimental/026600.npy (deflated 77%)
  adding: kaggle/working/mel_spectrograms/Experimental/065234.npy (deflated 15%)
  adding: kaggle/working/mel_spectrograms/Experimental/044854.npy (deflated 16%)
  adding: kaggle/working/mel_spectrograms/Experimental/135089.npy (deflated 34%)
  adding: kaggle/working/mel_spectrograms/Experimental/053729.npy (deflated 19%)
  adding: kaggle/working/mel_spectrograms/Experimental/

In [7]:
from IPython.display import FileLink
FileLink(r'/kaggle/working/mel_spectograms.zip')