In [10]:
import os
import random
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

In [1]:
# Set parameters
INPUT_DIR = "../dataset/raw data"  # Folder containing genre subdirectories with 30-second audio files
OUTPUT_DIR = "../dataset/melspectrograms"  # Folder where generated images will be saved

SPLIT_RATIOS = {"train": 0.8, "validation": 0.1, "test": 0.1}

# Audio processing parameters
SAMPLE_RATE = 22050       
SEGMENT_DURATION = 3      
NUM_SEGMENTS = 10        
N_MELS = 128
N_FFT = 2048
HOP_LENGTH = 512

In [2]:

def create_output_dirs():
    """Create train, validation, and test directories for each genre."""
    for split in SPLIT_RATIOS.keys():
        for genre in os.listdir(INPUT_DIR):
            genre_dir = os.path.join(INPUT_DIR, genre)
            if os.path.isdir(genre_dir):
                output_genre_dir = os.path.join(OUTPUT_DIR, split, genre)
                os.makedirs(output_genre_dir, exist_ok=True)



In [3]:
def assign_split():
    """Randomly assign a segment to train, validation, or test based on SPLIT_RATIOS."""
    rnd = random.random()
    cumulative = 0.0
    for split, ratio in SPLIT_RATIOS.items():
        cumulative += ratio
        if rnd < cumulative:
            return split
    return "train"  # fallback



In [4]:
def save_mel_spectrogram(mel_db, sr, output_filepath):
    """Save the Mel spectrogram as a JPEG image without axes."""
    plt.figure(figsize=(3, 3))
    # Display the spectrogram;
    librosa.display.specshow(mel_db, sr=sr, hop_length=HOP_LENGTH, 
                             x_axis='time', y_axis='mel', cmap='viridis')
    plt.axis('off')
    plt.tight_layout(pad=0)
    plt.savefig(output_filepath, bbox_inches='tight', pad_inches=0)
    plt.close()



In [None]:
def process_audio_file(filepath, genre):
    """Load an audio file, split into segments, generate and save Mel spectrograms."""
    try:
        # Load the full audio file
        y, sr = librosa.load(filepath, sr=SAMPLE_RATE)
        total_samples = len(y)
        samples_per_segment = int(SAMPLE_RATE * SEGMENT_DURATION)
        
        # Ensure the audio file has the expected length
        if total_samples < samples_per_segment * NUM_SEGMENTS:
            print(f"Warning: {filepath} is shorter than expected.")
            return
        
        # Process each segment
        for i in range(NUM_SEGMENTS):
            start = i * samples_per_segment
            end = start + samples_per_segment
            segment = y[start:end]
            
            # Compute the Mel spectrogram
            mel_spec = librosa.feature.melspectrogram(y=segment, sr=sr, n_fft=N_FFT,
                                                      hop_length=HOP_LENGTH, n_mels=N_MELS)
            # Convert to decibels for visualization
            mel_db = librosa.power_to_db(mel_spec, ref=np.max)
            
            # Assign the segment to a split
            split = assign_split()
            # Construct the output filepath. Using the original file's basename (without extension)
            base_filename = os.path.splitext(os.path.basename(filepath))[0]
            output_filename = f"{base_filename}_segment{i+1}.jpg"
            output_path = os.path.join(OUTPUT_DIR, split, genre, output_filename)
            
            # Save the spectrogram as a jpg file
            save_mel_spectrogram(mel_db, sr, output_path)
            # print(f"Saved: {output_path}")
    except Exception as e:
        print(f"Error processing {filepath}: {e}")



In [11]:
create_output_dirs()

# Iterate over each genre directory
for genre in os.listdir(INPUT_DIR):
    print(f'Genre: {genre}')
    genre_dir = os.path.join(INPUT_DIR, genre)
    if not os.path.isdir(genre_dir):
        continue  # skip non-directory files
    
    # Process each audio file in the genre directory
    for filename in tqdm(os.listdir(genre_dir), desc=f"Processing {genre}", leave=False):
        if filename.lower().endswith((".wav", ".mp3", ".au")):  # add more extensions if needed
            filepath = os.path.join(genre_dir, filename)
            process_audio_file(filepath, genre)


Genre: blues


Processing blues:   0%|          | 0/100 [00:00<?, ?it/s]

Saved: ../dataset/melspectrograms\train\blues\blues.00000_segment1.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00000_segment2.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00000_segment3.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00000_segment4.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00000_segment5.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00000_segment6.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00000_segment7.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00000_segment8.jpg


Processing blues:   1%|          | 1/100 [00:04<07:40,  4.66s/it]

Saved: ../dataset/melspectrograms\train\blues\blues.00000_segment9.jpg
Saved: ../dataset/melspectrograms\validation\blues\blues.00000_segment10.jpg
Saved: ../dataset/melspectrograms\validation\blues\blues.00001_segment1.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00001_segment2.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00001_segment3.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00001_segment4.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00001_segment5.jpg
Saved: ../dataset/melspectrograms\test\blues\blues.00001_segment6.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00001_segment7.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00001_segment8.jpg


Processing blues:   2%|▏         | 2/100 [00:05<03:58,  2.44s/it]

Saved: ../dataset/melspectrograms\train\blues\blues.00001_segment9.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00001_segment10.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00002_segment1.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00002_segment2.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00002_segment3.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00002_segment4.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00002_segment5.jpg
Saved: ../dataset/melspectrograms\test\blues\blues.00002_segment6.jpg


Processing blues:   3%|▎         | 3/100 [00:06<02:35,  1.60s/it]

Saved: ../dataset/melspectrograms\train\blues\blues.00002_segment7.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00002_segment8.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00002_segment9.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00002_segment10.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00003_segment1.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00003_segment2.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00003_segment3.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00003_segment4.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00003_segment5.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00003_segment6.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00003_segment7.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00003_segment8.jpg


Processing blues:   4%|▍         | 4/100 [00:06<02:00,  1.25s/it]

Saved: ../dataset/melspectrograms\validation\blues\blues.00003_segment9.jpg
Saved: ../dataset/melspectrograms\validation\blues\blues.00003_segment10.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00004_segment1.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00004_segment2.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00004_segment3.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00004_segment4.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00004_segment5.jpg
Saved: ../dataset/melspectrograms\test\blues\blues.00004_segment6.jpg


Processing blues:   5%|▌         | 5/100 [00:07<01:38,  1.03s/it]

Saved: ../dataset/melspectrograms\test\blues\blues.00004_segment7.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00004_segment8.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00004_segment9.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00004_segment10.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00005_segment1.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00005_segment2.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00005_segment3.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00005_segment4.jpg
Saved: ../dataset/melspectrograms\validation\blues\blues.00005_segment5.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00005_segment6.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00005_segment7.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00005_segment8.jpg


Processing blues:   6%|▌         | 6/100 [00:08<01:28,  1.06it/s]

Saved: ../dataset/melspectrograms\train\blues\blues.00005_segment9.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00005_segment10.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00006_segment1.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00006_segment2.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00006_segment3.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00006_segment4.jpg
Saved: ../dataset/melspectrograms\test\blues\blues.00006_segment5.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00006_segment6.jpg


Processing blues:   7%|▋         | 7/100 [00:08<01:17,  1.20it/s]

Saved: ../dataset/melspectrograms\train\blues\blues.00006_segment7.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00006_segment8.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00006_segment9.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00006_segment10.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00007_segment1.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00007_segment2.jpg
Saved: ../dataset/melspectrograms\test\blues\blues.00007_segment3.jpg
Saved: ../dataset/melspectrograms\validation\blues\blues.00007_segment4.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00007_segment5.jpg
Saved: ../dataset/melspectrograms\test\blues\blues.00007_segment6.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00007_segment7.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00007_segment8.jpg


Processing blues:   8%|▊         | 8/100 [00:09<01:10,  1.31it/s]

Saved: ../dataset/melspectrograms\train\blues\blues.00007_segment9.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00007_segment10.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00008_segment1.jpg
Saved: ../dataset/melspectrograms\test\blues\blues.00008_segment2.jpg
Saved: ../dataset/melspectrograms\validation\blues\blues.00008_segment3.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00008_segment4.jpg
Saved: ../dataset/melspectrograms\test\blues\blues.00008_segment5.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00008_segment6.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00008_segment7.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00008_segment8.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00008_segment9.jpg


Processing blues:   9%|▉         | 9/100 [00:10<01:08,  1.33it/s]

Saved: ../dataset/melspectrograms\train\blues\blues.00008_segment10.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00009_segment1.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00009_segment2.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00009_segment3.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00009_segment4.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00009_segment5.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00009_segment6.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00009_segment7.jpg


Processing blues:  10%|█         | 10/100 [00:10<01:05,  1.38it/s]

Saved: ../dataset/melspectrograms\train\blues\blues.00009_segment8.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00009_segment9.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00009_segment10.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00010_segment1.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00010_segment2.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00010_segment3.jpg
Saved: ../dataset/melspectrograms\test\blues\blues.00010_segment4.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00010_segment5.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00010_segment6.jpg
Saved: ../dataset/melspectrograms\validation\blues\blues.00010_segment7.jpg


Processing blues:  11%|█         | 11/100 [00:11<01:07,  1.33it/s]

Saved: ../dataset/melspectrograms\test\blues\blues.00010_segment8.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00010_segment9.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00010_segment10.jpg


                                                                  

Saved: ../dataset/melspectrograms\validation\blues\blues.00011_segment1.jpg
Saved: ../dataset/melspectrograms\train\blues\blues.00011_segment2.jpg




KeyboardInterrupt: 