In [36]:
import os
import random
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

In [37]:
# Set parameters
INPUT_DIR = "../dataset/raw data"  # Folder containing genre subdirectories with 30-second audio files
OUTPUT_DIR = "../dataset/melspectrograms"  # Folder where generated images will be saved

SPLIT_RATIOS = {"train": 0.8, "validation": 0.1, "test": 0.1}

# Audio processing parameters
SAMPLE_RATE = 22050       
SEGMENT_DURATION = 3      
NUM_SEGMENTS = 10        
N_MELS = 128
N_FFT = 2048
HOP_LENGTH = 512

In [38]:

def create_output_dirs():
    """Create train, validation, and test directories for each genre."""
    for split in SPLIT_RATIOS.keys():
        for genre in os.listdir(INPUT_DIR):
            genre_dir = os.path.join(INPUT_DIR, genre)
            if os.path.isdir(genre_dir):
                output_genre_dir = os.path.join(OUTPUT_DIR, split, genre)
                os.makedirs(output_genre_dir, exist_ok=True)



In [39]:
def save_mel_spectrogram(mel_db, sr, output_filepath):
    """Save the Mel spectrogram as a JPEG image without axes."""
    plt.figure(figsize=(3, 3))
    librosa.display.specshow(mel_db, sr=sr, hop_length=HOP_LENGTH, 
                             x_axis='time', y_axis='mel', cmap='viridis')
    plt.axis('off')
    plt.tight_layout(pad=0)
    plt.savefig(output_filepath, bbox_inches='tight', pad_inches=0)
    plt.close()



In [40]:
def process_audio_file(filepath):
    """Load an audio file, split it into segments, compute Mel spectrograms, and return a list of segment data."""
    segments = []
    try:
        y, sr = librosa.load(filepath, sr=SAMPLE_RATE)
        total_samples = len(y)
        samples_per_segment = int(SAMPLE_RATE * SEGMENT_DURATION)
        # Check if audio file length is as expected (or longer)
        # if total_samples < samples_per_segment * NUM_SEGMENTS:
        #     print(f"Warning: {filepath} is shorter than expected 30 seconds.")
        #     return segments
        
        base_filename = os.path.splitext(os.path.basename(filepath))[0]
        # Process each segment
        for i in range(NUM_SEGMENTS):
            start = i * samples_per_segment
            end = start + samples_per_segment
            segment = y[start:end]
            
            # Compute the Mel spectrogram
            mel_spec = librosa.feature.melspectrogram(y=segment, sr=sr, n_fft=N_FFT,
                                                      hop_length=HOP_LENGTH, n_mels=N_MELS)
            mel_db = librosa.power_to_db(mel_spec, ref=np.max)
            
            segment_data = {
                "base_filename": base_filename,
                "segment_index": i + 1,
                "mel_db": mel_db,
                "sr": sr
            }
            segments.append(segment_data)
    except Exception as e:
        print(f"Error processing {filepath}: {e}")
    return segments


In [41]:
def collect_all_segments():
    """Iterate over all genres and audio files to extract segments and organize them by genre."""
    print("Collecting Segments")
    segments_by_genre = {}
    for genre in os.listdir(INPUT_DIR):
        genre_path = os.path.join(INPUT_DIR, genre)
        if not os.path.isdir(genre_path):
            continue  # skip non-directory files
        segments_by_genre[genre] = []
        for filename in tqdm(os.listdir(genre_path), desc=f"Processing {genre}", leave=False):
            if filename.lower().endswith((".wav", ".mp3", ".au")):  # add extensions if needed
                filepath = os.path.join(genre_path, filename)
                segments = process_audio_file(filepath)
                segments_by_genre[genre].extend(segments)
        print(f'Genre {genre} Complete')
    return segments_by_genre

In [42]:
def split_segments(segments):
    """
    Split a list of segments into train, validation, and test subsets exactly according
    to SPLIT_RATIOS. Returns a dictionary with keys 'train', 'validation', and 'test'.
    """
    total = len(segments)
    # Calculate counts using integer math. The remainder is added to test.
    train_count = int(total * SPLIT_RATIOS["train"])
    validation_count = int(total * SPLIT_RATIOS["validation"])
    test_count = total - train_count - validation_count
    
    # Shuffle segments before splitting
    random.shuffle(segments)
    return {
        "train": segments[:train_count],
        "validation": segments[train_count:train_count+validation_count],
        "test": segments[train_count+validation_count:]
    }

In [None]:
create_output_dirs()
# Collect all segments organized by genre
segments_by_genre = collect_all_segments()

# For each genre, split the segments exactly as per the defined ratios and save images
for genre, segments in segments_by_genre.items():
    if not segments:
        continue
    split_dict = split_segments(segments)
    for split, seg_list in split_dict.items():
        for seg in seg_list:
            output_filename = f"{seg['base_filename']}_segment{seg['segment_index']}.jpg"
            output_path = os.path.join(OUTPUT_DIR, split, genre, output_filename)
            save_mel_spectrogram(seg["mel_db"], seg["sr"], output_path)
            # print(f"Saved: {output_path}")
    print(f'Genre {genre} Saved')
    print(f'Number of Test Segment: {len(split_dict['test'])}')
    print(f'Number of Train Segment: {len(split_dict['train'])}')
    print(f'Number of Validate Segment: {len(split_dict['validation'])}')




Collecting Segments


Processing blues:   0%|          | 0/100 [00:00<?, ?it/s]

                                                                  

Genre blues Complete


                                                                       

Genre classical Complete


                                                                     

Genre country Complete


                                                                   

Genre disco Complete


                                                                    

Genre hiphop Complete


                                                                

Genre jazz Complete


                                                                   

Genre metal Complete


                                                                

Genre pop Complete


                                                                   

Genre reggae Complete


                                                                  

Genre rock Complete
Genre blues Saved
Number of Test Segment: 100
Number of Train Segment: 800


KeyError: 'validate'