In [1]:
import os
import random
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Set parameters
INPUT_DIR = "../dataset/raw data"  # Folder containing genre subdirectories with 30-second audio files
OUTPUT_DIR = "../dataset/melspectrograms"  # Folder where generated images will be saved

# Define the split ratios (e.g., 70% train, 15% validation, 15% test)
SPLIT_RATIOS = {"train": 0.7, "validation": 0.15, "test": 0.15}

# Audio processing parameters
SAMPLE_RATE = 22050       
SEGMENT_DURATION = 3      # seconds
NUM_SEGMENTS = 10         # since 30s / 3s = 10 segments per audio file

# Mel spectrogram parameters
N_MELS = 128
N_FFT = 2048
HOP_LENGTH = 512

In [3]:

def create_output_dirs():
    """Create train, validation, and test directories for each genre."""
    for split in SPLIT_RATIOS.keys():
        for genre in os.listdir(INPUT_DIR):
            genre_dir = os.path.join(INPUT_DIR, genre)
            if os.path.isdir(genre_dir):
                output_genre_dir = os.path.join(OUTPUT_DIR, split, genre)
                os.makedirs(output_genre_dir, exist_ok=True)



In [4]:
def assign_split():
    """Randomly assign a segment to train, validation, or test based on SPLIT_RATIOS."""
    rnd = random.random()
    cumulative = 0.0
    for split, ratio in SPLIT_RATIOS.items():
        cumulative += ratio
        if rnd < cumulative:
            return split
    return "train"  # fallback



In [5]:
def save_mel_spectrogram(mel_db, sr, output_filepath):
    """Save the Mel spectrogram as a JPEG image without axes."""
    plt.figure(figsize=(3, 3))
    # Display the spectrogram; you can adjust the colormap if desired
    librosa.display.specshow(mel_db, sr=sr, hop_length=HOP_LENGTH, 
                             x_axis='time', y_axis='mel', cmap='viridis')
    plt.axis('off')
    plt.tight_layout(pad=0)
    plt.savefig(output_filepath, bbox_inches='tight', pad_inches=0)
    plt.close()



In [6]:
def process_audio_file(filepath, genre):
    """Load an audio file, split into segments, generate and save Mel spectrograms."""
    try:
        # Load the full audio file
        y, sr = librosa.load(filepath, sr=SAMPLE_RATE)
        total_samples = len(y)
        samples_per_segment = int(SAMPLE_RATE * SEGMENT_DURATION)
        
        # Ensure the audio file has the expected length
        if total_samples < samples_per_segment * NUM_SEGMENTS:
            print(f"Warning: {filepath} is shorter than expected.")
            return
        
        # Process each segment
        for i in range(NUM_SEGMENTS):
            start = i * samples_per_segment
            end = start + samples_per_segment
            segment = y[start:end]
            
            # Compute the Mel spectrogram
            mel_spec = librosa.feature.melspectrogram(y=segment, sr=sr, n_fft=N_FFT,
                                                      hop_length=HOP_LENGTH, n_mels=N_MELS)
            # Convert to decibels for visualization
            mel_db = librosa.power_to_db(mel_spec, ref=np.max)
            
            # Assign the segment to a split
            split = assign_split()
            # Construct the output filepath. Using the original file's basename (without extension)
            base_filename = os.path.splitext(os.path.basename(filepath))[0]
            output_filename = f"{base_filename}_segment{i+1}.jpg"
            output_path = os.path.join(OUTPUT_DIR, split, genre, output_filename)
            
            # Save the spectrogram as a jpg file
            save_mel_spectrogram(mel_db, sr, output_path)
            print(f"Saved: {output_path}")
    except Exception as e:
        print(f"Error processing {filepath}: {e}")



In [None]:

create_output_dirs()

# Iterate over each genre directory
for genre in os.listdir(INPUT_DIR):
    genre_dir = os.path.join(INPUT_DIR, genre)
    if not os.path.isdir(genre_dir):
        continue  # skip non-directory files
    
    # Process each audio file in the genre directory
    for filename in os.listdir(genre_dir):
        if filename.lower().endswith((".wav", ".mp3", ".au")):  # add more extensions if needed
            filepath = os.path.join(genre_dir, filename)
            process_audio_file(filepath, genre)
