In [None]:
import numpy as np
import random
import os
import wave
import subprocess
import shutil

# Create a MIDI file
from midiutil import MIDIFile

SOUNDFONT_PATH = "/Users/dj.yoon/.fluidsynth/soundfont.sf2"
DATA_PATH = "dataset"


def generate_dataset(random_seed, n_datas, n_max_instruments, sampling_rate):
    """
    Generates a dataset of mixed audio WAV files and their corresponding separated instrument WAV files.

    Parameters:
    - random_seed (int): Seed for random number generation.
    - n_datas (int): Number of data samples to generate.
    - n_max_instruments (int): Maximum number of instruments in a mix.
    - sampling_rate (int): Sampling rate for audio files.

    The function creates a 'mixed' directory containing the mixed audio files and an 'instruments' directory
    containing subdirectories for each data sample with the individual instrument audio files.
    """
    # Set random seed
    random.seed(random_seed)
    np.random.seed(random_seed)

    # Path to the SoundFont
    sf2_path = SOUNDFONT_PATH

    # Ensure the SoundFont file exists
    if not os.path.isfile(sf2_path):
        raise FileNotFoundError(f"SoundFont file not found at {sf2_path}")

    # Create output directories
    os.makedirs("temp_midis", exist_ok=True)

    # List of available instruments (program numbers)
    instruments_list = list(range(0, 128))  # General MIDI instruments

    for i in range(n_datas):
        n_instruments = random.randint(1, n_max_instruments)
        selected_instruments = random.sample(instruments_list, n_instruments)

        print(
            f"Generating data sample {i+1}/{n_datas} with {n_instruments} instruments."
        )

        # For each instrument, generate a MIDI file with random notes
        duration = 4  # seconds
        tempo = 120  # BPM
        instrument_audios = []

        for idx, instrument in enumerate(selected_instruments):
            midi = MIDIFile(1)
            track = 0
            time = 0  # In beats
            midi.addTrackName(track, time, f"Instrument_{instrument}")
            midi.addTempo(track, time, tempo)

            # Set the instrument
            channel = 0  # Use channel 0 for all instruments
            midi.addProgramChange(track, channel, time, instrument)

            # Generate random notes
            num_notes = 8
            notes = np.random.randint(60, 72, size=num_notes)  # C4 to B4
            note_duration = duration / num_notes / (60 / tempo)  # duration in beats

            for note in notes:
                midi.addNote(track, channel, note, time, note_duration, 100)
                time += note_duration

            # Save the MIDI file
            midi_path = f"temp_midis/data_{i}_instrument_{idx}.mid"
            with open(midi_path, "wb") as midi_file:
                midi.writeFile(midi_file)

            # Use fluidsynth to render MIDI to WAV
            instrument_dir = os.path.join(DATA_PATH, "instruments", f"data_{i}")
            os.makedirs(instrument_dir, exist_ok=True)
            instrument_wav_path = os.path.join(instrument_dir, f"instrument_{idx}.wav")

            cmd = [
                "fluidsynth",
                "-ni",  # No interactive mode
                "-F",
                instrument_wav_path,
                "-r",
                str(sampling_rate),
                sf2_path,
                midi_path,
            ]
            subprocess.run(cmd, check=True)

            # Load the WAV file data
            with wave.open(instrument_wav_path, "rb") as wav_file:
                n_channels = wav_file.getnchannels()
                sample_width = wav_file.getsampwidth()
                n_frames = wav_file.getnframes()
                frames = wav_file.readframes(n_frames)
                audio_data = np.frombuffer(frames, dtype=np.int16)
                instrument_audios.append(audio_data)

        # Mix the audio tracks
        # Ensure all audio tracks are the same length
        min_length = min([len(a) for a in instrument_audios])
        instrument_audios = [a[:min_length] for a in instrument_audios]

        mixed_audio = np.sum(instrument_audios, axis=0)
        # Normalize mixed_audio
        max_val = np.max(np.abs(mixed_audio))
        if max_val > 0:
            mixed_audio = mixed_audio / max_val * 32767 * 0.9  # avoid clipping
        mixed_audio = np.int16(mixed_audio)

        # Save mixed audio
        mixed_wav_path = os.path.join(DATA_PATH, "mixed", f"data_{i}.wav")
        os.makedirs(os.path.dirname(mixed_wav_path), exist_ok=True)
        with wave.open(mixed_wav_path, "w") as wav_file:
            wav_file.setnchannels(n_channels)
            wav_file.setsampwidth(sample_width)
            wav_file.setframerate(sampling_rate)
            wav_file.writeframes(mixed_audio.tobytes())

    # Clean up temporary MIDI files
    shutil.rmtree("temp_midis")

    print("Dataset generation completed.")


# Example usage:
generate_dataset(random_seed=42, n_datas=1, n_max_instruments=3, sampling_rate=44100)

In [1]:
import numpy as np
import random
import os
import wave
import subprocess
import shutil
from pydub import AudioSegment

# Create a MIDI file
from midiutil import MIDIFile

SOUNDFONT_PATH = "/Users/dj.yoon/.fluidsynth/soundfont.sf2"
DATA_PATH = "dataset"


def mix_wav_files(wav_files, output_file):
    mix = None
    for wav_file in wav_files:
        audio = AudioSegment.from_wav(wav_file)
        if mix is None:
            mix = audio
        else:
            mix = mix.overlay(audio)

    mix.export(output_file, format="wav")


def generate_dataset(random_seed, n_datas, n_max_instruments, sampling_rate):
    """
    Generates a dataset of mixed audio WAV files and their corresponding separated instrument WAV files.

    Parameters:
    - random_seed (int): Seed for random number generation.
    - n_datas (int): Number of data samples to generate.
    - n_max_instruments (int): Maximum number of instruments in a mix.
    - sampling_rate (int): Sampling rate for audio files.

    The function creates a 'mixed' directory containing the mixed audio files and an 'instruments' directory
    containing subdirectories for each data sample with the individual instrument audio files.
    """
    # Set random seed
    random.seed(random_seed)
    np.random.seed(random_seed)

    # Path to the SoundFont
    sf2_path = SOUNDFONT_PATH

    # Ensure the SoundFont file exists
    if not os.path.isfile(sf2_path):
        raise FileNotFoundError(f"SoundFont file not found at {sf2_path}")

    # Create output directories
    os.makedirs("temp_midis", exist_ok=True)

    # List of available instruments (program numbers)
    instruments_list = list(range(0, 128))  # General MIDI instruments

    for i in range(n_datas):
        n_instruments = random.randint(1, n_max_instruments)
        selected_instruments = random.sample(instruments_list, n_instruments)

        print(
            f"Generating data sample {i+1}/{n_datas} with {n_instruments} instruments."
        )

        # For each instrument, generate a MIDI file with random notes
        duration = 4  # seconds
        tempo = 120  # BPM
        duration_in_beats = duration * tempo / 60  # Total duration in beats
        num_notes = 8
        note_duration = duration_in_beats / num_notes  # duration in beats
        instrument_audios = []

        inst_wav_files = []
        for idx, instrument in enumerate(selected_instruments):
            midi = MIDIFile(1)
            track = 0
            midi.addTrackName(track, 0, f"Instrument_{instrument}")
            midi.addTempo(track, 0, tempo)

            # Set the instrument
            channel = 0  # Use channel 0 for all instruments
            midi.addProgramChange(track, channel, 0, instrument)

            # Generate random notes
            notes = np.random.randint(60, 72, size=num_notes)  # C4 to B4
            note_times = np.arange(0, duration_in_beats, note_duration)

            for note, time in zip(notes, note_times):
                midi.addNote(track, channel, note, time, note_duration, 100)

            # Add All Sound Off controller message at the exact end time
            midi.addControllerEvent(track, channel, duration_in_beats, 120, 0)

            # Save the MIDI file
            midi_path = f"temp_midis/data_{i}_instrument_{idx}.mid"
            with open(midi_path, "wb") as midi_file:
                midi.writeFile(midi_file)

            # Use fluidsynth to render MIDI to WAV
            instrument_dir = os.path.join(DATA_PATH, "instruments", f"data_{i}")
            os.makedirs(instrument_dir, exist_ok=True)
            instrument_wav_path = os.path.join(instrument_dir, f"instrument_{idx}.wav")

            cmd = [
                "fluidsynth",
                "-ni",
                "-R",
                "0",  # Disable reverb
                "-C",
                "0",  # Disable chorus
                "-o",
                "synth.release-time=0.1",  # Short release time
                "-F",
                instrument_wav_path,
                "-r",
                str(sampling_rate),
                sf2_path,
                midi_path,
            ]
            subprocess.run(cmd, check=True)

            # Load the WAV file data
            with wave.open(instrument_wav_path, "rb") as wav_file:
                n_channels = wav_file.getnchannels()
                sample_width = wav_file.getsampwidth()
                frames = wav_file.readframes(wav_file.getnframes())
                audio_data = np.frombuffer(frames, dtype=np.int16)

                # If stereo, convert to mono
                if n_channels > 1:
                    audio_data = audio_data.reshape(-1, n_channels)
                    audio_data = audio_data.mean(axis=1).astype(np.int16)
                    n_channels = 1  # Now mono

                # Ensure the audio data has the exact expected length
                expected_length = int(duration * sampling_rate)
                current_length = len(audio_data)
                if current_length < expected_length:
                    # Pad with zeros
                    audio_data = np.pad(
                        audio_data,
                        (0, expected_length - current_length),
                        mode="constant",
                    )
                elif current_length > expected_length:
                    # Truncate to the expected length
                    audio_data = audio_data[:expected_length]

                instrument_audios.append(audio_data)
            inst_wav_files.append(instrument_wav_path)

        # # Mix the audio tracks
        # # All audio tracks now have the same length
        # instrument_audios = np.array(instrument_audios)

        # mixed_audio = np.sum(instrument_audios, axis=0)
        # # Normalize mixed_audio
        # max_val = np.max(np.abs(mixed_audio))
        # if max_val > 0:
        #     mixed_audio = mixed_audio / max_val * 32767 * 0.9  # avoid clipping
        # mixed_audio = np.int16(mixed_audio)

        # # Save mixed audio
        mixed_wav_path = os.path.join(DATA_PATH, "mixed", f"data_{i}.wav")
        os.makedirs(os.path.dirname(mixed_wav_path), exist_ok=True)
        # with wave.open(mixed_wav_path, "w") as wav_file:
        #     wav_file.setnchannels(1)  # Mono
        #     wav_file.setsampwidth(2)  # 2 bytes for np.int16
        #     wav_file.setframerate(sampling_rate)
        #     wav_file.writeframes(mixed_audio.tobytes())

        mix_wav_files(
            inst_wav_files,
            mixed_wav_path,
        )

    # Clean up temporary MIDI files
    shutil.rmtree("temp_midis")

    print("Dataset generation completed.")


# Example usage:
generate_dataset(random_seed=42, n_datas=1, n_max_instruments=3, sampling_rate=44100)

Setting parameter 'synth.release-time' not found


Generating data sample 1/1 with 3 instruments.


CalledProcessError: Command '['fluidsynth', '-ni', '-R', '0', '-C', '0', '-o', 'synth.release-time=0.1', '-F', 'dataset/instruments/data_0/instrument_0.wav', '-r', '44100', '/Users/dj.yoon/.fluidsynth/soundfont.sf2', 'temp_midis/data_0_instrument_0.mid']' returned non-zero exit status 255.