# Audio Stems Mixing
This notebook takes audio stems and combines them into a mixure

In [1]:
# Imports
import os
from tqdm import tqdm
import torchaudio

In [2]:
# Define paths to load & save directories
db_folder = 'A_Label_Noise'
path = f'/home/crlandsc/external/I/MDX-23/{db_folder}/'
save_dir = f'/home/crlandsc/external/I/MDX-23/{db_folder}_norm/'

### Functions

In [3]:
def check_dir(save_dir):
    """Check if a directory exists. If not, create one."""
    if os.path.exists(save_dir) is False:
        os.makedirs(save_dir)

In [4]:
def audio_to_wav(path, in_format='mp3'):
    """Convert audio files to wav using Torchaudio. Save to wav folder in path directory.
    args:
        - path (str): path to folder of audio files
        - in_format (str): the format of the audio to be converted (default='mp3')
    """
    # convert mp3 or other audio format -> wav
    print("Converting audio format to .wav")

    # Read in songs
    songs = os.listdir(path)

    for song in tqdm(songs):
        name = song.rsplit('.', 1)[0]
        mp3_path = os.path.join(path, song)
        wav_path = os.path.join(path, 'wav', name+'.wav')

        if os.path.exists(wav_path) is False:
            # os.system('ffmpeg -i {} -vn -acodec pcm_s16le -ac l -ar 44100 -f .wav {}') # run command line
            audio, sr = torchaudio.load(mp3_path)
            torchaudio.save(wav_path, audio, sr)

In [5]:
def load_audio(path, track):
    vocals, sr_v = torchaudio.load(os.path.join(path, track, 'vocals.wav')) # assign vocals track path
    bass, sr_b = torchaudio.load(os.path.join(path, track, 'bass.wav')) # assign bass track path
    drums, sr_d = torchaudio.load(os.path.join(path, track, 'drums.wav')) # assign drums track path
    other, sr_o = torchaudio.load(os.path.join(path, track, 'other.wav')) # assign other track path
    sr = sr_v # assign common sr
    
    # Confirm that all sample rates and lengths match
    if sr_v == sr_b == sr_d == sr_o and vocals.shape[1] == bass.shape[1] == drums.shape[1] == other.shape[1]:
        return True, vocals, bass, drums, other, sr
    else:
        return False, vocals, bass, drums, other, sr 

In [6]:
def normalize_audio(vocals, bass, drums, other, mixture):
    """Normalizes all tracks to the "mixture" track.
    """
    # Define normalization constant
    norm = mixture.max()
    
    # Normalize all stems
    vocals = vocals / norm
    bass = bass / norm
    drums = drums / norm
    other = other / norm
    mixture = mixture / norm

    return vocals, bass, drums, other, mixture

### Mix Audio

In [7]:
# Load folder names of tracks (containing bass, drums, voals, and other)
track_folder = sorted(os.listdir(path))

In [8]:
# Mix tracks

# Create error and normalization lists
mixed_tracks = []
error_tracks = []

# Combine stems into mixed track
print('Mixing audio...')
for track in tqdm(track_folder):
    
    # Load audio files
    compatible, vocals, bass, drums, other, sr = load_audio(path, track)
    
    # Confirm that all sample rates and lengths match
    # If not compatible, don't mix
    if compatible:
        
        # Combine all stems into "mixture" audio track
        mixture = vocals + bass + drums + other
        
        # Normalize all tracks
        # Avoid clipping on "mixture.wav"
        # Normalize stems to same amplitude as mixture for better accuracy when separating
        vocals, bass, drums, other, mixture = normalize_audio(vocals, bass, drums, other, mixture)
        
        # Check if directory exists. If not, create it.
        save_track_dir = os.path.join(save_dir, track)
        check_dir(save_track_dir)
        
        # Save mixed & normalized audio
        torchaudio.save(os.path.join(save_track_dir, 'vocals.wav'), vocals, sr)
        torchaudio.save(os.path.join(save_track_dir, 'bass.wav'), bass, sr)
        torchaudio.save(os.path.join(save_track_dir, 'drums.wav'), drums, sr)
        torchaudio.save(os.path.join(save_track_dir, 'other.wav'), other, sr)
        torchaudio.save(os.path.join(save_track_dir, 'mixture.wav'), mixture, sr)
        
        # append normalized & mixed track names
        mixed_tracks.append(track)
        
    else:
        error_tracks.append(track)

print("Finished mixing!")
print("================")
print(f"Number of tracks normalized and mixed: {len(mixed_tracks)}")
print(f"Number of tracks incompatible for mixing: {len(error_tracks)}")

Mixing audio...


100%|██████████████████████████████████████████████████████████████████████████████▌| 203/204 [1:08:07<00:20, 20.13s/it]


RuntimeError: Failed to open the input "/home/crlandsc/external/I/MDX-23/A_Label_Noise/metadata/vocals.wav" (No such file or directory).