# MUSDB18 Crop audio files 
| Name         | Surname    | ID        |
|--------------|------------|-----------|
| ABOUELAZM    | Youssef    | 10960436  |
| BINGLING     | Wu         | 11105141  |
| GARCIA       | Adrian     | 10975956  |
| OUALI        | Ernest     | 10984484  |

This notebook crops track objects in time for the desired duration (s) and stores them in the specified folder.

In [1]:
# System and core libraries
import os
import warnings
warnings.filterwarnings('ignore')

# Numerical and scientific computing
import numpy as np
import random

# Audio processing
import librosa

from tqdm import tqdm
import soundfile as sf

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Audio display
from IPython.display import Audio, display

# Music dataset handling
import musdb  # Dataset loader for MUSDB18

# Set visualization style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

In [2]:
MUSDB_PATH = "musdb18"

# Extracting the whole set
mus = musdb.DB(root=MUSDB_PATH)

mus_train_valid = musdb.DB(root=MUSDB_PATH, subsets="train")
mus_train       = musdb.DB(root=MUSDB_PATH, subsets="train", split='train')
mus_valid       = musdb.DB(root=MUSDB_PATH, subsets="train", split='valid')
mus_test        = musdb.DB(root=MUSDB_PATH, subsets="test")

In [3]:
print(f"Whole dataset loaded with {len(mus)} tracks.")
print(f"Training set loaded with {len(mus_train)} tracks.")
print(f"Validation set loaded with {len(mus_valid)} tracks.")
print(f"Test set loaded with {len(mus_test)} tracks.")

# Print the first track's name and type
print(f"First track: {mus[0].name}, Type: {type(mus[0])}")
# Print the type of the elelemnts within the mus[0]
print(f"Type of elements in the first track: {type(mus[0].audio)}")


Whole dataset loaded with 150 tracks.
Training set loaded with 86 tracks.
Validation set loaded with 14 tracks.
Test set loaded with 50 tracks.
First track: A Classic Education - NightOwl, Type: <class 'musdb.audio_classes.MultiTrack'>
Type of elements in the first track: <class 'numpy.ndarray'>


In [4]:
OUTPUT_PATH = "cropped_files_all"
TRAIN_PATH = os.path.join(OUTPUT_PATH, "train")
VALID_PATH = os.path.join(OUTPUT_PATH, "valid")
TEST_PATH = os.path.join(OUTPUT_PATH, "test")

In [5]:
# Function to extract fixed-length excerpts from MUSDB tracks
def extract_excerpts(mus_dataset, output_path, duration=5.0, offset=0.0, mono=True, num_excerpts=10, targets=["vocals"]):
    """
    Extract fixed-length excerpts from MUSDB18 dataset and save them to disk.
    Processes 'mix' and specified target stems.

    Args:
        mus_dataset (musdb.DB): The MUSDB18 dataset object (train, test, or valid)
        output_path (str): Base output directory
        duration (float): Duration in seconds for each excerpt (default: 10.0)
        offset (float): Starting offset in seconds (default: 0.0)
        mono (bool): Whether to convert audio to mono (default: True)
        num_excerpts (int): Number of excerpts to extract per track (default: 1)
                            If more than 1, they will be taken at equal intervals, up to the max that can fit
        targets (list): A list of strings specifying the names of the target stems
                        to process in addition to the mix (e.g., ["vocals", "bass"])
                        If empty, only the mix is processed.

    Returns:
        list: Information about extracted excerpts
    """

    # Create output directories for mix and specified targets
    output_mix = os.path.join(output_path, "mix")
    os.makedirs(output_mix, exist_ok=True)

    output_target_dirs = {}
    for target_name in targets:
        target_dir = os.path.join(output_path, target_name)
        os.makedirs(target_dir, exist_ok=True)
        output_target_dirs[target_name] = target_dir

    excerpt_info = []

    print(f"Extracting excerpts of {duration}s from {len(mus_dataset)} tracks (processing mix and {targets})...")

    for track_idx, track in enumerate(tqdm(mus_dataset, desc="Processing tracks", unit="track")):
        track_name = track.name
        sample_rate = track.rate
        total_samples = track.audio.shape[0]
        total_duration = total_samples / sample_rate

        available_duration = total_duration - offset

        # Calculate the maximum number of excerpts that can fit
        max_possible_excerpts = int((available_duration - duration) // duration + 1)
        actual_num_excerpts = min(num_excerpts, max_possible_excerpts)

        if actual_num_excerpts <= 0:
            tqdm.write(f"Track '{track_name}' is too short for even one {duration}s excerpt (offset: {offset}s).")
            continue

        # Calculate interval between excerpt start positions
        if actual_num_excerpts > 1:
            interval = (available_duration - duration) / (actual_num_excerpts - 1)
        else:
            interval = 0

        for excerpt_idx in range(actual_num_excerpts):
            start_time = offset + (excerpt_idx * interval)
            end_time = start_time + duration

            start_sample = int(start_time * sample_rate)
            end_sample = int(end_time * sample_rate)

            excerpt_filename = f"{track_name}_{start_time:.0f}_{end_time:.0f}"

            # Save mix
            mix_audio = track.audio[start_sample:end_sample]
            if mono and mix_audio.ndim > 1:
                mix_audio = librosa.to_mono(mix_audio.T)
            mix_path = os.path.join(output_mix, f"{excerpt_filename}.wav")
            sf.write(mix_path, mix_audio, sample_rate)

            # Save each target
            for target_name in targets:
                if target_name in track.sources:
                    target_audio = track.sources[target_name].audio[start_sample:end_sample]
                    if mono and target_audio.ndim > 1:
                        target_audio = librosa.to_mono(target_audio.T)
                    target_path = os.path.join(output_target_dirs[target_name], f"{excerpt_filename}.wav")
                    sf.write(target_path, target_audio, sample_rate)
                else:
                    tqdm.write(f"Warning: Target '{target_name}' not found for track '{track_name}'. Skipping {target_name}.")

            # Record info
            excerpt_info.append({
                'track_name': track_name,
                'excerpt_idx': excerpt_idx + 1,
                'start_time': start_time,
                'duration': duration,
                'filename': excerpt_filename,
                'sample_rate': sample_rate
            })

    print(f"Extraction complete. Saved {len(excerpt_info)} mix and target excerpts to {output_path}")
    return excerpt_info


In [7]:
# extract training clips
extract_1 = extract_info = extract_excerpts(mus_train, output_path=TRAIN_PATH,targets=['drums', 'bass', 'other', 'vocals'])
# extract validation clips
extract_2 = extract_info = extract_excerpts(mus_valid, output_path=VALID_PATH,targets=['drums', 'bass', 'other', 'vocals'])
# extract test clips
extract_3 = extract_info = extract_excerpts(mus_test, output_path=TEST_PATH,targets=['drums', 'bass', 'other', 'vocals'])

Extracting excerpts of 5.0s from 86 tracks (processing mix and ['drums', 'bass', 'other', 'vocals'])...


Processing tracks: 100%|██████████| 86/86 [1:08:22<00:00, 47.70s/track]


Extraction complete. Saved 806 mix and target excerpts to cropped_files_all\train
Extracting excerpts of 5.0s from 14 tracks (processing mix and ['drums', 'bass', 'other', 'vocals'])...


Processing tracks: 100%|██████████| 14/14 [14:59<00:00, 64.28s/track]


Extraction complete. Saved 140 mix and target excerpts to cropped_files_all\valid
Extracting excerpts of 5.0s from 50 tracks (processing mix and ['drums', 'bass', 'other', 'vocals'])...


Processing tracks: 100%|██████████| 50/50 [49:44<00:00, 59.70s/track]

Extraction complete. Saved 500 mix and target excerpts to cropped_files_all\test



