In [87]:
# the sample code in pytorch example combines dataloader and datasets in the same part.
# whereas in asteroid x-umx code, they are separated, 
# it is good to separated when there are more than one dataset are trained on.

from pathlib import Path
import torch.utils.data
import random
import torch
import tqdm
import soundfile as sf
import os
import pandas as pd
# import musedb

class MS_21Dataset(torch.utils.data.Dataset):
    """MS_21 music separation dataset

    The dataset consists of 150 full lengths music tracks (~10h duration) of
    different genres along with their raw multitracks:
    

    This dataset asssumes music raw multi-tracks in (sub)folders where each folder
    has a various number of sources. 
    A linear mix is performed on the fly by summing up the sources according to 
    the grouping information in the .csv file.
    In order to be compatible to MUSDB_18 dataset, one can utilize the grouping information
    to generate the traditional four stems:
        'drums', 'vocals', 'bass', 'other'
    

    Folder Structure:
        >>> #train/1/lead_vocals.wav ------------|
        >>> #train/1/backing_vocals.wav ---------|
        >>> #train/1/drums.wav ---------------+--> input (mix),
        >>> #train/1/bass.wav -------------------|
        >>> #train/1/accordin.wav ---------------|
        >>> #train/1/bell.wav -------------------/

        >>> #train/1/lead_vocals.wav ------------> output[target]

    Args:
        root (str): Root path of dataset
        sources (:obj:`list` of :obj:`str`, optional): List of source names
            that composes the mixture.
            Defaults to MUSDB18 4 stem scenario: `vocals`, `drums`, `bass`, `other`.
        targets (list or None, optional): List of source names to be used as
            targets. If None, a dict with the 4 stems is returned.
             If e.g [`vocals`, `drums`], a tensor with stacked `vocals` and
             `drums` is returned instead of a dict. Defaults to None.
        suffix (str, optional): Filename suffix, defaults to `.wav`.
        split (str, optional): Dataset subfolder, defaults to `train`.
        subset (:obj:`list` of :obj:`str`, optional): Selects a specific of
            list of tracks to be loaded, defaults to `None` (loads all tracks).
        segment (float, optional): Duration of segments in seconds,
            defaults to ``None`` which loads the full-length audio tracks.
        samples_per_track (int, optional):
            Number of samples yielded from each track, can be used to increase
            dataset size, defaults to `1`.
        random_segments (boolean, optional): Enables random offset for track segments.
        random_track_mix boolean: enables mixing of random sources from
            different tracks to assemble mix.
        source_augmentations (:obj:`list` of :obj:`callable`): list of augmentation
            function names, defaults to no-op augmentations (input = output)
        sample_rate (int, optional): Samplerate of files in dataset.

    Attributes:
        root (str): Root path of dataset
        sources (:obj:`list` of :obj:`str`, optional): List of source names.
            Defaults to MUSDB18 4 stem scenario: `vocals`, `drums`, `bass`, `other`.
        suffix (str, optional): Filename suffix, defaults to `.wav`.
        split (str, optional): Dataset subfolder, defaults to `train`.
        subset (:obj:`list` of :obj:`str`, optional): Selects a specific of
            list of tracks to be loaded, defaults to `None` (loads all tracks).
        segment (float, optional): Duration of segments in seconds,
            defaults to ``None`` which loads the full-length audio tracks.
        samples_per_track (int, optional):
            Number of samples yielded from each track, can be used to increase
            dataset size, defaults to `1`.
        random_segments (boolean, optional): Enables random offset for track segments.
        random_track_mix boolean: enables mixing of random sources from
            different tracks to assemble mix.
        source_augmentations (:obj:`list` of :obj:`callable`): list of augmentation
            function names, defaults to no-op augmentations (input = output)
        sample_rate (int, optional): Samplerate of files in dataset.
        tracks (:obj:`list` of :obj:`Dict`): List of track metadata

    References
        "The 2018 Signal Separation Evaluation Campaign" Stoter et al. 2018.
    """

    dataset_name = "MS_21"

    def __init__(
        self,
        root,
        csv_file_path,
        grouping_info = {'percussion':['Drum_Kick','Drum_Snare','Drum_HiHat','Drum_Cymbals','Drum_Overheads','Drum_Tom','Drum_Room','Percussion'
],'vocals':['Lead_Vocal','Backing_Vocal'],'bass':'Bass','other':['Acoustic_Guitar','Electric_Guitar','Piano','Electric_Piano','Brass','String','WoodWind','Other'
]}, # default traditional four stems
        sources=["vocals", "bass", "drums", "other"],
        targets=None,
        suffix=".wav",
        split="",
        subset=None,
        segment=None,
        samples_per_track=1,
        random_segments=False,
        random_track_mix=False,
        source_augmentations=lambda audio: audio,
        sample_rate=44100,
    ):

        self.root = Path(root).expanduser()
        self.csv_info = pd.read_csv(csv_file_path)
        
        self.grouping_info = grouping_info
        self.split = split
        self.sample_rate = sample_rate
        self.segment = segment
        self.random_track_mix = random_track_mix
        self.random_segments = random_segments
        self.source_augmentations = source_augmentations
        self.sources = sources
        self.targets = targets
        self.suffix = suffix
        self.subset = subset
        self.samples_per_track = samples_per_track
        self.tracks = list(self.get_tracks())
        #print(self.tracks)
        if not self.tracks:
            raise RuntimeError("No tracks found.")
        self.__getitem__(index = 1)

    def __getitem__(self, index):
        # create a dict for storing stem grouping rule
        
        
        
        # assemble the mixture of target and interferers
        audio_sources = {}

        # get track_id
        track_id = index // self.samples_per_track
        #print(track_id)
        
        
        if self.random_segments:
            start = random.uniform(0, self.tracks[track_id]["min_duration"] - self.segment)
        else:
            start = 0

        # create sources based on multitracks
        for source in self.sources:
            # optionally select a random track for each source
            if self.random_track_mix:
                # load a different track
                track_id = random.choice(range(len(self.tracks)))
                if self.random_segments:
                    start = random.uniform(0, self.tracks[track_id]["min_duration"] - self.segment)

            # loads the full track duration
            start_sample = int(start * self.sample_rate)
            # check if dur is none
            if self.segment:
                # stop in soundfile is calc in samples, not seconds
                stop_sample = start_sample + int(self.segment * self.sample_rate)
            else:
                # set to None for reading complete file
                stop_sample = None

            # load actual audio
#             audio, _ = sf.read(
#                 Path(self.tracks[track_id]["path"] / source).with_suffix(self.suffix),
#                 always_2d=True,
#                 start=start_sample,
#                 stop=stop_sample,
#             )
            # load multitracks and be ready to do linear mix
            for i in self.grouping_info:
                print(i) # get source names
                stem_tracks = []
                for j in self.grouping_info[i]:
                    # get all multitrack filename within one source
                    for m in j:
                        print(m)
                        stem_tracks.append(self.csv_info.iloc[track_id][m])
                    
                # apply linear mix within one source (stem) later can intergrate with data augmentation
                # first load one multitrack
                for k in stem_tracks:
                    audio,_ = sf.read(
                        Path(self.tracks[track_id]['path'] / k),
                    always_2d=True,
                    start=start_sample,
                    stop=stop_sample,
                    )
                    # convert to torch tensor
                    audio = torch.tensor(audio.T, dtype=torch.float)

                    # apply multitrack-wise augmentations
                    # audio = self.multitrack_augmentation(audio)
                    source_multitrack[k] = audio
                   
                # apply linear mix over all multitracks within one source index=0
                source_mix = torch.stack(list(source_multitrack.values())).sum(0)
                audio_sources[i] = source_mix
                # apply source-wise augmentations
                # source_mix = self.source_augmentations(source_mix)
            
            audio_mix = torch.stack(list(audio_sources.values())).sum(0)
            if self.targets:
                audio_sources = torch.stack(
                    [wav for src, wav in audio_sources.items() if src in self.targets], dim=0
            )
        # audio_mix a mixture over the sources, audio_sources is a concatenation of all sources
        return audio_mix, audio_sources

    def __len__(self):
        return len(self.tracks) * self.samples_per_track

    def get_tracks(self):
        """Loads input and output tracks"""
        """load tracks that contain all the required sources tracks"""
        p = Path(self.root, self.split) # train and test folder
        # p = Path(self.root)
        
        for track_path in tqdm.tqdm(p.iterdir()):
            #print(track_path)
            if track_path.is_dir():
                if self.subset and track_path.stem not in self.subset:
                    # skip this track
                    continue

                
                # source_paths = [track_path / (s + self.suffix) for s in self.sources] # 固定命名
                
                multitrack_paths = []
                for s in os.listdir(track_path):
                    if s.split('.')[-1]=='wav' and s.split('.')[0]!='':
                        multitrack_paths.append(track_path / s )
                #print(len(multitrack_paths))
                # 改成先读取所有wav文件，返回所有path
                # 然后通过csv文件进行linear mix生成sources,直接读成tensor
                if not all(sp.exists() for sp in multitrack_paths):
                    print("Exclude track due to non-existing source", track_path)
                    continue

                # get metadata
                infos = list(map(sf.info, multitrack_paths))
                if not all(i.samplerate == self.sample_rate for i in infos):
                    print("Exclude track due to different sample rate ", track_path)
                    continue

                if self.segment is not None:
                    # get minimum duration of track
                    min_duration = min(i.duration for i in infos)
                    if min_duration > self.segment:
                        yield ({"path": track_path, "min_duration": min_duration})
                else:
                    yield ({"path": track_path, "min_duration": None})

    def get_infos(self):
        """Get dataset infos (for publishing models).

        Returns:
            dict, dataset infos with keys `dataset`, `task` and `licences`.
        """
        infos = dict()
        infos["dataset"] = self.dataset_name
        infos["task"] = "enhancement"
        infos["licenses"] = [musdb_license]
        return infos


musdb_license = dict()


In [88]:
train_dataset = MS_21Dataset( 
    csv_file_path = 'D:\smc_master_thesis_2021\MTG_2021_MASTER_THESIS\mixing_secret_dataset_modified.csv',
    targets = 'vocals',
    root = 'E:/unzip_multitrack'
    # root = 'E:/musdb18_hq'
)

18it [00:00, 88.03it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\4_Out_Of_10
Exclude track due to different sample rate  E:\unzip_multitrack\All_American_Mutt


69it [00:00, 93.29it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\Believer
Exclude track due to different sample rate  E:\unzip_multitrack\Bright_Angel
Exclude track due to different sample rate  E:\unzip_multitrack\Buildin'_It_Up


93it [00:00, 106.91it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\By_My_Side
Exclude track due to different sample rate  E:\unzip_multitrack\Carolina_In_The_Pines
Exclude track due to different sample rate  E:\unzip_multitrack\Carousel_Ride


104it [00:01, 91.19it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\Cog_In_The_Machine
Exclude track due to different sample rate  E:\unzip_multitrack\Contact


125it [00:01, 93.26it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\Daddy_D
Exclude track due to different sample rate  E:\unzip_multitrack\Die_Young
Exclude track due to different sample rate  E:\unzip_multitrack\Don't_Put_Me_On_Hold


159it [00:01, 102.04it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\Encanto_Tropical


216it [00:02, 103.55it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\Happy_Pills
Exclude track due to different sample rate  E:\unzip_multitrack\Heart_Of_My_Home_Town
Exclude track due to different sample rate  E:\unzip_multitrack\Hope_And_The_Sea


238it [00:02, 95.52it/s] 

Exclude track due to different sample rate  E:\unzip_multitrack\Ja_Make_Ya_Dance
Exclude track due to different sample rate  E:\unzip_multitrack\Joy_Ride
Exclude track due to different sample rate  E:\unzip_multitrack\Kane_Guru


257it [00:02, 75.86it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\Last_Night's_Gig
Exclude track due to different sample rate  E:\unzip_multitrack\Like_You_Do
Exclude track due to different sample rate  E:\unzip_multitrack\Little_Lighter
Exclude track due to different sample rate  E:\unzip_multitrack\Living_In_The_City


276it [00:02, 80.70it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\Lookin'_Tough_Feelin'_Good
Exclude track due to different sample rate  E:\unzip_multitrack\Make_You_Dance
Exclude track due to different sample rate  E:\unzip_multitrack\Me_And_My_Crew


296it [00:03, 81.83it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\Must_Be_Voodoo
Exclude track due to different sample rate  E:\unzip_multitrack\Nalim
Exclude track due to different sample rate  E:\unzip_multitrack\Naturally
Exclude track due to different sample rate  E:\unzip_multitrack\Not_Alone


325it [00:03, 86.64it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\One_Time_Weekend
Exclude track due to different sample rate  E:\unzip_multitrack\Passenger_Side
Exclude track due to different sample rate  E:\unzip_multitrack\Piano_Solo_1


334it [00:03, 84.78it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\Place_2_Be
Exclude track due to different sample rate  E:\unzip_multitrack\Pure_Luxury


354it [00:03, 77.03it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\Rescue_Me


387it [00:04, 69.11it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\Santa_Fe
Exclude track due to different sample rate  E:\unzip_multitrack\Scar
Exclude track due to different sample rate  E:\unzip_multitrack\Seat_Back


406it [00:04, 77.25it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\So_Hi._So_Lo
Exclude track due to different sample rate  E:\unzip_multitrack\Spirit_Cold
Exclude track due to different sample rate  E:\unzip_multitrack\String_Quartet


437it [00:05, 90.28it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\Surrendering


448it [00:05, 94.53it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\The_Hardest_Part
Exclude track due to different sample rate  E:\unzip_multitrack\The_Last_Stand
Exclude track due to different sample rate  E:\unzip_multitrack\The_Things_We_Do_For_Love


468it [00:05, 79.98it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\Tip_Toe_Through_The_Crypto


477it [00:05, 77.81it/s]

Exclude track due to different sample rate  E:\unzip_multitrack\Two_Bare_Hands


506it [00:06, 83.28it/s]


Exclude track due to different sample rate  E:\unzip_multitrack\You_Don't_Know
percussion
D


KeyError: 'D'

In [76]:
train_dataset.csv_info.iloc[0]['Drum_Kick']

"['01_KickMic1.wav', '02_KickMic2.wav', '03_KickMic3.wav']"

# Original musdb18_dataset.py

In [None]:
from pathlib import Path
import torch.utils.data
import random
import torch
import tqdm
import soundfile as sf
# import musedb

class MUSDB18Dataset(torch.utils.data.Dataset):
    """MUSDB18 music separation dataset

    The dataset consists of 150 full lengths music tracks (~10h duration) of
    different genres along with their isolated stems:
        `drums`, `bass`, `vocals` and `others`.

    Out-of-the-box, asteroid does only support MUSDB18-HQ which comes as
    uncompressed WAV files. To use the MUSDB18, please convert it to WAV first:

    - MUSDB18 HQ: https://zenodo.org/record/3338373
    - MUSDB18     https://zenodo.org/record/1117372

    .. note::
        The datasets are hosted on Zenodo and require that users
        request access, since the tracks can only be used for academic purposes.
        We manually check this requests.

    This dataset asssumes music tracks in (sub)folders where each folder
    has a fixed number of sources (defaults to 4). For each track, a list
    of `sources` and a common `suffix` can be specified.
    A linear mix is performed on the fly by summing up the sources

    Due to the fact that all tracks comprise the exact same set
    of sources, random track mixing can be used can be used,
    where sources from different tracks are mixed together.

    Folder Structure:
        >>> #train/1/vocals.wav ---------|
        >>> #train/1/drums.wav ----------+--> input (mix), output[target]
        >>> #train/1/bass.wav -----------|
        >>> #train/1/other.wav ---------/

    Args:
        root (str): Root path of dataset
        sources (:obj:`list` of :obj:`str`, optional): List of source names
            that composes the mixture.
            Defaults to MUSDB18 4 stem scenario: `vocals`, `drums`, `bass`, `other`.
        targets (list or None, optional): List of source names to be used as
            targets. If None, a dict with the 4 stems is returned.
             If e.g [`vocals`, `drums`], a tensor with stacked `vocals` and
             `drums` is returned instead of a dict. Defaults to None.
        suffix (str, optional): Filename suffix, defaults to `.wav`.
        split (str, optional): Dataset subfolder, defaults to `train`.
        subset (:obj:`list` of :obj:`str`, optional): Selects a specific of
            list of tracks to be loaded, defaults to `None` (loads all tracks).
        segment (float, optional): Duration of segments in seconds,
            defaults to ``None`` which loads the full-length audio tracks.
        samples_per_track (int, optional):
            Number of samples yielded from each track, can be used to increase
            dataset size, defaults to `1`.
        random_segments (boolean, optional): Enables random offset for track segments.
        random_track_mix boolean: enables mixing of random sources from
            different tracks to assemble mix.
        source_augmentations (:obj:`list` of :obj:`callable`): list of augmentation
            function names, defaults to no-op augmentations (input = output)
        sample_rate (int, optional): Samplerate of files in dataset.

    Attributes:
        root (str): Root path of dataset
        sources (:obj:`list` of :obj:`str`, optional): List of source names.
            Defaults to MUSDB18 4 stem scenario: `vocals`, `drums`, `bass`, `other`.
        suffix (str, optional): Filename suffix, defaults to `.wav`.
        split (str, optional): Dataset subfolder, defaults to `train`.
        subset (:obj:`list` of :obj:`str`, optional): Selects a specific of
            list of tracks to be loaded, defaults to `None` (loads all tracks).
        segment (float, optional): Duration of segments in seconds,
            defaults to ``None`` which loads the full-length audio tracks.
        samples_per_track (int, optional):
            Number of samples yielded from each track, can be used to increase
            dataset size, defaults to `1`.
        random_segments (boolean, optional): Enables random offset for track segments.
        random_track_mix boolean: enables mixing of random sources from
            different tracks to assemble mix.
        source_augmentations (:obj:`list` of :obj:`callable`): list of augmentation
            function names, defaults to no-op augmentations (input = output)
        sample_rate (int, optional): Samplerate of files in dataset.
        tracks (:obj:`list` of :obj:`Dict`): List of track metadata

    References
        "The 2018 Signal Separation Evaluation Campaign" Stoter et al. 2018.
    """

    dataset_name = "MUSDB18"

    def __init__(
        self,
        root,
        sources=["vocals", "bass", "drums", "other"],
        targets=None,
        suffix=".wav",
        split="train",
        subset=None,
        segment=None,
        samples_per_track=1,
        random_segments=False,
        random_track_mix=False,
        source_augmentations=lambda audio: audio,
        sample_rate=44100,
    ):

        self.root = Path(root).expanduser()
        self.split = split
        self.sample_rate = sample_rate
        self.segment = segment
        self.random_track_mix = random_track_mix
        self.random_segments = random_segments
        self.source_augmentations = source_augmentations
        self.sources = sources
        self.targets = targets
        self.suffix = suffix
        self.subset = subset
        self.samples_per_track = samples_per_track
        self.tracks = list(self.get_tracks())
        if not self.tracks:
            raise RuntimeError("No tracks found.")

    def __getitem__(self, index):
        # assemble the mixture of target and interferers
        audio_sources = {}

        # get track_id
        track_id = index // self.samples_per_track
        if self.random_segments:
            start = random.uniform(0, self.tracks[track_id]["min_duration"] - self.segment)
        else:
            start = 0

        # load sources
        for source in self.sources:
            # optionally select a random track for each source
            if self.random_track_mix:
                # load a different track
                track_id = random.choice(range(len(self.tracks)))
                if self.random_segments:
                    start = random.uniform(0, self.tracks[track_id]["min_duration"] - self.segment)

            # loads the full track duration
            start_sample = int(start * self.sample_rate)
            # check if dur is none
            if self.segment:
                # stop in soundfile is calc in samples, not seconds
                stop_sample = start_sample + int(self.segment * self.sample_rate)
            else:
                # set to None for reading complete file
                stop_sample = None

            # load actual audio
            audio, _ = sf.read(
                Path(self.tracks[track_id]["path"] / source).with_suffix(self.suffix),
                always_2d=True,
                start=start_sample,
                stop=stop_sample,
            )
            # convert to torch tensor
            audio = torch.tensor(audio.T, dtype=torch.float)
            # apply source-wise augmentations
            audio = self.source_augmentations(audio)
            audio_sources[source] = audio

        # apply linear mix over source index=0
        audio_mix = torch.stack(list(audio_sources.values())).sum(0)
        if self.targets:
            audio_sources = torch.stack(
                [wav for src, wav in audio_sources.items() if src in self.targets], dim=0
            )
        return audio_mix, audio_sources

    def __len__(self):
        return len(self.tracks) * self.samples_per_track

    def get_tracks(self):
        """Loads input and output tracks"""
        p = Path(self.root, self.split)
        
        for track_path in tqdm.tqdm(p.iterdir()):
            if track_path.is_dir():
                if self.subset and track_path.stem not in self.subset:
                    # skip this track
                    continue

                source_paths = [track_path / (s + self.suffix) for s in self.sources]
                if not all(sp.exists() for sp in source_paths):
                    print("Exclude track due to non-existing source", track_path)
                    continue

                # get metadata
                infos = list(map(sf.info, source_paths))
                if not all(i.samplerate == self.sample_rate for i in infos):
                    print("Exclude track due to different sample rate ", track_path)
                    continue

                if self.segment is not None:
                    # get minimum duration of track
                    min_duration = min(i.duration for i in infos)
                    if min_duration > self.segment:
                        yield ({"path": track_path, "min_duration": min_duration})
                else:
                    yield ({"path": track_path, "min_duration": None})

    def get_infos(self):
        """Get dataset infos (for publishing models).

        Returns:
            dict, dataset infos with keys `dataset`, `task` and `licences`.
        """
        infos = dict()
        infos["dataset"] = self.dataset_name
        infos["task"] = "enhancement"
        infos["licenses"] = [musdb_license]
        return infos


musdb_license = dict()
