# RealTrack data preparation

This notebook prepares the RealTrack (RT) training data. `INPUT_DIR` is expected to point to the `RealTracks` directory from Band-in-a-Box UltraPAK 2018, so that each subdirectory of `INPUT_DIR` contains the WMA files for a single RealTrack.

The code creates a `wav_16kHz` directory containing the 8-second training segments, a `metadata.json` file with information about each segment, and `pairs_train`, `pairs_val` and `pairs_test` files listing pairs of audio file paths. Note that `pairs_test` is not used in the paper.

Copyright 2020 InterDigital R&D and Télécom Paris.  
Author: Ondřej Cífka

In [1]:
import argparse
import collections
import concurrent.futures as cf
import glob
import hashlib
import json
import os
import random
import regex as re
import sys

import IPython.display as ipd
import itertools
import librosa
import matplotlib.pyplot as plt
from natsort import natsorted, ns
import numpy as np
import pandas as pd
import pydub
import pysndfx
from pydub import silence
from sklearn.model_selection import train_test_split
import soundfile as sf
from tqdm.auto import tqdm

In [1]:
INPUT_DIR = 'RealTracks'
OUTPUT_DIR = 'wav_16kHz'

# Load data augmentation parameters from metadata_ref.json instead of sampling them randomly.
# Set to True to reproduce the dataset from the paper. Set to False if you want to use your own data.
USE_REF_METADATA = True

MAX_FINAL_CHUNK_LEN = 8000  # 8 s
MAX_TRANSPOSE = 4  # maximum number of transposition steps
MAX_RESAMPLE = 2 ** (4 / 12)
MAX_CHUNK_LEN = int(MAX_FINAL_CHUNK_LEN * MAX_RESAMPLE / 1000 + 0.5) * 1000  # 10 s
MAX_CHUNKS = 20 * 60 * 1000 // MAX_CHUNK_LEN  # 20 min
SR = 16000

In [3]:
def random_fx(rng):
    chain = pysndfx.AudioEffectsChain()
    for _ in range(rng.choice([0, 1, 2, 3], p=[0.35, 0.4, 0.2, 0.05])):
        effect = rng.choice([
            lambda c: c.overdrive(gain=rng.uniform(10, 40)),
            lambda c: c.phaser(gain_in=rng.uniform(0.6, 0.9),
                               gain_out=rng.uniform(0.66, 0.85),
                               delay=rng.power(0.4) * 3 + 1,
                               decay=rng.uniform(0.2, 0.45),
                               speed=rng.uniform(0.5, 2),
                               triangular=rng.choice([True, False])),
            lambda c: c.gain(-3).reverb(),
            lambda c: c.tremolo(freq=rng.power(0.5) * 14 + 1,
                                depth=rng.uniform(20, 60))
        ])
        effect(chain)
    return chain

In [4]:
def pydub_to_numpy(audio, sr=SR):
    samples = audio.set_channels(1).set_frame_rate(sr).get_array_of_samples()
    array = np.array(samples)
    src_dtype = array.dtype
    array = array.astype(np.float32)
    if len(samples) > 0:
        array /= np.iinfo(src_dtype).max
    return array

In [6]:
def process_dir(args):
    dir_path = args
    dir_name = os.path.basename(dir_path)
    out_dir = os.path.join(OUTPUT_DIR, dir_name)
    os.makedirs(out_dir)
    
    if USE_REF_METADATA:
        with open('metadata_ref.json') as f:
            metadata_ref_raw = json.load(f)
        metadata_ref = collections.OrderedDict()
        for item in metadata_ref_raw:
            if item['style'] == dir_name:
                ref_key = tuple(os.path.splitext(os.path.basename(segment['path']))[0]
                                for segment in item['segments'])
                assert ref_key not in metadata_ref
                metadata_ref[ref_key] = item
    else:
        seed = int.from_bytes(hashlib.sha512(dir_name.encode()).digest(), 'big')
        rng = np.random.default_rng(seed=seed)

    chunks = []
    for path in glob.glob(os.path.join(dir_path, '*.wma')):
        filename = os.path.basename(path)
        name, _ = os.path.splitext(filename)
        audio = pydub.AudioSegment.from_file(path)
        for t in range(0, len(audio), MAX_CHUNK_LEN):
            end = t + MAX_CHUNK_LEN
            chunks.append(argparse.Namespace(
                audio=audio, start=t, end=end,
                name=f'{name}.{t // 1000}-{end // 1000}',
                src_path=os.path.join(dir_name, filename)))
    chunks.sort(key=lambda chk: (chk.src_path, chk.name))

    if USE_REF_METADATA:
        chunks_by_name = {chk.name: chk for chk in chunks}
        chunks = [chunks_by_name[name] for key in metadata_ref for name in key]
    else:
        rng.shuffle(chunks)
        chunks = chunks[:MAX_CHUNKS]

    metadata = []
    for chunk_pair in zip(chunks[::2], chunks[1::2]):
        meta = {
            'style': dir_name
        }

        if USE_REF_METADATA:
            ref_key = tuple(chk.name for chk in chunk_pair)
            meta['resample_rate'] = metadata_ref[ref_key]['resample_rate']
            effects = pysndfx.AudioEffectsChain()
            effects.command = metadata_ref[ref_key]['effects']
        else:
            # Pick a resampling rate so as to transpose by a whole number of steps
            meta['resample_rate'] = 1 / (2 ** (rng.choice(np.arange(-MAX_TRANSPOSE, MAX_TRANSPOSE + 1)) / 12))

            # Pick random effects
            effects = random_fx(rng)
        meta['effects'] = effects.command

        meta['segments'] = []
        for chunk in chunk_pair:
            audio = pydub_to_numpy(chunk.audio[chunk.start:chunk.end])
            
            # Apply effects
            audio = librosa.resample(audio, SR, SR * meta['resample_rate'])
            audio = effects(audio, sample_in=SR)

            # Clip to maximum length
            audio = audio[:MAX_FINAL_CHUNK_LEN * SR // 1000]

            out_path = os.path.join(out_dir, f'{chunk.name}.wav')
            sf.write(out_path, audio, SR, subtype='PCM_24')
            
            meta['segments'].append({
                'src_path': chunk.src_path,
                'path': os.path.relpath(out_path, OUTPUT_DIR),
                'index': chunk.start // MAX_CHUNK_LEN
            })
        metadata.append(meta)

    return dir_name, metadata

dir_paths = sorted(set(os.path.dirname(p) for p in glob.glob(os.path.join(INPUT_DIR, '*', '*.wma'))))
metadata = []
with cf.ProcessPoolExecutor(16) as executor:
    for style, metadata_chunk in tqdm(executor.map(process_dir, dir_paths), total=len(dir_paths)):
        metadata.extend(metadata_chunk)
        print(style, 'OK')

HBox(children=(FloatProgress(value=0.0, max=1526.0), HTML(value='')))

Accordion, Rhythm EuropeanFastWaltzCory Sw 180 OK
Accordion, Rhythm GypsyJazzCompCory Sw 220 OK
Accordion, Rhythm GypsyWaltz Sw 200 OK
Accordion, Rhythm ItalianTarantellaCory Sw 130 OK
Accordion, Rhythm Klezmer44Cory Ev 130 OK
Accordion, Rhythm KlezmerGypsyCory Sw 190 OK
Accordion, Rhythm KlezmerHora34Cory Ev 120 OK
Accordion, Rhythm KlezmerPolkaABCory Ev 140 OK
Accordion, Rhythm KlezmerPopWaltz Ev 110 OK
Accordion, Rhythm OldWorld2Beat Ev16 140 OK
Accordion, Rhythm OldWorld2BeatHeldPlus Ev16 140 OK
Accordion, Rhythm OldWorldWaltz Ev 180 OK
Accordion, Rhythm OldWorldWaltzHeldPlus Ev 180 OK
Accordion, Rhythm PasoMusette Ev 120 OK
Accordion, Rhythm PolkaTradCory Ev 140 OK
Accordion, Rhythm Pop8ths Ev 085 OK
Accordion, Rhythm Pop8ths Ev 110 OK
Accordion, Rhythm PopCalypso Ev 100 OK
Accordion, Rhythm PopRockCory Ev 085 OK
Accordion, Rhythm PopRockCory Ev 120 OK
Accordion, Rhythm SpanishTango Ev 120 OK
Accordion, Rhythm SwedishSnoaCory Ev 110 OK
Accordion, Rhythm TangoMusette Ev 100 OK
Acco

Bass, Electric, Pop Syncopated Ev 136 OK
Bass, Electric, Pop16ths Ev 065 OK
Bass, Electric, Pop16ths Ev 085 OK
Bass, Electric, Pop16ths Ev 110 OK
Bass, Electric, PopBelieve Ev16 065 OK
Bass, Electric, PopCalypso Ev 100 OK
Bass, Electric, PopCrossover Sw16 075 OK
Bass, Electric, PopHalfNotesPush Ev 085 OK
Bass, Electric, PopHalfNotesPush Ev 120 OK
Bass, Electric, PopHalfNotesSync Ev 085 OK
Bass, Electric, PopHalfNotesSync Ev 120 OK
Bass, Electric, PopModernGrooveA-B Ev16 075 OK
Bass, Electric, PopPowerA-B Ev 120 OK
Bass, Electric, PopRockWaltz Ev 140 OK
Bass, Electric, PopShiningA-B Ev 120 OK
Bass, Electric, PopSw16RootFive Sw16 075 OK
Bass, Electric, PopSw16Sync Sw16 075 OK
Bass, Electric, PopUplift Ev16 120 OK
Bass, Electric, PopWaltz Ev 120 OK
Bass, Electric, PopWaltz Ev 180 OK
Bass, Electric, PraiseBreakA-B Ev 140 OK
Bass, Electric, PuertoRicoAguinaldo Ev16 120 OK
Bass, Electric, PunkFuzzPickedA-B Ev 165 OK
Bass, Electric, Reggae Ev16 100 OK
Bass, Electric, ReggaeSlow Sw 075 OK
Bass

Guitar, Acoustic, Fingerpicking Folky16thsBrent Ev16 085 OK
Guitar, Acoustic, Fingerpicking FolkySwing Sw 110 OK
Guitar, Acoustic, Fingerpicking Hank Sw 065 OK
Guitar, Acoustic, Fingerpicking Hank Sw 085 OK
Guitar, Acoustic, Fingerpicking Hank Sw 120 OK
Guitar, Acoustic, Fingerpicking Hank Sw 165 OK
Guitar, Acoustic, Fingerpicking Hank Sw 190 OK
Guitar, Acoustic, Fingerpicking HipFolk Sw16 85 OK
Guitar, Acoustic, Fingerpicking NashvilleSwingHighJoe Sw 090 OK
Guitar, Acoustic, Fingerpicking NashvilleSwingJoe Sw 090 OK
Guitar, Acoustic, Fingerpicking Pop8thsSteady Ev 065 OK
Guitar, Acoustic, Fingerpicking PopWaltz Ev 120 OK
Guitar, Acoustic, Fingerpicking Songwriter16ths Ev16 075 OK
Guitar, Acoustic, Fingerpicking Songwriter16thsHigh Ev16 075 OK
Guitar, Acoustic, Fingerpicking SongwriterAlternating8thsQuinn Ev 065 OK
Guitar, Acoustic, Fingerpicking SongwriterAlternating8thsQuinn Ev 110 OK
Guitar, Acoustic, Fingerpicking SongwriterFolkAltBassQuinn Ev16 085 OK
Guitar, Acoustic, Fingerpicki

Guitar, Electric, Background Dreamy Ev 065 OK
Guitar, Electric, Background DreamyPopBalladFillsBrent Ev 085 OK
Guitar, Electric, Background FunkSingleNoteWah Ev16 100 OK
Guitar, Electric, Background, PopDreamyBrent Ev 085 OK
Guitar, Electric, BackgroundSoloist SurfRock16thsBrent Ev 165 OK
Guitar, Electric, Chet Comp Ev 130 OK
Guitar, Electric, Chet Pickin' Ev 100 OK
Guitar, Electric, Rhythm AltCountryMoody Ev 075 OK
Guitar, Electric, Rhythm AltCountryRootsy Ev 075 OK
Guitar, Electric, Rhythm AmericanaGrittySlow128Brent Sw 040 OK
Guitar, Electric, Rhythm AmericanaSlow16thsBrent Ev16 060 OK
Guitar, Electric, Rhythm AmericanaSparseSlow16thsColin Ev16 060 OK
Guitar, Electric, Rhythm AmericanaTremoloSlow128Colin Sw 040 OK
Guitar, Electric, Rhythm Blues BB Ev 085 OK
Guitar, Electric, Rhythm Blues Chuck Sw 102 OK
Guitar, Electric, Rhythm Blues Monday Sw 065 OK
Guitar, Electric, Rhythm Blues Roadhouse Ev 120 OK
Guitar, Electric, Rhythm Blues Sw 120 OK
Guitar, Electric, Rhythm BluesRockCrispA-B

Guitar, Electric, Rhythm PopWaltzClassic Ev 085 OK
Guitar, Electric, Rhythm PopWaltzGrittyA-B Ev 120 OK
Guitar, Electric, Rhythm PopWaltzGrittyA-B Ev 180 OK
Guitar, Electric, Rhythm Punk Ev 165 OK
Guitar, Electric, Rhythm PunkHeldPlus Ev 165 OK
Guitar, Electric, Rhythm PunkMuted Ev 165 OK
Guitar, Electric, Rhythm Reggae Ev16 100 OK
Guitar, Electric, Rhythm ReggaeSlow Sw 075 OK
Guitar, Electric, Rhythm RnBSoulSw16Brent Sw16 085 OK
Guitar, Electric, Rhythm Rock Southern Ev 120 OK
Guitar, Electric, Rhythm RockBritDriving Ev 085 OK
Guitar, Electric, Rhythm RockBritSoulful Ev 085 OK
Guitar, Electric, Rhythm RockHeavy16thsCraig Ev16 090 OK
Guitar, Electric, Rhythm RockHeavy16thsMurrayA-B Ev16 090 OK
Guitar, Electric, Rhythm RockHeavy8thsCraig Ev 120 OK
Guitar, Electric, Rhythm RockHeavy8thsMurray Ev 120 OK
Guitar, Electric, Rhythm RockHeavyShuffleCraig Sw 120 OK
Guitar, Electric, Rhythm RockHeavyShuffleMurrayA-B Sw 120 OK
Guitar, Electric, Rhythm RockabillyBoomChickaBrent Ev 100 OK
Guitar, E

Guitar, Electric, Strumming DirtyOpen Ev 120 OK
Guitar, Electric, Strumming DirtyOpen Ev 165 OK
Guitar, High-Strung Acoustic, Fingerpicking Pop16ths Ev16 085 OK
Guitar, High-Strung Acoustic, Rhythm Folkie16ths Sw16 085 OK
Guitar, High-Strung Acoustic, Rhythm NorthernRockBallad Ev 065 OK
Guitar, High-Strung Acoustic, Rhythm Pop16ths Ev16 065 OK
Guitar, High-Strung Acoustic, Rhythm Pop16ths Ev16 110 OK
Guitar, High-Strung Acoustic, Rhythm PopCalypso Ev 100 OK
Guitar, Nylon, Background CountryBrent Ev 085 OK
Guitar, Nylon, Background CountryBrent Ev 120 OK
Guitar, Nylon, Background SlowSwingBrent Sw 065 OK
Guitar, Nylon, Background SwingBrent Sw 110 OK
Guitar, Nylon, Background WaltzBrent Sw 085 OK
Guitar, Nylon, Background WaltzBrent Sw 110 OK
Guitar, Nylon, Chet Fishin' Sw 120 OK
Guitar, Nylon, Chet Train Ev 130 OK
Guitar, Nylon, Fingerpicking CountryBrent Ev 085 OK
Guitar, Nylon, Fingerpicking CountryBrent Ev 120 OK
Guitar, Nylon, Fingerpicking PeruFestejo68Melodic Sw 120 OK
Guitar, Ny

Pedal Steel, Background HippieTrainEddy Ev 110 OK
Pedal Steel, Background ModernBalladAtmosphere Ev16 065 OK
Pedal Steel, Background ModernCountryDreamy16ths Ev16 075 OK
Pedal Steel, Background ModernCrossover Sw16 090 OK
Pedal Steel, Background ModernPopCountry Ev 110 OK
Pedal Steel, Background ModernWaltz Sw 140 OK
Pedal Steel, Background Opry Sw 130 OK
Pedal Steel, Background Shuffle Sw 136 OK
Pedal Steel, Background Train Ev 130 OK
Pedal Steel, Background TrainBeatEddy12-key Ev16 090 OK
Pedal Steel, Background Waylon Ev 176 OK
Pedal Steel, Background WestCoastBallad Ev 065 OK
Pedal Steel, Rhythm CountryDrivin8ths Ev 150 OK
Pedal Steel, Rhythm CountryWaltz Sw 085 OK
Pedal Steel, Rhythm CountryWaltz Sw 110 OK
Pedal Steel, Rhythm CountryWaltz Sw 140 OK
Pedal Steel, Rhythm HawaiianBallad Sw 085 OK
Pedal Steel, Rhythm HawaiianSwing Sw 110 OK
Pedal Steel, Rhythm HawaiianWaltz Sw 085 OK
Pedal Steel, Rhythm LushChordsSlow Ev 075 OK
Pedal Steel, Rhythm Pop8ths Ev 110 OK
Pedal Steel, Rhythm 

Piano, Acoustic, Rhythm RockabillyRockNRollKevin Ev 165 OK
Piano, Acoustic, Rhythm RockabillyRockNRollKevin Ev 165 (Soft LH) OK
Piano, Acoustic, Rhythm RockabillySlowSwingKevin Sw 085 OK
Piano, Acoustic, Rhythm RockabillySwingKevin Sw 165 OK
Piano, Acoustic, Rhythm Rumba Ev 110 OK
Piano, Acoustic, Rhythm Samba Ev 190 OK
Piano, Acoustic, Rhythm SmoothJazzBallad Ev16 065 OK
Piano, Acoustic, Rhythm SmoothJazzCool Sw16 075 OK
Piano, Acoustic, Rhythm SmoothJazzCool Sw16 100 OK
Piano, Acoustic, Rhythm SmoothJazzPoppy Ev16 090 OK
Piano, Acoustic, Rhythm SmoothSoulFast16thsMike Ev16 110 OK
Piano, Acoustic, Rhythm SmoothSoulSlowMike Ev 085 OK
Piano, Acoustic, Rhythm SonMontuno Ev 165 OK
Piano, Acoustic, Rhythm SonMontunoComp Ev 165 OK
Piano, Acoustic, Rhythm Soul60sA-B Ev 110 OK
Piano, Acoustic, Rhythm Soul70sA-B Ev16 100 OK
Piano, Acoustic, Rhythm SoulJazzMike Ev 130 OK
Piano, Acoustic, Rhythm SoulfulPop16thsMike Ev16 065 OK
Piano, Acoustic, Rhythm SouthernRockJohn Sw 120 OK
Piano, Acoustic, R

Sax, Soprano, Soloist Jazz Swing Sw 110 OK
Sax, Soprano, Soloist JazzSwing Sw 140 OK
Sax, Soprano, Soloist JazzWaltzJack Sw 110 OK
Sax, Soprano, Soloist Samba Ev 190 OK
Sax, Soprano, Soloist SmoothJazzBallad Ev16 065 OK
Sax, Soprano, Soloist SmoothJazzCoolJack Sw16 075 OK
Sax, Tenor, 2-Beat Rock Ev 110 OK
Sax, Tenor, Background CroonerBalladHigh Sw 060 (1TrackHornSection) OK
Sax, Tenor, Background CroonerBalladLow Sw 060 (1TrackHornSection) OK
Sax, Tenor, Background CroonerBigBandHigh Sw 110 (1TrackHornSection) OK
Sax, Tenor, Background CroonerBigBandLow Sw 110 (1TrackHornSection) OK
Sax, Tenor, Background CroonerBossaHigh Ev 110 (1TrackHornSection) OK
Sax, Tenor, Background CroonerBossaLow Ev 110 (1TrackHornSection) OK
Sax, Tenor, Background Funk Ev 110 (1TrackHornSection) OK
Sax, Tenor, Background JazzBalladJack Sw 085 OK
Sax, Tenor, Background JazzSwingJack Sw 140 OK
Sax, Tenor, Background, R&B Ev16 110 OK
Sax, Tenor, HipHop Rock Sw 100 OK
Sax, Tenor, Jazz Sw 140 OK
Sax, Tenor, Moto

In [7]:
metadata = natsorted(metadata, key=lambda m: tuple(s['path'] for s in m['segments']), alg=ns.PATH)

In [8]:
with open('metadata.json', 'w') as f:
    json.dump(metadata, f)

In [9]:
out_path_pairs = [(os.path.join(OUTPUT_DIR, a['path']), os.path.join(OUTPUT_DIR, b['path']))
                  for m in metadata for a, b in [m['segments']]]
np.random.seed(42)
np.random.shuffle(out_path_pairs)

In [10]:
np.random.seed(42)
train_pairs, test_pairs = train_test_split(out_path_pairs, test_size=0.01)
train_pairs, val_pairs = train_test_split(train_pairs, test_size=500)
np.random.shuffle(train_pairs)

def write_tuples(tuples, path, shuffle_items=False):
    with open(path, 'w') as f:
        for tup in tuples:
            if shuffle_items:
                tup = np.random.choice(tup, size=len(tup), replace=False)
            print(*tup, sep='\t', file=f)

write_tuples(train_pairs, 'pairs_train', shuffle_items=True)
write_tuples(val_pairs, 'pairs_val')
write_tuples(test_pairs, 'pairs_test')

In [11]:
!wc -l pairs_*

    912 pairs_test
  89724 pairs_train
    500 pairs_val
  91136 total


In [12]:
len(dir_paths)  # 1526

1526