In [54]:
import json

import wave
import numpy as np
import pandas as pd

In [55]:
meta_fpath = 'landing/nsynth-test/examples.json'

meta = json.load(open(meta_fpath))

meta_df = pd.DataFrame(meta).T

# Save data catalog
meta_df.to_csv('trusted/data_catalog.csv', index=False)

display(meta_df.head())

Unnamed: 0,qualities,pitch,note,instrument_source_str,velocity,instrument_str,instrument,sample_rate,qualities_str,instrument_source,note_str,instrument_family,instrument_family_str
bass_synthetic_068-049-025,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]",49,217499,synthetic,25,bass_synthetic_068,656,16000,[dark],2,bass_synthetic_068-049-025,0,bass
keyboard_electronic_001-021-127,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",21,299359,electronic,127,keyboard_electronic_001,40,16000,[],1,keyboard_electronic_001-021-127,4,keyboard
guitar_acoustic_010-066-100,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",66,72288,acoustic,100,guitar_acoustic_010,219,16000,[],0,guitar_acoustic_010-066-100,3,guitar
reed_acoustic_037-068-127,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0]",68,22259,acoustic,127,reed_acoustic_037,387,16000,[reverb],0,reed_acoustic_037-068-127,7,reed
flute_acoustic_002-077-100,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0]",77,72001,acoustic,100,flute_acoustic_002,86,16000,[reverb],0,flute_acoustic_002-077-100,2,flute


In [56]:
# List unique instrument families
instrument_families = meta_df['instrument_family_str'].unique()
instrument_families

array(['bass', 'keyboard', 'guitar', 'reed', 'flute', 'string', 'vocal',
       'brass', 'mallet', 'organ'], dtype=object)

In [57]:
# Keep only some instrument families
instrument_families = ['guitar', 'keyboard', 'string', 'flute']

meta_df = meta_df[meta_df['instrument_family_str'].isin(instrument_families)]

In [58]:
# Keep only pitches between 48 and 96 (C3 to C6)
meta_df = meta_df[(meta_df['pitch'] >= 48) & (meta_df['pitch'] <= 84)]

In [59]:
def sample_rows(meta_df, n):
    """
    Sample n rows from the meta dataframe, one from each instrument family.
    """
    sampled_rows = meta_df.groupby('instrument_family').apply(lambda x: x.sample(1)).sample(n)
    return sampled_rows

# Example usage
n = 2
sampled_rows = sample_rows(meta_df, n)
display(sampled_rows)


Unnamed: 0_level_0,Unnamed: 1_level_0,qualities,pitch,note,instrument_source_str,velocity,instrument_str,instrument,sample_rate,qualities_str,instrument_source,note_str,instrument_family,instrument_family_str
instrument_family,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
3,guitar_electronic_028-084-127,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",84,22082,electronic,127,guitar_electronic_028,510,16000,[],1,guitar_electronic_028-084-127,3,guitar
2,flute_acoustic_002-073-127,"[0, 0, 0, 0, 0, 0, 0, 0, 1, 0]",73,9412,acoustic,127,flute_acoustic_002,86,16000,[reverb],0,flute_acoustic_002-073-127,2,flute


In [60]:
from pydub import AudioSegment

def combine_wav_files(*input_paths):
    """
    Combine multiple WAV files into one such that they play simultaneously.
    """
    # Load the first audio file
    combined = AudioSegment.from_wav(input_paths[0])

    # Mix in the other audio files
    for path in input_paths[1:]:
        next_wav = AudioSegment.from_wav(path)
        combined = combined.overlay(next_wav)

    # Save the result
    return combined

In [61]:
def generate_combined_sample(meta_df, n):
    """
    Generate a combined sample by combining n random rows from the meta dataframe.
    """
    sampled_rows = sample_rows(meta_df, n)
    input_dir = 'landing/nsynth-test/audio'

    input_paths = sampled_rows['note_str'].apply(lambda x: f'{input_dir}/{x}.wav').values
    combined_wav = combine_wav_files(*input_paths)
    return sampled_rows['note_str'].tolist(), combined_wav

N = 5000 # Number of combined samples
n = 2 # Number of instruments per sample

combined_df = pd.DataFrame(columns=['id'] + [f'instrument_{i}' for i in range(1, n+1)])
for i in range(N):
    id = i
    row, combined_wav = generate_combined_sample(meta_df, n)
    row = [id] + row
    combined_wav.export(f'trusted/combined_wav/{id}.wav', format='wav')
    combined_df.loc[i] = row

# Save the combined_wav data catalogue
combined_df.to_csv('trusted/combined_wav.csv', index=False)