In [12]:
import os
import sys
import json
import pandas as pd

import random
random.seed(1234)

In [13]:
def get_two_unique_integers(start, end):
    
    first = random.randint(start, end)
    second = random.randint(start, end)
    
    # Ensure uniqueness
    while first == second:
        second = random.randint(start, end)

    return first, second

In [14]:
with open('datafiles/clotho.json') as f:
    clotho_data = json.load(f)
    clotho_data = clotho_data['data']

In [15]:
clotho_data[:1]

[{'wav': '/fs/nexus-scratch/vla/Clotho/development/Distorted AM Radio noise.wav',
  'caption': 'The loud buzz of static constantly changes pitch and volume.'}]

In [16]:
len(clotho_data)

5929

In [17]:
with open('datafiles/FSD50K.json') as f:
    fsd_data = json.load(f)
    fsd_data = fsd_data['data']

In [18]:
fsd_data[:1]

[{'wav': '/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/10000.wav',
  'caption': 'The act of breathing creates audible respiratory sounds.'}]

In [19]:
len(fsd_data)

51197

In [20]:
all_data = clotho_data + fsd_data
len(all_data)

57126

In [21]:
all_pairs = []
num_samples = 50000
gather = []

In [22]:
for i in range(num_samples):
    
    entry = {}

    # ensure unique mixtures
    while True:
        pair = get_two_unique_integers(0, len(all_data) - 1)
        if pair not in all_pairs:
            all_pairs.append(pair)
            source_idx, noise_idx = pair
            break

    
    entry['source'] = all_data[source_idx]['wav'].removesuffix('.wav')
    entry['noise'] = all_data[noise_idx]['wav'].removesuffix('.wav')
    # signal to noise ratio, sample from [-15,15]
    # entry['snr'] = random.randint(-15,15)
    
    # entry['snr'] = None
    entry['caption'] = all_data[source_idx]['caption']

    gather.append(entry)


In [23]:
gather

[{'source': '/fs/nexus-scratch/vla/FSD50K/FSD50K.eval_audio/256487',
  'noise': '/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/346711',
  'caption': 'A trumpet sounds with bright, bold notes.'},
 {'source': '/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/117945',
  'noise': '/fs/nexus-scratch/vla/Clotho/development/Glass Knock',
  'caption': 'A printer operates with mechanical sounds.'},
 {'source': '/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/10003',
  'noise': '/fs/nexus-scratch/vla/FSD50K/FSD50K.eval_audio/347609',
  'caption': 'Breathing sounds are audible.'},
 {'source': '/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/414560',
  'noise': '/fs/nexus-scratch/vla/Clotho/development/20080629.campeones.01',
  'caption': 'A single bell rings.'},
 {'source': '/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/76672',
  'noise': '/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/88975',
  'caption': "A man's singing voice resonates."},
 {'source': '/fs/nexus-scratch/vla/Clotho/evaluation/Drawer_ha

In [24]:
df_training_subset = pd.DataFrame(gather)
df_training_subset

Unnamed: 0,source,noise,caption
0,/fs/nexus-scratch/vla/FSD50K/FSD50K.eval_audio...,/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/...,"A trumpet sounds with bright, bold notes."
1,/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/...,/fs/nexus-scratch/vla/Clotho/development/Glass...,A printer operates with mechanical sounds.
2,/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/...,/fs/nexus-scratch/vla/FSD50K/FSD50K.eval_audio...,Breathing sounds are audible.
3,/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/...,/fs/nexus-scratch/vla/Clotho/development/20080...,A single bell rings.
4,/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/...,/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/...,A man's singing voice resonates.
...,...,...,...
49995,/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/...,/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/...,Someone is whispering with a human voice.
49996,/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/...,/fs/nexus-scratch/vla/FSD50K/FSD50K.eval_audio...,An acoustic guitar is strummed.
49997,/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/...,/fs/nexus-scratch/vla/FSD50K/FSD50K.eval_audio...,A snare drum is struck.
49998,/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/...,/fs/nexus-scratch/vla/FSD50K/FSD50K.dev_audio/...,Liquid splashes and splatters.


In [25]:
df_training_subset.to_csv('lass_training_subset.csv', index = None)