# Construct a Processed Subset of NSYNTH

## Setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# ENABLE IF USING COLAB
USE_COLAB = False

if USE_COLAB:
    import os
    import shutil
    if os.path.exists('/content/spectroconv/'):
        shutil.rmtree('/content/spectroconv/') #deletes a directory and all its contents.
    !git clone https://github.com/jakeval/spectroconv.git
    !pip install hub
    !pip install hub[audio]
    !pip install wandb
    import sys
    sys.path.insert(0,'/content/spectroconv')

In [3]:
from data_utils import preprocessing
from data_utils import nsynth_adapter as na
from data_utils.dataset_constructor import WBDatasetConstructor

from matplotlib import pyplot as plt
import numpy as np

## Setup the Configs

In [24]:
wb_config = {
    'project': 'spectroconv-debug',
    'entity': 'jakeval-colab',
}

def get_config(split):
    params = {
        'artifact': {
            'name': 'nsynth-med-shift', # name-split
            'split': split
        },
        'hub_urls': {
            'source': f'hub://activeloop/nsynth-{split}',
            'target': f'hub://jakeval/nsynth-med-shift-{split}'
        },
        'preprocessor': {
            'window_size': 1024,
            'n_mels': 128,
            'scaling': 1000
        },
        'subset': {
            'selected_families': [
                na.InstrumentFamily.REED.value,
                na.InstrumentFamily.BRASS.value,
                na.InstrumentFamily.ORGAN.value],
            'instruments_per_family': None,
            'min_pitch': None,
            'max_pitch': None
        },
    }
    if split == 'train':
        params.update({'augmentation': 
            {'shift_up': 2,
             'shift_down': 1.2}})

    return params

## Run it

In [None]:
wdc = WBDatasetConstructor(wb_config)
wdc.make_dataset(get_config('test'))
wdc.make_dataset(get_config('val'))
wdc.make_dataset(get_config('train'))

{'project': 'spectroconv-debug', 'entity': 'jakeval-colab'}


Opening dataset in read-only mode as you don't have write permissions.
hub://activeloop/nsynth-test loaded successfully.
This dataset can be visualized at https://app.activeloop.ai/activeloop/nsynth-test.
Your Hub dataset has been successfully created!
The dataset is private so make sure you are logged in!
This dataset can be visualized at https://app.activeloop.ai/jakeval/nsynth-med-shift-test-metadata.
Your Hub dataset has been successfully created!
The dataset is private so make sure you are logged in!
This dataset can be visualized at https://app.activeloop.ai/jakeval/nsynth-med-shift-test.
Load 488 audio clips...
Take the spectrogram...
Write to the database...
Load 488 audio clips...
Take the spectrogram...
Write to the database...
Load 15 audio clips...
Take the spectrogram...
Write to the database...
Finished writing data in 1.7728420615196228 minutes
start loading 3 samples
finished loading!



VBox(children=(Label(value='0.462 MB of 0.462 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Opening dataset in read-only mode as you don't have write permissions.
hub://activeloop/nsynth-val loaded successfully.
This dataset can be visualized at https://app.activeloop.ai/activeloop/nsynth-val.
Your Hub dataset has been successfully created!
The dataset is private so make sure you are logged in!
This dataset can be visualized at https://app.activeloop.ai/jakeval/nsynth-med-shift-val-metadata.
Your Hub dataset has been successfully created!
The dataset is private so make sure you are logged in!
This dataset can be visualized at https://app.activeloop.ai/jakeval/nsynth-med-shift-val.
Load 488 audio clips...
Take the spectrogram...
Write to the database...
Load 488 audio clips...
Take the spectrogram...
Write to the database...
Load 488 audio clips...
Take the spectrogram...
Write to the database...
Load 488 audio clips...
Take the spectrogram...
Write to the database...
Load 488 audio clips...
Take the spectrogram...
Write to the database...
Load 488 audio clips...
Take the spec

VBox(children=(Label(value='0.430 MB of 0.430 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Opening dataset in read-only mode as you don't have write permissions.
hub://activeloop/nsynth-train loaded successfully.
This dataset can be visualized at https://app.activeloop.ai/activeloop/nsynth-train.
Your Hub dataset has been successfully created!
The dataset is private so make sure you are logged in!
This dataset can be visualized at https://app.activeloop.ai/jakeval/nsynth-med-shift-train-metadata.
Your Hub dataset has been successfully created!
The dataset is private so make sure you are logged in!
This dataset can be visualized at https://app.activeloop.ai/jakeval/nsynth-med-shift-train.
Load 488 audio clips...
start shift
end shift
Take the spectrogram...
Write to the database...
Load 488 audio clips...




start shift
end shift
Take the spectrogram...
Write to the database...
Load 488 audio clips...
start shift
end shift
Take the spectrogram...
Write to the database...
Load 488 audio clips...
start shift
end shift
Take the spectrogram...
Write to the database...
Load 488 audio clips...
start shift




end shift
Take the spectrogram...
Write to the database...
Load 488 audio clips...

