# hparams only stores some variables, can be removed as a dependency and variables defined directly

Set up dependencies:

In [13]:
from joblib import Parallel
import pandas as pd

In [None]:
class HParams(object):
    """ Hparams was removed from tf 2.0alpha so this is a placeholder
    """

    def __init__(self, **kwargs):
        self.set_defaults()
        self.__dict__.update(kwargs)

    def set_defaults(self):
        self.win_length_ms = 5
        self.hop_length_ms = 1
        self.n_fft = 1024
        self.ref_level_db = 20
        self.min_level_db = -60
        self.preemphasis = 0.97
        self.num_mel_bins = 64
        self.mel_lower_edge_hertz = 200
        self.mel_upper_edge_hertz = 15000
        self.power = 1.5  # for spectral inversion
        self.griffin_lim_iters = 50
        self.butter_lowcut = 500
        self.butter_highcut = 15000
        self.reduce_noise = False
        self.noise_reduce_kwargs = {}
        self.mask_spec = False
        self.mask_spec_kwargs = {"spec_thresh": 0.9, "offset": 1e-10}
        self.nex = -1
        self.n_jobs = -1
        self.verbosity = 1

    def save(self):
        raise NotImplementedError

    def load(self):
        raise NotImplementedError

In [None]:
# create a set of hyperparameters for processing a dataset.
hparams = HParams(
    num_mel_bins = 64,
    mel_lower_edge_hertz=500,
    mel_upper_edge_hertz=15000,
    butter_lowcut = 500,
    butter_highcut = 15000,
    ref_level_db = 20,
    min_level_db = -30,
    mask_spec = True,
    win_length_ms = 10,
    hop_length_ms = 2,
    nex=-1,
    n_jobs=-1,
    verbosity = 1,
)

# The four helper functions  `int16_to_float32` `read_wav` `load_wav` `prepare_wav' can also be removed/merged

In [None]:
def read_wav(wav_loc, method="librosa", **kwargs):
    """ read wav using either librosa or scipy
    """
    if method == "librosa":
        if "sr" not in kwargs.keys():
            kwargs["sr"] = None
        data, rate = librosa.core.load(wav_loc, **kwargs)
    elif method == "scipy":
        rate, data = wavfile.read(wav_loc)
    return rate, data

In [None]:
def load_wav(wav_loc, catch_errors=True, method="librosa", **kwargs):
    if catch_errors:
        try:
            rate, data = read_wav(wav_loc, method=method, **kwargs)
            return rate, data
        except Exception as e:
            print(e)
            return None, None
    else:
        rate, data = read_wav(wav_loc, method=method, **kwargs)
        return rate, data

In [None]:
# def int16_to_float32(data):
#     """ Converts from uint16 wav to float32 wav
#     """
#     if np.max(np.abs(data)) > 32768:
#         raise ValueError("Data has values above 32768")
#     return (data / 32768.0).astype("float32")

In [None]:
def prepare_wav(wav_loc, hparams=None):
    """ load wav and convert to correct format
    """

    # get rate and date
    rate, data = load_wav(wav_loc)

    # #butter_bandpass and reduce noise not necessary for our dataset
    # if np.issubdtype(type(data[0]), np.integer):
    #     data = int16_to_float32(data)
    # # bandpass filter
    # if hparams is not None:
    #     data = butter_bandpass_filter(
    #         data, hparams.butter_lowcut, hparams.butter_highcut, rate, order=5
    #     )

    #     # reduce noise
    #     if hparams.reduce_noise:
    #         data = nr.reduce_noise(
    #             audio_clip=data, noise_clip=data, **hparams.noise_reduce_kwargs
    #         )

    return rate, data

# This is the core function to read audio.

In [None]:
def get_row_audio(test_input, wav_loc, hparams):
    """ load audio and grab individual snippets
    TODO: for large sparse WAV files, the audio should be loaded only for the syllable
    """

    # load audio
    rate, data = prepare_wav(wav_loc, hparams)
    data = data.astype('float32')

    # get audio for each word (row) in dataframe
    test_input["audio"] = [
        data[int(st * rate) : int(et * rate)]
        for st, et in zip(test_input.start_time.values, test_input.end_time.values)
    ]

    test_input["rate"] = rate

    return test_input

# Directly use
# input = pandas.readcsv("testinput.csv") # Create a dummy dataframe
# get_row_audio(input, "path/to/audio.wav", hparams)

# This currently loops over separate dataframes "test_inputs" but should just get audio from one dataframe test_input..

Note the loop below is designed to loop among multiple data frames.

In [None]:
with Parallel(n_jobs=n_jobs, verbose=verbosity) as parallel:
    test_inputs = parallel(
        delayed(get_row_audio)(
            test_input[test_input.key == key],
            # Edit path to audio folder here
            #'/Volumes/WRKGRP/FDL-CLS-MDingemanse-ConverseWG/Elpaco dataset'+ key +'.wav',
            '../../testing/Elpaco dataset'+ key +'.wav', # <<<<<< Change the path
            #'/testing/Elpaco dataset'+ key +'.wav',
            dataset.hparams
        )
        for key in tqdm(test_input.key.unique())
    )
test_output = pd.concat(test_inputs)


# After extracting the audio snippets we normalize the audio column using librosa. this needs to be integrated

In [None]:
test_output['audio'] = [librosa.util.normalize(i) for i in test_output.audio.values]

# optional: write out file as "test_output.csv"

In [None]:
test_output.to_csv("test_output.csv", index=False)