In [None]:
import gc
import glob
import os
import shutil
import joblib
import h5py

import numpy as np
import pandas as pd
import librosa as lb
import matplotlib.pyplot as plt
import multiprocessing as mm

from pathlib import Path
from tqdm import tqdm

In [None]:
NUM_THREADS = mm.cpu_count() - 1
files = glob.glob('test/*.flac')
new_dir = 'mel_32_128_2'

OUT_TRAIN = f'{new_dir}/train'
OUT_TEST = f'{new_dir}/test'

In [None]:
class params:
    sr = 32000
    n_mels = 128
    fmin = 0
    fmax = 14000
    mel_power = 2

In [None]:
def compute_melspec(y, params):
    """
    Computes a mel-spectrogram and puts it at decibel scale
    Arguments:
        y {np array} - signal
        params {AudioParams} - Parameters to use for the spectrogram. Expected to have the attributes sr, n_mels, f_min, f_max
    Returns:
        np array - Mel-spectrogram
    """
    melspec = lb.feature.melspectrogram(
        y, sr=params.sr, n_mels=params.n_mels, fmin=params.fmin, fmax=params.fmax
    )

    melspec = lb.power_to_db(melspec, params.mel_power).astype(np.float32)
    return melspec

In [None]:
def load_and_save(record, out_dir):
    """
    Load the audio files and convert to melspectogram and then store as .npy files
    Arguments:
        record {String} - full directory of input file
        out_dir {String} - directory to save .npy files
    """
    y, _ = lb.load(record, params.sr)
    melspec = compute_melspec(y, params)
    
    record_name = record.split('/')[-1]
    output_name = record_name.replace('.flac', '.npy')
    
    np.save(f'{out_dir}/{output_name}', melspec)

In [None]:
train_files = glob.glob('train/*.flac')
test_files = glob.glob('test/*.flac')

In [None]:
_ = joblib.Parallel(n_jobs=NUM_THREADS)(
    joblib.delayed(load_and_save)(i,j) for i,j in tqdm(zip(train_files, [OUT_TRAIN]*len(train_files)), total=len(train_files))
)
_ = joblib.Parallel(n_jobs=NUM_THREADS)(
    joblib.delayed(load_and_save)(i,j) for i,j in tqdm(zip(test_files, [OUT_TEST]*len(test_files)), total=len(test_files))
)

In [None]:
def normalize(X, eps=1e-6, mean=None, std=None):
    """
    Normalizes the image and converts to a range of 0-255
    Arguments:
        X {numpy array [H x W]} - 2D array to convert
        eps {float} -- To avoid dividing by 0 (default: {1e-6})
        mean {None or np array} - Mean for normalization (default: {None})
        std {None or np array} - Std for normalization (default: {None})
    Returns:
        numpy array [H x W] - numpy array
    """
    mean = mean or X.mean()
    std = std or X.std()
    X = (X - mean) / (std + eps)

    _min, _max = X.min(), X.max()

    if (_max - _min) > eps:
        V = np.clip(X, _min, _max)
        V = 255 * (V - _min) / (_max - _min)
        V = V.astype(np.uint8)
    else:
        V = np.zeros_like(X, dtype=np.uint8)

    return V

In [None]:
with h5py.File(f'{new_dir}.hdf5', mode='w') as f:    
    train_files = glob.glob(f'{new_dir}/train/*')
    test_files = glob.glob(f'{new_dir}/test/*')
    
    # Collect the mean and std over all records
    mean = []
    std = []
    for i in tqdm(train_files + test_files):
        file = np.load(i)
        mean.append(file.mean())
        std.append(file.std())
    
    mean = np.array(mean).mean()
    std = np.array(std).mean()    
    
    base = np.load(train_files[0])
    shape = (len(train_files), *base.shape)
    
    f.create_dataset('train_files', (len(train_files), *base.shape), np.uint8)
    f.create_dataset('test_files', (len(test_files), *base.shape), np.uint8)
    
    dt = h5py.special_dtype(vlen=str)
    
    f.create_dataset('train_labels', (len(train_files),), 'S10')
    f.create_dataset('test_labels', (len(test_files),), 'S10')
    
    # Save the names of all the recording_ids
    f['train_labels'][...] = [i.split('/')[-1].split('.')[0].encode("ascii", "ignore") for i in train_files]
    f['test_labels'][...] = [i.split('/')[-1].split('.')[0].encode("ascii", "ignore") for i in test_files]

    for i, v in tqdm(enumerate(train_files), total=len(train_files)):
        f['train_files'][i, ...] = normalize(np.load(v), mean=mean, std=std)
        
    for i, v in tqdm(enumerate(test_files), total=len(test_files)):
        f['test_files'][i, ...] = normalize(np.load(v), mean=mean, std=std)

In [None]:
# Delete all the .npy files
shutil.rmtree(new_dir)