In [2]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [3]:
import IPython.display as ipd
from pathlib import Path
from multiprocessing import Pool
import torch
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from data_loading_utils import *
from preprocessing_utils import *
from helpers import *
from metrics import *

In [15]:
PATH = Path('data/audioset')
TRAIN_PATH = PATH/'train_segments_mono'
VALID_PATH = PATH/'eval_segments_mono'

sample_rate = 22500
n_features = 128
n_fft = 1024
hop_length = 256

In [5]:
train = pd.read_csv(PATH/'train.csv')
train.head()

Unnamed: 0.1,Unnamed: 0,YTID,positive_labels,fname
0,0,--ZhevVpy1s,/m/012xff,--ZhevVpy1s_50.000.wav
1,1,--aE2O5G5WE,"/m/03fwl,/m/04rlf,/m/09x0r",--aE2O5G5WE_0.000.wav
2,2,--aO5cdqSAg,"/t/dd00003,/t/dd00005",--aO5cdqSAg_30.000.wav
3,3,--aaILOrkII,"/m/032s66,/m/073cg4",--aaILOrkII_200.000.wav
4,4,--cB2ZVjpnA,/m/01y3hg,--cB2ZVjpnA_30.000.wav


In [6]:
valid = pd.read_csv(PATH/'valid.csv')
valid.head()

Unnamed: 0.1,Unnamed: 0,YTID,positive_labels,fname
0,0,--4gqARaEJE,"/m/068hy,/m/07q6cd_,/m/0bt9lr,/m/0jbk",--4gqARaEJE_0.000.wav
1,1,--BfvyPmVMo,/m/03l9g,--BfvyPmVMo_20.000.wav
2,2,--U7joUcTCo,/m/01b_21,--U7joUcTCo_0.000.wav
3,3,--i-y1v8Hy8,"/m/04rlf,/m/09x0r,/t/dd00004,/t/dd00005",--i-y1v8Hy8_0.000.wav
4,4,-0BIyqJj9ZU,"/m/07rgt08,/m/07sq110,/t/dd00001",-0BIyqJj9ZU_30.000.wav


In [7]:
train_fname = train.fname
valid_fname = valid.fname

In [8]:
len(train_fname), len(valid_fname)

(18725, 17492)

In [9]:
TRAIN_PATH_LMS = PATH/f'train_{sample_rate}_{n_fft}_{hop_length}_{n_features}'
TRAIN_PATH_LMS.mkdir(exist_ok=True)


In [10]:
VALID_PATH_LMS = PATH/f'valid_{sample_rate}_{n_fft}_{hop_length}_{n_features}'
VALID_PATH_LMS.mkdir(exist_ok=True)

In [18]:
TRAIN_PATH_LMS, VALID_PATH_LMS

(PosixPath('data/audioset/train_22500_1024_256_128'),
 PosixPath('data/audioset/valid_22500_1024_256_128'))

In [16]:
SRC_PATH = VALID_PATH
DST_PATH = VALID_PATH_LMS

def convert_feature(filename, src_path=SRC_PATH, dst_path=DST_PATH, sample_rate=sample_rate):
    data, file_sr = read_file(filename, path=src_path, sample_rate=sample_rate)
    if data.shape[0] > 0:
        mel_spec = librosa.feature.melspectrogram(data,
                                                  sr=sample_rate, 
                                                  n_mels=n_features, 
                                                  n_fft=n_fft, 
                                                  hop_length=hop_length)
    else:
        mel_spec = np.zeros((n_mels, 1))
    power_spec = librosa.power_to_db(mel_spec)
    dst_filename = dst_path/f'{filename}.npy'
    np.save(dst_filename, power_spec)

In [17]:
processes = 8
fnames = valid_fname

with Pool(processes=processes) as pool:
    total_tfms = len(fnames)    
    with tqdm(total=total_tfms) as pbar:
        for _ in pool.imap_unordered(convert_feature, fnames):
            pbar.update()

A Jupyter Widget




### Calculating get_stats

In [None]:
trn_files = list(TRAIN_PATH_LMS.iterdir())
trn_features = [np.load(f) for f in trn_files]
len(trn_features)

In [None]:
def get_stats(objects):
    sizes, means, stds = zip(*((o.size, o.mean(), o.std()) for o in objects))
    sizes, means, stds = np.array(sizes), np.array(means), np.array(stds)
    weighted_mean = (means * sizes).sum() / sizes.sum()
    # https://stats.stackexchange.com/questions/55999/is-it-possible-to-find-the-combined-standard-deviation
    weighted_std = np.sqrt(((sizes * stds**2.0).sum() + (sizes * (means - weighted_mean)**2.0).sum())/(sizes.sum()))
    return weighted_mean, weighted_std

mean, std = get_stats(trn_features)
mean, std