In [None]:
# default_exp utils

# spectrogram compression statistics

> API details.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#hide
%matplotlib inline

## Spectrograms

In [None]:
import librosa
import torch
from pathlib import Path
import numpy as np

import librosa.display
import os
import h5py
from fastprogress import progress_bar as pb, master_bar
import time

import pandas as pd

In [None]:
n_fft = 2048
hop_length = 512
n_mels= 128

In [None]:
datapath = Path('temp/')

### compute spectrogram

In [None]:
# ignore librosa pysoundfile load warning
import warnings
warnings.filterwarnings(
    action='ignore',
    category=UserWarning,
    module=r'librosa'
)

In [None]:
audio_file = datapath/'00204008d.flac'
wf,sr = librosa.load(audio_file, sr=None)
# spectrogram
stft = librosa.stft(wf, n_fft=n_fft, hop_length=hop_length)
spgm_pwr = np.abs(stft)**2
spgm_log = librosa.power_to_db(spgm_pwr)
# mel spectrogram
spgm_mel_pwr = librosa.feature.melspectrogram(wf, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
spgm_mel_log = librosa.power_to_db(spgm_mel_pwr)

In [None]:
def compute_spectrogram(wf, n_fft, hop_length):
    return librosa.power_to_db(np.abs(librosa.stft(wf, n_fft=n_fft, hop_length=hop_length))**2)
    
def compute_mel_spectrogram(wf, sr, n_fft, hop_length, n_mels):
    return librosa.power_to_db(librosa.feature.melspectrogram(wf, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels))
    

In [None]:
statspath = Path('statistics')
datapath = Path('sample_data')
datapath_train = Path('sample_data/train/')
datapath_spgm = Path('sample_data/spectrograms')
datapath_spgm_mel = Path('sample_data/melspectrograms')
datapath_spgm.mkdir(exist_ok=True)
datapath_spgm_mel.mkdir(exist_ok=True)

In [None]:
def spectrogram_time_stats(outpath, loadpath, n_fft, hop_length, n_mels, compression=None, compression_opts=None, print_stats=False, n_files=None, master_bar=None):
    # setup
    datapath_train = loadpath
    datapath_spgm = outpath/'spectrograms'
    datapath_spgm_mel = outpath/'melspectrograms'
    [os.remove(path/'data.hdf5') for path in (datapath_spgm, datapath_spgm_mel) if (path/'data.hdf5').exists()]
    writes = 0
    times = {'compute':[],'write':[]}
    compression_paras = {'compression':compression,'compression_opts':compression_opts}
    
    # load-compute-write
    for audio_file in pb(list(datapath_train.iterdir())[:n_files], parent=master_bar):
        # check
        t0 = time.time()
        if audio_file.suffix != '.flac': continue
        writes += 1
        # load
        wf,sr = librosa.load(audio_file)
        # compute
        spgm = compute_spectrogram(wf=wf, n_fft=n_fft, hop_length=hop_length)
        spgm_mel = compute_mel_spectrogram(wf=wf, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
        times['compute'].append(time.time() - t0)
        # write
        t0 = time.time()
        with h5py.File(datapath_spgm/'data.hdf5', 'a') as hdf:
            hdf.create_dataset(f'{audio_file.stem}', data=spgm, **compression_paras)
        with h5py.File(datapath_spgm_mel/'data.hdf5', 'a') as hdf:
            hdf.create_dataset(f'{audio_file.stem}', data=spgm_mel, **compression_paras)
        times['write'].append(time.time() - t0)

    # statistics
    if print_stats:
        print(f"compression: {compression}; options: {compression_opts}")
        print('writes:', writes)
        for k,v in times.items(): print(k, np.array(v).mean().round(3), 's')
        for path,array in ((datapath_spgm,spgm), (datapath_spgm_mel,spgm_mel)):
            size = os.path.getsize(path/'data.hdf5')
            print(f"size {path.stem} : {round(size/1e6, 3)} MB")
            print(f"byte/pixel ratio: {round(size/(array.size * writes), 3)}")  
    size_spgm = os.path.getsize(datapath_spgm/'data.hdf5')
    size_spgm_mel = os.path.getsize(datapath_spgm_mel/'data.hdf5')
    stats = {
        'compression type':compression,
        'compression option': compression_opts,
        'audios':writes,
        'avg compute time [s]':np.array(times['compute']).mean(),
        'avg write time [s]': np.array(times['write']).mean(),
        'size spectrogram [MB]': size_spgm/1e6,
        'size mel spectgm [MB]': size_spgm_mel/1e6,
        'B/px ratio spctgm': size_spgm / spgm.size / writes,
        'B/px ratio mel spctgm': size_spgm_mel / spgm_mel.size / writes
    }
    return stats

In [None]:
file_paras = {'outpath':datapath,
              'loadpath': datapath_train,
              'n_files': 10}
spectrogram_paras = {'n_fft':n_fft, 
                     'hop_length':hop_length, 
                     'n_mels':n_mels,}
compression_schemes = [{'compression':None,'compression_opts':None},
                       {'compression':'lzf','compression_opts':None},
                       {'compression':'gzip','compression_opts':9},
                       {'compression':'gzip','compression_opts':4},
                       {'compression':'gzip','compression_opts':0},]

In [None]:
stats

{'audios': 8,
 'avg compute time [s]': 2.9452082812786102,
 'avg write time [s]': 0.02203020453453064,
 'compression type': None,
 'compression option': None,
 'size spectrogram [B]': 84771584,
 'size mel spectgm [B]': 10600448,
 'B/px ratio spctgm': 4.0007732386921395,
 'B/px ratio mel spctgm': 4.006191950464396}

In [None]:
%%time
df = pd.DataFrame()
mb = master_bar(compression_schemes)
for compression_scheme in mb:
    stats = spectrogram_time_stats(**file_paras, **spectrogram_paras, **compression_scheme, master_bar=mb)
    stats = {k:[v] for k,v in stats.items()}
    df = pd.concat([df, pd.DataFrame.from_dict(stats)], ignore_index=True)

CPU times: user 1min 57s, sys: 3.69 s, total: 2min 1s
Wall time: 2min 6s


In [None]:
df

Unnamed: 0,compression type,compression option,audios,avg compute time [s],avg write time [s],size spectrogram [MB],size mel spectgm [MB],B/px ratio spctgm,B/px ratio mel spctgm
0,,,8,2.938427,0.023523,84.771584,10.600448,4.000773,4.006192
1,lzf,,8,2.917015,0.10941,81.974545,10.59233,3.868768,4.003124
2,gzip,9.0,8,2.92667,0.520495,71.355023,9.113995,3.367582,3.444422
3,gzip,4.0,8,2.927158,0.463423,71.38669,9.114067,3.369077,3.444449
4,gzip,0.0,8,2.927926,0.049601,86.416928,10.617696,4.078425,4.01271


In [None]:
df.to_csv(statspath/'sample_hdf5_compressions.csv', index=False)

In [None]:
%%time
for audio_file in datapath_train.iterdir():
    # check
    if audio_file.suffix != '.flac': continue
    # load
    wf,sr = librosa.load(audio_file)
    # compute
    spgm = compute_spectrogram(wf=wf, n_fft=n_fft, hop_length=hop_length)
    spgm_mel = compute_mel_spectrogram(wf=wf, sr=sr, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)
    # write
    np.save(datapath_spgm/f'{audio_file.stem}', spgm)
    np.save(datapath_spgm_mel/f'{audio_file.stem}', spgm_mel)



CPU times: user 1min 25s, sys: 3.62 s, total: 1min 28s
Wall time: 1min 36s


## pure pytorch

In [None]:
# import torchaudio

In [None]:
# # pytorch STFT
# wf,sr = torchaudio.load(audio_file)
# stft = torch.stft(wf, 
#                   n_fft          = n_fft, 
#                   hop_length     = hop_length, 
#                   window         = torch.hann_window(n_fft), 
#                   return_complex = True, 
#                   center         = True)

# spgm_pwr = torch.abs(stft)**2
# spgm_log = librosa.power_to_db

## notebook export

In [None]:
from nbdev.export import notebook2script; notebook2script()

Converted 00_utilities.ipynb.
Converted index.ipynb.
