In [7]:
import essentia
import essentia.standard as es
import glob
import os
import librosa
import soundfile as sf
from essentia.standard import *
import numpy as np
import ntpath
import pandas as pd
import logging
from multiprocessing import Pool
import traceback
logging.basicConfig(filename='audio_feature_extraction.log',level=logging.DEBUG)

import argparse

def get_audio_features(f):
    features, features_frames = es.MusicExtractor(
                                              lowlevelStats=['mean', 'stdev'],
                                              rhythmStats=['mean', 'stdev'],
                                              tonalStats=['mean', 'stdev'],
                                             )(f)
    features_name = features.descriptorNames()
    features_name = list(filter(lambda name: "metadata" not in name,features_name))
    info = {}
    for f in features_name:
        if not isinstance(features[f], np.ndarray):
            info[f] = features[f]
    
    return info

def task(file, ID):
    try:
        feature = get_audio_features(file)
        logging.info("processed file {}".format(file))
        return (feature, ID)
    except Exception as e:
        logging.error(traceback.format_exc())
        return (None, None)

    
def get_all_audio_features_parallel(list_dir, out_csv):
    files = []
    for directory in list_dir:
        files += glob.glob(directory + "/*.wav")
    
    IDs = [ntpath.basename(f).replace(".mp3", "").replace(".wav", "") for f in files]
    
    with Pool(processes=7) as pool:
        in_out = pool.starmap(task, list(zip(files, IDs)))

        
    in_out = [p  for p in in_out if p[0] is not None]
    input, output = zip(*in_out)    
    features = np.array(input)
    IDs = np.array(output)
    
    keys = features[0].keys()
    data = {}
    data["ID"] = IDs
    for k in keys:
        data[k] = [feature[k] for feature in features]
        
        
    df = pd.DataFrame(data=data)
    
    df.to_csv(out_csv)
    
#get_all_audio_features_parallel(list_dir=in_wav_dirs, out_csv=csv_path)
# get_all_audio_features_parallel(["/data/zalo/hit-song-prediction/train-wav-samples", "/data/zalo/hit-song-prediction/test-wav-samples"])

In [8]:

in_wav_dirs = "/media/ben/datadrive/Zalo/voice-verification/Train-Test-Data/public-test/" #Video directories that contains wav files
out_csv = "features.csv"
get_audio_features(os.path.join(in_wav_dirs, "0auqADJgo6MHz2ET5vv4.wav"))

{'lowlevel.average_loudness': 0.9797549247741699,
 'lowlevel.barkbands_crest.mean': 15.558675765991211,
 'lowlevel.barkbands_crest.stdev': 5.068912029266357,
 'lowlevel.barkbands_flatness_db.mean': 0.4248653054237366,
 'lowlevel.barkbands_flatness_db.stdev': 0.07999197393655777,
 'lowlevel.barkbands_kurtosis.mean': 10.325821876525879,
 'lowlevel.barkbands_kurtosis.stdev': 16.852739334106445,
 'lowlevel.barkbands_skewness.mean': 1.0736998319625854,
 'lowlevel.barkbands_skewness.stdev': 1.4764575958251953,
 'lowlevel.barkbands_spread.mean': 4.976924419403076,
 'lowlevel.barkbands_spread.stdev': 4.354218006134033,
 'lowlevel.dissonance.mean': 0.32215598225593567,
 'lowlevel.dissonance.stdev': 0.07955385744571686,
 'lowlevel.dynamic_complexity': 2.355710744857788,
 'lowlevel.erbbands_crest.mean': 16.084442138671875,
 'lowlevel.erbbands_crest.stdev': 5.316064357757568,
 'lowlevel.erbbands_flatness_db.mean': 0.37523046135902405,
 'lowlevel.erbbands_flatness_db.stdev': 0.05358386039733887,
 '