In [1]:
import os
import librosa
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
from sklearn.preprocessing import scale

In [2]:
def read_audio_labels(filename):
    labels = []
    aup = BeautifulSoup(open(filename, 'r'), features="xml")

    for label in aup.find_all('label'):
        label_value = 0
        if label['title'] in ['L', 'R', 'C']:
            label_value = 1
        elif label['title'] == 'H':
            label_value = 2
        labels.append((float(label['t']), float(label['t1']), label_value))

    return labels

In [3]:
def get_sample_rate(audio_file):
    _, sr = librosa.load(audio_file, sr=None, mono=True)
    return sr

In [4]:
def extract_features_from_segment(data, segment, sr):
    zcr = librosa.feature.zero_crossing_rate(segment)
    mfcc = librosa.feature.mfcc(y=segment, sr=sr)
    sflat = librosa.feature.spectral_flatness(segment)
    scent = librosa.feature.spectral_centroid(y=segment, sr=sr)
    sroll = librosa.feature.spectral_rolloff(y=segment, sr=sr)
    sband = librosa.feature.spectral_bandwidth(y=segment, sr=sr)
    cstft = librosa.feature.chroma_stft(y=segment, sr=sr)

    data['zero_crossing_rate_sum'].append(np.sum(zcr))
    data['spectral_flatness_mean'].append(np.mean(sflat))
    data['spectral_flatness_std'].append(np.std(sflat))
    data['spectral_centroid_mean'].append(np.mean(scent))
    data['spectral_centroid_std'].append(np.std(scent))
    data['spectral_rolloff_mean'].append(np.mean(sroll))
    data['spectral_rolloff_std'].append(np.std(sroll))
    data['spectral_bandwidth_mean'].append(np.mean(sband))
    data['spectral_bandwidth_std'].append(np.std(sband))
    data['chroma_stft_mean'].append(np.mean(cstft))
    data['chroma_stft_std'].append(np.std(cstft))

    mfcc = scale(mfcc, axis=1).mean(axis=1)
    
    for i in range(len(mfcc)):
        data['mfcc_{}_mean'.format(i)].append(mfcc[i])

In [5]:
def extract_features_with_label(data, shift, sr, audio, labels):
    data['label'] = []
    
    for i, (start, stop, label) in enumerate(labels):
        if stop - start < 1: continue # ignore if less than 1 sec
        st, sp = int(start * sr), int((start + 1) * sr)
        
        while sp <= int(stop * sr):
            segment = audio[st:sp]
            extract_features_from_segment(data, segment, sr)
            data['label'].append(label)

            st += shift
            sp += shift

    return pd.DataFrame(data)

In [6]:
def extract_features_without_label(data, shift, sr, audio):
    st, sp = 0, sr
   
    while sp <= len(audio):
        segment = audio[st:sp]
        extract_features_from_segment(data, segment, sr)
        
        st += shift
        sp += shift
    
    return pd.DataFrame(data)

In [7]:
def extract_features(audio_file, audio_label_file=None, no_sr=True):
    
    data = {
        'zero_crossing_rate_sum': [],
        'spectral_flatness_mean': [],
        'spectral_flatness_std': [],
        'spectral_centroid_mean': [],
        'spectral_centroid_std': [],
        'spectral_rolloff_mean': [],
        'spectral_rolloff_std': [],
        'spectral_bandwidth_mean': [],
        'spectral_bandwidth_std': [],
        'chroma_stft_mean': [],
        'chroma_stft_std': [],
    }
    
    for i in range(20):
        data['mfcc_{}_mean'.format(i)] = []
        
    if '.m4a' not in audio_file:
        audio_file = "{}.m4a".format(audio_file)
        
    if not os.path.isfile(audio_file):
        raise Exception('Audio file does not exist')
    
    if audio_label_file:
        if '.aup' not in audio_label_file:
            audio_label_file = "{}.aup".format(audio_label_file)

        if not os.path.isfile(audio_label_file):
            raise Exception('Label file does not exist')
    
    audio, sr = librosa.load(audio_file, sr=None, mono=True)
    shift = int(sr / 5)
    
    if audio_label_file:
        # can we reuse it?
        #audio_features_csv = audio_file.replace('.m4a', '.csv')
        #if os.path.exists(audio_features_csv):
        #    return pd.read_csv(audio_features_csv)      
        
        audio_labels = read_audio_labels(audio_label_file)
        df = extract_features_with_label(data, shift, sr, audio, audio_labels)
        return df
    
    return extract_features_without_label(data, shift, sr, audio)

In [8]:
all_files = os.listdir(".")
audio_files = list(filter(lambda x: '.m4a' in x, all_files))

for audio_file in audio_files:
    print("Extracting features of '{}'".format(audio_file))
    
    filename = audio_file.split('.')[0]
    feature_file = '{}.csv'.format(filename)
    
    label_file = '{}.aup'.format(filename)
    if label_file not in all_files:
        label_file = None
        print("No label available for '{}'".format(audio_file))
    
    
    df = extract_features(audio_file, label_file)
    df.to_csv(feature_file, index=False)

Extracting features of 'NcAZdATqkxA.m4a'
No label available for 'NcAZdATqkxA.m4a'
Extracting features of 'WIVVb87JPog.m4a'
No label available for 'WIVVb87JPog.m4a'
Extracting features of 'jaOEY8iRo6c.m4a'
No label available for 'jaOEY8iRo6c.m4a'
Extracting features of 'h7VtMor4lL8.m4a'
No label available for 'h7VtMor4lL8.m4a'
Extracting features of 'EEFInk9wlzs.m4a'
No label available for 'EEFInk9wlzs.m4a'
Extracting features of '-OUplnLYcxY.m4a'
No label available for '-OUplnLYcxY.m4a'
Extracting features of 'ao2LuQwi4is.m4a'
Extracting features of 'Q2LDobhGHm4.m4a'
No label available for 'Q2LDobhGHm4.m4a'
Extracting features of 'UpX_kurvAXM.m4a'
No label available for 'UpX_kurvAXM.m4a'
Extracting features of 'e8mvLIXoIug.m4a'
No label available for 'e8mvLIXoIug.m4a'
Extracting features of 'IAzZnHwmBfc.m4a'
No label available for 'IAzZnHwmBfc.m4a'
Extracting features of '2aaM63uawjo.m4a'
Extracting features of 'XLK7E1aEOuM.m4a'
No label available for 'XLK7E1aEOuM.m4a'
Extracting featu