In [6]:
% pylab inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
from scipy.io.wavfile import read as sciread
from scipy import stats

Populating the interactive namespace from numpy and matplotlib


In [7]:
def calcFFT(audio, fs):
    transformed = np.fft.fft(audio)
    f = np.arange(0, len(transformed))*fs/len(transformed)
    f = f[:44100]
    transformed = transformed[:44100]
    return (f, transformed)

In [8]:
def plotFFT(audio, fs):
    f, transformed = calcFFT(audio, fs)
    plt.plot(f, np.abs(transformed))
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Magnitude (dB)')
    plt.show()

In [9]:
def get_tempo(signal, sr):
    onset_env = librosa.onset.onset_strength(signal, sr=sr)
    tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr)
    return tempo

In [16]:
def get_harmonic(signal, sr):
    signal = librosa.effects.harmonic(signal)
    f, transformed = calcFFT(signal, sr)
    transformed = np.array(np.abs(transformed))
    data = {}
    data['hmedian'] = np.median(transformed)
    data['hmean'] = np.mean(transformed)
    data['h1Q'] = np.percentile(transformed, 25)
    data['h2Q'] = np.percentile(transformed, 75)
    data['hIQR'] = stats.iqr(transformed)
    data['hmin'] = np.min(transformed)
    data['hmax'] = np.max(transformed)
    data['hstd'] = np.std(transformed)
    data['htempo'] = get_tempo(signal, 44100)
    return data

In [20]:
def get_percussion(signal, sr):
    signal = librosa.effects.percussive(signal)
    f, transformed = calcFFT(signal, sr)
    transformed = np.array(np.abs(transformed))
    data = {}
    data['pmedian'] = np.median(transformed)
    data['pmean'] = np.mean(transformed)
    data['p1Q'] = np.percentile(transformed, 25)
    data['p2Q'] = np.percentile(transformed, 75)
    data['pIQR'] = stats.iqr(transformed)
    data['pmin'] = np.min(transformed)
    data['pmax'] = np.max(transformed)
    data['pstd'] = np.std(transformed)
    data['ptempo'] = get_tempo(signal, 44100)
    return data

In [18]:
def get_info(fpath, label):
    signal = librosa.core.load(fpath, 44100)[0]
    f, transformed = calcFFT(signal, 44100)
    transformed = np.array(np.abs(transformed))
    harmonic_data = get_harmonic(signal, 44100)
    percussion_data = get_percussion(signal, 44100)
    data = {}
    data['fpath'] = fpath
    data['label'] = label
    data['median'] = np.median(transformed)
    data['mean'] = np.mean(transformed)
    data['1Q'] = np.percentile(transformed, 25)
    data['2Q'] = np.percentile(transformed, 75)
    data['IQR'] = stats.iqr(transformed)
    data['min'] = np.min(transformed)
    data['max'] = np.max(transformed)
    data['std'] = np.std(transformed)
    data['tempo'] = get_tempo(signal, 44100)
    data = {**data, **percussion_data, **harmonic_data}
    return data

In [13]:
train = pd.read_csv('train.csv')
train.head(10)

Unnamed: 0.1,Unnamed: 0,fname,label,manually_verified,fpath
0,0,00044347.wav,Hi-hat,0,audio_train/audio_train/00044347.wav
1,1,001ca53d.wav,Saxophone,1,audio_train/audio_train/001ca53d.wav
2,2,002d256b.wav,Trumpet,0,audio_train/audio_train/002d256b.wav
3,3,0033e230.wav,Glockenspiel,1,audio_train/audio_train/0033e230.wav
4,4,00353774.wav,Cello,1,audio_train/audio_train/00353774.wav
5,5,003b91e8.wav,Cello,0,audio_train/audio_train/003b91e8.wav
6,6,003da8e5.wav,Knock,1,audio_train/audio_train/003da8e5.wav
7,7,0048fd00.wav,Gunshot_or_gunfire,1,audio_train/audio_train/0048fd00.wav
8,8,004ad66f.wav,Clarinet,0,audio_train/audio_train/004ad66f.wav
9,9,0063ab88.wav,Computer_keyboard,0,audio_train/audio_train/0063ab88.wav


In [14]:
working = pd.read_csv('working.csv')

In [21]:
for i in range(working.index.size, len(train.fname)):
    fpath = train.loc[i, 'fpath']
    label = train.loc[i, 'label']
    info = get_info(fpath, label)
    df = pd.DataFrame(info, index=[i])
    working = pd.concat([working, df])

KeyboardInterrupt: 

In [34]:
working.to_csv(path_or_buf='working.csv')