In [1]:
% pylab inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa
import librosa.display
from scipy.io.wavfile import read as sciread
from scipy import stats

Populating the interactive namespace from numpy and matplotlib


In [2]:
def calcFFT(audio, fs):
    transformed = np.fft.fft(audio)
    f = np.arange(0, len(transformed))*fs/len(transformed)
    f = f[:44100]
    transformed = transformed[:44100]
    return (f, transformed)

In [3]:
def plotFFT(audio, fs):
    f, transformed = calcFFT(audio, fs)
    plt.plot(f, np.abs(transformed))
    plt.xlabel('Frequency (Hz)')
    plt.ylabel('Magnitude (dB)')
    plt.show()

In [15]:
def get_info(fpath, label):
    signal = librosa.core.load(fpath, 44100)[0]
    f, transformed = calcFFT(signal, 44100)
    transformed = np.array(np.abs(transformed))
    data = {}
    data['fpath'] = fpath
    data['label'] = label
    data['median'] = np.median(transformed)
    data['mean'] = np.mean(transformed)
    data['1Q'] = np.percentile(transformed, 25)
    data['2Q'] = np.percentile(transformed, 75)
    data['IQR'] = stats.iqr(transformed)
    data['min'] = np.min(transformed)
    data['max'] = np.max(transformed)
    data['std'] = np.std(transformed)
    data['skew'] = stats.skew(transformed, bias=True)
    return data

In [7]:
train = pd.read_csv('train.csv')
train.head(10)
labels = list(train.label.unique())

In [8]:
files = []
for i in range(len(train)):
    file = train.loc[i].fname
    files.append(f'audio_train/audio_train/{file}')

In [9]:
train['fpath'] = files

In [12]:
train

Unnamed: 0,fname,label,manually_verified,fpath
0,00044347.wav,Hi-hat,0,audio_train/audio_train/00044347.wav
1,001ca53d.wav,Saxophone,1,audio_train/audio_train/001ca53d.wav
2,002d256b.wav,Trumpet,0,audio_train/audio_train/002d256b.wav
3,0033e230.wav,Glockenspiel,1,audio_train/audio_train/0033e230.wav
4,00353774.wav,Cello,1,audio_train/audio_train/00353774.wav
5,003b91e8.wav,Cello,0,audio_train/audio_train/003b91e8.wav
6,003da8e5.wav,Knock,1,audio_train/audio_train/003da8e5.wav
7,0048fd00.wav,Gunshot_or_gunfire,1,audio_train/audio_train/0048fd00.wav
8,004ad66f.wav,Clarinet,0,audio_train/audio_train/004ad66f.wav
9,0063ab88.wav,Computer_keyboard,0,audio_train/audio_train/0063ab88.wav


In [16]:
infos = []
for i in range(1000):
    fpath = train.loc[i, 'fpath']
    label = train.loc[i, 'label']
    info = get_info(fpath, label)
    infos.append(info)

In [17]:
working = pd.DataFrame(infos)

In [18]:
working

Unnamed: 0,1Q,2Q,IQR,fpath,label,max,mean,median,min,skew,std
0,6.230491,27.272478,21.041986,audio_train/audio_train/00044347.wav,Hi-hat,577.259387,24.051130,13.204853,0.099928,4.257246,34.076773
1,5.674157,35.867089,30.192933,audio_train/audio_train/001ca53d.wav,Saxophone,3170.740614,49.153424,13.990031,0.006609,8.895209,141.165615
2,0.012762,0.066496,0.053734,audio_train/audio_train/002d256b.wav,Trumpet,7.276662,0.089362,0.029888,0.000128,13.358796,0.264248
3,0.420803,0.977241,0.556438,audio_train/audio_train/0033e230.wav,Glockenspiel,788.281803,1.144892,0.674073,0.000313,63.396895,7.775617
4,0.646084,3.692972,3.046888,audio_train/audio_train/00353774.wav,Cello,5414.037780,7.551387,1.383008,0.006342,40.976874,61.102422
5,0.785515,3.588235,2.802721,audio_train/audio_train/003b91e8.wav,Cello,10196.807714,7.652749,1.648056,0.003708,66.091146,103.880939
6,0.059958,0.320783,0.260824,audio_train/audio_train/003da8e5.wav,Knock,167.530903,0.950249,0.135189,0.000493,15.812324,5.040230
7,0.227796,12.818314,12.590518,audio_train/audio_train/0048fd00.wav,Gunshot_or_gunfire,1335.808399,13.389611,2.054116,0.000718,10.160240,33.314290
8,0.227506,0.858560,0.631054,audio_train/audio_train/004ad66f.wav,Clarinet,8488.464951,5.025694,0.420932,0.000829,59.144963,83.405840
9,4.397280,11.597468,7.200188,audio_train/audio_train/0063ab88.wav,Computer_keyboard,96.070145,9.130246,7.416977,0.002750,2.413294,7.320734


In [None]:
y = librosa.load('audio_train/audio_train/003b91e8.wav', sr=44100)[0]

In [None]:
data = get_info(y, 44100)

In [None]:
plotFFT(y, 44100)

In [None]:
data2

In [None]:
data

In [None]:
pd.DataFrame([data, data2])

In [None]:
fs = 44100
Y = np.fft.fft(y)
f = np.arange(0, len(Y))*fs/len(Y)

In [None]:
plt.plot(f)

In [None]:
#plt.plot(f[:22050], np.abs(Y[:22050]))
#plt.plot(f[:22050], np.abs(Z[:22050]))
#plt.plot(f[:22050], np.abs(X[:22050]))
plt.plot(f[:44100], np.abs(Y[:44100]))

In [None]:
z = librosa.load('audio_train/audio_train/0097160c.wav', sr=44100)[0]
fs = 44100
Z = np.fft.fft(y)
f2 = np.arange(0, len(Y))*fs/len(Z)

In [None]:
x = librosa.load('audio_train/audio_train/00c934d7.wav', sr=44100)[0]
fs = 44100
X = np.fft.fft(y)
f2 = np.arange(0, len(Y))*fs/len(X)

In [None]:
fmax = 500
nmax = int(len(Y) * fmax / fs)
plt.plot(f[:nmax], np.abs(Y[:nmax]))

In [None]:
plt.plot(y)

In [None]:
%%timeit

z = sciread('audio_train/audio_train/0048fd00.wav')[0]