In [1]:
import os
import numpy as np
import librosa
from IPython.display import display, Audio
import timeit
import random
from sklearn.externals import joblib
from numpy import ma
from aubio import source, pitch

os.chdir('/sharedfolder')

In [2]:
def get_mfccs(wav_pathname):
    sample_array, sample_rate = librosa.load(wav_pathname)
    mfcc_frames = librosa.feature.mfcc(sample_array, sample_rate, hop_length=512, n_mfcc=13).T
    mfcc_frames_sans_0th = [frame_values[1:] for frame_values in mfcc_frames]
    return mfcc_frames_sans_0th

In [3]:
def get_mfccs_and_deltas(wav_pathname):
    sample_array, sample_rate = librosa.load(wav_pathname)
    mfcc = librosa.feature.mfcc(sample_array, sample_rate, hop_length=512, n_mfcc=13)
    delta = librosa.feature.delta(mfcc)
    delta2 = librosa.feature.delta(mfcc, order=2)
    mfcc=mfcc.T     ### Transposing tables
    delta=delta.T   ## (We can instead set the axis above to do this without the extra step)
    delta2=delta2.T
    mfcc_sans_0th = [frame_values[1:] for frame_values in mfcc]
    all_features=[]
    for i in range(len(mfcc)):
        all_features.append(list(mfcc_sans_0th[i])+list(delta[i])+list(delta2[i]))
    return all_features

In [4]:

def get_vowel_segments(media_path):
    downsample = 1
    samplerate = 44100 // downsample

    win_s = 2048 // downsample # fft size
    hop_s = 512  // downsample # hop size

    s = source(media_path, samplerate, hop_s)
    samplerate = s.samplerate

    tolerance = 0.6

    pitch_o = pitch("yin", win_s, hop_s, samplerate)
    pitch_o.set_unit("Hz")
    pitch_o.set_tolerance(tolerance)

    pitches = []
    confidences = []

    # total number of frames read
    total_frames = 0
    samples=[]
    pitches=[]
    while True:
        samples, read = s()
        pitch_ = pitch_o(samples)[0]
        #pitch = int(round(pitch))
        confidence = pitch_o.get_confidence()
        #print("%f %f %f" % (total_frames / float(samplerate), pitch, confidence))
        pitches += [pitch_]
        confidences += [confidence]
        total_frames += read
        if read < hop_s: break

    pitches = np.array(pitches)
    confidences = np.array(confidences)

    cleaned_pitches = ma.masked_where(confidences < tolerance, pitches)
    cleaned_pitches = ma.masked_where(cleaned_pitches > 1000, cleaned_pitches)
    return list(np.logical_not(cleaned_pitches.mask))



In [5]:
tic=timeit.default_timer()

pesca_mfccs = []

for filename in os.listdir('3_training_classes/Mike_Pesca/_vowel_clips'):
    if '.wav' in filename:
        pesca_mfccs += get_mfccs_and_deltas('3_training_classes/Mike_Pesca/_vowel_clips/'+filename)

print(timeit.default_timer() - tic)

191.404698849


In [6]:
tic=timeit.default_timer()

bg_male_mfccs = []

for filename in os.listdir('3_training_classes/Background_male/_vowel_clips'):
    if '.wav' in filename:
        bg_male_mfccs += get_mfccs_and_deltas('3_training_classes/Background_male/_vowel_clips/'+filename)

print(timeit.default_timer() - tic)

62.7976949215


In [7]:
tic=timeit.default_timer()

bg_female_mfccs = []

for filename in os.listdir('3_training_classes/Background_female/_vowel_clips'):
    if '.wav' in filename:
        bg_female_mfccs += get_mfccs_and_deltas('3_training_classes/Background_female/_vowel_clips/'+filename)

print(timeit.default_timer() - tic)

23.6135640144


In [8]:
X = pesca_mfccs + bg_male_mfccs + bg_female_mfccs
y = [0]*len(pesca_mfccs) + [1]*len(bg_male_mfccs) + [2]*len(bg_female_mfccs)

# These should be the same.
print(len(X))
print(len(y))

26758
26758


In [94]:
## Logistic regression

from sklearn.linear_model import LogisticRegression

tic=timeit.default_timer()

X = pesca_mfccs + bg_male_mfccs + bg_female_mfccs
y = [0]*len(pesca_mfccs) + [1]*len(bg_male_mfccs) + [1]*len(bg_female_mfccs)

logreg = LogisticRegression(C=100).fit(X, y)

## Saving trained model
joblib.dump(logreg,'pesca_vowels_logreg100.pkl')
logreg=joblib.load('pesca_vowels_logreg100.pkl')

print(timeit.default_timer() - tic)

2.2925491333


In [95]:
logreg.score(X,y)

0.65199192764780622

In [88]:
## Linear SVC

from sklearn.svm import LinearSVC


tic=timeit.default_timer()

X = pesca_mfccs + bg_male_mfccs + bg_female_mfccs
y = [0]*len(pesca_mfccs) + [1]*len(bg_male_mfccs) + [2]*len(bg_female_mfccs)

linear_svc = LinearSVC(C=100).fit(X, y)

## Saving trained model
joblib.dump(linear_svc,'pesca_vowels_linear_svc.pkl')
linear_svc=joblib.load('pesca_vowels_linear_svc.pkl')

print(timeit.default_timer() - tic)

46.2222027779


In [89]:
linear_svc.score(X,y)

0.39225652141415651

In [167]:
tic=timeit.default_timer()

filename = random.choice(os.listdir('3_training_classes/unseen/'))
test_pathname = '3_training_classes/unseen/'+filename
test_mfccs=get_mfccs_and_deltas(test_pathname)

print(test_pathname)

results = decision_tree.predict(test_mfccs)  ## Predicting new observation

print(results)


vowel_results=[]

vowel_bools = get_vowel_segments(test_pathname)[::2]

for i in range(len(results)):
    if vowel_bools[i]==True:
        vowel_results.append(results[i])

display(Audio(test_pathname))

print("All: "+str(np.mean(results)))
print("Vowels only: "+str(np.mean(vowel_results)))

#print("Time elapsed: "+str(timeit.default_timer() - tic))

3_training_classes/unseen/SM8940738255.mp3.0338_clip.wav
[0 1 1 0 0 0 1 1 1 1 0 0 0 1 0 0 0 0 0 1 0 0 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0
 1 0 0 0 0 1 0 0 1 1 1 1 1 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 0 1 1 0 0
 0 0 0 1 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0]


All: 0.453846153846
Vowels only: 0.473684210526


In [164]:
## Decision tree

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

tic=timeit.default_timer()

X = pesca_mfccs + bg_male_mfccs + bg_female_mfccs
y = [0]*len(pesca_mfccs) + [1]*len(bg_male_mfccs) + [1]*len(bg_female_mfccs)

decision_tree = RandomForestClassifier().fit(X, y)

## Saving trained model
joblib.dump(decision_tree,'pesca_vowels_decision_tree.pkl')
decision_tree=joblib.load('pesca_vowels_decision_tree.pkl')

print(timeit.default_timer() - tic)

3.31674909592


In [165]:
decision_tree.score(X, y)

0.99159129979819116

In [119]:
# KNN

tic=timeit.default_timer()

from sklearn.neighbors import KNeighborsClassifier

X = pesca_mfccs + bg_male_mfccs + bg_female_mfccs
y = [0]*len(pesca_mfccs) + [1]*len(bg_male_mfccs) + [1]*len(bg_female_mfccs)

neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X, y)

print(timeit.default_timer() - tic)

0.381817102432


In [120]:
neigh.score(X, y)

0.95070633081695199

In [21]:
os.chdir('/sharedfolder/smacpy')
import smacpy


NameError: _SNDFILE_FILE_FORMAT

In [23]:
os.chdir('/sharedfolder/')

In [None]:
### SVM
tic=timeit.default_timer()

from sklearn import svm
X = pesca_mfccs + bg_male_mfccs + bg_female_mfccs
y = [0]*len(pesca_mfccs) + [1]*len(bg_male_mfccs) + [2]*len(bg_female_mfccs)

clf = svm.SVC()
clf.fit(X, y) 

print(timeit.default_timer() - tic)

In [None]:
## Saving trained model

joblib.dump(clf,'pesca_svm_vowels.pkl')
clf=joblib.load('pesca_svm_vowels.pkl')

In [65]:
tic=timeit.default_timer()

import random
filename = random.choice(os.listdir('3_training_classes/unseen'))
test_pathname = '3_training_classes/unseen/'+filename
test_mfccs=get_mfccs_and_deltas(test_pathname)

print(test_pathname)
results = clf.predict(test_mfccs)
print(results)
print(np.mean(results))

display(Audio(test_pathname))

print(timeit.default_timer() - tic)

3_training_classes/unseen/SM8940738255.mp3.0492_clip.wav
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
0.0232558139535


0.928913831711


In [74]:
## GMM

from sklearn import mixture

tic=timeit.default_timer()

X = pesca_mfccs + bg_male_mfccs + bg_female_mfccs
y = [0]*len(pesca_mfccs) + [1]*len(bg_male_mfccs) + [2]*len(bg_female_mfccs)

gmm = mixture.GaussianMixture()
gmm.fit(X, y) 

## Saving trained model
joblib.dump(gmm,'pesca_gmm.pkl')
gmm=joblib.load('pesca_gmm.pkl')

print(timeit.default_timer() - tic)

0.5949010849


In [78]:
tic=timeit.default_timer()

import random
filename = random.choice(os.listdir('3_training_classes/unseen'))
test_pathname = '3_training_classes/unseen/'+filename
test_mfccs=get_mfccs_and_deltas(test_pathname)

print(test_pathname)
results = gmm.predict(test_mfccs)
print(results)
print(np.mean(results))

display(Audio(test_pathname))

print(timeit.default_timer() - tic)

3_training_classes/unseen/SM4892127271.mp3.0383_clip.wav
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
0.0


0.406955003738


ValueError: numpy.dtype does not appear to be the correct type object

In [79]:
!pwd


/sharedfolder


In [81]:
from scikits.audiolab import Sndfile

NameError: _SNDFILE_FILE_FORMAT

In [83]:
!apt install libsndfile

Reading package lists... Done
Building dependency tree       
Reading state information... Done
E: Unable to locate package libsndfile


In [None]:
#1 pesca
#2 male
#3 female

In [92]:
!wget install http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz

This is libsndfile, 1.0.28

libsndfile is a library of C routines for reading and writing
files containing sampled audio data.

The src/ directory contains the source code for library itself.

The doc/ directory contains the libsndfile documentation.

The examples/ directory contains examples of how to write code using
libsndfile.

The tests/ directory contains programs which link against libsndfile
and test its functionality.

The src/GSM610 directory contains code written by Jutta Degener and Carsten
Bormann. Their original code can be found at :
    http://kbs.cs.tu-berlin.de/~jutta/toast.html

The src/G72x directory contains code written and released by Sun Microsystems
under a suitably free license.

The src/ALAC directory contains code written and released by Apple Inc and
released under the Apache license.


LINUX
-----
Whereever possible, you should use the packages supplied by your Linux
distribution.

If you really do need to compile from source

In [96]:
!./configure
!make
!make install

checking build system type... x86_64-pc-linux-gnu
checking host system type... x86_64-pc-linux-gnu
checking target system type... x86_64-pc-linux-gnu
checking for a BSD-compatible install... /usr/bin/install -c
checking whether build environment is sane... yes
checking for a thread-safe mkdir -p... /bin/mkdir -p
checking for gawk... no
checking for mawk... mawk
checking whether make sets $(MAKE)... yes
checking whether make supports nested variables... yes
checking whether make supports nested variables... (cached) yes
checking for style of include used by make... GNU
checking for gcc... gcc
checking whether the C compiler works... yes
checking for C compiler default output file name... a.out
checking for suffix of executables... 
checking whether we are cross compiling... no
checking for suffix of object files... o
checking whether we are using the GNU C compiler... yes
checking whether gcc accepts -g... yes
checking for gcc option to accept ISO C89... none needed
checking whether gcc

checking if gcc -std=gnu99 accepts -std=gnu99... yes
checking for version of gcc -std=gnu99... 4.8
checking if gcc -std=gnu99 accepts -Wextra... yes
checking if gcc -std=gnu99 accepts -Wdeclaration-after-statement... yes
checking if gcc -std=gnu99 accepts -Wpointer-arith... yes
checking if g++ accepts -Wextra... yes
checking if g++ accepts -Wpointer-arith... yes
checking whether to add -D_FORTIFY_SOURCE=2 to CPPFLAGS... no
checking that generated files are newer than configure... done
configure: creating ./config.status
config.status: creating src/Makefile
config.status: creating man/Makefile
config.status: creating examples/Makefile
config.status: creating tests/Makefile
config.status: creating regtest/Makefile
config.status: creating M4/Makefile
config.status: creating doc/Makefile
config.status: creating Win32/Makefile
config.status: creating Octave/Makefile
config.status: creating programs/Makefile
config.status: creating Makefile
config.status: creating src/version-metadata.rc
con

make[1]: Entering directory `/sharedfolder/libsndfile-1.0.28/Win32'
make[2]: Entering directory `/sharedfolder/libsndfile-1.0.28/Win32'
make[2]: Nothing to be done for `install-exec-am'.
make[2]: Nothing to be done for `install-data-am'.
make[2]: Leaving directory `/sharedfolder/libsndfile-1.0.28/Win32'
make[1]: Leaving directory `/sharedfolder/libsndfile-1.0.28/Win32'
Making install in src
make[1]: Entering directory `/sharedfolder/libsndfile-1.0.28/src'
make[2]: Entering directory `/sharedfolder/libsndfile-1.0.28/src'
 /bin/mkdir -p '/usr/local/lib'
 /bin/bash ../libtool   --mode=install /usr/bin/install -c   libsndfile.la '/usr/local/lib'
libtool: install: /usr/bin/install -c .libs/libsndfile.so.1.0.28 /usr/local/lib/libsndfile.so.1.0.28
libtool: install: (cd /usr/local/lib && { ln -s -f libsndfile.so.1.0.28 libsndfile.so.1 || { rm -f libsndfile.so.1 && ln -s libsndfile.so.1.0.28 libsndfile.so.1; }; })
libtool: install: (cd /usr/local/lib && { ln -s -f libsndfile.so.1.0.28 libsndfil

In [None]:
print(np.mean(neigh.predict(chime_mfccs_test)))