In [2]:
import os
import numpy as np
import librosa
from IPython.display import display, Audio
import timeit
import random
from sklearn.externals import joblib
from numpy import ma
from aubio import source, pitch

os.chdir('/home/sharedfolder')

In [3]:
def get_mfccs(wav_pathname):
    sample_array, sample_rate = librosa.load(wav_pathname)
    mfcc_frames = librosa.feature.mfcc(sample_array, sample_rate, hop_length=512, n_mfcc=13).T
    mfcc_frames_sans_0th = [frame_values[1:] for frame_values in mfcc_frames]
    return mfcc_frames_sans_0th

In [4]:
def get_mfccs_and_deltas(wav_pathname):
    sample_array, sample_rate = librosa.load(wav_pathname)
    mfcc = librosa.feature.mfcc(sample_array, sample_rate, hop_length=512, n_mfcc=13)
    delta = librosa.feature.delta(mfcc)
    delta2 = librosa.feature.delta(mfcc, order=2)
    mfcc=mfcc.T     ### Transposing tables
    delta=delta.T   ## (We can instead set the axis above to do this without the extra step)
    delta2=delta2.T
    mfcc_sans_0th = [frame_values[1:] for frame_values in mfcc]
    all_features=[]
    for i in range(len(mfcc)):
        all_features.append(list(mfcc_sans_0th[i])+list(delta[i])+list(delta2[i]))
    return all_features

In [None]:
tic=timeit.default_timer()

pesca_mfccs = []

for filename in os.listdir('3_training_classes/Mike_Pesca/_vowel_clips'):
    if '.wav' in filename:
        pesca_mfccs += get_mfccs_and_deltas('3_training_classes/Mike_Pesca/_vowel_clips/'+filename)

print(timeit.default_timer() - tic)

In [None]:
tic=timeit.default_timer()

bg_male_mfccs = []

for filename in os.listdir('3_training_classes/Background_male/_vowel_clips'):
    if '.wav' in filename:
        bg_male_mfccs += get_mfccs_and_deltas('3_training_classes/Background_male/_vowel_clips/'+filename)

print(timeit.default_timer() - tic)

In [None]:
tic=timeit.default_timer()

bg_female_mfccs = []

for filename in os.listdir('3_training_classes/Background_female/_vowel_clips'):
    if '.wav' in filename:
        bg_female_mfccs += get_mfccs_and_deltas('3_training_classes/Background_female/_vowel_clips/'+filename)

print(timeit.default_timer() - tic)

In [None]:
X = pesca_mfccs + bg_male_mfccs + bg_female_mfccs
y = [0]*len(pesca_mfccs) + [1]*len(bg_male_mfccs) + [1]*len(bg_female_mfccs)

# These should be the same.
print(len(X))
print(len(y))

In [None]:
# More details here:
# http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html

tic=timeit.default_timer()

#from sklearn.neighbors import KNeighborsClassifier

#neigh = KNeighborsClassifier(n_neighbors=3)
#neigh.fit(X, y)

print(timeit.default_timer() - tic)

In [None]:
## Saving trained model

#joblib.dump(neigh,'pesca_vowels_knn.pkl')
#neigh=joblib.load('pesca_vowels_knn.pkl')

In [None]:
## Splitting up recent unseen files

#!for f in *.mp3; do ffmpeg -i $f -f segment -segment_time 3 $f.%04d_clip.wav ; done

In [None]:
#tic=timeit.default_timer()

#filename = random.choice(os.listdir('3_training_classes/unseen/_vowel_clips'))
#test_pathname = '3_training_classes/unseen/_vowel_clips/'+filename
#test_mfccs=get_mfccs_and_deltas(test_pathname)

#print(test_pathname)

#results = neigh.predict(test_mfccs)  ## Predicting new observation

#print(results)
#print(np.mean(results))


#get_vowel_segments(test_pathname)

#display(Audio(test_pathname))

#print("Time elapsed: "+str(timeit.default_timer() - tic))

In [1]:

def get_vowel_segments(media_path):
    downsample = 1
    samplerate = 44100 // downsample

    win_s = 2048 // downsample # fft size
    hop_s = 512  // downsample # hop size

    s = source(media_path, samplerate, hop_s)
    samplerate = s.samplerate

    tolerance = 0.6

    pitch_o = pitch("yin", win_s, hop_s, samplerate)
    pitch_o.set_unit("Hz")
    pitch_o.set_tolerance(tolerance)

    pitches = []
    confidences = []

    # total number of frames read
    total_frames = 0
    samples=[]
    pitches=[]
    while True:
        samples, read = s()
        pitch_ = pitch_o(samples)[0]
        #pitch = int(round(pitch))
        confidence = pitch_o.get_confidence()
        #print("%f %f %f" % (total_frames / float(samplerate), pitch, confidence))
        pitches += [pitch_]
        confidences += [confidence]
        total_frames += read
        if read < hop_s: break

    pitches = np.array(pitches)
    confidences = np.array(confidences)

    cleaned_pitches = ma.masked_where(confidences < tolerance, pitches)
    cleaned_pitches = ma.masked_where(cleaned_pitches > 1000, cleaned_pitches)
    return list(np.logical_not(cleaned_pitches.mask))



In [None]:
### SVM
tic=timeit.default_timer()

from sklearn import svm
X = pesca_mfccs + bg_male_mfccs + bg_female_mfccs
y = [0]*len(pesca_mfccs) + [1]*len(bg_male_mfccs) + [1]*len(bg_female_mfccs)

clf = svm.SVC()
clf.fit(X, y) 

print(timeit.default_timer() - tic)

In [None]:
## Saving trained model

joblib.dump(clf,'pesca_svm_vowels.pkl')
clf=joblib.load('pesca_svm_vowels.pkl')

In [None]:
tic=timeit.default_timer()

import random
filename = random.choice(os.listdir('3_training_classes/unseen'))
test_pathname = '3_training_classes/unseen/'+filename
test_mfccs=get_mfccs_and_deltas(test_pathname)

print(test_pathname)
results = clf.predict(test_mfccs)
print(results)
print(np.mean(results))

display(Audio(test_pathname))

print(timeit.default_timer() - tic)

In [None]:
## GMM

from sklearn import mixture

tic=timeit.default_timer()

X = pesca_mfccs + bg_male_mfccs + bg_female_mfccs
y = [0]*len(pesca_mfccs) + [1]*len(bg_male_mfccs) + [2]*len(bg_female_mfccs)

gmm = mixture.GaussianMixture()
gmm.fit(X, y) 

## Saving trained model
joblib.dump(gmm,'pesca_gmm.pkl')
gmm=joblib.load('pesca_gmm.pkl')

print(timeit.default_timer() - tic)

In [None]:
tic=timeit.default_timer()

import random
filename = random.choice(os.listdir('3_training_classes/unseen'))
test_pathname = '3_training_classes/unseen/'+filename
test_mfccs=get_mfccs_and_deltas(test_pathname)

print(test_pathname)
results = gmm.predict(test_mfccs)
print(results)
print(np.mean(results))

display(Audio(test_pathname))

print(timeit.default_timer() - tic)

In [None]:
os.chdir('smacpy')
import smacpy
os.chdir('../')

In [None]:
!pwd


In [None]:
from scikits.audiolab import Sndfile

In [None]:
!apt install libsndfile

In [None]:
#1 pesca
#2 male
#3 female

In [None]:
!wget install http://www.mega-nerd.com/libsndfile/files/libsndfile-1.0.28.tar.gz

In [None]:
!./configure
!make
!make install

In [None]:
print(np.mean(neigh.predict(chime_mfccs_test)))