In [None]:
import attk
import os
import csv
import numpy as np
import librosa
import timeit
import random
import subprocess
import unicodecsv
import urllib2
from sklearn.externals import joblib
from numpy import ma
from aubio import source, pitch
from moviepy.audio.io import AudioFileClip
from IPython.display import display, Audio

classifier_dir = '/sharedfolder/Obama_Batch_4K/sida_classifier/'

os.chdir(classifier_dir)

speaker_1_label = 'Barack Obama'

In [None]:
## Load saved features

def load_features(dir_path):
    features = []
    for filename in os.listdir(dir_path):
        with open(os.path.join(dir_path, filename)) as fi:
            csv_reader = csv.reader(fi)
            for row in csv_reader:
                features.append([float(item) for item in row])
    return features

speaker_1_features = load_features('/sharedfolder/Obama_Batch_4K/sida_classifier/Obama_400_speeches_cleaned/_vowel_mfccs_and_deltas')
print(len(speaker_1_features))

speaker_1_features += load_features('/sharedfolder/Obama_Batch_4K/sida_classifier/Obama_weekly_addresses_cleaned/_vowel_mfccs_and_deltas')
print(len(speaker_1_features))

aapb_ubm_male_features = load_features('/sharedfolder/Obama_Batch_4K/sida_classifier/AAPB_male_vowel_mfccs_and_deltas_100-5K_Hz')
print(len(aapb_ubm_male_features))

aapb_ubm_female_features = load_features('/sharedfolder/Obama_Batch_4K/sida_classifier/AAPB_female_vowel_mfccs_and_deltas_100-5K_Hz')
print(len(aapb_ubm_female_features))

the_world_ubm_male_features = load_features('/sharedfolder/Obama_Batch_4K/sida_classifier/The_World_Male_vowel_mfccs_and_deltas')
print(len(the_world_ubm_male_features))

the_world_ubm_female_features = load_features('/sharedfolder/Obama_Batch_4K/sida_classifier/The_World_Female_vowel_mfccs_and_deltas')
print(len(the_world_ubm_female_features))

In [None]:
#def load_features(dir_path):
#    features = []
#    for filename in [item for item in os.listdir(dir_path) if 'Weekly' in item]:
#        with open(os.path.join(dir_path, filename)) as fi:
#            csv_reader = csv.reader(fi)
#            for row in csv_reader:
#                features.append([float(item) for item in row])
#    return features

#speaker_1_features = load_features('/sharedfolder/sida_classifier/_classes_Obama_training_clips_all/Barack_Obama/_vowel_mfccs_and_deltas')
#print(len(speaker_1_features))

In [None]:
## Printing MFCCs and deltas for a single frame

print(random.choice(speaker_1_features))

In [None]:
## Combining feature sets

speaker_1_features = speaker_1_features
ubm_features = aapb_ubm_male_features + aapb_ubm_female_features + the_world_ubm_male_features + the_world_ubm_female_features

print(len(speaker_1_features))
print(len(ubm_features))

In [None]:
## Training and multi-layer perceptron model with 9/10 of training data and evaluating performance on remaining 1/10

os.chdir(classifier_dir)

#random.shuffle(speaker_1_features)
#random.shuffle(ubm_features)

from sklearn.neural_network import MLPClassifier

X = speaker_1_features[:-len(speaker_1_features)/10] + ubm_features[:-len(ubm_features)/10]
y = [1]*len(speaker_1_features[:-len(speaker_1_features)/10]) + [0]*len(ubm_features[:-len(ubm_features)/10])

X_test = speaker_1_features[-len(speaker_1_features)/10:] + ubm_features[-len(ubm_features)/10:]
y_test = [1]*len(speaker_1_features[-len(speaker_1_features)/10:]) + [0]*len(ubm_features[-len(ubm_features)/10:])

X_train = np.array(X)
y_train = np.array(y)

X_test = np.array(X_test)
y_test = np.array(y_test)

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

classifier = MLPClassifier(random_state = 10).fit(X_train_scaled, y_train)

#classifier = MLPClassifier(max_iter = 2000, random_state = 10, \
#                          hidden_layer_sizes = (100, 100), solver = 'adam', \
#                          activation = 'relu').fit(X_train_scaled, y_train)

print(classifier.score(X_test_scaled,y_test))

In [None]:
## Training and saving an MLP model with all training data

from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

X = speaker_1_features + ubm_features
y = [1]*len(speaker_1_features) + [0]*len(ubm_features)

X = np.array(X)

scaler = StandardScaler()

scaler.fit(X)

X_scaled = scaler.transform(X)

classifier = MLPClassifier().fit(X_scaled, y)

trained_model_filename = speaker_1_label.replace(' ', '_') + '_vowels_mlpc_4096_scaled_20170928.pkl'
scaler_filename = speaker_1_label.replace(' ', '_') + '_vowels_mlpc_4096_scaled_20170928.scaler'

print(trained_model_filename)

## Saving trained model

joblib.dump(classifier, trained_model_filename)
joblib.dump(scaler, scaler_filename)

classifier = joblib.load(trained_model_filename)
scaler = joblib.load(scaler_filename)

In [None]:
##############################################
#### Start here to load pre-trained model ####
##############################################

#os.chdir(classifier_dir)

#trained_model_filename = speaker_1_label.replace(' ', '_') + '_vowels_mlpc_4096_scaled_20170928.pkl'
#scaler_filename = speaker_1_label.replace(' ', '_') + '_vowels_mlpc_4096_scaled_20170928.scaler'

#os.chdir('/sharedfolder/sida_classifier/')
#classifier = joblib.load(trained_model_filename)
#scaler = joblib.load(scaler_filename)

In [None]:
## Download unseen audio and split into 3-second WAV clips for testing

os.chdir(classifier_dir)

try: os.mkdir('test_clips/')
except: pass

os.chdir(os.path.join(classifier_dir, 'test_clips/'))

mp3_url = "http://traffic.libsyn.com/wtfpod/WTF_-_EPISODE_613_PRESIDENT_BARACK_OBAMA.mp3"

mp3_filename = os.path.basename(mp3_url)

wav_filename = mp3_filename[:-4]+'.wav'

subprocess.call(['wget', '-N', mp3_url])

subprocess.call(['ffmpeg', '-i', mp3_filename, wav_filename])

subprocess.call(['ffmpeg', '-i', wav_filename, '-f', 'segment', '-segment_time', '3',  wav_filename[:-4] + '_3_sec_%04d.wav'])


In [None]:
## Classifying short clips
#### Repeat this cell several times to help choose a classifier threshold value.

os.chdir(os.path.join(classifier_dir, 'test_clips/'))

wav_pathname = os.path.abspath(random.choice([item for item in os.listdir('./') if '3_sec' in item]))

test_features = attk.get_mfccs_and_deltas(wav_pathname)

test_features = scaler.transform(test_features)

print(wav_pathname)

results = classifier.predict(test_features)  ## Predicting new observation

print(results)

vowel_results=[]

vowel_bools = attk.get_vowel_segments(wav_pathname)

for i in range(len(results)):
    try:
        if vowel_bools[i]==True:
            vowel_results.append(results[i])
    except:
        pass

display(Audio(wav_pathname))

print("All samples: "+str(np.mean(results)))
print("Vowels only: "+str(np.mean(vowel_results)))

In [None]:
## Function that classifies vowel segments only and returns 
## average output for the full clip

def classify_clip(clip_pathname):
    mfccs = attk.get_mfccs_and_deltas(clip_pathname)
    mfccs = scaler.transform(mfccs)
    results = classifier.predict(mfccs)  ## Predicting new observation
    vowel_results=[]
    vowel_bools = attk.get_vowel_segments(clip_pathname)

    for i in range(len(results)):
        if vowel_bools[i] == True:
            vowel_results.append(results[i])

    return np.mean(vowel_results) ## Vowels only

In [None]:
%%capture
## Classifying a long audio file

resolution_secs = 3.0

os.chdir('/sharedfolder/')

import timeit
tic=timeit.default_timer()

media_path = "/sharedfolder/sida_classifier/test_clips/WIWpodcast38.wav"
media_path = "/sharedfolder/sida_classifier/test_clips/Barack_obama_documentary.2016_HD-ASM0GXVZmv4.wav"
media_path = "/sharedfolder/sida_classifier/Obama_test_audio/20090219_tmm_obamablog.mp3"
media_path = "/sharedfolder/sida_classifier/test_clips/Documentaries-20150724-ThePresidentObamaInterview.wav"
media_path = os.path.join(classifier_dir, "test_clips/WTF_-_EPISODE_613_PRESIDENT_BARACK_OBAMA.wav")

snd = AudioFileClip.AudioFileClip(media_path)

classifications = []

for i in range(int(attk.duration(media_path)/resolution_secs)):
    try:
        snd.subclip(i * resolution_secs , (i * resolution_secs) + resolution_secs).write_audiofile('/tmp/temp_clip.wav')
        classifications.append(classify_clip('/tmp/temp_clip.wav'))
    except:
        classifications.append(0.0)
        print("Error: " + str(i))

In [None]:
print("Time elapsed: "+str(timeit.default_timer() - tic))

In [None]:
## Writing classification output to CSV

classifier_threshold = 0.0      # Classifier values below this threshold will be discarded

os.chdir(classifier_dir)

csv_path = media_path[:-4]+'_mlpc4096_scaled_labels_'+str(resolution_secs)+'s.csv'

counter=0

with open(csv_path,'w') as fo:
    duration = resolution_secs
    for value in classifications:
        if value >= classifier_threshold:
            start = counter * resolution_secs
            fo.write(str(start) + ',' + str(duration) +','+ str(value) + ',' + speaker_1_label + '\n')
        counter+=1

In [None]:
## Writing filtered classification output to CSV

classifier_threshold = 0.30      # Classifier values below this threshold will be discarded

os.chdir(classifier_dir)

csv_path = media_path[:-4]+'_mlpc4096_scaled_labels_filtered_'+str(resolution_secs)+'s.csv'

counter = 0

with open(csv_path,'w') as fo:
    duration = resolution_secs
    for value in attk.smooth(classifications):
        if value >= classifier_threshold:
            start = counter * resolution_secs
            fo.write(str(start) + ',' + str(duration) +','+ str(value) + ',' + speaker_1_label + '\n')
        counter+=1

In [None]:
%%capture
## Classifying a long audio file

resolution_secs = 3.0

os.chdir('/sharedfolder/AAPB_Subset_4000/')

import timeit
tic=timeit.default_timer()

media_paths = [os.path.abspath(item) for item in os.listdir('./') if item[-4:] in ('.mp3','.wav','.mp4')]
random.shuffle(media_paths)

for media_path in media_paths:
    try:
        print(media_path)

        snd = AudioFileClip.AudioFileClip(media_path)

        classifications = []

        for i in range(int(attk.duration(media_path)/resolution_secs)):
            try:
                snd.subclip(i * resolution_secs , (i * resolution_secs) + resolution_secs).write_audiofile('/tmp/temp_clip.wav')
                classifications.append(classify_clip('/tmp/temp_clip.wav'))
            except:
                classifications.append(0.0)
                print("Error: " + str(i))

    ## Writing classification output to CSV

        classifier_threshold = 0.0      # Classifier values below this threshold will be discarded

        csv_path = media_path[:-4]+'_mlpc4096_scaled_labels_'+str(resolution_secs)+'s.csv'

        counter = 0

        with open(csv_path,'w') as fo:
            duration = resolution_secs
            for value in classifications:
                if value >= classifier_threshold:
                    start = counter * resolution_secs
                    fo.write(str(start) + ',' + str(duration) +','+ str(value) + ',' + speaker_1_label + '\n')
                counter+=1

    ## Writing filtered classification output to CSV

        classifier_threshold = 0.25      # Classifier values below this threshold will be discarded

        csv_path = media_path[:-4]+'_mlpc4096_scaled_labels_filtered_10_frame_window_'+str(resolution_secs)+'s.csv'

        counter = 0

        with open(csv_path,'w') as fo:
            duration = resolution_secs
            for value in attk.smooth(classifications):
                if value >= classifier_threshold:
                    start = counter * resolution_secs
                    fo.write(str(start) + ',' + str(duration) +','+ str(value) + ',' + speaker_1_label + '\n')
                counter+=1


    ## Writing filtered classification output to CSV

        classifier_threshold = 0.25      # Classifier values below this threshold will be discarded

        csv_path = media_path[:-4]+'_mlpc4096_scaled_labels_filtered_5_frame_window_'+str(resolution_secs)+'s.csv'

        counter = 0

        with open(csv_path,'w') as fo:
            duration = resolution_secs
            for value in attk.smooth(classifications):
                if value >= classifier_threshold:
                    start = counter * resolution_secs
                    fo.write(str(start) + ',' + str(duration) +','+ str(value) + ',' + speaker_1_label + '\n')
                counter+=1

    except: pass

In [None]:
%%capture
## Classifying a long audio file

resolution_secs = 1.0

os.chdir('/sharedfolder/sida_classifier/Obama_test_audio')

import timeit
tic=timeit.default_timer()

media_paths = [os.path.abspath(item) for item in os.listdir('./') if item[-4:] in ('.mp3','.wav')]
random.shuffle(media_paths)

for media_path in media_paths:
    try:
        print media_path

        snd = AudioFileClip.AudioFileClip(media_path)

        classifications = []

        for i in range(int(attk.duration(media_path)/resolution_secs)):
            try:
                snd.subclip(i * resolution_secs , (i * resolution_secs) + resolution_secs).write_audiofile('/tmp/temp_clip.wav')
                classifications.append(classify_clip('/tmp/temp_clip.wav'))
            except:
                classifications.append(0.0)
                print("Error: " + str(i))

    ## Writing classification output to CSV

        classifier_threshold = 0.0      # Classifier values below this threshold will be discarded

        os.chdir('/sharedfolder/sida_classifier')

        csv_path = media_path[:-4]+'_mlpc4096_scaled_labels_'+str(resolution_secs)+'s.csv'

        counter=0

        with open(csv_path,'w') as fo:
            duration = resolution_secs
            for value in classifications:
                if value >= classifier_threshold:
                    start = counter * resolution_secs
                    fo.write(str(start) + ',' + str(duration) +','+ str(value) + ',' + speaker_1_label + '\n')
                counter+=1

    ## Writing filtered classification output to CSV

        classifier_threshold = 0.40      # Classifier values below this threshold will be discarded

        os.chdir('/sharedfolder/sida_classifier')

        csv_path = media_path[:-4]+'_mlpc4096_scaled_labels_filtered_10_frame_window'+str(resolution_secs)+'s.csv'

        counter = 0

        with open(csv_path,'w') as fo:
            duration = resolution_secs
            for value in attk.smooth(classifications):
                if value >= classifier_threshold:
                    start = counter * resolution_secs
                    fo.write(str(start) + ',' + str(duration) +','+ str(value) + ',' + speaker_1_label + '\n')
                counter+=1


    ## Writing filtered classification output to CSV

        classifier_threshold = 0.40      # Classifier values below this threshold will be discarded

        os.chdir('/sharedfolder/sida_classifier')

        csv_path = media_path[:-4]+'_mlpc4096_scaled_labels_filtered_5_frame_window'+str(resolution_secs)+'s.csv'

        counter = 0

        with open(csv_path,'w') as fo:
            duration = resolution_secs
            for value in attk.smooth(classifications):
                if value >= classifier_threshold:
                    start = counter * resolution_secs
                    fo.write(str(start) + ',' + str(duration) +','+ str(value) + ',' + speaker_1_label + '\n')
                counter+=1

    except: pass