In [None]:
%%capture
# Install the latest version of attk (if necessary)
#!pip install -U git+git://github.com/hipstas/audio-tagging-toolkit.git

In [3]:
import attk
import os
import numpy as np
import librosa
from IPython.display import display, Audio
import timeit
import random
from sklearn.externals import joblib
from numpy import ma
from aubio import source, pitch
from moviepy.audio.io import AudioFileClip
import subprocess
import unicodecsv
import urllib2
import csv

!mkdir -p /sharedfolder/_training_audio

os.chdir('/sharedfolder/_training_audio/')

## Uncomment lines below to download audio files for training

#!wget http://www.stephenmclaughlin.net/HILT/audio_corpora/NPR_Fresh_Air_diarized.zip
#!unzip NPR_Fresh_Air_diarized.zip

In [None]:
## Downloading random 1-second labels
csv_url = "https://raw.githubusercontent.com/hipstas/aapb-labels/master/Terry_Gross/Terry_Gross_labels.csv"

csv_string = urllib2.urlopen(csv_url)

train_table = []

## Loading CSV as list of lists
csv_reader = unicodecsv.reader(csv_string)

for row in csv_reader:
        train_table.append(row)

train_table[:10]+['...']

In [None]:
## Removing header row (if present)

if 'Media file basename' in train_table[0]:
    train_table = train_table[1:]

random.shuffle(train_table)

In [None]:
%%capture

## Excerpting WAV clips corresponding to labels

training_audio_pathname = "NPR_Fresh_Air_diarized"
out_dir = '_classes_' + training_audio_pathname


for row in train_table:
    try:
        basename , start, duration, class_name, labeled_by = row  ## Assigning values in row to variables
        filename = str(basename + '.mp3')
        start = float(start)
        end = float(start) + float(duration)
        wav_out_pathname = str(os.path.join(out_dir, class_name.replace(' ','_')))
        try: 
            subprocess.call(['mkdir', '-p', wav_out_pathname])
        except:
            pass
        attk.subclip(os.path.join(training_audio_pathname, filename), float(start), end, wav_out_pathname) ## <- attk
    except Exception as e: 
        print(row)
        print(e)


In [None]:
## Defining functions we'll use below


def extract_pairs(media_path,vowel_time_ranges):
    snd = AudioFileClip.AudioFileClip(media_path)
    file_duration = attk.duration(media_path)
    for pair in vowel_time_ranges:
        start, end = pair
        start = float(start)
        end = float(end)
        if end-start >= 0.1:  ## Ignore clips shorter than 0.1 second
            basename = media_path.split('/')[-1][:-4]
            out_filename = basename+'__'+str(round(start, 4))+'_'+str(round(end, 4))+'.wav'
            snd.subclip(start, end).write_audiofile(os.path.join('_vowel_clips',out_filename))


def batch_extract_vowels(media_dir):
    starting_location = os.getcwd()
    os.chdir(media_dir)
    bin_2048_to_sec_constant = 0.046439909297052155
    try: os.mkdir('_vowel_clips')
    except: pass
    filenames=[item for item in os.listdir('./') if item[-4:].lower() in ('.mp3','.wav','.mp4')]
    for filename in filenames[::-1]:
        try:
            vowel_bools = attk.get_vowel_segments(filename)
            vowel_bin_ranges = attk.labels_to_ranges(vowel_bools, label=True)
            vowel_time_ranges = [(s*bin_2048_to_sec_constant, e*bin_2048_to_sec_constant) for s, e in vowel_bin_ranges]
            extract_pairs(filename,vowel_time_ranges)
        except: print("***** ERROR: "+filename)
    os.chdir(starting_location)


In [None]:
%%capture

os.chdir('/sharedfolder/_classes_NPR_Fresh_Air_diarized')

batch_extract_vowels('Terry_Gross')
batch_extract_vowels('Background_Speaker')

In [None]:
%%capture

## Extract vowel segments from labeled audio

os.chdir('/sharedfolder/GitHub/sida/___training_audio/_classes_NPR_Fresh_Air_diarized')

#for class_dir_name in [item for item in os.listdir('./') if os.path.isdir(item)]:
#    batch_extract_vowels(class_dir_name)

batch_extract_vowels('Terry_Gross')
batch_extract_vowels('Background_Speaker')

In [None]:
## Extracting Speaker 1 features
tic=timeit.default_timer()

os.chdir('/sharedfolder/GitHub/sida/___training_audio/_classes_NPR_Fresh_Air_diarized/')

dir_names = [item for item in os.listdir('./') if os.path.isdir(item)]

for dir_name in dir_names:
    
    try:
        os.chdir('/sharedfolder/GitHub/sida/___training_audio/_classes_NPR_Fresh_Air_diarized/' + dir_name + '/_vowel_clips')
    
        try: os.mkdir('../_vowel_mfccs_and_deltas')
        except: pass
    
        csv_out_path = '../_vowel_mfccs_and_deltas/' + filename[:-4] + '.mfcc.csv'
        if not os.path.isfile(csv_out_path):
            try:
                mfccs = attk.get_mfccs_and_deltas(filename)
                with open(csv_out_path, 'w') as fo:
                    csv_writer = csv.writer(fo)
                    csv_writer.writerows(mfccs)  
            except:
                "ERROR on " + filename
                
    except: pass

print(timeit.default_timer() - tic)

tic=timeit.default_timer()

##


In [None]:
## Extracting Background Speaker Features

os.chdir('/sharedfolder/GitHub/sida/___training_audio/_classes_ubm_clips_final_July_2017')

dir_names = [item for item in os.listdir('./') if os.path.isdir(item)]

for dir_name in dir_names:
    
    try:
        os.chdir('/sharedfolder/GitHub/sida/___training_audio/_classes_ubm_clips_final_July_2017/' + dir_name + '/_vowel_clips')
    
        try: os.mkdir('../_vowel_mfccs_and_deltas')
        except: pass
    
        for filename in [item for item in os.listdir('./') if item.lower()[-4:] == '.wav']:
            mfccs = attk.get_mfccs_and_deltas(filename)
            with open('../_vowel_mfccs_and_deltas/' + filename[:-4] + '.mfcc.csv', 'w') as fo:
                csv_writer = csv.writer(fo)
                csv_writer.writerows(mfccs)  
    except: pass


In [4]:
## Downloading and unzipping prepared MFCC feature data

os.chdir('/sharedfolder/')

#!wget https://raw.githubusercontent.com/hipstas/shaping-humanities-data/master/feature_sets/Terry_Gross_vowel_mfccs_and_deltas.zip
#!wget https://raw.githubusercontent.com/hipstas/shaping-humanities-data/master/feature_sets/Fresh_Air_ubm_vowel_mfccs_and_deltas.zip
#!wget https://raw.githubusercontent.com/hipstas/shaping-humanities-data/master/feature_sets/AAPB_female_vowel_mfccs_and_deltas.zip
#!wget https://raw.githubusercontent.com/hipstas/shaping-humanities-data/master/feature_sets/AAPB_male_vowel_mfccs_and_deltas.zip

!unzip Terry_Gross_vowel_mfccs_and_deltas.zip
!unzip Fresh_Air_ubm_vowel_mfccs_and_deltas.zip
!unzip AAPB_female_vowel_mfccs_and_deltas.zip
!unzip AAPB_male_vowel_mfccs_and_deltas.zip

unzip:  cannot find or open Terry_Gross_vowel_mfccs_and_deltas.zip, Terry_Gross_vowel_mfccs_and_deltas.zip.zip or Terry_Gross_vowel_mfccs_and_deltas.zip.ZIP.
unzip:  cannot find or open Fresh_Air_ubm_vowel_mfccs_and_deltas.zip, Fresh_Air_ubm_vowel_mfccs_and_deltas.zip.zip or Fresh_Air_ubm_vowel_mfccs_and_deltas.zip.ZIP.
unzip:  cannot find or open AAPB_female_vowel_mfccs_and_deltas.zip, AAPB_female_vowel_mfccs_and_deltas.zip.zip or AAPB_female_vowel_mfccs_and_deltas.zip.ZIP.
unzip:  cannot find or open AAPB_male_vowel_mfccs_and_deltas.zip, AAPB_male_vowel_mfccs_and_deltas.zip.zip or AAPB_male_vowel_mfccs_and_deltas.zip.ZIP.


In [None]:
## Loading saved features


os.chdir('/sharedfolder/Terry_Gross_vowel_mfccs_and_deltas')

gross_features = []

for filename in os.listdir('./'):
    with open(filename) as fi:
        csv_reader = csv.reader(fi)
        for row in csv_reader:
            gross_features.append([float(item) for item in row])

print(len(gross_features))


os.chdir('/sharedfolder/Terry_Gross_vowel_mfccs_and_deltas')

fresh_air_ubm_features = []

for filename in os.listdir('./'):
    with open(filename) as fi:
        csv_reader = csv.reader(fi)
        for row in csv_reader:
            fresh_air_ubm_features.append([float(item) for item in row])

print(len(fresh_air_ubm_features))


os.chdir('/sharedfolder/AAPB_male_vowel_mfccs_and_deltas')

m_ubm_features = []

for filename in os.listdir('./'):
    with open(filename) as fi:
        csv_reader = csv.reader(fi)
        for row in csv_reader:
            m_ubm_features.append([float(item) for item in row])

print(len(m_ubm_features))


os.chdir('/sharedfolder/AAPB_female_vowel_mfccs_and_deltas')

f_ubm_features = []

for filename in os.listdir('./'):
    with open(filename) as fi:
        csv_reader = csv.reader(fi)
        for row in csv_reader:
            f_ubm_features.append([float(item) for item in row])

print(len(f_ubm_features))


In [None]:
## Printing MFCCs and deltas for a single frame

print(random.choice(gross_ubm_features))

In [None]:
## Combining feature sets

speaker_1_mfccs = gross_features
ubm_mfccs = fa_ubm_features + m_ubm_features + f_ubm_features

#speaker_1_mfccs = terry_gross_mfccs[::-1]
#bg_mfccs = ubm_mfccs[::-1]

#random.shuffle(speaker_1_mfccs)
#random.shuffle(ubm_mfccs)

print(len(speaker_1_mfccs))
print(len(ubm_mfccs))

In [None]:
## Training Extra Trees Classifier

os.chdir('/sharedfolder/')

from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import ExtraTreesClassifier

X = speaker_1_mfccs[:-len(speaker_1_mfccs)/10] + ubm_mfccs[:-len(ubm_mfccs)/10]
y = [1]*len(speaker_1_mfccs[:-len(speaker_1_mfccs)/10]) + [0]*len(ubm_mfccs[:-len(ubm_mfccs)/10])

X_test = speaker_1_mfccs[-len(speaker_1_mfccs)/10:] + ubm_mfccs[-len(ubm_mfccs)/10:]
y_test = [1]*len(speaker_1_mfccs[-len(speaker_1_mfccs)/10:]) + [0]*len(ubm_mfccs[-len(ubm_mfccs)/10:])

#classifier = ExtraTreesClassifier().fit(X, y)
classifier = MLPClassifier().fit(X, y)

## Saving trained model
joblib.dump(classifier,'gross_vowels_mlpc_2048.pkl')
classifier = joblib.load('gross_vowels_mlpc_2048.pkl')

print(classifier.score(X_test,y_test))

In [None]:
print(classifier.score(X_test,y_test))

In [None]:
## Loading pre-trained model

#from sklearn.ensemble import RandomForestClassifier

#random_forest=joblib.load('pesca_vowels_random_forest_2048.pkl')

In [None]:
#### Wget and unzip the Fresh Air clips

In [None]:
## Classifying short clips

os.chdir('/sharedfolder/Fresh_Air_2017-07-31_3-sec_clips/')

tic = timeit.default_timer()

wav_pathname = os.path.abspath(random.choice(os.listdir('/sharedfolder/Fresh_Air_2017-07-31_3-sec_clips/')))

test_mfccs = attk.get_mfccs_and_deltas(wav_pathname)

print(wav_pathname)

results = classifier.predict(test_mfccs)  ## Predicting new observation

print(results)


vowel_results=[]

vowel_bools = attk.get_vowel_segments(wav_pathname)

for i in range(len(results)):
    if vowel_bools[i]==True:
        vowel_results.append(results[i])

display(Audio(wav_pathname))


print("All: "+str(np.mean(results)))
print("Vowels only: "+str(np.mean(vowel_results)))

#print("Time elapsed: "+str(timeit.default_timer() - tic))

In [None]:
print classify_clip(wav_pathname)

In [None]:

def classify_clip(clip_pathname):
    mfccs = attk.get_mfccs_and_deltas(clip_pathname)
    results = classifier.predict(mfccs)  ## Predicting new observation
    vowel_results=[]
    vowel_bools = attk.get_vowel_segments(clip_pathname)

    for i in range(len(results)):
        if vowel_bools[i]==True:
            vowel_results.append(results[i])

    return np.mean(vowel_results) ## Vowels only



In [None]:


## Classifying a long audio file


tic=timeit.default_timer()


media_path = "/sharedfolder/fa_eval/FA_Author_Tom_Perrotta.mp3"


snd = AudioFileClip.AudioFileClip(media_path)

classifications=[]

for i in range(int(attk.duration(media_path)/3.0)):
    try:
        snd.subclip(i,i+3).write_audiofile('/tmp/temp_clip.wav')
        classifications.append(classify_clip('/tmp/temp_clip.wav'))
    except: 
        classifications.append(0.0)
        print "missed one"



In [None]:
## Writing classification output to CSV

counter=0

class_0_secs=[]
class_1_secs=[]

i=0

for classification in attk.smooth(np.array(classifications)):
    if classification < 0.1:
        class_0_secs.append(i)
    if classification >= 0.1:
        class_1_secs.append(i)
    i+=1

33
counter=0

csv_path=media_path[:-4]+'_mlpc2048_labels.csv'

with open(csv_path,'w') as fo:
    for pair in attk.labels_to_ranges(class_0_secs):
        fo.write(str(3.0 * float(pair[0]))+','+str(3.0 * float(pair[1]))+',Background\n')
    for pair in attk.labels_to_ranges(class_1_secs):
        fo.write(str(3.0 * float(pair[0]))+','+str(3.0 * float(pair[1]))+',Terry Gross\n')


print(timeit.default_timer() - tic)

In [None]:
!pwd
