In [None]:
import attk
import os
import csv
import numpy as np
import librosa
import timeit
import random
import subprocess
import unicodecsv
import urllib2
from sklearn.externals import joblib
from numpy import ma
from aubio import source, pitch
from moviepy.audio.io import AudioFileClip
from IPython.display import display, Audio

os.chdir('/sharedfolder/applause_classifier/')

speaker_list = ['Applause', 'Non-Applause']

speaker_0_label, speaker_1_label = speaker_list

In [None]:
!du /sharedfolder/applause_classifier/_classes_Applause/applause/

In [None]:
## Load saved features

def load_features(dir_path):
    features = []
    for filename in os.listdir(dir_path):
        try:
            with open(os.path.join(dir_path, filename)) as fi:
                csv_reader = csv.reader(fi)
                for row in csv_reader:
                    features.append([float(item) for item in row])
        except Exception as e: 
            print(os.path.join(dir_path, filename))
            print(e)
    return features

speaker_0_features = load_features('/sharedfolder/applause_classifier/_classes_Applause/applause/_mfccs_and_deltas')
print(len(speaker_0_features))

speaker_1_features = load_features('/sharedfolder/applause_classifier/_classes_Applause/non_applause/_mfccs_and_deltas')
print(len(speaker_1_features))


min_length = np.min([len(speaker_0_features), len(speaker_1_features)])
#speaker_0_features = random.sample(speaker_0_features, min_length)
#speaker_1_features = random.sample(speaker_1_features, min_length)


#aapb_ubm_male_features = load_features('/sharedfolder/applause_classifier/AAPB_male_vowel_mfccs_and_deltas_100-5K_Hz')
#print(len(aapb_ubm_male_features))

#aapb_ubm_female_features = load_features('/sharedfolder/applause_classifier/AAPB_female_vowel_mfccs_and_deltas_100-5K_Hz')
#print(len(aapb_ubm_female_features))

In [None]:
## Printing MFCCs and deltas for a single frame

print(random.choice(speaker_1_features))

In [None]:
## Combining feature sets

#speaker_1_features = speaker_1_features
#male_ubm_features = program_ubm_male_features +  aapb_ubm_male_features 
#female_ubm_features = program_ubm_female_features + aapb_ubm_female_features

#print(len(speaker_1_features))
#print(len(male_ubm_features))
#print(len(female_ubm_features))


In [None]:
## Training and multi-layer perceptron model with 9/10 of training data and evaluating performance on remaining 1/10

os.chdir('/sharedfolder/applause_classifier/')

import random
#random.shuffle(speaker_0_features)
#random.shuffle(speaker_1_features)
#random.shuffle(speaker_2_features)

from sklearn.neural_network import MLPClassifier

X = speaker_0_features[:-len(speaker_0_features)/10] + speaker_1_features[:-len(speaker_1_features)/10]
y = [1]*len(speaker_0_features[:-len(speaker_0_features)/10]) + [0]*len(speaker_1_features[:-len(speaker_1_features)/10])

X_train = np.array(X)
y_train = np.array(y)

X_test = speaker_0_features[-len(speaker_0_features)/10:] + speaker_1_features[-len(speaker_1_features)/10:]
y_test = [1]*len(speaker_0_features[-len(speaker_0_features)/10:]) + [0]*len(speaker_1_features[-len(speaker_1_features)/10:]) 

X_test = np.array(X_test)
y_test = np.array(y_test)
    
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

classifier = MLPClassifier(max_iter = 2000, random_state = 9, \
                          hidden_layer_sizes = (100, 100), solver = 'adam', \
                          activation = 'relu').fit(X_train_scaled, y_train)

print(classifier.score(X_test_scaled,y_test))

In [None]:
## Training and saving an MLP model with all training data
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

X = speaker_0_features + speaker_1_features
y = [0]*len(speaker_0_features) + [1]*len(speaker_1_features)

scaler = StandardScaler()

scaler.fit(X)

StandardScaler(copy=True, with_mean=True, with_std=True)

X_scaled = scaler.transform(X)

#classifier = MLPClassifier().fit(X_scaled, y)

classifier = MLPClassifier(max_iter = 2000, random_state = 9, \
                          hidden_layer_sizes = (100, 100), solver = 'adam', \
                          activation = 'relu').fit(X_scaled, y)

trained_model_filename = 'Applause' + '_mlpc_4096_100-16K_scaled_.pkl'
print(trained_model_filename)

## Saving trained model
joblib.dump(classifier, trained_model_filename)
joblib.dump(scaler, trained_model_filename.replace('.pkl', '.scaler'))
classifier = joblib.load(trained_model_filename)

In [None]:
#from sklearn.mixture import GaussianMixture

#gmm_classifier = GaussianMixture(n_components=3, covariance_type='diag', max_iter=3000).fit(X_scaled, y)

In [None]:
##############################################
#### Start here to load pre-trained model ####
##############################################

#trained_model_filename = 'mbmbam' + '_vowels_mlpc_4096_100-5K_scaled.pkl'

os.chdir('/sharedfolder/applause_classifier')
classifier = joblib.load('Applause_mlpc_4096_100-16K_scaled_.pkl')

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.decomposition import PCA as sklearnPCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.datasets.samples_generator import make_blobs

from pandas.tools.plotting import parallel_coordinates

In [None]:
lda = LDA(n_components=2) #2-dimensional LDA
lda_transformed = pd.DataFrame(lda.fit_transform(X_scaled, y))
lda_transformed.tail()

In [None]:
lda_transformed['y'] = y

In [None]:
# 30 mfccs, 4096

plt.scatter(lda_transformed[lda_transformed['y']==0][0], lda_transformed[lda_transformed['y']==0][1], label='Applause', c='red', alpha=0.2)
plt.scatter(lda_transformed[lda_transformed['y']==1][0], lda_transformed[lda_transformed['y']==1][1], label='Non_Applause', c='blue', alpha=0.2)

plt.legend(loc=3)
plt.show()

In [None]:
# 30 mfccs, 2048

plt.scatter(lda_transformed[lda_transformed['y']==0][0], lda_transformed[lda_transformed['y']==0][1], label='Justin', c='red', alpha=0.1)
plt.scatter(lda_transformed[lda_transformed['y']==1][0], lda_transformed[lda_transformed['y']==1][1], label='Griffin', c='blue', alpha=0.1)
plt.scatter(lda_transformed[lda_transformed['y']==2][0], lda_transformed[lda_transformed['y']==2][1], label='Travis', c='green', alpha=0.1)

plt.legend(loc=3)
plt.show()

In [None]:
# 13 mfccs, 2048

plt.scatter(lda_transformed[lda_transformed['y']==0][0], lda_transformed[lda_transformed['y']==0][1], label='Justin', c='red')
plt.scatter(lda_transformed[lda_transformed['y']==1][0], lda_transformed[lda_transformed['y']==1][1], label='Griffin', c='blue')
plt.scatter(lda_transformed[lda_transformed['y']==2][0], lda_transformed[lda_transformed['y']==2][1], label='Travis', c='green')

plt.legend(loc=3)
plt.show()

In [None]:

# Old

plt.scatter(lda_transformed[lda_transformed['y']==0][0], lda_transformed[lda_transformed['y']==0][1], label='Justin', c='red')
plt.scatter(lda_transformed[lda_transformed['y']==1][0], lda_transformed[lda_transformed['y']==1][1], label='Griffin', c='blue')
plt.scatter(lda_transformed[lda_transformed['y']==2][0], lda_transformed[lda_transformed['y']==2][1], label='Travis', c='green')

plt.legend(loc=3)
plt.show()

In [None]:
#%%capture
## Download unseen audio and split into 3-second WAV clips for testing

os.chdir('/sharedfolder/applause_classifier/')

try: os.mkdir('test_clips/')
except: pass

os.chdir('/sharedfolder/applause_classifier/test_clips/')

wav_filename = "LM_03_John-Prine_1976-06-13_CAS_A_SRM.wav"


#subprocess.call(['wget', '-N', mp3_url])

#subprocess.call(['ffmpeg', '-i', mp3_filename, wav_filename])

subprocess.call(['ffmpeg', '-i', wav_filename, '-f', 'segment', '-segment_time', '3',  wav_filename[:-4] + '_3_sec_%04d.wav'])


In [None]:
def most_common_class(class_ids):
    mode_id = int(list(scipy.stats.mode(class_ids))[0][0])
    mode_id_percentage = float(float(class_ids.count(mode_id))/len(class_ids))
    return (mode_id, mode_id_percentage)

In [None]:
## Classifying short clips
#### Repeat this cell several times to help choose a classifier threshold value.

import scipy

os.chdir('/sharedfolder/applause_classifier/test_clips/')

wav_pathname = os.path.abspath(random.choice([item for item in os.listdir('./') if '3_sec' in item]))

test_features = np.array(attk.get_mfccs_and_deltas(wav_pathname, n_mfcc=30, n_fft=8192))
test_features = scaler.transform(test_features)

print(wav_pathname)

results = classifier.predict(test_features)  ## Predicting new observation
results_proba = classifier.predict_proba(test_features)  ## Predicting new observation

print(results)
print([round(max(item), 4) for item in list(results_proba)])

vowel_results=[]

vowel_bools = attk.get_vowel_segments(wav_pathname, n_fft=8192)

for i in range(len(results)):
    if True:                                 #vowel_bools[i]==
        vowel_results.append(results[i])

display(Audio(wav_pathname))

print("MODE: " + str(list(scipy.stats.mode(results))[0][0])) 
print("MODE vowels only: " + str(list(scipy.stats.mode(vowel_results))[0][0])) ## Vowels only
#print("All samples: "+str(np.mean(results)))
#print("Vowels only: "+str(np.mean(vowel_results)))

mode_id, mode_id_percentage = most_common_class(vowel_results)
top_label = speaker_list[mode_id]

print('')
print("Speaker: " + str(top_label))
print("Confidence: " + str(mode_id_percentage))

print('')

print(str(mode_id) +','+ str(mode_id_percentage) + ',' + str(top_label) + '\n')

In [None]:
## Classifying short clips
#### Repeat this cell several times to help choose a classifier threshold value.

import scipy

os.chdir('/sharedfolder/applause_classifier/test_clips/')

#wav_pathname = os.path.abspath(random.choice([item for item in os.listdir('./') if '3_sec' in item]))

#test_features = np.array(attk.get_mfccs_and_deltas(wav_pathname))
#test_features = scaler.transform(test_features)

print(wav_pathname)

results = classifier.predict(test_features)  ## Predicting new observation
results_proba = classifier.predict_proba(test_features)  ## Predicting new observation

print(results)
print([round(max(item), 4) for item in list(results_proba)])

vowel_results=[]

vowel_bools = attk.get_vowel_segments(wav_pathname, n_fft=4096)

for i in range(len(results)):
    if True:  #### vowel_bools[i]==
        vowel_results.append(results[i])

display(Audio(wav_pathname))

print("MODE: " + str(list(scipy.stats.mode(results))[0][0])) 
print("MODE vowels only: " + str(list(scipy.stats.mode(vowel_results))[0][0])) ## Vowels only
#print("All samples: "+str(np.mean(results)))
#print("Vowels only: "+str(np.mean(vowel_results)))

mode_id, mode_id_percentage = most_common_class(vowel_results)
top_label = speaker_list[mode_id]

print('')
print("Speaker: " + str(top_label))
print("Confidence: " + str(mode_id_percentage))

print('')

print(str(mode_id) +','+ str(mode_id_percentage) + ',' + str(top_label) + '\n')

In [None]:
## Function that classifies vowel segments only and returns 
## average output for the full clip

def classify_clip(clip_pathname):
    mfccs = np.array(attk.get_mfccs_and_deltas(clip_pathname, n_mfcc=30, n_fft=8192))
    mfccs = scaler.transform(mfccs)
    results = list(classifier.predict(mfccs))  ## Predicting new observation
    return most_common_class(results)


In [None]:
import scipy
os.chdir('/sharedfolder/applause_classifier/test_clips/')
!rm *_3_sec_*

In [None]:
import scipy

In [None]:
%%capture
## Classifying a long audio file

resolution_secs = 1
classifier_threshold = 0.30

os.chdir('/sharedfolder/applause_classifier/test_clips/')
#os.chdir('/sharedfolder/')

errors = []

import datetime

import timeit
tic=timeit.default_timer()


media_path = "/sharedfolder/applause_classifier/test_clips/LM_03_John-Prine_1976-06-13_CAS_A_SRM.wav"

#os.chdir('/sharedfolder/The_World_batch/The_World_WGBH_episodes/')

#media_path = random.choice([item for item in os.listdir('./') if '.wav' in item])

time_str = str(datetime.datetime.now()).replace(':', '').split('.')[0].replace(' ', '_')

csv_path = media_path[:-4]+'_applause_mlpc4096_labels_100-500Hz_scaled_'+str(resolution_secs)+'s_' + time_str +'.csv'

snd = AudioFileClip.AudioFileClip(media_path)

classifications = []
with open(csv_path,'w') as fo:
    fo.write('')

for i in range(int(attk.duration(media_path)/resolution_secs)):
    try:
        snd.subclip(i * resolution_secs , (i * resolution_secs) + resolution_secs).write_audiofile('/tmp/temp_clip.wav')
        mode_id, mode_id_percentage = classify_clip('/tmp/temp_clip.wav')
        os.remove('/tmp/temp_clip.wav')
        
        top_label = speaker_list[mode_id]
        if mode_id_percentage > classifier_threshold:
            with open(csv_path,'a') as fo:
                duration = resolution_secs
                start = i * resolution_secs
                fo.write(str(start) + ',' + str(duration) +','+ str(mode_id) +','+ str(mode_id_percentage) + ',' + str(top_label) + '\n')
    except Exception as e:
        errors.append(e)
        print("Error: " + str(i))
        print(e)

In [None]:
%%capture
## BATCH classifying long audio files

resolution_secs = 1
classifier_threshold = 0.30


errors = []

import datetime

import timeit
tic=timeit.default_timer()


media_dir = "/sharedfolder/applause_classifier/LM_Live_Recordings/"

os.chdir(media_dir)
media_paths = [item for item in os.listdir('./') if (item[-4:].lower() in ('.mp3','.wav','.mp4')) & (item[0]!='.')]
random.shuffle(media_paths)

for media_path in media_paths:

    time_str = str(datetime.datetime.now()).replace(':', '').split('.')[0].replace(' ', '_')

    csv_path = media_path[:-4]+'_applause_mlpc4096_labels_100-16kHz_scaled_'+str(resolution_secs)+'s_' + time_str +'.csv'

    snd = AudioFileClip.AudioFileClip(media_path)

    classifications = []
    with open(csv_path,'w') as fo:
        fo.write('')

    for i in range(int(attk.duration(media_path)/resolution_secs)):
        try:
            snd.subclip(i * resolution_secs , (i * resolution_secs) + resolution_secs).write_audiofile('/tmp/temp_clip.wav')
            mode_id, mode_id_percentage = classify_clip('/tmp/temp_clip.wav')
            os.remove('/tmp/temp_clip.wav')

            top_label = speaker_list[mode_id]
            if mode_id_percentage > classifier_threshold:
                if mode_id==0:
                    with open(csv_path,'a') as fo:
                        duration = resolution_secs
                        start = i * resolution_secs
                        fo.write(str(start) + ',' + str(duration) +','+ str(mode_id) +','+ str(mode_id_percentage) + ',' + str(top_label) + '\n')
        except Exception as e:
            errors.append(e)
            print("Error: " + str(i))
            print(e)

In [None]:
print("Time elapsed: "+str(timeit.default_timer() - tic))
os.system("printf '\a'")

In [None]:
print(len(errors))
print(list(set([item[0] for item in errors])))