In [None]:
import attk
import os
import csv
import numpy as np
import librosa
import scipy
import timeit
import random
import subprocess
import urllib2
from sklearn.externals import joblib
from numpy import ma
from aubio import source, pitch
from moviepy.audio.io import AudioFileClip
from IPython.display import display, Audio

os.chdir('/sharedfolder/music_classifier/')

class_list = ['Music', 'Non-Music']

class_0_label, class_1_label = class_list

In [None]:
## Load saved features

def load_features(dir_path):
    features = []
    for filename in os.listdir(dir_path):
        try:
            with open(os.path.join(dir_path, filename)) as fi:
                csv_reader = csv.reader(fi)
                for row in csv_reader:
                    features.append([float(item) for item in row])
        except Exception as e: 
            print(os.path.join(dir_path, filename))
            print(e)
    return features

class_0_features = load_features('/sharedfolder/music_classifier/_classes_Music/Music/_mfccs_and_deltas')
print(len(class_0_features))

class_1_features = load_features('/sharedfolder/music_classifier/_classes_Music/Non-Music/_mfccs_and_deltas')
print(len(class_1_features))


min_length = np.min([len(class_0_features), len(class_1_features)])

## Uncomment the lines below to downsample if necessary.

#class_0_features = random.sample(class_0_features, min_length)
#class_1_features = random.sample(class_1_features, min_length)

In [None]:
## Printing MFCCs and deltas for a single frame

print(random.choice(class_0_features))

In [None]:
## Training a multi-layer perceptron model with 9/10 of training data and evaluating performance on remaining 1/10

os.chdir('/sharedfolder/music_classifier/')

import random

from sklearn.neural_network import MLPClassifier

X = class_0_features[:-len(class_0_features)/10] + class_1_features[:-len(class_1_features)/10]
y = [1]*len(class_0_features[:-len(class_0_features)/10]) + [0]*len(class_1_features[:-len(class_1_features)/10])

X_train = np.array(X)
y_train = np.array(y)

X_test = class_0_features[-len(class_0_features)/10:] + class_1_features[-len(class_1_features)/10:]
y_test = [1]*len(class_0_features[-len(class_0_features)/10:]) + [0]*len(class_1_features[-len(class_1_features)/10:]) 

X_test = np.array(X_test)
y_test = np.array(y_test)
    
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

classifier = MLPClassifier(max_iter = 2000, random_state = 9, \
                          hidden_layer_sizes = (100, 100), solver = 'adam', \
                          activation = 'relu').fit(X_train_scaled, y_train)

print(classifier.score(X_test_scaled,y_test))

In [None]:
## Training and saving an MLP model with all training data

from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

X = class_0_features + class_1_features
y = [0]*len(class_0_features) + [1]*len(class_1_features)

scaler = StandardScaler()

scaler.fit(X)

StandardScaler(copy=True, with_mean=True, with_std=True)

X_scaled = scaler.transform(X)

#classifier = MLPClassifier().fit(X_scaled, y)

classifier = MLPClassifier(max_iter = 2000, random_state = 9, \
                          hidden_layer_sizes = (100, 100), solver = 'adam', \
                          activation = 'relu').fit(X_scaled, y)

trained_model_filename = 'Music' + '_mlpc_4096_100-16K_scaled_.pkl'
scaler_filename = trained_model_filename.replace('.pkl', '.scaler')

print(trained_model_filename)
print(scaler_filename)

## Saving trained model
joblib.dump(classifier, trained_model_filename)
joblib.dump(scaler, scaler_filename)
classifier = joblib.load(trained_model_filename)
scaler = joblib.load(scaler_filename)

In [None]:
##############################################
#### Start here to load pre-trained model ####
##############################################

os.chdir('/sharedfolder/music_classifier')

trained_model_filename = 'Music' + '_mlpc_4096_100-16K_scaled_.pkl'
scaler_filename = trained_model_filename.replace('.pkl', '.scaler')

classifier = joblib.load(trained_model_filename)
scaler = joblib.load(scaler_filename)

In [None]:
%%capture
## Download unseen audio and split into 3-second WAV clips for testing

os.chdir('/sharedfolder/music_classifier/')

try: os.mkdir('test_clips/')
except: pass

os.chdir('/sharedfolder/music_classifier/test_clips/')

mp3_url = 'https://americanroutes.s3.amazonaws.com/shows/1825_02.mp3'

mp3_filename = mp3_url.split('/')[-1]

wav_filename = mp3_filename[:-4] + '.wav'

subprocess.call(['wget', '-N', mp3_url])

subprocess.call(['ffmpeg', '-i', mp3_filename, wav_filename])

subprocess.call(['ffmpeg', '-i', wav_filename, '-f', 'segment', '-segment_time', '3',  wav_filename[:-4] + '_3_sec_%04d.wav'])


In [None]:
def most_common_class(class_ids):
    mode_id = int(list(scipy.stats.mode(class_ids))[0][0])
    mode_id_percentage = float(float(list(class_ids).count(mode_id))/len(class_ids))
    return (mode_id, mode_id_percentage)

In [None]:
## Classifying short clips
#### Repeat this cell several times to help choose a classifier threshold value.

import scipy

os.chdir('/sharedfolder/music_classifier/test_clips/')

wav_pathname = os.path.abspath(random.choice([item for item in os.listdir('./') if '3_sec' in item]))

test_features = np.array(attk.get_mfccs_and_deltas(wav_pathname, n_mfcc=30, n_fft=8192))
test_features = scaler.transform(test_features)

print(wav_pathname)

results = classifier.predict(test_features)  ## Predicting new observation
results_proba = classifier.predict_proba(test_features)  ## Predicting new observation

print(results)
print([round(max(item), 4) for item in list(results_proba)])

display(Audio(wav_pathname))

print("MODE: " + str(list(scipy.stats.mode(results))[0][0])) 

mode_id, mode_id_percentage = most_common_class(results)
top_label = class_list[mode_id]

print('')
print("Class: " + str(top_label))
print("Confidence: " + str(mode_id_percentage))

print('')

print(str(mode_id) +','+ str(mode_id_percentage) + ',' + str(top_label) + '\n')

In [None]:
## Function that classifies a full audio segment
## and returns the mode class for the full clip

def classify_clip(wav_pathname):
    features = np.array(attk.get_mfccs_and_deltas(wav_pathname, n_mfcc=30, n_fft=8192))
    features = scaler.transform(features)
    results = classifier.predict(features)  ## Predicting new observation
    return most_common_class(results)

In [None]:
## Deleting 3-second test clips

import scipy
os.chdir('/sharedfolder/music_classifier/test_clips/')
!rm *_3_sec_*

In [None]:
%%capture
## Classifying a long audio file

resolution_secs = 2
classifier_threshold = 0.50

os.chdir('/sharedfolder/music_classifier/test_clips/')

errors = []

import datetime
import timeit

# Initializing timeit object to time this cell
tic = timeit.default_timer()

# Choosing an MP3 from the 'test_clips' directory at random
media_path = random.choice([item for item in os.listdir('./') if '.mp3' in item])

# Creating a current timestamp in order to give the output CSV a unique filename
time_str = str(datetime.datetime.now()).replace(':', '').split('.')[0].replace(' ', '_')

# Creating output CSV filename
csv_path = media_path[:-4]+'_music_mlpc4096_labels_100-1600Hz_scaled_'+str(resolution_secs)+'s_resolution_' + time_str +'.csv'

# Loading audio from the media path using MoviePy package
snd = AudioFileClip.AudioFileClip(media_path)

classifications = []

# Creating an empty CSV file, to which we'll append classifier output one line at a time
# (This step isn't strictly necessary; it's a precaution in case there is already a file with the same filename.)
with open(csv_path,'w') as fo:
    fo.write('')

for i in range(int(attk.duration(media_path)/resolution_secs)):
    try:
        snd.subclip(i * resolution_secs , (i * resolution_secs) + resolution_secs).write_audiofile('/tmp/temp_clip.wav')
        mode_id, mode_id_percentage = classify_clip('/tmp/temp_clip.wav')
        os.remove('/tmp/temp_clip.wav')
        
        top_label = class_list[mode_id]
        if mode_id_percentage > classifier_threshold:
            with open(csv_path,'a') as fo:
                duration = resolution_secs
                start = i * resolution_secs
                fo.write(str(start) + ',' + str(duration) +','+ str(mode_id) +','+ str(mode_id_percentage) + ',' + str(top_label) + '\n')
    except Exception as e:
        errors.append(e)
        print("Error: " + str(i))
        print(e)

In [None]:
## Run this cell immediately after the one above to display the time elapsed.

print("Time elapsed: "+str(timeit.default_timer() - tic))

In [None]:
%%capture
## Batch classifying long audio files

resolution_secs = 2
classifier_threshold = 0.50


errors = []

import datetime

import timeit
tic=timeit.default_timer()


media_dir = "/sharedfolder/music_classifier/test_clips/"

os.chdir(media_dir)

# Creating a list of all MP3s, WAVs, and MP4s
media_paths = [item for item in os.listdir('./') if (item[-4:].lower() in ('.mp3','.wav','.mp4')) & (item[0]!='.')]

random.shuffle(media_paths)

for media_path in media_paths:

    time_str = str(datetime.datetime.now()).replace(':', '').split('.')[0].replace(' ', '_')

    csv_path = media_path[:-4]+'_music_mlpc4096_labels_100-16kHz_scaled_'+str(resolution_secs)+'s_resolution_' + time_str +'.csv'

    snd = AudioFileClip.AudioFileClip(media_path)

    classifications = []
    with open(csv_path,'w') as fo:
        fo.write('')

    for i in range(int(attk.duration(media_path)/resolution_secs)):
        try:
            snd.subclip(i * resolution_secs , (i * resolution_secs) + resolution_secs).write_audiofile('/tmp/temp_clip.wav')
            mode_id, mode_id_percentage = classify_clip('/tmp/temp_clip.wav')
            os.remove('/tmp/temp_clip.wav')

            top_label = class_list[mode_id]
            if mode_id_percentage > classifier_threshold:
                with open(csv_path,'a') as fo:
                    duration = resolution_secs
                    start = i * resolution_secs
                    fo.write(str(start) + ',' + str(duration) +','+ str(mode_id) +','+ str(mode_id_percentage) + ',' + str(top_label) + '\n')
        except Exception as e:
            errors.append(e)
            print("Error: " + str(i))
            print(e)

In [None]:
## Run this cell immediately after the one above to display the time elapsed.

print("Time elapsed: "+str(timeit.default_timer() - tic))
os.system("printf '\a'")

In [None]:
print(len(errors))
print(list(set([item[0] for item in errors])))