In [54]:
import numpy as np
from matplotlib import pyplot as plt
import os
import glob
import madmom
import myutils
import collections
import subprocess
from os.path import basename
import librosa

from essentia import *
from essentia.standard import *

In [55]:
# datasetPath = "/Users/carthach/tmp/drum_timbre/dataset/"
datasetPath = "/Users/carthach/tmp/drum_timbre/combined"

#Add Trailing Slash
datasetPath = os.path.join(datasetPath, '', '')

print datasetPath

#Get the timbreClasses in datasetPath
timbreClasses = [d for d in os.listdir(datasetPath) if os.path.isdir(os.path.join(datasetPath, d))]

instrumentClasses = []

# for timbreClass in timbreClasses:
#     instrumentPath = datasetPath + timbreClass
#     sectionInstrumentClasses = [d for d in os.listdir(instrumentPath) if os.path.isdir(os.path.join(instrumentPath, d))]
#     sectionInstrumentClasses = ["%s/%s" % (timbreClass, sectionInstrumentClass) for sectionInstrumentClass in sectionInstrumentClasses]
    
#     instrumentClasses += sectionInstrumentClasses
    
# print instrumentClasses    

print timbreClasses


/Users/carthach/tmp/drum_timbre/combined/
['cymbals', 'perc', 'hats', 'toms', 'claps', 'snares', 'kicks']


In [56]:
def poolToVector(pool):
    featureVector = []
    
    for feature in pool.descriptorNames():
        if isinstance(pool[feature], (list, tuple, np.ndarray)):
            for featureScalar in pool[feature]:
                featureVector.append(featureScalar)
        else:
            featureVector.append(pool[feature])
                                        
    return featureVector

def savePool(filename, pool):
    YamlOutput(filename = filename, writeVersion=False)(pool)
    
def loadPool(filename):
    yamlInput = YamlInput(filename=filename)
    
    pool = yamlInput()
    
    return pool

def extractFeatures(filename):
    audio, sampleRate = librosa.load(filename, sr=44100)
#     audio, sampleRate = loadAudio(filename)

    # create the pool and the necessary algorithms
    pool = Pool()
    w = Windowing()
    spec = Spectrum()
    centroid = Centroid(range=22050)
    flatness = Flatness()
    flatnessDB = FlatnessDB()
    mfcc = MFCC()
    bfcc = BFCC(
        type = 'power',
        weighting = 'linear',
        lowFrequencyBound = 0,
        highFrequencyBound = 22050,
#         highFrequencyBound = 8000,        
        numberBands = 26,
        numberCoefficients = 13,
        normalize = 'unit_max',
        dctType = 3,
        liftering = 22
    )
    loudness = Loudness()
    logAttackTime = LogAttackTime()
        
    # compute the centroid for all frames in our audio and add it to the pool
    for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):        
        s = spec(w(frame))
        
        c = centroid(s)
        pool.add('centroid', c)
        
#         f = flatness(s)
        f = flatnessDB(s)        
        pool.add('flatness', f)        

        m_bands, m_coeffs = mfcc(s)
        pool.add('mfcc', m_coeffs)
        
        b_bands, b_coeffs = bfcc(s)
        pool.add('bfcc', b_coeffs)
        
    # aggregate the results
    aggrPool = PoolAggregator(defaultStats = [ 'mean', 'var'])(pool)
    
    l, aStart, aStop = logAttackTime(audio)
    aggrPool.add('logAttackTime', l)
    aggrPool.add('loudness', loudness(audio))
    
    return aggrPool

In [59]:
def analyseSounds(datasetPath, soundClasses, reAnalyse=False, featureSet=[]):
    features = []
    labels = []
    
    for soundClass in soundClasses:
        soundPath = datasetPath + soundClass

        soundFiles = myutils.getListOfFilesRecursive(soundPath, "*.wav")
            
        print "Processing: %s" % soundClass
    
        for soundFile in soundFiles:
            yamlFile = soundFile + ".yaml"
            
            #Analyse or retrieve yaml features
            if not os.path.isfile(yamlFile) or reAnalyse:            
                featurePool = extractFeatures(soundFile)
                savePool(yamlFile, featurePool)
            else:        
                featurePool = loadPool(yamlFile)
                
            for descriptor in featurePool.descriptorNames():
                if descriptor not in featureSet:
                    featurePool.remove(descriptor)
                                                        
            featureVector = poolToVector(featurePool)
                    
#             print featureVector
#             print("\n*********************")
                    
#             timbreFeatureDict[timbreClass][timbreClassFile] = features

            features.append(featureVector)
            labels.append(soundClasses.index(soundClass))  
        
#             print "\t%s" % soundFile
    
    return features, labels

temporalFeatures = ["loudness", "logAttackTime"]
spectralFeatures = ["centroid.mean", "centroid.var", "flatness.mean", "flatness.var"]
mfccFeatures = ["mfcc.mean", "mfcc.var"]
bfccFeatures = ["bfcc.mean", "bfcc.var"]
temporalSpectralFeatures = temporalFeatures + spectralFeatures
mfccTemporalSpectralFeatures = mfccFeatures + temporalSpectralFeatures
bfccTemporalSpectralFeatures = bfccFeatures + temporalSpectralFeatures

featureSets = {
    "Temporal" : temporalFeatures,
    "Spectral" : spectralFeatures,
    "Temporal+Spectral" : temporalSpectralFeatures,
    "MFCC" : mfccFeatures,
    "BFCC" : bfccFeatures,
    "MFCC+Temporal+Spectral" : mfccTemporalSpectralFeatures,
    "BFCC+Temporal+Spectral" : bfccTemporalSpectralFeatures
}

# features, labels = analyseSounds(datasetPath, instrumentClasses, reAnalyse=True, featureSet=featureSet)
    

In [60]:
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import ShuffleSplit
import csv

#Select this to true if you want to analyse on the first iteration
reAnalyse = True

with open('results.csv', 'wb') as csvfile:
    resultswriter = csv.writer(csvfile, delimiter=',',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
    resultswriter.writerow(["", "kNN", "SVM", "ANN"])    
    
    for featureSetName, featureSet in featureSets.iteritems():
        print("Evaluating: %s" % featureSetName)
        
        print datasetPath
        
        features, labels = analyseSounds(datasetPath, timbreClasses, reAnalyse=reAnalyse, featureSet=featureSet)
#         features, labels = analyseSounds(datasetPath, instrumentClasses, reAnalyse=reAnalyse, featureSet=featureSet)        

        if reAnalyse:
            reAnalyse = False
        
    #     print features



        classifierNames = ["kNN", "SVM", "ANN"]

        classifiers = [KNeighborsClassifier(1),
                       SVC(gamma=0.1, C=100.),
                       MLPClassifier(max_iter=500)
                      ]
        
        results = []

        # iterate over classifiers        
        for classifierName, clf in zip(classifierNames, classifiers):
            if classifierName == "ANN":
                scaler = StandardScaler()
            else:
                scaler = MinMaxScaler()
                
            features = scaler.fit_transform(features)

            n_samples = len(features)
            n_features = len(features[0])
            n_units = n_features
            
            print("Classifying with: %s" % classifierName)
            scores = cross_val_score(clf, features, labels, cv=10)
            result = "%0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)
            print(result)
            
            results.append(result)
            
        resultswriter.writerow([featureSetName, results[0], results[1], results[2]])
        
print("DONE!")

Evaluating: Spectral
/Users/carthach/tmp/drum_timbre/combined/
Processing: cymbals
Processing: perc
Processing: hats
Processing: toms
Processing: claps
Processing: snares
Processing: kicks
Classifying with: kNN
0.62 (+/- 0.11)
Classifying with: SVM
0.57 (+/- 0.09)
Classifying with: ANN
0.61 (+/- 0.09)
Evaluating: MFCC
/Users/carthach/tmp/drum_timbre/combined/
Processing: cymbals
Processing: perc
Processing: hats
Processing: toms
Processing: claps
Processing: snares
Processing: kicks
Classifying with: kNN
0.84 (+/- 0.09)
Classifying with: SVM
0.83 (+/- 0.09)
Classifying with: ANN
0.85 (+/- 0.10)
Evaluating: Temporal
/Users/carthach/tmp/drum_timbre/combined/
Processing: cymbals
Processing: perc
Processing: hats
Processing: toms
Processing: claps
Processing: snares
Processing: kicks
Classifying with: kNN
0.39 (+/- 0.12)
Classifying with: SVM
0.41 (+/- 0.09)
Classifying with: ANN
0.47 (+/- 0.10)
Evaluating: BFCC+Temporal+Spectral
/Users/carthach/tmp/drum_timbre/combined/
Processing: cymbal

In [7]:
#Visualise Network
#From https://gist.github.com/craffel/2d727968c3aaebd10359
# from draw_neural_net_ import draw_neural_net

fig66 = plt.figure(figsize=(14, 14))
ax = fig66.gca()
ax.axis('off')

draw_neural_net(ax, .1, .9, .05, .95, [n_units, n_units, n_units, n_units, 1],
clf.coefs_, 
clf.intercepts_,
clf.n_iter_,
clf.loss_,
np, plt)
plt.savefig("neural_network.pdf", bbox_inches='tight', dpi=1200)
plt.show()
#=========================================

AttributeError: 'MLPClassifier' object has no attribute 'coefs_'