In [2]:
from shared_notebook_utils import *
from scipy.stats import gaussian_kde
from sklearn import svm, cross_validation, tree
from sklearn.externals import joblib
from sklearn.externals.six import StringIO
seaborn.set(style="whitegrid")
%matplotlib inline
datasets = load_datasets(dirnames=None, clean=True)
METHOD = 'Percival14_bpm'

# Classifier for good and bad BPM estimates

In [3]:
def condition_good_estimate(key, item, data):
    result = metric_close_bpm(data, METHOD, tolerance=0, sound_ids=[key])
    if len(result) > 0 and result[0] == 1:
        return True
    else:
        return False
    
def condition_wrong_estimate(key, item, data):
    result = metric_close_bpm(data, METHOD, tolerance=0, sound_ids=[key])
    if len(result) > 0 and result[0] == 0:
        return True
    else:
        return False

base = 'analysis.FS_onset_rate_count'
features = [
    ('onset_rate', '%s.rhythm.onset_rate' % base),
    ('onset_count', '%s.rhythm.onset_count' % base),
]
    
def return_feature_vector(data_item):
    vector = list()
    for fname, fpath in features:
        vector.append(vfkp(data_item, fpath))
    return vector

for count, dataset in enumerate(datasets):
    
    print title('Training classifier for %s' % dataset.name)
    
    # Separate good and bad estimates
    correctly_estimated = dataset.filter_data(condition=condition_good_estimate)  
    wrongly_estimated = dataset.filter_data(condition=condition_wrong_estimate)
    
    # Prepare data to feed classifier
    X = list()  # List of feature vectors
    y = list()  # List of good and bad estimates labels
    for item in correctly_estimated.data.values():
        feature_vector = return_feature_vector(item)
        if feature_vector is not None:  # Skip vectors with nan or inf values
            X.append(feature_vector)
            y.append('good estimate')

    for item in wrongly_estimated.data.values():
        feature_vector = return_feature_vector(item)
        if feature_vector is not None:  # Skip vectors with nan or inf values
            X.append(feature_vector)
            y.append('bad estimate')
    
    # Train SVM
    print "Training and evaluating linear SVM classifier..."
    svm_clf = svm.SVC(kernel='linear')
    print '  Accuracy: %.2f' % np.mean(cross_validation.cross_val_score(svm_clf, X, y, scoring='accuracy', cv=10))
    
    # Train decision tree with different depths
    for depth in [1, 2, 3, 4, 5, 10]:
        print "Training and evaluating decision tree classifier (depth=%i)..." % depth
        tree_clf = tree.DecisionTreeClassifier(max_depth=depth)
        print '  Accuracy: %.2f' % np.mean(cross_validation.cross_val_score(tree_clf, X, y, scoring='accuracy', cv=10))

        # Training decision tree for export (gets better accuracy and is easier to interpret)
        fitted_tree_clf = tree_clf.fit(X, y)

        # Export classifier output in dot format (for further inspection)
        with open(os.path.join(settings.TEMPO_ESTIMATION_OUT_PATH, 'tree_clf_%s_depth_%i.dot' % (dataset.short_name, depth)), 'w') as f:
            f = tree.export_graphviz(fitted_tree_clf, feature_names=[fname for fname, fpath in features], out_file=f)

        # Export classifier as pickle so we can load it later
        joblib.dump(fitted_tree_clf, os.path.join(settings.TEMPO_ESTIMATION_OUT_PATH, 'tree_clf_%s_depth_%i.pkl' % (dataset.short_name, depth))) 



Training classifier for Freesound Loops 4k
------------------------------------------

Training and evaluating linear SVM classifier...
  Accuracy: 0.57
Training and evaluating decision tree classifier (depth=1)...
  Accuracy: 0.59
Training and evaluating decision tree classifier (depth=2)...
  Accuracy: 0.61
Training and evaluating decision tree classifier (depth=3)...
  Accuracy: 0.62
Training and evaluating decision tree classifier (depth=4)...
  Accuracy: 0.64
Training and evaluating decision tree classifier (depth=5)...
  Accuracy: 0.65
Training and evaluating decision tree classifier (depth=10)...
  Accuracy: 0.67

Training classifier for Apple Loops
-----------------------------------

Training and evaluating linear SVM classifier...
  Accuracy: 0.65
Training and evaluating decision tree classifier (depth=1)...
  Accuracy: 0.66
Training and evaluating decision tree classifier (depth=2)...
  Accuracy: 0.66
Training and evaluating decision tree classifier (depth=3)...
  Accuracy:

In [7]:
# Transform .dot data into pdfs using GraphViz (required dot command line tool, see example in http://scikit-learn.org/stable/modules/tree.html#classification)
from ac_utils.graph import simplify_dot_tree

for filename in os.listdir(settings.TEMPO_ESTIMATION_OUT_PATH):
    if filename.endswith('.dot') and not '.simp' in filename:
        in_filename = os.path.join(settings.TEMPO_ESTIMATION_OUT_PATH, filename)
        in_filename = simplify_dot_tree(in_filename, ['wrong estimate', 'good estimate'])
        out_filename = os.path.join(settings.TEMPO_ESTIMATION_OUT_PATH, filename.replace('.dot', '.pdf'))
        os.system('dot -Tpdf %s -o %s' % (in_filename, out_filename))
