In [1178]:
import os
import sys
import string
from music21 import *
from itertools import chain, imap
import csv
import json
import random

In [1179]:
def fetch_genres(basedir):
    genres = []
    nested = os.listdir(basedir)
    for i in nested:
        try:
            if(os.path.isdir(i) and not i.startswith(".")):
                genres.append(i)
        except:
            print("An error occured trying to load features for genre " + i)
    return genres

In [1180]:
def chunks(l, n):
    for i in xrange(0, len(l), n):
        yield l[i:i+n]

In [1181]:
def flatmap(l):
    return [item for sublist in l for item in sublist]

In [1182]:
def failed_features(basedir):
    not_included = set()
    fs = features.jSymbolic.extractorsById
    genres = fetch_genres(basedir)
    for genre in genres:
        try: 
            filename = basedir + "/" + genre + "/features"
            files = os.listdir(filename)
            for aFile in files:
                if aFile.endswith(".csv"):
                    arr = fileToArray(filename + "/" + aFile)
                    arr1 = set(map(lambda x: tuple(x[0:2]), arr))
                    # If features failed to extract 
                    for k in fs:
                       if k is not "I":
                            for i in range(len(fs[k])):
                                if (k,str(i)) not in arr1 and fs[k][i] is not None:
                                    not_included.add((k,i))
        except:
            print("An error occured trying to load features for genre " + genre)
    return not_included

In [1183]:
def build_vectors(exclude_features, basedir):
    genres = fetch_genres(basedir)
    final_vecs = []
    for genre in genres:
        try:
            filename = basedir + "/" + genre + "/features"
            files = os.listdir(filename)
            for aFile in files:
                vec = []
                if aFile.endswith(".csv"):
                    arr = fileToArray(filename + "/" + aFile)
                    for i in arr:
                        if (i[0], int(i[1])) not in exclude_features:
                            vec.append(map(lambda x: float(x), i[3:]))
                if len(vec) > 0:
                    final_vec = flatmap(vec)
                    final_vec.append(genre)
                    final_vecs.append(final_vec)
        except:
            print("Error occured trying to load features for " + genre)
    return final_vecs

In [1184]:
def fileToArray(filename):
    array = []
    with open(filename, 'rb') as f:
        reader = csv.reader(f)
        try:
            array = list(reader)
        except: 
            print("Error reading: " + filename)
    f.close()
    return array

In [1185]:
def perform_k_folds(k, vectors):
    chunk_size = len(vectors) / k
    chunked = chunks(vectors, chunk_size)
    
    sample_list = range(0, chunked)

    

In [1186]:
basedir = "."

# Features that failed to extract will not be written to files. Put all the failed
# features into a set so that they can be excluded from the final vectors right away.
not_included = failed_features(basedir)



# Vectors is a list of lists. The inner list contains the features and the resulting
# labels in the last position of each vector. 
vecs = build_vectors(not_included, basedir)

print(str(len(vecs)))


An error occured trying to load features for genre classical
Error occured trying to load features for classical
149


In [1187]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectFromModel
from sklearn import cross_validation
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cross_validation import KFold

print(str(len(vecs[0])))

random.shuffle(vecs)

X, y = map(lambda x: x[0:len(x)-1], vecs), map(lambda x: x[len(x)-1], vecs)

print(str(len(X[0])))

clf = ExtraTreesClassifier()
clf = clf.fit(X, y)
importances = clf.feature_importances_ 
model = SelectFromModel(clf, prefit=True)
X_new = model.transform(X)
X_new.shape

print(str(len(X_new[0])))

std = np.std([tree.feature_importances_ for tree in clf.estimators_], axis=0)
indices = np.argsort(importances)[::-1]

print("Feature ranking:")

for f in range(X_new.shape[1]):
    print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]]))

# plt.figure()
# plt.title("Feature importances")
# plt.bar(range(X_new.shape[1]), importances[indices], color="r", yerr=std[indices], align="center")
# plt.xticks(range(X_new.shape[1]), indices)
# plt.xlim([-1, X_new.shape[1]])
# plt.show()


    
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(verbose=1, n_estimators=100, random_state=0)
clf = clf.fit(X_new, y)

cur_idx = 38
print(y[cur_idx])
print(clf.predict(X_new[cur_idx]))
print(clf.score(X_new, y))

n_samples = X_new.shape[0]
print("n_samples=" + str(n_samples))

print(str(cross_validation.cross_val_score(clf, X_new, y, cv=5)))


333
332
136
Feature ranking:
1. feature 153 (0.016345)
2. feature 213 (0.015885)
3. feature 146 (0.015254)
4. feature 223 (0.013520)
5. feature 330 (0.012712)
6. feature 221 (0.012007)
7. feature 329 (0.011929)
8. feature 289 (0.010957)
9. feature 249 (0.010910)
10. feature 197 (0.010870)
11. feature 243 (0.010646)
12. feature 292 (0.010560)
13. feature 232 (0.010428)
14. feature 299 (0.010406)
15. feature 317 (0.010251)
16. feature 295 (0.010242)
17. feature 134 (0.010221)
18. feature 7 (0.009884)
19. feature 212 (0.009705)
20. feature 215 (0.009664)
21. feature 131 (0.009654)
22. feature 233 (0.009636)
23. feature 208 (0.009542)
24. feature 152 (0.009542)
25. feature 128 (0.009473)
26. feature 219 (0.009196)
27. feature 145 (0.009192)
28. feature 158 (0.008886)
29. feature 229 (0.008877)
30. feature 218 (0.008703)
31. feature 5 (0.008587)
32. feature 135 (0.008524)
33. feature 298 (0.008465)
34. feature 305 (0.008317)
35. feature 294 (0.007999)
36. feature 237 (0.007980)
37. feature 

[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


rock
['rock']
1.0
n_samples=149


[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    0.0s
[Parallel(n_jo

[ 0.53333333  0.36666667  0.56666667  0.43333333  0.31034483]


[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished
