In [1]:
from music21 import *
from matplotlib import pyplot as plt
from sklearn import svm, naive_bayes, neighbors
from sklearn.feature_extraction.text import CountVectorizer
import itertools
import numpy as np
import data_loader as loader

Loading unprocessed data

In [2]:
def fit(x_train,y_train, x_test, y_test,classifiers):
    for clf in classifiers:
        clf.fit(x_train,y_train)
        print(clf)
        print(f"acc: {clf.score(x_test,y_test)}","\n")

In [3]:
def find_k(x_train,y_train, x_test, y_test):
    best_acc = 0
    k = 0
    for i in range(1,30):
        k_nn = neighbors.KNeighborsClassifier(n_neighbors=i)
        k_nn.fit(x_train, y_train)
        acc = k_nn.score(x_test,y_test)
        if(acc>best_acc):
            best_acc = acc
            k = i
    print(f"best k = {k}, best acc = {best_acc}")
    return k

In [4]:
def fit_ngrams(x_train,y_train,x_test,y_test,n):
    print("n =",n)
    
    ngram_vectorizer = CountVectorizer(token_pattern="\d+", ngram_range=(1, n))
    ngrams_train = ngram_vectorizer.fit_transform(x_train)
    ngrams_test = ngram_vectorizer.transform(x_test)

    print("train shape =", ngrams_train.shape)
    print("test shape =", ngrams_test.shape)
    
    k = find_k(ngrams_train,y_train,ngrams_test,y_test)
    fit(ngrams_train,y_train,ngrams_test,y_test,
         [neighbors.KNeighborsClassifier(n_neighbors=k), naive_bayes.MultinomialNB()])
    

In [5]:
def combine(x1,x2,x1_classes):
    result =[]
    for a,b in zip (x1,x2):
        result.append(x1_classes * b + a)
    return result

In [6]:
def unique_dict(in_list):
    dic = dict.fromkeys(in_list)
    for i,k in enumerate(dic.keys()):
        dic[k]=i
    return dic

In [7]:
def values_to_labels(in_list,dic):
    out_list = [dic[x] for x in in_list]
    return out_list        

In [8]:
def stringify_items(in_list):
    return [str(x) for x in in_list]

In [None]:
# Define known chords
tone, major, minor, suspended, augmented, diminished, major_sixth, minor_sixth, 
dominant_seventh, major_seventh, minor_seventh, half_diminished_seventh, 
diminished_seventh, major_ninth, dominant_ninth, dominant_minor_ninth, 
minor_ninth = [0], [0,4,7],[0,3,7],[0,5,7],[0,4,8],[0,3,6],[0,4,7,9],[0,3,7,9],
[0,4,7,10],[0,4,7,11],[0,3,7,10],[0,3,6,10],[0,3,6,9],[0,2,4,7,11],
[0,2,4,7,10],[0,1,4,7,10],[0,2,3,7,10]

chord_types_list = [tone,major, minor, suspended, augmented, 
                    diminished, major_sixth, minor_sixth, 
                    dominant_seventh, major_seventh, 
                    minor_seventh, half_diminished_seventh, 
                    diminished_seventh, major_ninth, 
                    dominant_ninth, dominant_minor_ninth, minor_ninth]

In [24]:
%%capture
composerNames = ["debussy","tchaikovsky","mozart","victoria","beethoven"]
#count = min(loader.get_data_counts(composerNames))
x_train_ch, x_test_ch,y_train, y_test = loader.load("chords_t",composerNames)
x_train_d, x_test_d,y_train, y_test = loader.load("durations",composerNames)

In [23]:
list(filter(lambda x: x[0] == 12,x_train_ch[1]))

[[12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12, 0],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12, 0, 1],
 [12],
 [12],
 [12, 0, 2],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12, 0, 4],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12, 0, 4],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12, 0, 5],
 [12, 0, 5],
 [12, 0, 5],
 [12],
 [12],
 [12, 0, 2, 5],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12, 0],
 [12, 0],
 [12],
 [12],
 [12],
 [12, 0, 3],
 [12],
 [12],
 [12],
 [12],
 [12],
 [12, 0, 2, 5],
 [12],
 [12],
 [12],
 [12],
 [12, 0, 2],
 [12],
 [12],
 [12, 0],
 [12],
 [12],
 [12, 0, 5],
 [12, 0, 5],
 [12],
 [12],
 [12, 0, 5],
 [12, 0, 5],
 [12, 0, 5],
 [12],
 [12],
 [12, 0, 2],
 [12, 0, 3, 7],
 [12],
 [12],
 [12, 0, 2, 5],
 [12],
 [12],
 [12],
 [12],
 [12, 0, 5],
 [12, 0],
 [12],

In [25]:
x_train_ch = [[chord[0] for chord in example ] for example in x_train_ch]
x_test_ch = [[chord[0] for chord in example]for example in x_test_ch]

In [21]:
max(x_train_ch[0])

[12, 0, 5]

In [26]:
pitches_seq = list(itertools.chain.from_iterable(x_train_ch+x_test_ch))
pitches_dict = unique_dict(pitches_seq)

x_train_ch = [values_to_labels(example,pitches_dict) for example in x_train_ch]
x_test_ch = [values_to_labels(example,pitches_dict) for example in x_test_ch]

durations_seq = list(itertools.chain.from_iterable(x_train_d+x_test_d))
durations_dict = unique_dict(durations_seq)

x_train_d = [values_to_labels(example,durations_dict) for example in x_train_d]
x_test_d = [values_to_labels(example,durations_dict) for example in x_test_d]

In [27]:
x_train = list(map(str, combine(x_train_ch,x_train_d,len(pitches_dict))))
x_test = list(map(str, combine(x_test_ch,x_test_d,len(pitches_dict))))

In [28]:
print(loader.get_data_counts(y_train,composerNames),loader.get_data_counts(y_test,composerNames))

{'debussy': 119, 'tchaikovsky': 165, 'mozart': 423, 'victoria': 188, 'beethoven': 380} {'debussy': 40, 'tchaikovsky': 54, 'mozart': 124, 'victoria': 61, 'beethoven': 61}


In [29]:
for n in range(1,4):
    fit_ngrams(x_train,y_train,x_test,y_test,n)

n = 1
train shape = (1275, 51)
test shape = (340, 51)
best k = 27, best acc = 0.6176470588235294
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=27, p=2,
           weights='uniform')
acc: 0.6176470588235294 

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
acc: 0.5264705882352941 

n = 2
train shape = (1275, 1551)
test shape = (340, 1551)
best k = 10, best acc = 0.6058823529411764
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=10, p=2,
           weights='uniform')
acc: 0.6058823529411764 

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
acc: 0.5558823529411765 

n = 3
train shape = (1275, 18802)
test shape = (340, 18802)
best k = 1, best acc = 0.6147058823529412
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=1, p=2,
           wei

In [None]:
classes = list(set(y_test))
test_data = list(zip(x_test,y_test))
for label in classes:
    label_data = [tupple[0] for tupple in test_data if tupple[1] == label][:10]
    plt.plot(label_data)
    plt.xlabel("Examples")
    plt.show()