In [1]:
from music21 import *
from matplotlib import pyplot as plt
from sklearn import svm, naive_bayes, neighbors
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
import data_loader as loader

Loading unprocessed data

In [2]:
def fit(x_train,y_train, x_test, y_test,classifiers):
    for clf in classifiers:
        clf.fit(x_train,y_train)
        print(clf)
        print(f"acc: {clf.score(x_test,y_test)}","\n")

In [3]:
def find_k(x_train,y_train, x_test, y_test):
    best_acc = 0
    k = 0
    for i in range(1,60):
        k_nn = neighbors.KNeighborsClassifier(n_neighbors=i)
        k_nn.fit(x_train, y_train)
        acc = k_nn.score(x_test,y_test)
        if(acc>best_acc):
            best_acc = acc
            k = i
    print(f"best k = {k}, best acc = {best_acc}")
    return k

In [26]:
def fit_ngrams(x_train,y_train,x_test,y_test,n):
    print("n =",n)
    
    ngram_vectorizer = CountVectorizer(token_pattern="\d+", ngram_range=(1, n))
    ngrams_train = ngram_vectorizer.fit_transform(x_train)
    ngrams_test = ngram_vectorizer.transform(x_test)

    print("train shape =", ngrams_train.shape,len(y_train))
    print("test shape =", ngrams_test.shape,len(y_test))
    
    k = find_k(ngrams_train,y_train,ngrams_test,y_test)
    fit(ngrams_train,y_train,ngrams_test,y_test,
         [neighbors.KNeighborsClassifier(n_neighbors=k), naive_bayes.MultinomialNB()])
    

In [40]:
%%capture
composerNames = ["debussy","tchaikovsky","mozart","victoria","beethoven"]
#count = min(loader.get_data_counts(composerNames))
x_train, x_test,y_train, y_test = loader.load("chords",composerNames)


In [41]:
print(loader.get_data_counts(y_train,composerNames),loader.get_data_counts(y_test,composerNames))

{'debussy': 119, 'tchaikovsky': 165, 'mozart': 423, 'victoria': 188, 'beethoven': 380} {'debussy': 40, 'tchaikovsky': 54, 'mozart': 124, 'victoria': 61, 'beethoven': 61}


In [42]:
pitches_train = [str([chord[0] for chord in example])for example in x_train]
pitches_test = [str([chord[0] for chord in example])for example in x_test]

In [50]:
print(pitches_train[0])

[2, 5, 10, 2, 5, 7, 9, 4, 12, 9, 4, 12, 10, 2, 7, 10, 2, 4, 5, 12, 9, 5, 12, 9, 7, 10, 2, 2, 2, 7, 10, 7, 10, 5, 10, 2, 2, 2, 7, 10, 2, 10, 4, 10, 2, 2, 2, 7, 10, 7, 10, 2, 10, 5, 10, 5, 10, 2, 10, 12, 7, 10, 10, 2, 4, 7, 10, 2, 4, 10, 2, 9, 12, 7, 10, 5, 12, 5, 5, 7, 9, 2, 5, 12, 12, 9, 2, 9, 5, 12, 12, 7, 9, 5, 5, 2, 9, 4, 5, 9, 2, 9, 2, 5, 2, 12, 5, 12, 5, 5, 7, 9, 2, 5, 12, 12, 9, 2, 9, 5, 12, 12, 7, 9, 5, 5, 2, 9, 4, 5, 9, 2, 9, 2, 5, 2, 12, 7, 10, 2, 12, 2, 5, 7, 2, 10, 5, 7, 4, 5, 5, 2, 2, 9, 2, 9, 5, 9, 12, 9, 5, 5, 2, 4, 10, 2, 5, 4, 5, 7, 10, 5, 2, 9, 10, 7, 7, 9, 5, 5, 11, 5, 2, 9, 2, 2, 7, 11, 11, 2, 5, 11, 9, 5, 11, 5, 2, 9, 7, 2, 2, 5, 5, 11, 2, 9, 11, 5, 9, 7, 2, 9, 2, 7, 11, 7, 5, 9, 11, 5, 9, 7, 2, 9, 2, 7, 7, 11, 9, 5, 10, 2, 5, 10, 2, 5, 7, 9, 4, 12, 9, 4, 12, 7, 10, 2, 7, 10, 2, 4, 5, 12, 9, 5, 12, 9, 4, 8, 2, 4, 11, 4, 2, 5, 9, 2, 5, 9, 12, 5, 12, 2, 4, 8, 2, 4, 11, 4, 2, 4, 5, 10, 2, 2, 5, 9, 2, 9, 10, 12, 4, 10, 12, 7, 12, 10, 12, 2, 7, 10, 10, 2, 5, 10, 5, 7, 9,

In [27]:
for n in range(1,4):
    fit_ngrams(pitches_train,y_train,pitches_test,y_test,n)

n = 1
train shape = (1275, 30) 1275
test shape = (340, 30) 340
best k = 23, best acc = 0.5970588235294118
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=23, p=2,
           weights='uniform')
acc: 0.5970588235294118 

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
acc: 0.5323529411764706 

n = 2
train shape = (1275, 238) 1275
test shape = (340, 238) 340
best k = 18, best acc = 0.6088235294117647
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=18, p=2,
           weights='uniform')
acc: 0.6088235294117647 

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
acc: 0.5411764705882353 

n = 3
train shape = (1275, 1385) 1275
test shape = (340, 1385) 340
best k = 16, best acc = 0.6235294117647059
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbor

In [None]:
bigram_vectorizer = CountVectorizer(token_pattern="\d+", ngram_range=(1,3))
bigrams_train = bigram_vectorizer.fit_transform(pitches_train)
bigrams_test = bigram_vectorizer.fit_transform(pitches_test)

print(bigrams_train.shape)
k = find_k(bigrams_train,y_train,bigrams_test,y_test)
fit(bigrams_train,y_train,bigrams_test,y_test,
     [neighbors.KNeighborsClassifier(n_neighbors=k), naive_bayes.MultinomialNB()])

In [None]:
classes = list(set(y_test))
test_data = list(zip(x_test,y_test))
for label in classes:
    label_data = [tupple[0] for tupple in test_data if tupple[1] == label][:10]
    plt.plot(label_data)
    plt.xlabel("Examples")
    plt.show()