In [1]:
from music21 import *
from matplotlib import pyplot as plt
from sklearn import svm, naive_bayes, neighbors
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
import data_loader as loader

Loading unprocessed data

In [2]:
%%capture
composerNames = ["debussy","tchaikovsky","mozart","victoria","beethoven"]
#count = min(loader.get_data_counts(composerNames))
x_train, x_test,y_train, y_test = loader.load("chords_t",composerNames)


In [3]:
print(loader.get_data_counts(y_train,composerNames),loader.get_data_counts(y_test,composerNames))

{'debussy': 119, 'tchaikovsky': 165, 'mozart': 423, 'victoria': 188, 'beethoven': 380} {'debussy': 40, 'tchaikovsky': 54, 'mozart': 124, 'victoria': 61, 'beethoven': 61}


In [4]:
pitches_train = [str([chord[0] for chord in example])for example in x_train]
pitches_test = [str([chord[0] for chord in example])for example in x_test]

In [5]:
def fit(x_train,y_train, x_test, y_test,classifiers):
    for clf in classifiers:
        clf.fit(x_train,y_train)
        print(clf)
        print(f"acc: {clf.score(x_test,y_test)}","\n")

In [10]:
def find_k(x_train,y_train, x_test, y_test):
    best_acc = 0
    k = 0
    for i in range(1,60):
        k_nn = neighbors.KNeighborsClassifier(n_neighbors=i)
        k_nn.fit(x_train, y_train)
        acc = k_nn.score(x_test,y_test)
        if(acc>best_acc):
            best_acc = acc
            k = i
    print(f"best k = {k}, best acc = {best_acc}")
    return k

In [None]:
def fit_ngrams(pitches_train,y_train,pitches_test,y_test,classifiers,n):
    print("n =",n)
    
    ngram_vectorizer = CountVectorizer(token_pattern="\d+", ngram_range=(1, n))
    ngrams_train = ngram_vectorizer.fit_transform(pitches_train)
    ngrams_test = ngram_vectorizer.fit_transform(pitches_test)

    print("training shape =", ngrams_train.shape)
    
    k = find_k(ngrams_train,y_train,ngrams_test,y_test)
    fit(ngrams_train,y_train,ngrams_test,y_test,
         [neighbors.KNeighborsClassifier(n_neighbors=k), naive_bayes.MultinomialNB()])
    

In [11]:
for n in range(1,4):
    classifiers = [neighbors.KNeighborsClassifier(n_neighbors=k), 
                   naive_bayes.MultinomialNB()]
    
    ngram_vectorizer = CountVectorizer(token_pattern="\d+", ngram_range=(1, n))
    ngrams_train = ngram_vectorizer.fit_transform(pitches_train)
    ngrams_test = ngram_vectorizer.fit_transform(pitches_test)

    print("training shape =", ngrams_train.shape)
    
    k = find_k(ngrams_train,y_train,ngrams_test,y_test)
    fit(ngrams_train,y_train,ngrams_test,y_test,
         [neighbors.KNeighborsClassifier(n_neighbors=k), naive_bayes.MultinomialNB()])

n= 1
training shape (1275, 12)
best k = 6, best acc = 0.65
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=6, p=2,
           weights='uniform')
acc: 0.65 

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
acc: 0.6 

n= 2
training shape (1275, 156)
best k = 2, best acc = 0.7176470588235294
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=2, p=2,
           weights='uniform')
acc: 0.7176470588235294 

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
acc: 0.6588235294117647 

n= 3
training shape (1275, 1884)
best k = 2, best acc = 0.7176470588235294
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=2, p=2,
           weights='uniform')
acc: 0.7176470588235294 

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
acc: 0.714705882352941

In [26]:
bigram_vectorizer = CountVectorizer(token_pattern="\d+", ngram_range=(1,3))
bigrams_train = bigram_vectorizer.fit_transform(pitches_train)
bigrams_test = bigram_vectorizer.fit_transform(pitches_test)

print(bigrams_train.shape)
k = find_k(bigrams_train,y_train,bigrams_test,y_test)
fit(bigrams_train,y_train,bigrams_test,y_test,
     [neighbors.KNeighborsClassifier(n_neighbors=k), naive_bayes.MultinomialNB()])

(1275, 1884)
best k = 6, best acc = 0.65
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=6, p=2,
           weights='uniform')
acc: 0.6705882352941176 

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
acc: 0.7147058823529412 



In [None]:
classes = list(set(y_test))
test_data = list(zip(x_test,y_test))
for label in classes:
    label_data = [tupple[0] for tupple in test_data if tupple[1] == label][:10]
    plt.plot(label_data)
    plt.xlabel("Examples")
    plt.show()