In [2]:
import sys
sys.path.append('../..')

from ml.dataset.corpus import *
from ml.dataset.mappers_preprocess import *
from ml.dataset.mappers_simplify import *
from ml.dataset.mappers_prepare import *
from ml.dataset.processor import *
from ml.dataset.pipeline import *
from ml.dataset.base_predictor import *
from IPython.core.display import HTML, display

import matplotlib.pyplot as plt
import pandas as pd
import pychord
import music21
music21.environment.set('musescoreDirectPNGPath', '/usr/bin/musescore')
DATASET_HOME='../../datasets/'

In [3]:
X = np.load(DATASET_HOME+'simple/X.npy')
y = np.load(DATASET_HOME+'simple/y.npy')

In [8]:
from sklearn.preprocessing import LabelEncoder

class ForestPredictor(BasePredictor):
    def fit(self, clf, X_train, y_train):
        self.clf = clf
        self.clf.fit(X_train, y_train)

    def predict(self, X):
        return self.clf.predict(X).astype(np.int64)
    
    def encode(self):
        print('Shape: ', self.X.shape, self.y.shape)
        self.X_cat_mask = np.array([type(val) == str for val in self.X[0]])
        self.X_num_mask = np.logical_not(self.X_cat_mask)

        X_cat = self.X[:, self.X_cat_mask]
        X_num = self.X[:, self.X_num_mask]

        self.enc = LabelEncoder()
        self.enc.fit(np.hstack([X_cat.flatten(), self.y.flatten()]))
        print('Classes:', self.enc.classes_)
        for i in range(sum(self.X_cat_mask)):
            X_cat[:, i] = self.enc.transform(X_cat[:, i])
            self.y[:, i] = self.enc.transform(self.y[:, i])

        self.X = np.hstack([X_cat, X_num])

        self.X = self.X.astype(np.int64)
        self.y = self.y.astype(np.int64)
        
    def predict_song(self, song_c, preview=1, measure_length=8):
        song = self.ccm.process(song_c)[0]

        predicted = ['']*(measure_length*preview)
        for i in range(preview, len(song[0]) // measure_length):
            # melody history + chords history + current melody
            x = np.array(song[0][(i - preview) * measure_length:i * measure_length] + \
                         song[1][(i - preview) * measure_length:i * measure_length])
            # current chords
            y = np.array(song[1][i * measure_length:(i + 1) * measure_length])
            x_cat = x[self.X_cat_mask]
            x_num = x[self.X_num_mask]
            for i in range(sum(self.X_cat_mask)):
                x_cat[i] = self.enc.transform([x_cat[i]])[0]
                y[i] = self.enc.transform([y[i]])[0]
            x = np.hstack([x_num, x_cat])

            y_pred = self.clf.predict([x]).astype(np.int64)[0]
            predicted += list(self.enc.inverse_transform(y_pred))

        predicted_song = self.ccm.inverse_process([[], predicted])
        predicted_song.tracks[0] = deepcopy(song_c.tracks[0])
        return predicted_song
    
pred = ForestPredictor()
pred.X = X
pred.y = y
pred.load_dicts(DATASET_HOME+'simple/simple_dataset_dicts.pickle')
pred.encode()

Shape:  (44092, 16) (44092, 8)
Classes: ['' '-' 'A' 'B-' 'C' 'D' 'E' 'E-' 'F' 'G']


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(pred.X, pred.y)

In [None]:
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import make_scorer

pred.fit( RandomForestClassifier(n_estimators=20), X_train, y_train)

In [None]:
y_pred = pred.predict(X_test)

In [None]:
print(accuracy_over_axis(y_test, y_pred))

In [None]:
pred.dump_clf(DATASET_HOME+'simple/rf.pickle')

In [None]:
corpus = SongCorpus()
corpus.load_from_file(DATASET_HOME+'simple/simple_dataset_SimplifyChordsMapper.pickle',max_count=100)

In [None]:
song = corpus.songs[30]

In [None]:
song

In [None]:
song.get_music21_repr().show()

In [None]:
song.get_music21_repr().show('midi')

In [None]:
predicted_song = pred.predict_song(song)

In [None]:
predicted_song.get_music21_repr().show()

In [None]:
predicted_song.get_music21_repr().show('midi')