In [9]:
from keras import backend as K
from keras_preprocessing import sequence
from keras.layers import *
from keras.models import Model, Sequential
import numpy as np
import time
import os
import joblib
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.preprocessing import OneHotEncoder
from fastai.imports import *
from fastai.structured import *
from IPython.display import display

Read in training and test data, returns:
* x: training set features
* y: training set labels
* test_x: test set features
* test_y: test set labels

In [2]:
train = np.load('dataset/train.npy', allow_pickle=True)

x = np.zeros((1801 * 185, 32))
y = np.zeros((1801 * 185))
n = 0
for i in range(1801):
    # pad the current song's bars
    for j in range(185):
        if j < len(train[i][1]) and 1 in train[i][1][j]:
            y[n] = train[i][1][j].index(1)
        else:
            y[n] = 24
            
        for k in range(32):
            if j < len(train[i][0]) and k < len(train[i][0][j]) and 1 in train[i][0][j][k]:
                x[n][k] = train[i][0][j][k].index(1)
            else:
                x[n][k] = 12
        n += 1

train = np.load('dataset/test.npy', allow_pickle=True)

test_x = np.zeros((train.shape[0] * 185, 32))
test_y = np.zeros((train.shape[0] * 185))

n = 0
for i in range(train.shape[0]):
    for j in range(185):
        if j < len(train[i][1]) and 1 in train[i][1][j]:
            test_y[n] = train[i][1][j].index(1)
        else:
            test_y[n] = 24
            
        for k in range(32):
            if j < len(train[i][0]) and k < len(train[i][0][j]) and 1 in train[i][0][j][k]:
                test_x[n][k] = train[i][0][j][k].index(1)
            else:
                test_x[n][k] = 12
        n += 1

## Random Forest Classifier

Model definition and training

In [3]:
m = RandomForestClassifier(n_estimators = 60, min_samples_leaf = 50, max_features = None, n_jobs = -1)
m.fit(x, y)

RandomForestClassifier(max_features=None, min_samples_leaf=50, n_estimators=60,
                       n_jobs=-1)

Save model

In [6]:
joblib.dump(m, "models/RFC.pkl")

['models/RFC.pkl']

If want to load model

In [None]:
m = joblib.load("models/RFC.pkl")

Predict test y

In [7]:
y_pred = m.predict(test_x)
print(accuracy_score(y_pred, test_y))

0.8785465465465465


## LSTM

Onehot encode training y

In [10]:
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(y)
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
y_onehot = onehot_encoder.fit_transform(integer_encoded)

Model definition and training

In [11]:
model = Sequential()
model.add(Embedding(input_dim = 25 + 1, output_dim = 10, input_length = 32))
model.add(LSTM(10))
model.add(Dense(10, activation = "relu"))
model.add(Dense(25, activation = "softmax"))

In [None]:
model.compile(optimizer = "adam", loss = "categorical_crossentropy")
model.fit(x, y_onehot, epochs = 5)

Save Model

In [None]:
joblib.dump(model, "models/LSTM.pkl")

If want to load model

In [12]:
model = joblib.load("models/LSTM.pkl")

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Onehot encode test y

In [13]:
test_integer_encoded = label_encoder.transform(test_y)
test_integer_encoded = test_integer_encoded.reshape(len(test_integer_encoded), 1)
test_y_onehot = onehot_encoder.transform(test_integer_encoded)

Predict test set

In [15]:
pred_y_onehot = model.predict(test_x)
pred_y = []
for cur_y in pred_y_onehot:
    inverted = label_encoder.inverse_transform([np.argmax(cur_y)])
    pred_y.append(inverted)
pred_y2 = []
for arr in pred_y:
    pred_y2.append(arr[0])
accuracy_score(test_y, pred_y2)

0.8704624624624625