In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pickle
from skimage import color, exposure, transform

NUM_CLASSES = 89 # 88 notes plus one class for silence
NUM_MFCC = 30
NUM_CEPS = 13

X_train = pickle.load(open('data/tdnn/mfcc_feat_train.pkl', 'rb'))
Y_train = pickle.load(open('data/tdnn/target_train.pkl', 'rb'))

num_samples = X_train.shape[0]
random_indexes = np.random.permutation(num_samples)

X_train = X_train[random_indexes, :]
Y_train = Y_train[random_indexes]

X_test = pickle.load(open('data/tdnn/mfcc_feat_test.pkl', 'rb'))
Y_test = pickle.load(open('data/tdnn/target_test.pkl', 'rb'))

In [2]:
print(X_train.shape)

(1047703, 13, 30)


In [3]:
print(Y_train.shape)

(1047703, 89)


In [10]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.pooling import MaxPooling1D
from keras.optimizers import SGD
from keras import backend as K
K.set_image_data_format('channels_first')


def cnn_model():
    model = Sequential()

    model.add(Conv1D(64, (2), padding='same',
                     input_shape=(NUM_CEPS, NUM_MFCC),
                     activation='relu'))
    model.add(Conv1D(64, (2), activation='relu'))
    model.add(MaxPooling1D(pool_size=(2)))
    model.add(Dropout(0.2))

    model.add(Conv1D(64, (3), padding='same',
                     activation='relu'))
    model.add(Conv1D(64, (3), activation='relu'))
    model.add(MaxPooling1D(pool_size=(2)))
    model.add(Dropout(0.2))

    model.add(Conv1D(128, (4), padding='same',
                     activation='relu'))
#     model.add(Conv1D(128, (2), activation='relu'))
#     model.add(MaxPooling1D(pool_size=(2)))
#     model.add(Dropout(0.2))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(NUM_CLASSES, activation='softmax'))
    return model

In [11]:
from keras.optimizers import SGD

model = cnn_model()

# let's train the model using SGD + momentum
lr = 0.001
sgd = SGD(lr=lr, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [12]:
from keras.callbacks import LearningRateScheduler, ModelCheckpoint


def lr_schedule(epoch):
    return lr * (0.1 ** int(epoch / 10))

batch_size = 32
epochs = 10

model.fit(X_train, Y_train,
          batch_size=batch_size,
          epochs=epochs,
          validation_split=0.2,
          callbacks=[LearningRateScheduler(lr_schedule),
                     ModelCheckpoint('models/model.h5', save_best_only=True)]
          )

Train on 838162 samples, validate on 209541 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x128b1eed0>

In [13]:
from keras.models import load_model

#model.save("models/tdnn_fulldata.h5")

model = load_model('models/model.h5')

In [14]:
Y_train_pred = model.predict(X_train)
Y_test_pred = model.predict(X_test)

# Notes predicted by simple max of probabilities
test_pred_notes = np.argmax(Y_test_pred, axis=1)

train_act_notes = np.argmax(Y_train, axis=1)
test_act_notes = np.argmax(Y_test, axis=1)


In [15]:
print(test_pred_notes[2500:2700])
print(test_act_notes[2500:2700])

[39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39
 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 88 88 88 88
 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88
 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88
 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88
 88 88 88 34 34 34 34 34 34 34 34 34 34 34 34 34 34 34 34 34 34 34 34 34 34
 34 34 34 88 88 88 88 88 88 88 88 35 35 35 35 35 35 35 35 35 35 35 35 35 35
 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35 35]
[39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39
 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 39 88 88
 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88
 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88
 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88
 88 88 88 3

In [16]:
from write_midi_mono import write_midi_mono 

In [17]:
write_midi_mono(test_pred_notes)

In [18]:
import pickle

pickle.dump(Y_train_pred, open("data/hmm/train_probs.pkl", 'wb'))
pickle.dump(Y_test_pred, open("data/hmm/test_probs.pkl", 'wb'))

pickle.dump(train_act_notes, open("data/hmm/train_notes.pkl", 'wb'))
pickle.dump(test_act_notes, open("data/hmm/test_notes.pkl", 'wb'))