Deep artificial neural network for expressive timing predictions in musical pieces
---------------

This notebook loads the data generated from the note level processing notebook and uses them to train and test a long sequence-based artificial neural network for predicting the onset timing deviation of notes from the MusicNet dataset pieces.


#### Load and preprocess training data

In [15]:
import numpy as np
import pandas as pd
import pickle

np.random.seed(1728)

#  read dataset
runLocal = True
if runLocal:
    pathRoot = 'data/'
else:
    pathRoot = '/content/drive/My Drive/colab_data/'
    
df_path = open(pathRoot + 'per_note_train.csv', 'r')
seq_path = open(pathRoot + 'note_sequences.data', 'rb')

sequences = pickle.load(seq_path)

seq_path.close()

#### Define the neural network

In [32]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Input, Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dropout, BatchNormalization, Dense, TimeDistributed
from tensorflow.keras.utils import Sequence


class DataGenerator(Sequence):
    def __init__(self, data, sequence_length, batch_size=25, sequence_stride=1, shuffle=True, fit=True):
        self.data = data
        self.sequence_length = sequence_length
        self.batch_size = batch_size
        self.sequence_stride = sequence_stride
        self.shuffle = shuffle
        self.fit = fit
        self.indexes = []
        for si, (x, _, _) in enumerate(data):
            tx = x.shape[0]
            xind = 0
            while tx > sequence_length:
                self.indexes.append((si, xind))
                xind += sequence_stride
                tx -= sequence_stride
            self.indexes.append((si,xind))
        
    def __len__(self):
        return len(self.indexes)
    
    def __getitem__(self, index):
        (seq, stride) = self.indexes[index]
        (X, Y, _) = self.data[seq]
        if stride+self.sequence_length <= X.shape[0]:
            if self.fit:
                X = X.iloc[stride:stride+self.sequence_length, :].to_numpy(dtype='float64').reshape((1, self.sequence_length, X.shape[1]))
                Y = Y.iloc[stride:stride+self.sequence_length, :].to_numpy(dtype='float64').reshape((1, self.sequence_length, Y.shape[1]))
                return X, Y
            else:
                return X.iloc[stride:stride+self.sequence_length, :].to_numpy(dtype='float64').reshape((1, self.sequence_length, X.shape[1]))
        else:
            # pad
            X = X.iloc[stride:X.shape[0], :].to_numpy(dtype='float64')
            padX = np.full((self.sequence_length - X.shape[0], X.shape[1]), -1e4)
            if self.fit:
                Y = Y.iloc[stride:Y.shape[0], :].to_numpy(dtype='float64')
                padY = np.full((self.sequence_length - Y.shape[0], Y.shape[1]), -1e4)
                return np.concatenate((X, padX), axis=0).reshape((1, self.sequence_length, X.shape[1])), np.concatenate((Y, padY), axis=0).reshape((1, self.sequence_length, Y.shape[1]))
            else:
                return np.concatenate((X, padX), axis=0).reshape((1, self.sequence_length, X.shape[1]))
    
    def on_epoch_end(self):
        """Updates indexes after each epoch
        """
        if self.shuffle:
            np.random.shuffle(self.indexes)

            
seq_length = 20        
generator = DataGenerator(sequences, seq_length)

model = Sequential([
    Input(shape=(seq_length, sequences[0][0].shape[1])),
    Bidirectional(LSTM(16, return_sequences = True)),
    Dropout(0.8),
    BatchNormalization(),
    TimeDistributed(Dense(3)),
    BatchNormalization()])

opt = keras.optimizers.Adam(learning_rate=1e-4, clipnorm=0.001)
model.compile(loss="mean_absolute_error", optimizer='adam')

model.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_5 (Bidirection (None, 20, 32)            15488     
_________________________________________________________________
dropout_5 (Dropout)          (None, 20, 32)            0         
_________________________________________________________________
batch_normalization_10 (Batc (None, 20, 32)            128       
_________________________________________________________________
time_distributed_5 (TimeDist (None, 20, 3)             99        
_________________________________________________________________
batch_normalization_11 (Batc (None, 20, 3)             12        
Total params: 15,727
Trainable params: 15,657
Non-trainable params: 70
_________________________________________________________________


#### Train the model

In [34]:
model.fit(generator, epochs=10)

Epoch 1/300
  9841/334002 [..............................] - ETA: 1:39:50 - loss: 0.7348


KeyboardInterrupt



#### Results

In [None]:
#  Save model
model.save_weights('timing2020-07-31.h5')

In [None]:
# Load model
model.load_weights('timing2020-07-31.h5')

#  Compute note-level error
xval_s = 10
Yhat = model.predict(X[0:xval_s,:,:])
DynPieces = []
DynIdeal = []
DynHat = []
RMSE_ideal = 0
RMSE_pred = 0
RMS_pieces = 0
for choice in range(0,xval_s):
    d_piece = np.load('data/levels/'+ str(df.pieceId[pd_idx[choice][0]]) + '_global_lvls.npy')
    DynPieces.append(d_piece)
    valid_idx = pd_idx[choice][pd_idx[choice] > 0]
    ideal = np.zeros((len(d_piece),2))
    hat = np.zeros((len(d_piece),2))
    cur_idx = 0
    for phr in range(len(valid_idx)):
        duration = df.durationSecs[valid_idx[phr]]
        t0 = df.startTime[valid_idx[phr]]
        n = int(duration * 10)
        tref = np.linspace(0, 1, n)
        x2_ref = df.dynamicsX2[valid_idx[phr]]
        x1_ref = df.dynamicsX1[valid_idx[phr]]
        x0_ref = df.dynamicsX0[valid_idx[phr]]
        x2_hat = Yhat[choice, phr, 0] * moments[choice,0,1] + moments[choice,0,0]
        x1_hat = Yhat[choice, phr, 1] * moments[choice,1,1] + moments[choice,1,0]
        x0_hat = Yhat[choice, phr, 2] * moments[choice,2,1] + moments[choice,2,0]
        ideal[cur_idx:cur_idx + n, 0] = tref*duration + t0
        ideal[cur_idx:cur_idx + n, 1] = x2_ref*tref**2 + x1_ref*tref + x0_ref
        hat[cur_idx:cur_idx + n, 0] = tref*duration + t0
        hat[cur_idx:cur_idx + n, 1] = x2_hat*tref**2 + x1_hat*tref + x0_hat
        cur_idx += n
    DynIdeal.append(ideal)
    DynHat.append(hat)
    RMSE_ideal += np.sqrt(np.mean((ideal[:,1] - d_piece)**2)) / xval_s
    RMSE_pred += np.sqrt(np.mean((hat[:,1] - d_piece)**2)) / xval_s
    RMS_pieces += np.sqrt(np.mean(d_piece**2)) / xval_s
    
print('Ideal approximation note-level RMS Error: ' + str(RMSE_ideal))
print('Prediction note-level RMS Error: ' + str(RMSE_pred))
print('Mean performance RMS loudness: ' + str(RMS_pieces))

In [None]:
import matplotlib.pyplot as plt

choice = 3
plt.figure(figsize=(21, 5))
#plt.plot(np.asarray(list(range(0, len(DynPiece))), dtype='float64') / 10.0, DynPiece)
#plt.plot(DynIdeal[:,0], DynIdeal[:,1])
#plt.plot(DynHat[:,0], DynHat[:,1])
plt.plot(DynPieces[choice])
plt.plot(DynIdeal[choice][:,1])
plt.plot(DynHat[choice][:,1])
plt.show()