***
### __Darius Petermann__ <br>
Email: dariusarthur.petermann01@estudiant.upf.edu <br>
Project Git Page: https://github.com/darius522/lstm_rl_music_generator.git

How this notebook works:

This notebook is divided into three main sections:
* 1. [Data Handling Methodology](#part1): First we go through the generic set-up steps such as data handling, formatting, pre-processing:
    1. [Dataset Download](#part11):
    2. [Storing our Data in a Convenient Format](#part12)
    3. [Defining the Generator Function](#part13)
    4. [Computing Time-Domain Snippets](#part14)
* 2. [Baseline Computation](#part2): MFC's: We then dive into the first part of our experiment, which is the computation of our baseline using MFC's:
    1. [Feature Extraction](#part21)
    2. [Data Plotting](#part22)
    3. [Data Formatting](#part23)
    4. [Model Training / Testing](#part24)
    5. [Results](#part25)
* 3. [Deep Salience Approach](#part3): In the second part of our experiment, we use the deep salience framework (cited later), in order to improve upon our initial baseline:
    1. [Importing Required Python File - Defining Function Parameters](#part31)
    2. [Predicting Salience](#part32)
    3. [Results](#part33)

In [1]:
import random
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, LSTM, Dropout, Activation
from keras.utils.np_utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint

['/home/miguel/Desktop/darius/lstm_rl_music_generator/sys/lib/python3.6/site-packages/numpy']


Using TensorFlow backend.


### Data Handling

In [2]:
from music21 import *
import glob
from tqdm import tqdm

# Since music21 only gives note name, we need a dict that converts name to categorical data
note2cat = {'C':0,'C#':1,'D-':1,'D':2,'D#':3,'E-':3,'E':4,'F':5,'F#':6,
             'G-':6,'G':7,'G#':8,'A-':8,'A':9,'A#':10,'B-':10,'B':11}

all_notes = []
for file in tqdm(glob.glob("./midifiles/*.mid")):
    midi = converter.parse(file)
    parts = instrument.partitionByInstrument(midi)
    for part in parts:
        nn = part.flat.notes.stream()
        for n in nn:
            if not isinstance(n, chord.Chord):
                all_notes.append(note2cat[n.name])
        
print(len(all_notes))

music21: Certain music21 functions might need the optional package matplotlib;
                  if you run into errors, install it by following the instructions at
                  http://mit.edu/music21/doc/installing/installAdditional.html
100%|██████████| 104/104 [01:16<00:00,  1.35it/s]

108655





In [3]:
# Sequence length ()
SEQ_LEN = 50

X = []
y = []

for i in range(0, len(all_notes) - SEQ_LEN, 1):
    seq = all_notes[i:i + SEQ_LEN] # Take the input sequence
    out = all_notes[i + SEQ_LEN]   # Take the output note
    X.append(seq)
    y.append(out)

X = np.asarray(X)
X = np.reshape(X, (np.shape(X)[0],np.shape(X)[1],1))
y_onehot = np.asarray(to_categorical(y))
# Printing Shapes
print('Initial Data Shape: '+str(np.shape(all_notes)))
print('Training Data Shape: '+str(np.shape(X)))
print('Ground Truth Data Shape: '+str(np.shape(y_onehot)))

Initial Data Shape: (108655,)
Training Data Shape: (108605, 50, 1)
Ground Truth Data Shape: (108605, 12)


### Define an LSTM Network 

In [4]:
def createModel(network_input):

    model = Sequential()
    model.add(LSTM(
        256,
        input_shape=(network_input.shape[1], network_input.shape[2]),
        return_sequences=True
    ))
    model.add(Dropout(0.3))
    model.add(LSTM(512, return_sequences=True))
    model.add(Dropout(0.3))
    model.add(LSTM(256))
    model.add(Dense(256))
    model.add(Dropout(0.3))
    model.add(Dense(12))
    model.add(Activation('softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    return model

In [5]:
path_to_hdf5 = ''

if path_to_hdf5 == '':
    filepath = "lstm_weight-{epoch:02d}-{loss:.4f}.hdf5"    
    checkpoint = ModelCheckpoint(
        filepath,
        monitor='loss', 
        verbose=0,        
        save_best_only=True,        
        mode='min'
    )    
    callbacks_list = [checkpoint]
    model = createModel(X)
    model.fit(X, y_onehot, epochs=60, batch_size=128, callbacks=callbacks_list)
else:
    model = createModel(X)
    model.load_weights(path_to_hdf5)

Train on 108605 samples
Epoch 1/60
   768/108605 [..............................] - ETA: 2:04:40 - loss: 2.9840

KeyboardInterrupt: 

In [None]:
# 1. The very first sequence is cherry-picked from our training set
test_sequence = X[np.random.randint(0, np.shape(X)[0]-1)]
pred_sequence = []

for i in range(100):
    test_sequence = np.reshape(test_sequence, (1, len(test_sequence), 1))
    # We predict the next note : Output will be a one-hot will prob as value so we can argmax
    prob = model.predict(test_sequence, verbose=0)
    pred_sequence.append(np.argmax(prob))
    # 4. The next sequence will add this prediction to the previous one and forget its oldest value
    test_sequence = np.append(test_sequence[:,1:SEQ_LEN,:],pred_sequence[-1])

In [None]:
from mingus.midi import fluidsynth
import mingus.core.notes as notes
from mingus.containers import NoteContainer
from mingus.containers import Note
import time

SF2 = './1.sf2'
fluidsynth.init(SF2,'alsa')
time.sleep(1)
for pitch in pred_sequence:
    note = Note(notes.int_to_note(pitch), 4)
    fluidsynth.play_Note(note)
    time.sleep(0.25)
    fluidsynth.stop_Note(note)