In [1]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [55]:
import numpy as np

from util.midi_manipulation import noteStateMatrixToMidi, midiToNoteStateMatrix, lowerBound, upperBound
from util.util import print_progress
from util.create_dataset import create_dataset, get_batch, make_one_hot_notes

import glob
import midi

### Create dataset

In [3]:
MAX_CONCURRENT_NOTES = 5

In [4]:
def reverse_onehot(encoded_song):
    reversed_encoded = np.zeros((encoded_song.shape[0], MAX_CONCURRENT_NOTES))
    for idx, timestamp in enumerate(encoded_song):
        note_idxs = np.where(timestamp == 1)[0]
        if len(note_idxs) > MAX_CONCURRENT_NOTES:
            note_idxs = note_idxs[:MAX_CONCURRENT_NOTES]
        reversed_encoded[idx, :len(note_idxs)] = note_idxs
    return reversed_encoded

In [106]:
def create_nonhot_dataset(min_length, multi=True):
    songs = glob.glob('data/*.mid*')
    
    encoded_songs = []
    discarded = 0
    for song in songs:
        encoded_song = midiToNoteStateMatrix(song)
        if len(encoded_song) >= min_length:
            if multi:
                encoded_song = reverse_onehot(encoded_song)
            else:
                encoded_song = make_one_hot_notes(encoded_song)
                encoded_song = np.argmax(encoded_song, axis=1)
            encoded_songs.append(encoded_song)
        else:
            discarded += 1
    print("{} songs processed".format(len(songs)))
    print("{} songs discarded".format(discarded))
    return encoded_songs

In [107]:
min_song_length  = 128
encoded_songs    = create_nonhot_dataset(min_song_length, multi=False)

88 songs processed
15 songs discarded


In [108]:
## Neural Network Parameters
# input_size       = encoded_songs[0].shape[1]   # The number of possible MIDI Notes
input_size       = upperBound - lowerBound   # The number of possible MIDI Notes
scale_size = input_size # 78
output_size      = input_size                  # Same as input size
hidden_size      = 256                         # Number of neurons in hidden layer

learning_rate    = 0.001 # Learning rate of the model
training_steps   = 5000  # Number of batches during training
batch_size       = 128    # Number of songs per batch
timesteps        = 64    # Length of song snippet -- this is what is fed into the model

assert timesteps < min_song_length

n_hidden = hidden_size

n_factors = 10

### Data Exploration

In [59]:
# max_simult = [np.max(np.sum(song, 1)) for song in encoded_songs]
# print(max(max_simult))

In [60]:
encoded_songs[2].shape

(161,)

encoded_songs

In [61]:
len(encoded_songs)

73

In [46]:
songs = glob.glob('data/*.mid*')

In [47]:
len(songs)

88

In [48]:
songs[0]

'data/Feel So Close - Verse.midi'

In [49]:
pattern = midi.read_midifile(songs[0])

In [50]:
encoded_song = midiToNoteStateMatrix(songs[0])

In [None]:
a = get_batch(encoded_songs, batch_size, timesteps, input_size, output_size)

In [None]:
a[0].shape

In [62]:
encoded_song[2]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### LSTM Model

In [109]:
import torch
import tqdm

In [110]:
def repackage_var(h):
    """Wraps h in new Variables, to detach them from their history."""
    return torch.autograd.Variable(h.data).cuda() if type(h) == torch.autograd.Variable else tuple(repackage_var(v) for v in h)

In [111]:
class StatefulLSTM(torch.nn.Module):
    def __init__(self, scale_size, n_factors, bs, nl):
        super().__init__()
        self.scale_size = scale_size
        self.nl = nl
        self.embedding = torch.nn.Embedding(scale_size, n_factors)
        self.rnn = torch.nn.LSTM(n_factors, n_hidden, nl, dropout=0.5)
        self.l_out = torch.nn.Linear(n_hidden, scale_size)
        self.init_hidden(bs)
        
    def forward(self, notes):
        bs = notes[0].shape[0]
        if self.h[0].size(1) != bs: 
            print('batch size is not same size as original:', bs)
            self.init_hidden(bs)
        emb = self.embedding(notes)
        outp,h = self.rnn(emb, self.h)
        self.h = repackage_var(h)
#         return torch.nn.functional.log_softmax(self.l_out(outp), dim=-1).view(-1, self.scale_size)
        return torch.nn.functional.log_softmax(self.l_out(outp[:, -1, :]), dim=-1)
#         return torch.nn.functional.softmax(self.l_out(outp[:, -1, :]), dim=-1)
    
    def init_hidden(self, bs):
        h1 = torch.autograd.Variable(torch.zeros(self.nl, bs, n_hidden))
        h2 = torch.autograd.Variable(torch.zeros(self.nl, bs, n_hidden))
        if self._cuda():
            self.h = (h1.cuda(), h2.cuda())
        else:
            self.h = (h1, h2)
    def _cuda(self):
        return next(self.parameters()).is_cuda

In [112]:
cuda_enabled = torch.cuda.is_available()

In [113]:

def get_batch(encoded_songs, batch_size, timesteps, input_size, output_size):
    rand_song_indices = np.random.randint(len(encoded_songs), size=batch_size)
#     batch_x = np.zeros((batch_size, timesteps, input_size))
    batch_x = np.zeros((batch_size, timesteps))
    batch_y = np.zeros((batch_size))
    for i in range(batch_size):
        song_ind = rand_song_indices[i]
        start_ind = np.random.randint(encoded_songs[song_ind].shape[0]-timesteps-1)
        batch_x[i] = encoded_songs[song_ind][start_ind:start_ind+timesteps]
        batch_y[i] = encoded_songs[song_ind][start_ind+timesteps]
    return batch_x, batch_y

In [114]:
def get_batch_cuda(*args):
    batch_x, batch_y = get_batch(*args)
#     y_reverse_hot = batch_y.argmax(axis=1)
    y_v = torch.autograd.Variable(torch.from_numpy(batch_y).long())
    x_v = torch.autograd.Variable(torch.from_numpy(batch_x).long())
    if cuda_enabled:
        return x_v.cuda(), y_v.cuda()
    return x_v, y_v

In [115]:
m = StatefulLSTM(scale_size, n_factors, batch_size, 2).cuda()

In [116]:
optimizer_fn = torch.optim.Adam(m.parameters(), lr=1e-4)

In [117]:
# loss_fn = torch.nn.CrossEntropyLoss()
loss_fn = torch.nn.NLLLoss()

### Playing around with batch size

In [92]:
batch_x, batch_y = get_batch(encoded_songs, batch_size, timesteps, input_size, output_size)

In [93]:
batch_y

array([41., 43., 52., 45., 43., 41., 43., 43., 43., 43., 43., 43., 48.,
       36., 43., 40., 41., 48., 45., 40., 43., 40., 40., 41., 52., 44.,
       43., 43., 40., 41., 41., 43., 43., 41., 43., 60., 41., 43., 43.,
       43., 40., 44., 43., 45., 43., 41., 45., 41., 53., 40., 41., 43.,
       43., 43., 43., 43., 43., 40., 43., 43., 43., 43., 43., 41., 43.,
       43., 43., 43., 50., 43., 43., 44., 43., 41., 50., 41., 40., 43.,
       40., 43., 50., 41., 41., 41., 43., 40., 57., 57., 41., 41., 50.,
       43., 43., 52., 36., 43., 41., 43., 43., 43., 43., 43., 43., 43.,
       43., 45., 43., 43., 43., 43., 41., 40., 40., 40., 43., 41., 40.,
       48., 43., 43., 48., 43., 43., 52., 43., 43., 43., 43.])

In [94]:
batch_x, batch_y = get_batch_cuda(encoded_songs, batch_size, timesteps, input_size, output_size)

In [95]:
print(batch_x.shape)
print(batch_y.shape)

torch.Size([128, 64])
torch.Size([128])


In [73]:
# vbatch_y = torch.autograd.Variable(torch.from_numpy(y_reverse_hot).long())
forward = m(batch_x)

batch size is not same size as original: 64


In [None]:
forward.size()

In [None]:
print(forward[:, -1].size())

In [None]:
def nll_loss_seq(inp, targ):
    sl,bs,nh = inp.size()
    targ = targ.transpose(0,1).contiguous().view(-1)
    return torch.nn.functional.nll_loss(inp.view(-1,nh), targ)

In [None]:
loss = loss_fn(forward, batch_y)
loss.backward()
optimizer_fn.step()

In [None]:
loss.data[0]

### Training

In [34]:
??get_batch

In [118]:
display_step = 100
for step in range(training_steps):
# for step in tqdm(range(training_steps)):
    batch_x, batch_y = get_batch_cuda(encoded_songs, batch_size, timesteps, input_size, output_size)
#     y_reverse_hot = batch_y.argmax(axis=1)
#     vbatch_y = torch.autograd.Variable(torch.from_numpy(y_reverse_hot).long())
    m.zero_grad()
    forward = m(batch_x)
    loss = loss_fn(forward, batch_y)
    loss.backward()
    optimizer_fn.step()
    if (step % display_step == 0):
        print(f'Step: {step} Loss: {loss.data[0]}')
    

batch size is not same size as original: 64
Step: 0 Loss: 4.367828845977783
Step: 100 Loss: 2.2450788021087646
Step: 200 Loss: 2.084689140319824
Step: 300 Loss: 1.9295320510864258
Step: 400 Loss: 1.7988028526306152
Step: 500 Loss: 1.6068980693817139
Step: 600 Loss: 1.4209301471710205
Step: 700 Loss: 1.3277291059494019
Step: 800 Loss: 1.0905829668045044
Step: 900 Loss: 0.9884465336799622
Step: 1000 Loss: 0.9860960245132446
Step: 1100 Loss: 1.1194413900375366
Step: 1200 Loss: 0.7504417896270752
Step: 1300 Loss: 0.9953158497810364
Step: 1400 Loss: 0.9345996379852295
Step: 1500 Loss: 0.996393620967865
Step: 1600 Loss: 1.102683663368225
Step: 1700 Loss: 0.9701059460639954
Step: 1800 Loss: 0.876598060131073
Step: 1900 Loss: 0.9252856969833374
Step: 2000 Loss: 1.0340992212295532
Step: 2100 Loss: 0.7221037149429321
Step: 2200 Loss: 0.947524905204773
Step: 2300 Loss: 0.9167802333831787
Step: 2400 Loss: 0.8938955068588257
Step: 2500 Loss: 0.9652553796768188
Step: 2600 Loss: 0.7662796974182129
St

In [None]:
print(m)

In [None]:
print(m(batch_x))

### Music generation

In [119]:
GEN_SEED_RANDOMLY = False # Use a random snippet as a seed for generating the new song.
if GEN_SEED_RANDOMLY:
    ind = np.random.randint(NUM_SONGS)
else:
    ind = 41 # "How Deep is Your Love" by Calvin Harris as a starting seed
    
gen_song = encoded_songs[ind][:timesteps].tolist() # TODO explore different (non-random) seed options
    

In [99]:
# def evaluate(seed):
# encoded_songs[ind][:timesteps].tolist()
len(gen_song)

64

# We need to have an unknown state (0)

In [122]:

def noteStateMatrixToMidi(statematrix, name="example", span=scale_size):
#     statematrix = np.array(statematrix)
    pattern = midi.Pattern()
    track = midi.Track()
    pattern.append(track)

    span = upperBound-lowerBound
    tickscale = 55

    lastcmdtime = 0
    prevstate = 0
    for time, state in enumerate(statematrix):  
        offNotes = []
        onNotes = []
        if prevstate != state:
            offNotes.append(prevstate)
            onNotes.append(state)
#         elif state > 0:
        prevstate = state
        for note in offNotes:
            track.append(midi.NoteOffEvent(tick=(time-lastcmdtime)*tickscale, pitch=note+lowerBound))
            lastcmdtime = time
        for note in onNotes:
            track.append(midi.NoteOnEvent(tick=(time-lastcmdtime)*tickscale, velocity=40, pitch=note+lowerBound))
            lastcmdtime = time

        prevstate = state

    eot = midi.EndOfTrackEvent(tick=1)
    track.append(eot)

    midi.write_midifile("{}.mid".format(name), pattern)



In [123]:
# generate music!
m.init_hidden(batch_size)
for i in range(500):
    seed = np.array([gen_song[-timesteps:]])
    # Use our RNN for prediction using our seed! 
    '''TODO: Write an expression to use the RNN to get the probability for the next note played based on the seed.
    Remember that we are now using the RNN for prediction, not training.'''
    # old way
    seed_v = torch.autograd.Variable(torch.from_numpy(seed).long()).cuda()

#     seed_v = torch.autograd.Variable(torch.from_numpy(np.argmax(seed, axis=1)).long()).cuda()
    predict_probs = m(seed_v)
    
    percentage_prob = (np.e ** predict_probs.data.cpu().numpy())
    # Define output vector for our generated song by sampling from our predicted probability distribution
    played_notes = np.zeros(output_size)
    '''TODO: Sample from the predicted distribution to determine which note gets played next.
    You can use a function from the numpy.random library to do this.
    Hint 1: range(x) produces a list of all the numbers from 0 to x
    Hint 2: make sure what you pass in has the "shape" you expect.'''
    sampled_note = np.random.choice(range(output_size), p=percentage_prob[0]) # TODO
#     print('Sampled_note:', sampled_note)
#     played_notes[sampled_note] = 1
#     gen_song.append(played_notes)
    gen_song.append(sampled_note)

noteStateMatrixToMidi(gen_song, name="generated/gen_song_0")
noteStateMatrixToMidi(encoded_songs[ind], name="generated/base_song_0")
print("saved generated song! seed ind: {}".format(ind))

batch size is not same size as original: 64
saved generated song! seed ind: 41


In [124]:
from IPython.lib.display import FileLink

In [125]:
FileLink("generated/gen_song_0.mid")

In [None]:
FileLink("generated/base_song_0.mid")

In [None]:
seed = np.array([gen_song[-timesteps:]])
#     predict_probs = sess.run(prediction, feed_dict={input_vec: seed}) # TODO
seed_v = torch.autograd.Variable(torch.from_numpy(seed).long()).cuda()

# seed_v = torch.autograd.Variable(torch.from_numpy(np.argmax(seed, axis=1)).long()).cuda()
print(seed_v.size())
predict_probs = m(seed_v)

print(predict_probs.size())

In [None]:
predict_probs

In [None]:
seed_v[0]

In [None]:
predict_probs.size()

In [None]:
output_size

In [None]:
np.sum(predict_probs.data.cpu().numpy())

In [None]:
np.random.choice()

In [None]:
percentage_prob = (np.e ** predict_probs.data.cpu().numpy())[0]

In [None]:
sampled_note = np.random.choice(range(output_size), p=percentage_prob) # TODO

In [None]:
sampled_note