# Attentive Music

I plan to use a Transformer architecture to generate musical MIDI sequences.

In [36]:
from music21 import *
import os
import numpy as np
from tqdm import tqdm_notebook as tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
from torchsample.modules import ModuleTrainer
import pickle
from sklearn.model_selection import train_test_split

## Data

I've found a [dataset](https://github.com/jukedeck/nottingham-dataset) of MIDI files.

In [3]:
PATH="../nottingham-dataset/MIDI"
files = [f for f in os.listdir(PATH) if os.path.isfile(PATH+'/'+f)]
files[:10]

['waltzes7.mid',
 'reelsa-c79.mid',
 'reelsr-t57.mid',
 'jigs211.mid',
 'morris29.mid',
 'reelsu-z8.mid',
 'jigs156.mid',
 'ashover5.mid',
 'reelsa-c32.mid',
 'morris10.mid']

From [this](https://www.hackerearth.com/blog/machine-learning/jazz-music-using-deep-learning/) tutorial for parsing MIDI.

In [3]:
def get_notes(file_list, PATH):  
    notes = []  
    for file in tqdm(file_list):  
    # converting .mid file to stream object
        midi = converter.parse(PATH + '/' + file)  
        notes_to_parse = [] 
        try:  
            # Given a single stream, partition into a part for each unique instrument  
            parts = instrument.partitionByInstrument(midi)  
        except:  
            pass  
        if parts: # if parts has instrument parts   
            notes_to_parse = parts.parts[0].recurse()  
        else:  
            notes_to_parse = midi.flat.notes  
        for element in notes_to_parse:   
            if isinstance(element, note.Note):  
                # if element is a note, extract pitch   
                notes.append(str(element.pitch))  
            elif(isinstance(element, chord.Chord)):  
                # if element is a chord, append the normal form of the   
                # chord (a list of integers) to the list of notes.   
                notes.append('.'.join(str(n) for n in element.normalOrder)) 
    
    with open('data/notes', 'wb') as filepath:  
        pickle.dump(notes, filepath)  
    return notes

In [4]:
# Create notes again
# notes = get_notes(files, PATH)

# Load from previously saved version
if os.path.getsize('data/notes') > 0:
    with open('data/notes', 'rb') as f:
        unpickler = pickle.Unpickler(f)
        notes = unpickler.load()

In [6]:
pitchnames = sorted(set(item for item in notes))
note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

In [7]:
int_notes = [note_to_int[x] for x in notes]; int_notes[:10]

[88, 111, 34, 108, 103, 88, 34, 110, 88, 94]

In [8]:
bs = 8

In [9]:
xs = [np.array(int_notes[i*bs:(i+1)*bs]) for i in range(len(int_notes)//bs)]
ys = [np.array(int_notes[i*bs+1:(i+1)*bs+1]) for i in range(len(int_notes)//bs)]

In [10]:
xs[:10]

[array([ 88, 111,  34, 108, 103,  88,  34, 110]),
 array([ 88,  94,  67, 118,  94,  88,  34, 110]),
 array([ 88, 111,  34, 108, 103,  88,  34, 110]),
 array([ 88,  94,  44, 108,  97,  83, 103,  34]),
 array([ 88, 111,  34, 108, 103,  88,  34, 110]),
 array([ 88,  94,  67, 118,  94,  88,  34, 110]),
 array([ 88, 111,  34, 108, 103,  88,  34, 110]),
 array([ 88,  94,  44, 108,  97,  83, 103,  34]),
 array([ 88, 111,  34, 108, 103,  88,  34, 110]),
 array([ 88,  94,  67, 118,  94,  88,  34, 110])]

These are the next notes in the sequence for every note in `xs`.

In [11]:
ys[:10]

[array([111,  34, 108, 103,  88,  34, 110,  88]),
 array([ 94,  67, 118,  94,  88,  34, 110,  88]),
 array([111,  34, 108, 103,  88,  34, 110,  88]),
 array([ 94,  44, 108,  97,  83, 103,  34,  88]),
 array([111,  34, 108, 103,  88,  34, 110,  88]),
 array([ 94,  67, 118,  94,  88,  34, 110,  88]),
 array([111,  34, 108, 103,  88,  34, 110,  88]),
 array([ 94,  44, 108,  97,  83, 103,  34,  88]),
 array([111,  34, 108, 103,  88,  34, 110,  88]),
 array([ 94,  67, 118,  94,  88,  34, 110,  88])]

In [43]:
x_tr, x_val, y_tr, y_val = train_test_split(xs, ys, test_size=0.25)

In [None]:
train_iter, val_iter = BucketIterator.splits(())

## LSTM

Let's first try an LSTM as a simple example.

In [12]:
def tensor(from_int):
    return torch.from_numpy(np.array(from_int)).long()

def each_tensor(items):
    return [tensor(item) for item in items] 

In [27]:
class LSTMTagger(nn.Module):
    def __init__(self, vocab_size, n_hidden, n_fac, bs, nl):
        super().__init__()
        self.n_hidden = n_hidden
        self.vocab_size,self.nl = vocab_size,nl
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.LSTM(n_fac, n_hidden, nl, dropout=0.5)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.init_hidden(bs)
        
    def forward(self, cs):
        bs = cs[0].size(0)
        if self.h[0].size(1) != bs: self.init_hidden(bs)
        outp,h = self.rnn(self.e(cs), self.h)
        self.h = repackage_var(h)
        return F.log_softmax(self.l_out(outp), dim=-1).view(-1, self.vocab_size)
    
    def init_hidden(self, bs):
        self.h = (Variable(torch.zeros(self.nl, bs, self.n_hidden)),
                  Variable(torch.zeros(self.nl, bs, self.n_hidden)))

In [28]:
batch_size = 32
use_cuda = False
criterion = nn.CrossEntropyLoss()
model = LSTMTagger(n_fac=50,n_hidden=8,vocab_size=len(note_to_int),bs=batch_size,nl=8)
if(use_cuda):
    model.cuda()
    criterion.cuda()
trainer = ModuleTrainer(model)
trainer.set_optimizer(optim.Adam, lr=1e-3)
trainer.set_loss(criterion)

# Bug in torchsample?
trainer._has_multiple_loss_fns = False

model

LSTMTagger(
  (e): Embedding(120, 50)
  (rnn): LSTM(50, 8, num_layers=8, dropout=0.5)
  (l_out): Linear(in_features=8, out_features=120, bias=True)
)

In [29]:
trainer.fit(each_tensor(xs), each_tensor(ys), num_epoch=4, batch_size=batch_size, shuffle=False)

Epoch 1/4: 100%|██████████| 1/1 [00:00<00:00,  2.76 batches/s]


TypeError: forward() takes 2 positional arguments but 30728 were given