# LSTM Music RNN

### IMPORTS

In [1]:
%run ./helper_functions.ipynb

In [18]:
epochs = 100
lr = 0.001
data_size = 150
batch_size = 256*2
embedding_dim = 32
data_folder = "./mozart_sonatas/"
save_folder = 'V4'
model_name = "lstm_model"

filepath = f"./{save_folder}/{model_name}_epoch{e}.mid"

if not os.path.exists(save_folder):
    os.makedirs(save_folder)

In [None]:
class RNN(nn.Module):
    '''
    Recurrent Neural Network Class with LSTM Layers
    
    Architecture:
    
                     INPUT
                       |
          ____LSTM Layers Output_____
         /             |             \
     4 layers      3 layers      4 layers
     w/ Relu       w/ Relu       w/ Relu
        |              |             |
      Offset   --->  Concat  <---  Length
     Softmax           |           Softmax
        |          3 layers          |
        |          w/ Relu           |
        |              |             |
        |            Note            |
        |           Softmax          |
        |              |             |   
         \__________Concat__________/
                       |
                    OUTPUT
    '''
    def __init__(self,sizes,embedding_dim):
        """
        Init. everything
        """
        super(RNN, self).__init__()
        self.hidden_size = 256
        self.num_layers = 3
        self.dropout = .1
        self.notes_vocab_size = sizes[0]
        self.offset_vocab_size = sizes[1]
        self.len_vocab_size = sizes[2]
        self.embedding = nn.Embedding(sizes[0] + sizes[1] + sizes[2], embedding_dim)
        self.lstm = nn.LSTM(input_size = embedding_dim, 
                            hidden_size = self.hidden_size, num_layers = self.num_layers, 
                           batch_first = True, dropout = self.dropout)
        self.batch1 = nn.BatchNorm1d(self.hidden_size)
        self.dropout = nn.Dropout(self.dropout)
        
        self.linear1a = nn.Linear(self.hidden_size, self.notes_vocab_size)
        self.linear2a = nn.Linear(self.notes_vocab_size, self.notes_vocab_size*2)
        self.linear3a = nn.Linear(self.notes_vocab_size*2, self.notes_vocab_size)
        self.linear4a = nn.Linear(np.sum(sizes), 2*np.sum(sizes))
        self.linear5a = nn.Linear(2*np.sum(sizes), self.notes_vocab_size)
        self.linear6a = nn.Linear(self.notes_vocab_size, self.notes_vocab_size)
        
        self.linear1b = nn.Linear(self.hidden_size, self.offset_vocab_size)
        self.linear2b = nn.Linear(self.offset_vocab_size, self.offset_vocab_size*2)
        self.linear3b = nn.Linear(self.offset_vocab_size*2, self.offset_vocab_size)
        self.linear4b = nn.Linear(self.offset_vocab_size, self.offset_vocab_size)
        
        self.linear1c = nn.Linear(self.hidden_size, self.len_vocab_size)
        self.linear2c = nn.Linear(self.len_vocab_size, self.len_vocab_size*2)
        self.linear3c = nn.Linear(self.len_vocab_size*2, self.len_vocab_size)
        self.linear4c = nn.Linear(self.len_vocab_size, self.len_vocab_size)
        
        self.relu = nn.ReLU()
        self.Lrelu = nn.LeakyReLU()
        self.softmax = nn.LogSoftmax(dim = 1)


    def forward(self, x, hidden):
        embeds = self.embedding(x)
        
        lstm_out, hidden = self.lstm(embeds, hidden)
        out = self.dropout(self.batch1(lstm_out[:,-1]))
        out = self.relu(self.linear1a(out))
        out = self.relu(self.linear2a(out))
        half_done = self.relu(self.linear3a(out))
        
        out = self.dropout(self.batch1(lstm_out[:,-1]))
        out = self.relu(self.linear1b(out))
        out = self.relu(self.linear2b(out))
        out = self.relu(self.linear3b(out))
        out = self.linear4b(out)
        offset_pred = self.softmax(out)
        
        out = self.dropout(self.batch1(lstm_out[:,-1]))
        out = self.relu(self.linear1c(out))
        out = self.relu(self.linear2c(out))
        out = self.relu(self.linear3c(out))
        out = self.linear4c(out)
        len_pred = self.softmax(out)
        
        out = self.relu(self.linear4a(torch.cat((len_pred, offset_pred, half_done) ,1)))
        out = self.relu(self.linear5a(out))
        note_pred = self.softmax(self.linear6a(out))
        
        return torch.cat((note_pred, offset_pred, len_pred), 1), hidden
  

    def initHidden(self,batch_size):
        # initialize the hidden layers
        weight = next(self.parameters()).data
        h0 = weight.new(self.num_layers, batch_size, self.hidden_size).zero_().to(device)
        h1 = weight.new(self.num_layers, batch_size, self.hidden_size).zero_().to(device)
        return (h0, h1)




In [25]:
raw_notes, raw_offsets, raw_length = music_to_lists(data_folder)
notes, offsets, length = clean_lists(raw_notes, raw_offsets, raw_length)

100%|███████████████████████████████████████████| 20/20 [00:38<00:00,  1.94s/it]


In [26]:
note_dict = make_unique_map(notes)
len_dict = make_unique_map(length)
offset_dict = make_unique_map(offsets)

num_len = [len_dict[round(i,4)] for i in length]
num_notes = [note_dict[i] for i in notes]
num_offset = [offset_dict[round(i,4)] for i in offsets]

#Create model for 
TestLoader, TrainLoader, X_train, X_test, y_train, y_test = loadData(data_size, batch_size, [num_notes, num_offset, num_len])
for i in range(len(y_train)):
    y_train[i][1] += len(note_dict)
    y_train[i][2] += len(offset_dict) + len(note_dict)
for i in range(len(y_test)):
    y_test[i][1] += len(note_dict)
    y_test[i][2] += len(offset_dict) + len(note_dict)

In [29]:
sizes = (len(set(num_notes)),len(set(num_offset)),len(set(num_len)))
model = RNN(sizes,embedding_dim = embedding_dim).to(device)
optimizer = torch.optim.RMSprop(model.parameters(),lr = lr)
objective = nn.BCEWithLogitsLoss()

nacc, oacc, lacc = [], [], []
losses = []

In [None]:
for e in range(43,  epochs):
    model.train()
    loss_val = 0
    for batch, (x, y) in tqdm(enumerate(TrainLoader), position=0, leave= True):
        x = x.to(device)
        y = y.to(device)
        
        y_full = torch.zeros(y.shape[0], np.sum(sizes))
        for i in range(y.shape[0]):
            for k in y[i]:
                y_full[i][k] = 1
        
        (h0,h1) = model.initHidden(batch_size)

        optimizer.zero_grad() #zero the gradient
        outputs, (h0,h1) = model(x,(h0,h1)) #forward pass

        h0 = h0.detach()
        h1 = h1.detach()
        loss = objective(outputs, y_full) #get loss
        loss_val += loss.item()
        loss.backward() #backward pass
        nn.utils.clip_grad_norm_(model.parameters(),5)
        optimizer.step() #update model
        
    losses.append(loss_val/(len(TrainLoader)*batch_size))
    model.eval()
    nacc.append(0)
    lacc.append(0)
    oacc.append(0)
    for batch, (x,y) in enumerate(TestLoader):
        h0, h1 = model.initHidden(batch_size)
        
        x = x.to(device)
        y = y.to(device).cpu().numpy()
       
        out, h = model(x, (h0,h1))
        out = out.detach().cpu().numpy()

        y_note = np.argmax(out[:,:sizes[0]], axis=1)
        y_off = np.argmax(out[:,sizes[0]:sizes[1]+sizes[0]], axis=1) + sizes[0]
        y_len = np.argmax(out[:,sizes[0] + sizes[1]:], axis=1) + sizes[1] + sizes[0]

        nacc[-1] += np.mean(y_note == y[:,0])
        oacc[-1] += np.mean(y_off == y[:,1])
        lacc[-1] += np.mean(y_len == y[:,2])

        
    nacc[-1] /= len(TestLoader)
    lacc[-1] /= len(TestLoader)
    oacc[-1] /= len(TestLoader)


    torch.save({
        'epoch': e,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'loss': loss,},
    f"./{save_folder}/{model_name}_epoch{e}.pt")
tm/
    #print(f"Epoch {e+1}, loss: {losses[-1]}, test note accuracy: {nacc[-1]},\ntest offset accuracy: {oacc[-1]},\ntest length accuracy: {lacc[-1]}")
    print(f"Epoch {e}, \nloss: {losses[-1]}, \ntest note accuracy: {nacc[-1]},\ntest offset accuracy: {oacc[-1]},\ntest length accuracy: {lacc[-1]}\n")

170it [10:36,  3.74s/it]


Epoch 43, 
loss: 6.642864025921239e-06, 
test note accuracy: 0.3874782986111111,
test offset accuracy: 0.8918185763888888,
test length accuracy: 0.7743055555555556



119it [07:32,  3.57s/it]