In [4]:
import matplotlib
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import pickle

In [31]:
class MusicGenRNN(nn.Module):
  def __init__(self, hidden_size=512, num_layers=1, bias=True):
    super(MusicGenRNN, self).__init__()
    # input # note, time and velocity 
    # size 128 note, 1, 128 velocity -> flatten to size of 128+1+128=257
    self.one_hot_size = 128
    self.note_emb_size = 128
    self.velocity_emb_size = 128
    self.input_size = self.note_emb_size + self.velocity_emb_size + 1
    self.hidden_size = hidden_size
    self.output_size = self.one_hot_size * 2 + 1

    # identiy matrix for generating one-hot vectors
    self.ident = torch.eye(self.one_hot_size) # recurrent neural network
    self.note_embedding = nn.Linear(self.one_hot_size, self.note_emb_size, bias=False)
    self.velocity_embedding = nn.Linear(self.one_hot_size, self.velocity_emb_size, bias=False)

    #self.rnn = nn.LSTM(input_size, hidden_size, num_layers,bias, batch_first=True, dropout) # a fully-connect layer that outputs a distribution over
                    # the next token, given the RNN output
    self.rnn = nn.LSTM(self.input_size, hidden_size, num_layers, bias, batch_first=True, dropout=0)
    self.decoder = nn.Linear(hidden_size, self.output_size)

  def forward(self, input, hidden_in=None):
    inp_note, inp_time, inp_velocity = input 
    inp_note = int(inp_note)
    inp_velocity = int(inp_velocity)
    one_hot_note = self.ident[inp_note] # generate one-hot vectors of input
    one_hot_velocity = self.ident[inp_velocity]
    embedded_note = self.note_embedding(one_hot_note)
    embedded_velocity = self.velocity_embedding(one_hot_velocity)
    inp = torch.concat((embedded_note.reshape(1, -1), torch.tensor([inp_time]).reshape(1, -1), embedded_velocity.reshape(1, -1)), dim=1)
    output, hidden_out = self.rnn(inp, hidden_in) # get the next output and hidden state
    output = self.decoder(output) # predict distribution over next tokens
    return output, hidden_out

In [20]:
with open('data.pickle', 'rb') as f:
    dataset = pickle.load(f)
print(dataset)

[<torch.utils.data.dataset.Subset object at 0x0000028C23AEEE30>, <torch.utils.data.dataset.Subset object at 0x0000028C18BAFD90>, <torch.utils.data.dataset.Subset object at 0x0000028C18BAFE80>]


In [13]:
train, validation, test = dataset

In [42]:
model = MusicGenRNN(num_layers=2)
print(train[0][0][31])

t = torch.tensor(train[0][0][31])
out, hidden = model(t)
h_n, c_n = hidden
print(c_n.shape)


[0, 0.3170733041666666, 0]
torch.Size([2, 512])
