# Music Generation using an LSTM

#### Final Project for Deep Learning (CS 7643)

By Daeil Cha, Daniel Dias, Chitwan Kaudan

### Global Variables

In [1]:
# data_path = "../../lmd_matched"
data_path = "../clean-data"
saved_models_path = "../saved-models"

num_epochs = 10 # 1000
batch_size = 4
num_time_steps = 256
num_total_songs = 400

### Environment

In [2]:
import os

import torch

import numpy as np
import matplotlib.pyplot as plt

from IPython.core.debugger import set_trace
from getdata import getBatch

%matplotlib inline

%load_ext autoreload
%autoreload 2

learning_rate = 1e-6

##### Pytorch GPU/CPU

In [3]:
# dtype = torch.FloatTensor
# device = torch.device("cpu")

dtype = torch.cuda.FloatTensor
device = torch.device("cuda:0")

print(device)

cuda:0


### Data

#### Load In Data

In [4]:
start = 0
all_data = []

while start < num_total_songs:
    batch = np.array(getBatch(start, batch_size, num_time_steps, data_path), dtype='double')
    all_data.append(batch)
    # Shape should be (batch_size x num_time_steps x note_range x pitch/articulation)
    start += batch_size

all_data = np.concatenate(all_data, axis=0)

In [5]:
all_data.shape

(400, 256, 78, 2)

#### Apply Input Kernel

In [6]:
from model.input_function import InputKernel
inputkernel = InputKernel.apply

note_state_batch = torch.from_numpy(np.swapaxes(all_data,1,2)).float() 
#input kernel expects input shape = batch_size x num_notes x num_timesteps x 2
midi_high = 101
midi_low = 24
time_init=0

with torch.no_grad():
    note_state_batch = inputkernel(note_state_batch,midi_low,midi_high,time_init)

note_state_batch.shape
#input kernel's output shape = batch_size x num_notes x num_timesteps x 80

torch.Size([400, 78, 256, 80])

#### Partition Data

In [20]:
x_train = None
y_train = None

x_val = None
y_val = None

x_test = None
y_test = None

all_expected = np.empty(all_data.shape)
all_expected[:, 0:all_expected.shape[1]-1] = all_data[:, 1:all_data.shape[1]]
all_expected[:, all_expected.shape[1]-1] = 0

all_data = np.reshape(all_data, (num_total_songs, num_time_steps, -1))
all_expected = np.reshape(all_expected, (num_total_songs, num_time_steps, -1))

print("all data:", all_data.shape)
print("all expected:", all_expected.shape)

note_state_batch.requires_grad_()

orig_dataset = torch.utils.data.TensorDataset(note_state_batch.type(torch.FloatTensor), torch.from_numpy(all_expected).type(torch.FloatTensor))
x_train, x_test = torch.utils.data.random_split(orig_dataset, [num_total_songs - 50, 50])

x_train_loader = torch.utils.data.DataLoader(x_train, batch_size=batch_size, shuffle=True)
x_test_loader = torch.utils.data.DataLoader(x_test, batch_size=batch_size)

for data in x_train_loader:
    print(data[0].shape, data[1].shape)

all data: (400, 256, 156)
all expected: (400, 256, 156)
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Size([4, 78, 256, 80]) torch.Size([4, 256, 156])
torch.Siz

## Functions

#### Train Steps

In [30]:
def train_step(x, y, model, loss_criterion, optimizer):
    y_pred = model(x)

    # Compute and print loss
    # loss = loss_criterion(torch.max(y_pred, dim=1).indices, y)
    loss = loss_criterion(y_pred, y)
    ret_val = loss.item()

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return ret_val

def test_step(x, y, model, loss_criterion):
    y_pred = model(x)
    loss = loss_criterion(y_pred, y)
    
    return loss.item()

#### Save/Load Model

In [9]:
def save_model(model, model_name):
    torch.save(model.state_dict(), os.path.join(saved_models_path, model_name))

def load_model_parameters(model, model_name):
    model.load_state_dict(torch.load(os.path.join(saved_models_path, model_name)))

def load_new_model(model_name, model_constructor, *args):
    model = model_constructor(args)
    load_model_parameters(model, model_name)
    return model

### Model

In [35]:
from model.main_model import MusicGeneration

model = MusicGeneration(time_sequence_len=num_time_steps, batch_size=batch_size, time_hidden_size=36, data_type=dtype, device=device)

load_model_parameters(model, "biaxial_trained.pt")

loss_criterion = torch.nn.MSELoss(reduction='sum') # = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#### Move To Correct Device

In [36]:
model.to(device)

MusicGeneration(
  (lstm_time0): LSTM(80, 36, batch_first=True)
  (lstm_note0): LSTM(36, 2, batch_first=True)
  (dropout): Dropout(p=0.2, inplace=False)
)

## Training

In [26]:
best_loss = 30000

for epoch in range(num_epochs):
    print("Epoch:", epoch)
    for i, data in enumerate(x_train_loader, 0):
        x, y = data[0].to(device), data[1].to(device)

        loss = train_step(x, y, model, loss_criterion, optimizer)

        print(" -", i, ":", loss)
        if i % 10 == 0 and loss < best_loss:
            best_loss = loss
            save_model(model, "biaxial_{0}.pt".format(i))
            print(" --- saving model to biaxial_{0}.pt ---".format(i))
            
save_model(model, "biaxial_trained.pt")

Epoch: 0
4
 - 0 : 30296.60546875
4
 - 1 : 32940.8515625
4
 - 2 : 29523.12109375
4
 - 3 : 31033.8046875
4
 - 4 : 29786.384765625
4
 - 5 : 33620.5546875
4
 - 6 : 30806.8984375
4
 - 7 : 30085.505859375
4
 - 8 : 31307.57421875
4
 - 9 : 32343.49609375
4
 - 10 : 31877.92578125
4
 - 11 : 29693.28515625
4
 - 12 : 31181.126953125
4
 - 13 : 28727.9921875
4
 - 14 : 31700.71875
4
 - 15 : 30914.80859375
4
 - 16 : 33069.96875
4
 - 17 : 28758.576171875
4
 - 18 : 29172.818359375
4
 - 19 : 30139.408203125
4
 - 20 : 31852.53515625
4
 - 21 : 30372.076171875
4
 - 22 : 29736.17578125
4
 - 23 : 30086.3828125
4
 - 24 : 30591.119140625
4
 - 25 : 30509.44921875
4
 - 26 : 32254.55859375
4
 - 27 : 31452.216796875
4
 - 28 : 36101.3515625
4
 - 29 : 31217.00390625
4
 - 30 : 32462.0859375
4
 - 31 : 31067.65625
4
 - 32 : 32206.64453125
4
 - 33 : 28838.234375
4
 - 34 : 28892.810546875
4
 - 35 : 31286.3125
4
 - 36 : 32148.328125
4
 - 37 : 30792.953125
4
 - 38 : 29234.71484375
4
 - 39 : 32847.75
4
 - 40 : 33197.8125
4
 

4
 - 63 : 28930.97265625
4
 - 64 : 30492.875
4
 - 65 : 29658.79296875
4
 - 66 : 32693.95703125
4
 - 67 : 28463.953125
4
 - 68 : 30781.390625
4
 - 69 : 31113.5703125
4
 - 70 : 29545.2578125
4
 - 71 : 29662.595703125
4
 - 72 : 31700.046875
4
 - 73 : 29276.96484375
4
 - 74 : 31513.6171875
4
 - 75 : 32937.02734375
4
 - 76 : 34521.7265625
4
 - 77 : 26952.423828125
4
 - 78 : 29838.576171875
4
 - 79 : 31371.6171875
4
 - 80 : 29229.87109375
4
 - 81 : 27679.40625
4
 - 82 : 29490.20703125
4
 - 83 : 29391.189453125
4
 - 84 : 30431.806640625
4
 - 85 : 29788.47265625
4
 - 86 : 29885.64453125
2
batch_size = 2 < 4
 - 87 : 16207.3369140625
Epoch: 4
4
 - 0 : 30347.3515625
4
 - 1 : 28551.3203125
4
 - 2 : 28218.34375
4
 - 3 : 30696.01171875
4
 - 4 : 30183.333984375
4
 - 5 : 28898.1015625
4
 - 6 : 28987.15625
4
 - 7 : 31279.88671875
4
 - 8 : 31894.8125
4
 - 9 : 29871.10546875
4
 - 10 : 30152.501953125
4
 - 11 : 32169.32421875
4
 - 12 : 25207.43359375
4
 - 13 : 32717.9921875
4
 - 14 : 26622.9140625
4
 - 15

4
 - 41 : 29033.876953125
4
 - 42 : 28533.896484375
4
 - 43 : 28050.7890625
4
 - 44 : 27148.181640625
4
 - 45 : 29011.40234375
4
 - 46 : 31234.658203125
4
 - 47 : 29463.912109375
4
 - 48 : 31190.12109375
4
 - 49 : 29675.83203125
4
 - 50 : 30409.751953125
4
 - 51 : 30958.72265625
4
 - 52 : 31923.1640625
4
 - 53 : 28770.580078125
4
 - 54 : 25756.671875
4
 - 55 : 29688.0234375
4
 - 56 : 27484.869140625
4
 - 57 : 31128.41015625
4
 - 58 : 26445.68359375
4
 - 59 : 28556.06640625
4
 - 60 : 30518.291015625
4
 - 61 : 28113.23046875
4
 - 62 : 28064.662109375
4
 - 63 : 31507.72265625
4
 - 64 : 29973.70703125
4
 - 65 : 28165.416015625
4
 - 66 : 30583.822265625
4
 - 67 : 29085.80859375
4
 - 68 : 31494.6640625
4
 - 69 : 29557.291015625
4
 - 70 : 28953.52734375
4
 - 71 : 28564.337890625
4
 - 72 : 28386.71484375
4
 - 73 : 30973.0859375
4
 - 74 : 28943.296875
4
 - 75 : 31322.3203125
4
 - 76 : 28143.080078125
4
 - 77 : 32798.5625
4
 - 78 : 29029.64453125
4
 - 79 : 29830.609375
4
 - 80 : 30286.53515625
4

#### Save Model

In [27]:
save_model(model, "biaxial_epochs10.pt")

## Test

In [37]:
model.eval()

with torch.no_grad():
    for data in x_test_loader:
        x, y = data[0].to(device), data[1].to(device)

        loss = test_step(x, y, model, loss_criterion)
        print(loss)

29426.2421875
28197.728515625
28028.541015625
30195.796875
27500.77734375
28377.0546875
30155.49609375
26090.654296875
26728.234375
29298.826171875
28926.55078125
25726.6328125
15634.611328125


##### IPDB Test Code

In [None]:
# set_trace()

def test_code():
    set_trace()

    train_iter = iter(x_train_loader)
    data = next(train_iter)
    x, y = data[0].to(device), data[1].to(device)    

    loss = train_step(x, y, model, loss_criterion, optimizer)
    
    print(loss)
    
    data = next(train_iter)
    x, y = data[0].to(device), data[1].to(device)

    loss = train_step(x, y, model, loss_criterion, optimizer)
    
    print(loss)
test_code()