# Music Generation using an LSTM

#### Final Project for Deep Learning (CS 7643)

By Daeil Cha, Daniel Dias, Chitwan Kaudan

### Global Variables

In [1]:
# data_path = "../../lmd_matched"
data_path = "../clean-data"
saved_models_path = "../saved-models"

num_epochs = 10 # 1000
batch_size = 8
num_time_steps = 128
num_total_songs = 400

### Environment

In [2]:
import os

import torch

import numpy as np
import matplotlib.pyplot as plt

# from IPython.core.debugger import set_trace
from getdata import getBatch

%matplotlib inline

%load_ext autoreload
%autoreload 2

learning_rate = 1e-6

##### Pytorch GPU/CPU

In [3]:
# dtype = torch.FloatTensor
# device = torch.device("cpu")

dtype = torch.cuda.FloatTensor
device = torch.device("cuda:0")

print(device)

cuda:0


### Data

#### Load In Data

In [4]:
start = 0
all_data = []

while start < num_total_songs:
    batch = np.array(getBatch(start, batch_size, num_time_steps, data_path), dtype='double')
    all_data.append(batch)
    # Shape should be (batch_size x num_time_steps x note_range x pitch/articulation)
    start += batch_size

all_data = np.concatenate(all_data, axis=0)

In [5]:
all_data.shape

(400, 128, 78, 2)

#### Apply Input Kernel

In [6]:
from model.input_function import InputKernel
inputkernel = InputKernel.apply

note_state_batch = torch.from_numpy(np.swapaxes(all_data,1,2)).float() 
#input kernel expects input shape = batch_size x num_notes x num_timesteps x 2
midi_high = 101
midi_low = 24
time_init=0

with torch.no_grad():
    note_state_batch = inputkernel(note_state_batch,midi_low,midi_high,time_init)

note_state_batch.shape
#input kernel's output shape = batch_size x num_notes x num_timesteps x 80

torch.Size([400, 78, 128, 80])

#### Partition Data

In [7]:
x_train = None
y_train = None

x_val = None
y_val = None

x_test = None
y_test = None

all_expected = np.empty(all_data.shape)
all_expected[:, 0:all_expected.shape[1]-1] = all_data[:, 1:all_data.shape[1]]
all_expected[:, all_expected.shape[1]-1] = 0

all_data = np.reshape(all_data, (num_total_songs, num_time_steps, -1))
all_expected = np.reshape(all_expected, (num_total_songs, num_time_steps, -1))

print("all data:", all_data.shape)
print("all expected:", all_expected.shape)

note_state_batch.requires_grad_()

orig_dataset = torch.utils.data.TensorDataset(note_state_batch.type(torch.FloatTensor), torch.from_numpy(all_expected).type(torch.FloatTensor))
x_train, x_test = torch.utils.data.random_split(orig_dataset, [num_total_songs - 50, 50])

x_train_loader = torch.utils.data.DataLoader(x_train, batch_size=batch_size, shuffle=True)
x_test_loader = torch.utils.data.DataLoader(x_test, batch_size=batch_size)

# for data in x_train_loader:
#     print(data[0].shape, data[1].shape)

all data: (400, 128, 156)
all expected: (400, 128, 156)


## Functions

#### Train Steps

In [8]:
def train_step(x, y, model, loss_criterion, optimizer):
    y_pred = model(x)

    # Compute and print loss
    # loss = loss_criterion(torch.max(y_pred, dim=1).indices, y)
    loss = loss_criterion(y_pred, y)
    ret_val = loss.item()

    # Zero gradients, perform a backward pass, and update the weights.
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    return ret_val

def test_step(x, y, model, loss_criterion):
    y_pred = model(x)
    loss = loss_criterion(y_pred, y)
    
    return loss.item()

#### Save/Load Model

In [9]:
def save_model(model, model_name):
    torch.save(model.state_dict(), os.path.join(saved_models_path, model_name))

def load_model_parameters(model, model_name):
    model.load_state_dict(torch.load(os.path.join(saved_models_path, model_name)))

def load_new_model(model_name, model_constructor, *args):
    model = model_constructor(args)
    load_model_parameters(model, model_name)
    return model

### Model

In [10]:
from model.main_model import MusicGenerationV2

model = MusicGenerationV2(time_sequence_len=num_time_steps, batch_size=batch_size, time_hidden_size=36, data_type=dtype, device=device)

load_model_parameters(model, "biaxial_trained.pt")

loss_criterion = torch.nn.MSELoss(reduction='sum') # = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#### Move To Correct Device

In [11]:
model.to(device)

MusicGenerationV2(
  (lstm_time0): ModuleList(
    (0): LSTM(80, 36, batch_first=True)
    (1): LSTM(80, 36, batch_first=True)
    (2): LSTM(80, 36, batch_first=True)
    (3): LSTM(80, 36, batch_first=True)
    (4): LSTM(80, 36, batch_first=True)
    (5): LSTM(80, 36, batch_first=True)
    (6): LSTM(80, 36, batch_first=True)
    (7): LSTM(80, 36, batch_first=True)
    (8): LSTM(80, 36, batch_first=True)
    (9): LSTM(80, 36, batch_first=True)
    (10): LSTM(80, 36, batch_first=True)
    (11): LSTM(80, 36, batch_first=True)
    (12): LSTM(80, 36, batch_first=True)
    (13): LSTM(80, 36, batch_first=True)
    (14): LSTM(80, 36, batch_first=True)
    (15): LSTM(80, 36, batch_first=True)
    (16): LSTM(80, 36, batch_first=True)
    (17): LSTM(80, 36, batch_first=True)
    (18): LSTM(80, 36, batch_first=True)
    (19): LSTM(80, 36, batch_first=True)
    (20): LSTM(80, 36, batch_first=True)
    (21): LSTM(80, 36, batch_first=True)
    (22): LSTM(80, 36, batch_first=True)
    (23): LSTM(80, 3

## Training

In [17]:
model.train()
best_loss = 30000

# num_train_steps = 0
losses = []

for epoch in range(num_epochs):
    print("Epoch:", epoch)
    losses.append([])

    for i, data in enumerate(x_train_loader, 0):
        x, y = data[0].to(device), data[1].to(device)

        loss = train_step(x, y, model, loss_criterion, optimizer)
        losses[epoch].append(loss)

        print(" -", num_train_steps, ":", loss)
        if num_train_steps % 10 == 0 and loss < best_loss:
            best_loss = loss
            save_model(model, "biaxial_{0}.pt".format(num_train_steps))
            print(" --- saving model to biaxial_{0}.pt ---".format(num_train_steps))
        
        num_train_steps += 1
    losses[epoch] = np.asarray(losses[epoch])
            
save_model(model, "biaxial_trained.pt")

Epoch: 0
 - 440 : 16070.888671875
 --- saving model to biaxial_440.pt ---
 - 441 : 15140.724609375
 - 442 : 14763.494140625
 - 443 : 15935.17578125
 - 444 : 14846.9150390625
 - 445 : 16343.25
 - 446 : 15737.119140625
 - 447 : 16946.623046875
 - 448 : 15418.9873046875
 - 449 : 15382.314453125
 - 450 : 18391.083984375
 - 451 : 15160.7939453125
 - 452 : 13694.53515625
 - 453 : 15783.974609375
 - 454 : 16505.181640625
 - 455 : 14993.923828125
 - 456 : 15638.017578125
 - 457 : 14972.087890625
 - 458 : 16685.34765625
 - 459 : 15875.21484375
 - 460 : 16582.951171875
 - 461 : 16996.46875
 - 462 : 14915.5537109375
 - 463 : 16376.2734375
 - 464 : 19302.96875
 - 465 : 15234.9189453125
 - 466 : 14397.5595703125
 - 467 : 16689.197265625
 - 468 : 18130.07421875
 - 469 : 17242.421875
 - 470 : 17544.97265625
 - 471 : 16837.28515625
 - 472 : 15778.6259765625
 - 473 : 16975.931640625
 - 474 : 16205.9453125
 - 475 : 16643.896484375
 - 476 : 17150.14453125
 - 477 : 17185.294921875
 - 478 : 16674.583984375

 - 764 : 16196.279296875
 - 765 : 16377.634765625
 - 766 : 15572.0703125
 - 767 : 16580.439453125
 - 768 : 15807.7529296875
 - 769 : 14741.787109375
 - 770 : 17865.5234375
 - 771 : 16867.08984375
 - 772 : 17999.84765625
 - 773 : 16488.08984375
 - 774 : 14913.998046875
 - 775 : 15485.26953125
 - 776 : 16326.689453125
 - 777 : 16964.328125
 - 778 : 15296.5322265625
 - 779 : 15727.779296875
 - 780 : 15637.296875
 - 781 : 17760.251953125
 - 782 : 17247.1875
 - 783 : 16832.326171875
 - 784 : 14751.974609375
 - 785 : 17255.2421875
 - 786 : 15664.984375
 - 787 : 15933.0986328125
 - 788 : 14636.2744140625
 - 789 : 16551.82421875
 - 790 : 16325.73828125
 - 791 : 12085.1455078125
Epoch: 8
 - 792 : 15543.9619140625
 - 793 : 17061.05859375
 - 794 : 15741.96875
 - 795 : 14579.19921875
 - 796 : 18780.974609375
 - 797 : 16560.892578125
 - 798 : 14844.484375
 - 799 : 16929.109375
 - 800 : 16556.00390625
 - 801 : 16046.263671875
 - 802 : 15461.892578125
 - 803 : 16804.48828125
 - 804 : 15967.0576171875

#### Save Model

In [13]:
save_model(model, "biaxial_epochs30.pt")

## Test

In [14]:
model.eval()

with torch.no_grad():
    for data in x_test_loader:
        x, y = data[0].to(device), data[1].to(device)

        loss = test_step(x, y, model, loss_criterion)
        print(loss)

14941.841796875
16315.681640625
16645.833984375
16636.525390625
16352.1640625
15169.2607421875
3877.65380859375


##### IPDB Test Code

In [15]:
# set_trace()

def test_code():
    set_trace()

    train_iter = iter(x_train_loader)
    data = next(train_iter)
    x, y = data[0].to(device), data[1].to(device)    

    loss = train_step(x, y, model, loss_criterion, optimizer)
    
    print(loss)
    
    data = next(train_iter)
    x, y = data[0].to(device), data[1].to(device)

    loss = train_step(x, y, model, loss_criterion, optimizer)
    
    print(loss)
# test_code()

In [19]:
print(len(loss))
for loss in losses:
    print(loss)

44
[16070.88867188 15140.72460938 14763.49414062 15935.17578125
 14846.91503906 16343.25       15737.11914062 16946.62304688
 15418.98730469 15382.31445312 18391.08398438 15160.79394531
 13694.53515625 15783.97460938 16505.18164062 14993.92382812
 15638.01757812 14972.08789062 16685.34765625 15875.21484375
 16582.95117188 16996.46875    14915.55371094 16376.2734375
 19302.96875    15234.91894531 14397.55957031 16689.19726562
 18130.07421875 17242.421875   17544.97265625 16837.28515625
 15778.62597656 16975.93164062 16205.9453125  16643.89648438
 17150.14453125 17185.29492188 16674.58398438 16219.31640625
 15910.76367188 15856.06640625 15231.92773438 12096.79199219]
[16956.60546875 14337.75390625 16156.72558594 16669.8828125
 16348.03515625 17003.35742188 16164.91796875 15261.55566406
 17029.35546875 15746.04980469 16393.95117188 16031.703125
 16695.91210938 15276.70214844 15183.37597656 15685.45507812
 16339.13671875 14697.08007812 16739.48242188 15746.90722656
 17022.94921875 15671.32