In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
print(sys.version)

3.7.9 (default, Oct 18 2020, 22:55:02) 
[Clang 10.0.1 (clang-1001.0.46.4)]


In [168]:
import glob

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm.notebook import trange, tqdm

import utils

In [4]:
torch.manual_seed(1);

In [31]:
DATA_ROOT = "data/nesmdb_token"
TRAIN_DIR = f"{DATA_ROOT}/train"
VALID_DIR = f"{DATA_ROOT}/valid"
TEST_DIR = f"{DATA_ROOT}/test"

VOCAB_PATH = "data/vocab.txt"

In [182]:
train_files = glob.glob(f"{TRAIN_DIR}/*.txt")
valid_files = glob.glob(f"{VALID_DIR}/*.txt")
test_files = glob.glob(f"{TEST_DIR}/*.txt")

vocab = utils.load_data(VOCAB_PATH)
token_to_index = {token: i for i, token in enumerate(vocab)}

VOCAB_SIZE = len(vocab)
print(f"Vocab size: {VOCAB_SIZE}")

Vocab size: 632


In [128]:
def to_tensor(tokens):
    """Convert a list of tokens to a tensor of indices."""
    return torch.LongTensor([token_to_index[token] for token in tokens])

In [183]:
class MusicGenerator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, rnn_type="gru", num_layers=1):
        assert rnn_type in ["gru", "lstm"]

        super().__init__()

        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.rnn_type = rnn_type
        self.num_layers = num_layers

        self.embeddings = nn.Embedding(input_size, hidden_size)
        self.rnn = (
            nn.LSTM(hidden_size, hidden_size, num_layers) if rnn_type == "lstm"
            else nn.GRU(hidden_size, hidden_size, num_layers)
        )
        self.fc = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, x):
        # Note: Instead of using a one-hot encoding for each token and then
        # multiplying it by a weight matrix, we directly store an embedding
        # for every token in x. This is equivalent but speeds up computation.
        x = self.embeddings(x)

        # Add batch dimension.
        x = x.unsqueeze(0)

        # Forward pass through RNN and FC layer.
        x, _ = self.rnn(x)
        x = self.fc(x)

        # Remove batch dimension.
        x = x.squeeze(0)

        return x

In [184]:
HIDDEN_SIZE = 64
NUM_EPOCHS = 300
LEARNING_RATE = 0.1
LOG_EVERY_N = 50

In [185]:
model = MusicGenerator(
    input_size=VOCAB_SIZE,
    hidden_size=HIDDEN_SIZE,
    output_size=VOCAB_SIZE,
    rnn_type="lstm",
)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE)

In [186]:
train_data = [
    to_tensor(utils.load_data(file))
    for file in train_files[:10]
]

In [187]:
tr = trange(1, NUM_EPOCHS + 1)
for epoch in tr:
    total_loss = 0.0
    for seq in train_data:
        # Reset gradients.
        model.zero_grad()
        
        # Prepare inputs and targets.
        x = seq[:-1]
        y = seq[1:]
        
        # Forward prop.
        y_hat = model(x)
        
        # Compute loss.
        loss = loss_function(y_hat, y)
        total_loss += loss.item()
        
        # Back prop.
        loss.backward()
        
        # Update parameters.
        optimizer.step()

    avg_loss = total_loss / len(seq)
    if epoch % LOG_EVERY_N == 0:
        tr.write(f"Epoch: {epoch} \t Avg loss: {avg_loss}")

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=300.0), HTML(value='')))

Epoch: 50 	 Avg loss: 1.8947256247202555
Epoch: 100 	 Avg loss: 1.6567293326059978
Epoch: 150 	 Avg loss: 1.5174024820327758
Epoch: 200 	 Avg loss: 1.4069296638170878
Epoch: 250 	 Avg loss: 1.3186911582946776
Epoch: 300 	 Avg loss: 1.2488004485766093



In [163]:
def validate(model, dataset):
    with torch.no_grad(): 
        total_loss = 0.0
        for seq in tqdm(dataset):
            x = seq[:-1]
            y = seq[1:]
            y_hat = model(x)
            loss = loss_function(y_hat, y)
            total_loss += loss.item()
        avg_loss = total_loss / len(seq)
        return avg_loss

In [164]:
validate(model, [t0, t1])

0.06680433337505047