In [91]:
# Character level lyrics generation using RNNs (LSTM)
import sys, os, random, string
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import string
from tqdm.autonotebook import tqdm

import CharlyricsDataset
from RNN import RNN
import glob

# ignore warnings
import warnings
warnings.filterwarnings("ignore")

# interactive mode
plt.ion()

from pathlib import Path
from config import config
import utils

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [92]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [93]:
train_dataset = CharlyricsDataset.CharLyricsDataset(config.DATA.LYRICS, config.TRAIN.MAX_LEN)

In [94]:
len(train_dataset)

1836262

In [95]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=config.TRAIN.BATCH_SIZE,
    shuffle=False,
    drop_last=True,
    num_workers=1
)

In [96]:
len(train_loader)

612087

In [97]:
model = RNN(utils.get_total_characters(), config.TRAIN.HIDDEN_SIZE, config.TRAIN.LSTM_N_LAYERS, utils.get_total_characters()).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=config.TRAIN.LEARNING_RATE)
loss_fn = nn.CrossEntropyLoss()

In [87]:
for epoch in range(config.TRAIN.EPOCHS):
    model.train()
    # tq = tqdm(train_loader, total=len(train_loader), desc=f"Training: Epoch {epoch+1}/{config.TRAIN.EPOCHS}")
    total_loss = 0

    for _, batch in enumerate(train_loader):
        model.zero_grad()
        input_seq, output_seq = batch

        input_seq = input_seq.to(device)
        output_seq = output_seq.to(device)
        loss = 0

        # vectorize this
        for c in range(config.TRAIN.MAX_LEN):
            output = model(input_seq[:, c])
            loss += loss_fn(output, output_seq[:, c])
            total_loss += loss.item()
            
        # tq.set_postfix(loss=loss.item())
        # batch-gradient-descent
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}: Total Loss {total_loss/(config.TRAIN.MAX_LEN * len(train_loader))}")

Epoch 1: Total Loss 148.56669910971934
Epoch 2: Total Loss 122.61308879733086
Epoch 3: Total Loss 114.45420882454285
Epoch 4: Total Loss 111.89200695129541
Epoch 5: Total Loss 109.1643524980545
Epoch 6: Total Loss 109.25127456839267
Epoch 7: Total Loss 108.76884311951123
Epoch 8: Total Loss 108.44116926284937
Epoch 9: Total Loss 108.30524064678413
Epoch 10: Total Loss 108.12335863553561


In [88]:
def generate(prime="B", total_len=300, temp=0.85):
    generated_text = prime
    last_char = prime
    
    for c in range(total_len):
        input_char = torch.LongTensor(utils.char_to_label(last_char)).to(device)
        out = model(input_char)
        top_char = np.argmax(out.detach().cpu())
        predicted = string.printable[top_char]
        generated_text += predicted
        last_char = predicted
    return generated_text

In [90]:
generate("b")

'be in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in in i'