In [5]:
# Character level lyrics generation using RNNs (LSTM)
import sys, os, random, string
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import string
from tqdm.autonotebook import tqdm

import CharlyricsDataset
from RNN import RNN
import glob

# ignore warnings
import warnings
warnings.filterwarnings("ignore")

# interactive mode
plt.ion()

from pathlib import Path
from config import config
import utils

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
train_dataset = CharlyricsDataset.CharLyricsDataset(config.DATA.LYRICS, config.TRAIN.MAX_LEN)

In [8]:
len(train_dataset)

459065

In [9]:
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=config.TRAIN.BATCH_SIZE,
    shuffle=False,
    drop_last=True,
    num_workers=1
)

In [10]:
len(train_loader)

585

In [11]:
model = RNN(utils.get_total_characters(), config.TRAIN.HIDDEN_SIZE, config.TRAIN.LSTM_N_LAYERS, utils.get_total_characters()).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=config.TRAIN.LEARNING_RATE)
loss_fn = nn.CrossEntropyLoss()

In [20]:
for epoch in range(config.TRAIN.EPOCHS):
    model.train()
    tq = tqdm(train_loader, total=len(train_loader), desc=f"Training: Epoch {epoch+1}/{config.TRAIN.EPOCHS}")
    total_loss = 0

    for _, batch in enumerate(tq):
        model.zero_grad()
        input_seq, output_seq = batch

        input_seq = input_seq.to(device)
        output_seq = output_seq.to(device)
        loss = 0

        # vectorize this
        for c in range(config.TRAIN.MAX_LEN):
            output = model(input_seq[:, c])
            loss += loss_fn(output, output_seq[:, c])
            total_loss += loss.item()
            
        # tq.set_postfix(loss=loss.item())
        # batch-gradient-descent
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}: Total Loss {total_loss/(config.TRAIN.MAX_LEN * len(train_loader))}")

HBox(children=(FloatProgress(value=0.0, description='Training: Epoch 1/10', max=840.0, style=ProgressStyle(des…


Epoch 1: Total Loss 374.75824603131036


HBox(children=(FloatProgress(value=0.0, description='Training: Epoch 2/10', max=840.0, style=ProgressStyle(des…


Epoch 2: Total Loss 369.94710677867323


HBox(children=(FloatProgress(value=0.0, description='Training: Epoch 3/10', max=840.0, style=ProgressStyle(des…


Epoch 3: Total Loss 369.54836229180154


HBox(children=(FloatProgress(value=0.0, description='Training: Epoch 4/10', max=840.0, style=ProgressStyle(des…


Epoch 4: Total Loss 369.39713707940354


HBox(children=(FloatProgress(value=0.0, description='Training: Epoch 5/10', max=840.0, style=ProgressStyle(des…


Epoch 5: Total Loss 369.32800469533225


HBox(children=(FloatProgress(value=0.0, description='Training: Epoch 6/10', max=840.0, style=ProgressStyle(des…


Epoch 6: Total Loss 369.38085261336346


HBox(children=(FloatProgress(value=0.0, description='Training: Epoch 7/10', max=840.0, style=ProgressStyle(des…


Epoch 7: Total Loss 369.665073798173


HBox(children=(FloatProgress(value=0.0, description='Training: Epoch 8/10', max=840.0, style=ProgressStyle(des…


Epoch 8: Total Loss 369.4202830215182


HBox(children=(FloatProgress(value=0.0, description='Training: Epoch 9/10', max=840.0, style=ProgressStyle(des…


Epoch 9: Total Loss 369.19014087901843


HBox(children=(FloatProgress(value=0.0, description='Training: Epoch 10/10', max=840.0, style=ProgressStyle(de…


Epoch 10: Total Loss 369.0909885830316


In [59]:
def generate(prime="B", total_len=100, temp=0.5):
    generated_text = prime

    for p in range(len(prime) - 1):
        input_char = torch.LongTensor(utils.char_to_label(prime[p])).to(device)
        _ = model(input_char)

    last_char = prime[-1]
    
    for c in range(total_len):
        input_char = torch.LongTensor(utils.char_to_label(last_char)).to(device)
        out = model(input_char)
        out = out.view(-1).detach().cpu().div(temp).exp()
        top_char = torch.multinomial(out, 1)[0]
        predicted = string.printable[top_char]
        generated_text += predicted
        last_char = predicted
    return generated_text

In [62]:
generate("man", 1000, 0.6)

"mankeng we the the me wery m inethe yot k ing sth istow yous ca thareyof pre whouseshe l be stho ther t the itheese t the, e fa, ara f wave meas us heth s the sther thend g t mo indeal, feat ton wanfoung re the, i thared n e oun a be wing y whe anomethe than routhe s thouere t i the ang whre ong t aianond w, lllat se ig g me wher s whe lling n m s the youghe, and se w, g an he the en ing as whe theelathe t ckend ind ar tge tahe l, y ideeri o, thang fe warond gare myo the he the fonar s int s in wayo it gon the te ou bote ha g y when wath s wan e bean mevend and ary the p be thed whane ber, he canu andi in hat windine w ang, ithi bor, myors o ry t, bin, ayofe, sthe are comide t therethon ta wayo s y be t thit yo t the tharuthe, t casin is yo istho st t bend t toushe ou mar hin youcho g le h min'me w sthe, t tin wane cor ur amangh, ongham a the thet lleno bel ithanengherenghithe ff m lo iner we t s the y ind m the i be ounginknou he 2 bal d llalo owh ve beve ingen ase gow, w win a ur nt