In [1]:
import pandas as pd 
import numpy as np 
# https://github.com/petrosDemetrakopoulos/RNN-Beatles-lyrics-generator
# https://github.com/starry91/Lyric-Generator#2-lyric-generator-based-on-word-level-rnn

In [3]:
data = pd.read_csv("sample-dataset.csv")
data.head()

Unnamed: 0,artist,genre,title,lyrics
0,Migos,rap,Stir Fry,"Woo, woo, woo, woo\nWoo, woo, woo, woo\n\nDanc..."
1,Snoop Dogg,rap,Drop It Like It‚Äôs Hot,"Snoop\nSnoop\n\nWhen the pimp's in the crib, m..."
2,Drake,rap,Headlines,I might be too strung out on compliments\nOver...
3,Lil Uzi Vert,rap,XO TOUR Llif3,"Are you alright?\nI'm alright, I'm quite alrig..."
4,Lil Uzi Vert,rap,The Way Life Goes,"That's true (That's true), that's right (That ..."


In [4]:
data['t-lyric'] = data['title'] + " @@@ " + data['lyrics']
data['t-lyric'] = data['t-lyric'].str.lower()
data.head()

Unnamed: 0,artist,genre,title,lyrics,t-lyric
0,Migos,rap,Stir Fry,"Woo, woo, woo, woo\nWoo, woo, woo, woo\n\nDanc...","stir fry @@@ woo, woo, woo, woo\nwoo, woo, woo..."
1,Snoop Dogg,rap,Drop It Like It‚Äôs Hot,"Snoop\nSnoop\n\nWhen the pimp's in the crib, m...",drop it like it‚äôs hot @@@ snoop\nsnoop\n\nwh...
2,Drake,rap,Headlines,I might be too strung out on compliments\nOver...,headlines @@@ i might be too strung out on com...
3,Lil Uzi Vert,rap,XO TOUR Llif3,"Are you alright?\nI'm alright, I'm quite alrig...",xo tour llif3 @@@ are you alright?\ni'm alrigh...
4,Lil Uzi Vert,rap,The Way Life Goes,"That's true (That's true), that's right (That ...",the way life goes @@@ that's true (that's true...


In [5]:
stopChars = [',','(',')','.','-','[',']','"']
# preprocessing the corpus by converting all letters to lowercase, 
# replacing blank lines with blank string and removing special characters
def preprocessText(text):
    text = text.replace('\n', ' ').replace('\t','')
    processedText = text.lower()
    for char in stopChars:
        processedText = processedText.replace(char,'')
    return processedText
data['t-lyric'] = data['t-lyric'].apply(preprocessText)

In [6]:
# tokenization 
def corpusToList(corpus):
    corpusList = [w for w in corpus.split(' ')] 
    corpusList = [i for i in corpusList if i] #removing empty strings from list
    return corpusList
data['t-lyric'] = data['t-lyric'].apply(corpusToList)

In [7]:
data.head()

Unnamed: 0,artist,genre,title,lyrics,t-lyric
0,Migos,rap,Stir Fry,"Woo, woo, woo, woo\nWoo, woo, woo, woo\n\nDanc...","[stir, fry, @@@, woo, woo, woo, woo, woo, woo,..."
1,Snoop Dogg,rap,Drop It Like It‚Äôs Hot,"Snoop\nSnoop\n\nWhen the pimp's in the crib, m...","[drop, it, like, it‚äôs, hot, @@@, snoop, snoo..."
2,Drake,rap,Headlines,I might be too strung out on compliments\nOver...,"[headlines, @@@, i, might, be, too, strung, ou..."
3,Lil Uzi Vert,rap,XO TOUR Llif3,"Are you alright?\nI'm alright, I'm quite alrig...","[xo, tour, llif3, @@@, are, you, alright?, i'm..."
4,Lil Uzi Vert,rap,The Way Life Goes,"That's true (That's true), that's right (That ...","[the, way, life, goes, @@@, that's, true, that..."


In [8]:
# trim each word for leading or trailing spaces / tabs.
map(str.strip, data['t-lyric']) # trim words

<map at 0x1231f95d0>

In [9]:
data.head()

Unnamed: 0,artist,genre,title,lyrics,t-lyric
0,Migos,rap,Stir Fry,"Woo, woo, woo, woo\nWoo, woo, woo, woo\n\nDanc...","[stir, fry, @@@, woo, woo, woo, woo, woo, woo,..."
1,Snoop Dogg,rap,Drop It Like It‚Äôs Hot,"Snoop\nSnoop\n\nWhen the pimp's in the crib, m...","[drop, it, like, it‚äôs, hot, @@@, snoop, snoo..."
2,Drake,rap,Headlines,I might be too strung out on compliments\nOver...,"[headlines, @@@, i, might, be, too, strung, ou..."
3,Lil Uzi Vert,rap,XO TOUR Llif3,"Are you alright?\nI'm alright, I'm quite alrig...","[xo, tour, llif3, @@@, are, you, alright?, i'm..."
4,Lil Uzi Vert,rap,The Way Life Goes,"That's true (That's true), that's right (That ...","[the, way, life, goes, @@@, that's, true, that..."


In [12]:
corpus_words = [x for sublist in data['t-lyric'] for x in sublist]
vocab = sorted(set(corpus_words))
print('vocab length:', len(corpus_words))
print('Unique words in corpus: {}'.format(len(vocab)))

vocab length: 3166
Unique words in corpus: 676


In [13]:
# creating numeric map; representing words with numberes 
# map specific number to each specific word of our corpus, and vice versa 
word2idx = {u: i for i, u in enumerate(vocab)}
idx2words = np.array(vocab)
word_as_int = np.array([word2idx[c] for c in corpus_words])

## Prediction

In [14]:
import torch
import torch.nn as nn
from torch import nn
from torch import optim
from torchtext import data
from torchtext.datasets import UDPOS

- User inputs a song title, and how many words they want the song to be. 
- Network does, for example, 100 predictions, and in the training phrase we know what word we need to generate. 
- (genre, song title); have a marker that it's the end of the title 

In [17]:
from torch.autograd import Variable

import argparse
import time
import reader

In [18]:
class RNN(nn.Module):
  # Simple RNN Language model 
    def __init__(self, embedding_size, num_steps, batch_size, hidden_size, vocab_size):
        super(RNN, self).__init__()
        # parameters 
        self.embedding_size = embedding_size
        self.num_steps = num_steps
        self.batch_size = batch_size
        self.vocab_size = vocab_size
        
        # passing through network 
        self._embedding = nn.Embedding(vocab_size, embedding_size)
        self._rnn = nn.RNN(input_size=embedding_size, hidden_size=hidden_size)
        self._linear = nn.Linear(in_features=hidden_size, out_features=vocab_size)
        self.init_weights()

    def forward(self, inputs, hidden):
        embedding = self._embedding(inputs)
        rnn_output, _ = self._rnn(embedding, hidden)
        return self._linear(rnn_output)

# Set-up + Training

In [None]:
embedding_size = 
num_steps = 
batch_size = 
hidden_size = 
vocab_size = 

In [None]:
model = RNN(