In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
from collections import Counter
import os
from argparse import Namespace
import re
import string
from rhyme_finder import RhymeFinder
import random

flags = Namespace(
    seq_size=32,
    batch_size=16,
    num_batches=1000,
    embedding_size=128,
    lstm_size=128,
    gradients_norm=5,
    initial_words=['sleeh', 'hcni'],
    predict_top_k=5,
    checkpoint_path='checkpoint',
)

In [2]:
import pandas as pd

df = pd.read_csv('DataScraper/hiphop_lyrics.csv')
df = df.append(pd.read_csv('DataScraper/hiphop_lyrics2.csv'))

df = df.dropna()

def clean_lyrics(l):
    l = re.sub(r'[\(\[].*?[\)\]]', '', l)
    l = os.linesep.join([s for s in l.splitlines() if s])
    l = l.replace('\r', '').replace('?', '').replace("!", '').replace(',', '').replace('.', '')
    l += '\n'
    l = ''.join([i for i in l if i in string.printable])
    #l = l.replace('\n', '$')
    return l.lower()

df['lyrics'] = df['lyrics'].apply(clean_lyrics)

df

Unnamed: 0,artist,title,url,lyrics
0,The Weeknd,6 Inch Heel,https://genius.com/The-weeknd-6-inch-heel-lyrics,six inch heel she walked in the club like nobo...
1,The Weeknd,Acquainted,https://genius.com/The-weeknd-acquainted-lyrics,baby you're no good\ncause they warned me 'bou...
2,The Weeknd,Adaptation,https://genius.com/The-weeknd-adaptation-lyrics,when the sun comes up you're searching for a l...
3,The Weeknd,After Hours,https://genius.com/The-weeknd-after-hours-lyrics,thought i almost died in my dream again \nfigh...
4,The Weeknd,Airports,https://genius.com/The-weeknd-airports-lyrics,i think i'm fuckin' gone rollin' on this floor...
...,...,...,...,...
6462,YG,Yo Nigga Ain’t Me,https://genius.com/Yg-yo-nigga-aint-me-lyrics,hook: charlie hood and yg\nsee shawty be rocki...
6463,YG,Yo Pussy,https://genius.com/Yg-yo-pussy-lyrics,raw smooth with a banger now\ndon't trip \ni b...
6464,YG,You Betta Kno,https://genius.com/Yg-you-betta-kno-lyrics,ay you don't even know it\ni'm on this bitch\n...
6465,YG,You Broke,https://genius.com/Yg-you-broke-lyrics,bitch you broke shut up\ndont talk to me get y...


In [13]:
corpus[-1000:]

"e for work she \n money the for work \n dollar every worth she's and \n finish the to start the from \n money the for worked she money the for worked she \n witness her was i and everybody murdered she goddamn \n business nobody's like club the in walked she heel inch six \n enemy an like stage fucking the killed she then \n no oh ecstasy it's like veins her through rushing \n recipe her that's tastes it way the love she \n hennessy that with ace that up mixing she \n professional she up it give gotta don't she \n decimals them and commas uno de \n mexico of out pesos know you \n goes she everywhere money money stacking she's \n money the for work she \n money the for work she \n money the for work she \n money the for work she \n minute every worth she's but \n dollar every worth dollar every worth she's and \n finish the to start the from \n money the for worked she money the for worked she \n witness her was i and everybody murdered she goddamn \n business nobody's like club the in

In [3]:
rf = RhymeFinder(df['lyrics'])

corpus = ''.join(list(df['lyrics']))

def revert(data):
    lines = data.split('\n')
    lines = [' '.join(x.split(' ')[::-1]) for x in lines]
    lines = lines[::-1]
    lines = ' \n '.join(lines)
    return lines

corpus = revert(corpus)

rf.find_lines_ending_with_word('inch heels')

def get_data_from_file(corpus, batch_size, seq_size):
    text = corpus.split(' ')

    word_counts = Counter(text)
    sorted_vocab = sorted(word_counts, key=word_counts.get, reverse=True)
    int_to_vocab = {k: w for k, w in enumerate(sorted_vocab)}
    vocab_to_int = {w: k for k, w in int_to_vocab.items()}
    n_vocab = len(int_to_vocab)

    print('Vocabulary size', n_vocab)

    int_text = [vocab_to_int[w] for w in text]
    num_batches = int(len(int_text) / (seq_size * batch_size))
    in_text = int_text[:num_batches * batch_size * seq_size]
    out_text = np.zeros_like(in_text)
    out_text[:-1] = in_text[1:]
    out_text[-1] = in_text[0]
    in_text = np.reshape(in_text, (batch_size, -1))
    out_text = np.reshape(out_text, (batch_size, -1))
    return int_to_vocab, vocab_to_int, n_vocab, in_text, out_text

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
int_to_vocab, vocab_to_int, n_vocab, in_text, out_text = get_data_from_file(
    corpus, flags.batch_size, flags.seq_size)

Vocabulary size 74389


In [5]:
class RNNModule(nn.Module):
    def __init__(self, n_vocab, seq_size, embedding_size, lstm_size):
        super(RNNModule, self).__init__()
        self.seq_size = seq_size
        self.lstm_size = lstm_size
        self.embedding = nn.Embedding(n_vocab, embedding_size)
        self.lstm = nn.LSTM(embedding_size,
                            lstm_size,
                            batch_first=True)
        self.dense = nn.Linear(lstm_size, n_vocab)
    
    def forward(self, x, prev_state):
        embed = self.embedding(x).float()
        output, state = self.lstm(embed, prev_state)
        logits = self.dense(output)

        return logits, state
    
    def zero_state(self, batch_size):
        return (torch.zeros(1, batch_size, self.lstm_size).float(),
                torch.zeros(1, batch_size, self.lstm_size).float())

In [6]:
model = RNNModule(n_vocab, flags.seq_size,
                flags.embedding_size, flags.lstm_size)
model.to(device)

RNNModule(
  (embedding): Embedding(74389, 128)
  (lstm): LSTM(128, 128, batch_first=True)
  (dense): Linear(in_features=128, out_features=74389, bias=True)
)

In [54]:
model.load_state_dict(torch.load('checkpoint_pt/model-26000.pth'))

<All keys matched successfully>

In [55]:
def predict(device, net, words, n_vocab, vocab_to_int, int_to_vocab, top_k=5):
    net.eval()

    state_h, state_c = net.zero_state(1)
    state_h = state_h.to(device)
    state_c = state_c.to(device)
    for w in words:
        ix = torch.tensor([[vocab_to_int[w]]]).to(device)
        output, (state_h, state_c) = net(ix, (state_h, state_c))
    
    _, top_ix = torch.topk(output[0], k=top_k)
    choices = top_ix.tolist()[0]
    return [int_to_vocab[choice] for choice in choices]

In [56]:
def generate_line(words, context, min_length=5, max_length=8):
    new_word = ''
    words = words.split(' ')[::-1]
    words_with_context = context.split(' ')[::-1] + words
    while True:
        new_words = predict(device, model, words_with_context, n_vocab, vocab_to_int, int_to_vocab, top_k=5)
        new_word = ''
        if len(words) < min_length and '\n' in new_words:
            new_words.remove('\n')
        if len(words) > max_length:
            break
        new_word = random.choice(new_words)
        if new_word == '\n':
            break
        words_with_context.append(new_word)
        words.append(new_word)
    return ' '.join(words[::-1]) + ' \n '

In [57]:
def generate_verse(words, n_lines=8):
    lines = []
    context = ''
    for i in range(n_lines):
        line = generate_line(words, context)
        potential_rhymes = rf.find_lines_ending_with_word(words)
        if len(potential_rhymes) > 0:
            res = {k: v for k, v in sorted(potential_rhymes.items(), key=lambda item: item[1])}
            prob = 1
            keys = list(res.keys())
            for i in range(len(keys)):
                words = keys[i]
                prob /= 2
                r = random.random()
                if r < prob:
                    break
        lines.append(line)
        context = '\n ' + lines[-1]
    return ''.join(lines[::-1]).replace('\n ', '\n')
        

In [58]:
print(generate_verse('all now', n_lines=16))

bout to fuck that bitch yes 
she got y'all know this gangster chest no lisp 
tried to pull up for the 40000 dollar dress 
find a stack it up to your so fresh 
can't even 'cause prom decanter broad 
if i go to me and your so fresh 
lays flat where ya ashy to pullup decanter broad 
niggas know you wear droppin' lenox mall now 
your ass is that you feel like decanter broad 
on par and your so fresh 
got a whole white decanter broad 
no silencer on that blue state your so fresh 
scrimmage throw a yellow fify decanter broad 
and cedar 'till at lenox mall now 
cake bitch and then i act like decanter broad 
but we got your spendin money all now 

