# Lyric Generator

In [1]:
import types
from itertools import islice
import random

## n-grams

In [2]:
def build_ngrams(itr, n=2):
    """Return the sequence of n-grams from the source iterator."""
    result = []
    index = 0
    while index + n-1 < len(itr):
        tempVar = []
        tempNum = 0
        while tempNum < n:
            tempVar.append(itr[index + tempNum ])
            tempNum = tempNum + 1
        tempVar = tuple(tempVar)
        result.append(tempVar)
        index = index + 1
    result = (i for i in result)
    return result
        

In [3]:
a = build_ngrams(range(10), n=2)
assert hasattr(a, '__iter__') and not isinstance(a, list)
al = list(a)
assert al == [(i,i+1) for i in range(9)]

b = build_ngrams(range(10), n=5)
assert hasattr(b, '__iter__') and not isinstance(b, list)
bl = list(b)
assert bl == [(i,i+1,i+2,i+3,i+4) for i in range(6)]

assert list(build_ngrams('one two three four five six seven'.split(' '), n=5)) == \
    [('one','two','three','four','five'),
     ('two','three','four','five','six'),
     ('three','four','five','six','seven')]

## Markov chain

In [4]:
def build_chain(ngrams, chain=None):
    """Build a Markov chain out of an iterator of n-grams.
    
    Parameters
    ----------
    ngrams: list of n-tuples
        A list of n-grams as tuples, where the first n-1 elements are the source node
        in the Markov chain ahd the last element is the target node in the Markov chain.
    chain: dict or None
        An existing Markov chain to add ngrams to or None for a new chain.
    """
    if chain != None:
        chain = chain
    else:
        chain = {}
        
    ngrams = list(ngrams)
    n= len(ngrams[0])
    
    for gram in ngrams:
        key = []
        for i in range(n-1):
            key.append(gram[i])
        key = tuple(key)
        data = gram[n-1]
        if key in chain:
            chain[key].append(data)
        else:
            chain[key] = [data]
    """
    for index in range(len(ngrams)):
        key = []
        for i in range(n-1):
            key.append(ngrams[index][i])
        key = tuple(key)
        if index +1 < len(ngrams):
            data = ngrams[index +1][n-1]
            if key in chain:
                chain[key].append(data)
            else:
                chain[key] = [data]
                """        
    return chain
    

In [5]:
random.seed(0)
seq1 = [random.randint(0,10) for i in range(200)]
chain = build_chain(build_ngrams(seq1, n=3))
seq2 = [random.randint(0,10) for i in range(200)]
chain = build_chain(build_ngrams(seq2, n=3), chain=chain)
assert chain[(0,0)]==[7, 10, 0, 3, 4]
assert chain[(4,2)]==[1, 3, 8, 3, 7, 1, 10, 2, 8]
assert len(chain.keys())==111

In [6]:
import random

def generate_sequence(chain, m):
    """Generate a new sequence of length n from a Markov chain.
    
    Parameter
    ---------
    chain : dict
        A dict where the keys are the source node of the Markov chain steps and
        the values are a list of possible targets.
    m : int
        The length of the sequence to generate.
    """
    a = random.choice(list(chain.keys()))
    
    length = m
    result = []
    tupLen = len(a)
    index = 0
    
    for num in a:
        result.append(num)
    length = length - tupLen
    while length > 0:
        
        tempKey = []
        
        for i in range(tupLen):
            tempKey.append(result[index + i])
            
        tempKey = tuple(tempKey)
        b = random.choice(chain[tempKey])
        result.append(b)
        index = index + 1
        length = length -1
    return result

In [7]:
random.seed(0)
seq3 = [random.randint(0,10) for i in range(200)]
chain2 = build_chain(build_ngrams(seq1, n=3))
assert list(generate_sequence(chain2, 10))==[8, 0, 1, 8, 10, 6, 8, 4, 8, 9]
chain3 = build_chain(build_ngrams(seq1, n=5))
assert list(generate_sequence(chain3, 10))==[4, 1, 8, 5, 8, 3, 9, 8, 9, 4]

## Scrape the web to find lyrics

In [8]:
import requests
from bs4 import BeautifulSoup

First get the page that has an index of all the lyrics and create a list of the URLs of those pages:

In [9]:
def get_lyric_urls(url):
    

    index = requests.get(url)
    soup = BeautifulSoup(index.text, 'html.parser')
    
    lyric_paths = [link.get('href') for link in
                   soup.find_all('a')]
    urls= []
    
    for index in range(len(lyric_paths)):
        if lyric_paths[index] == None :
            continue
        elif "news" in lyric_paths[index]:
            continue
        elif "-lyrics" in lyric_paths[index]:
            urls.append(lyric_paths[index])
    
        
            
   
    
        
    print(urls)
    lyric_urls = [i.strip() for i in urls]
    return lyric_urls


In [10]:
lyric_urls = get_lyric_urls(str(input('Enter MetroLyrics URL here:')))

Enter MetroLyrics URL here:http://www.metrolyrics.com/drake-albums-list.html
['http://www.metrolyrics.com/drake-lyrics.html', 'http://www.metrolyrics.com/hotline-lyrics-drake.html', 'http://www.metrolyrics.com/take-care-lyrics-drake.html', 'http://www.metrolyrics.com/jumpman-lyrics-drake.html', 'http://www.metrolyrics.com/controlla-lyrics-drake.html', 'http://www.metrolyrics.com/drake-lyrics.html', 'http://www.metrolyrics.com/drake-lyrics.html', 'http://www.metrolyrics.com/deceiving-lyrics-drake.html', 'http://www.metrolyrics.com/shut-it-down-lyrics-drake.html', 'http://www.metrolyrics.com/tuscan-leather-lyrics-drake.html', 'http://www.metrolyrics.com/killer-lyrics-drake.html', 'http://www.metrolyrics.com/paris-morton-music-2-lyrics-drake.html', 'http://www.metrolyrics.com/my-new-shit-lyrics-drake.html', 'http://www.metrolyrics.com/man-of-the-year-lyrics-drake.html', 'http://www.metrolyrics.com/lust-for-life-lyrics-drake.html', 'http://www.metrolyrics.com/god-snippet-lyrics-drake.html'

In [11]:
def get_lyric(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    html_lyrics = soup.find_all("p", class_= "verse")
    html_lyrics = [l.getText() for l in html_lyrics]
    print(html_lyrics)
    return '\n'.join(html_lyrics)

    """original code here, i made an edit:
    html_lyrics = soup.find_all('div', class_='lyrics')[0].find_all('p')
    html_lyrics = [l.getText() for l in html_lyrics]
    return '\n'.join(html_lyrics)
    """

In [12]:
import time

def get_all_lyrics(lyric_urls):
    for url in lyric_urls:
        time.sleep(1.0)
        yield get_lyric(url)

In [13]:
lyrics = get_all_lyrics(lyric_urls)


Now save all the lyrics to a text file:

In [14]:
with open('all_artist_lyrics.txt', 'w') as f:
    for lyric in lyrics:
        f.write(lyric.replace('\r\n', '\n'))
        f.write('\n')

[]
['You used to call me on my, you used to, you used to\nYou used to call me on my cell phone\nLate night when you need my love\nCall me on my cell phone\nLate night when you need my love\nI know when that hotline bling\nThat can only mean one thing\nI know when that hotline bling\nThat can only mean one thing', "Ever since I left the city you\nGot a reputation for yourself now\nEverybody knows and I feel left out\nGirl you got me down, you got me stressed out\nCause ever since I left the city, you\nStarted wearing less and goin' out more\nGlasses of champagne out on the dance floor\nHangin' with some girls I've never seen before", 'You used to call me on my cell phone\nLate night when you need my love\nCall me on my cell phone\nLate night when you need my love\nI know when that hotline bling\nThat can only mean one thing\nI know when that hotline bling\nThat can only mean one thing', "Ever since I left the city, you, you, you\nYou and me we just don't get along\nYou make me feel like

In [15]:
assert True

## Generate new lyrics with the Markov chain

Here is the fun part!

In [16]:
import textwrap

Here are some simple function for tokenizing the lyrics:

In [17]:
import quicktoken

In [18]:
PUNCTUATION = '`~!@#$%^&*()_-+={[}]|\:;"<,>.?/}\t\n'

def generateLyrics():
    lyrics = quicktoken.files_to_lines(["all_artist_lyrics.txt"])

    with open('tokenizedLyrics.txt', 'w') as f:
        tempList = []
        for line in lyrics:
            line = str(line)
            x = quicktoken.tokenize_line(line, stop_words = None, punctuation = PUNCTUATION)
            
            tempList.append(list(x))
        
        lyrics = []
        for verse in tempList:
            for word in verse:
                lyrics.append(word)

    x= build_ngrams(lyrics, n=2)
    x= build_chain(x, chain=None)
    x = generate_sequence(x, 200)
    x = " ".join(x)

    wrapLine = textwrap.wrap(x,45)

    for line in wrapLine:
        print(line)

In [None]:
def userInput():     
    more = int(input("\nGenerate lyrics? 1 = Yes, 0 = No"))
    if (more == 1):
        
        print("\n\n\n")
        generateLyrics()
        print("\n\n\n")
        userInput();
    else:
        print("done")


In [None]:
userInput()


Generate lyrics? 1 = Yes, 0 = No1




mistakes i'd lie to worry you deserve it man
what you've done done came out she workin'
with it here tonight me all these bitches
only teasin i haven't even name ooh na na na
na na what's my brand new drugs got a dike
bitch young but when i got a dog you owe me
say up reppin' oh yeah what to the hazy think
about the mothafuckin' choice it's beneficial
opps and i taught her wait until u ah i
looked forced it's gross what a problem if
you're fine as she fucking gutter bitch right
now took out tell me with something right now
the wri wri wri wri wri wri wri wri wri wri
wri wri yeah this i'm in your imagination
everything i went from the reason that my
jewelry that's a record i'm 'bout to be
buyin' shit set in my clique is a problem and
work he in and that's word to check a really
relieved some help on houston leverage when
i'm gone is what you've still good girl from
perfect time now we ever forget it you need
new shit sink and when i got a purchase
