In [1]:
def train_markov_chain(lyrics):
    """
    Args:
      - lyrics: a list of strings, where each string represents
                the lyrics of one song by an artist.
    
    Returns:
      A dict that maps a tuple of 2 words ("bigram") to a list of
      words that follow that bigram, representing the Markov
      chain trained on the lyrics.
    """
    chain = {(None, "<START>"): []}
    for lyric in lyrics:
        words = lyric.replace("\n", " <N> ").strip().split(' ')
        chain[(None,"<START>")].append(words[0])
        chain[("<START>", words[0])] = []
        chain[("<START>", words[0])].append(words[1])
        prev_word_two_ago = words[0]
        prev_word_one_ago = words[1]
        for word in words[2:len(words)-1]:
            if (prev_word_two_ago, prev_word_one_ago) not in chain.keys():
                chain[(prev_word_two_ago, prev_word_one_ago)] = []
            chain[(prev_word_two_ago, prev_word_one_ago)].append(word)
            prev_word_two_ago = prev_word_one_ago
            prev_word_one_ago = word
        chain[(prev_word_two_ago, prev_word_one_ago)] = []
        chain[(prev_word_two_ago, prev_word_one_ago)].append("<END>")
    return chain

In [2]:
# Load the pickled lyrics object that you created in Lab A.
import pickle
lyrics = pickle.load(open("lyrics.pkl", "rb"))

# Call the function you wrote above.
chain = train_markov_chain(lyrics)
#print(chain)
# What words tend to start a song (i.e., what words follow the <START> tag?)
print(chain[(None, "<START>")])

['<N>', 'Yeah,', "I've", 'Work', '<N>', 'Yeah,', "I've", "It's", 'Everybody', '<N>', 'I', 'God', 'Living', 'Okay', '<N>', 'Take', 'Ass,', 'I', 'Yeah,', 'My', 'Jesus,', 'Yeah,', 'So...here', 'If', 'Yeah,', 'Tell', 'All', 'Yeah', 'I', 'Take', "It's", 'Yeah', 'Hey', 'Let', 'I,', 'I', 'Living', 'You,', 'Get', 'Me', 'Now', 'Uh', 'I', 'Dead', 'Tryna', 'Everything', 'Outside', 'I', 'Hey,', 'Bobby', 'I', 'I', 'Hey-oh,', 'Yeah,', "I've", '<N>', 'So...here', 'Uh,', 'We', 'Aye', 'Aye,', 'When', 'Now', 'Okay', 'Life', 'Jesus,', 'Buck,', 'Bitch,', 'Hey', 'Yeah;', 'Okay', 'I', 'Aye,', 'Everything', '<N>']


In [3]:
import random

def generate_new_lyrics(chain):
    """
    Args:
    - chain: a dict representing the Markov chain,
    such as one generated by generate_new_lyrics()

    Returns:
    A string representing the randomly generated song.
    """

    # a list for storing the generated words
    words = []
    
    # generate the first word
    first_word = random.choice(chain[(None, "<START>")])
    words.append(first_word)
    second_word = random.choice(chain[("<START>", first_word)])
    words.append(second_word)
    i=1
    while True: 
        prev_word = words[i-1]
        current_word = words[i]
        if(current_word == "<END>"):
            break 
        words.append(random.choice(chain[(prev_word,current_word)]))
        i += 1
    
    # join the words together into a string with line breaks
    lyrics = " ".join(words[:-1])
    return "\n".join(lyrics.split("<N>"))

In [4]:
print(generate_new_lyrics(chain))

Hey mothafucka, I'm real as shit 
 You gon' fuck around and said 
 "Fuck it, I'ma steal this land"Everybody wanna tell me my wife cheated on me?! 
 Atom. None of that wrist 
 Flickin' that, feelin', flickin' that
