# Word/Sentence Generation using Markov's model

In [17]:
#We only need to import numpy for its random function, but we can also import pretty print to better visualize our markov chain
import numpy as np
import pprint

#File where the text is. Make sure it's in the same directory as the code
training_file = "chainsmokers.txt"

#how many words we want our song to be. The average song is usually between 250-300 words (not including unique words)
number_of_words = 280


## Setting up helper functions

Load the file using the with...open function and use the "r" for read only

In [18]:
def load_file():
    with open(training_file, "r") as file:
        song = file.read()
    
    return song

Now it's time to code the engine of our program: the Markov Chain. This will be a dictionary with a single word as the key, and a list of all the subsequent words as its value. 

In [19]:
def build_chain(song):
    #split text file into a list of single words/tokens
    tokens = song.split()
    index = 1
    chain = {}
    #iterate through the list and add each word as a key or value until we reach the last word, the ':' after index means starting at index and incrementing to values after
    for word in tokens[index:]:
        #Because we consider word pairs, we start at the second word in the list and decrement down to index - 1 as the key
        key = tokens[index - 1]
        if key in chain:
            chain[key].append(word)
        else:
            chain[key] = [word]
        index += 1

    return chain

#not necessary, but pretty printing our dictionary gives us a better view of how this chain building works
pprint.pprint(chain)

{'"Hey,': ['whatcha', 'whatcha', 'whatcha'],
 '"How': ['Could'],
 '"I': ["don't",
        'never',
        'never',
        "don't",
        'never',
        'never',
        "don't",
        'never',
        'never'],
 '"Wow': ['If'],
 "'Cause": ['I', 'I'],
 '(If': ['we', 'we', 'we', 'we'],
 '(we': ["ain't", "ain't", "ain't"],
 'Am': ['I', 'I'],
 'And': ['four',
         'I',
         'four',
         'I',
         'almost',
         'all',
         'almost',
         'all',
         'almost',
         'all',
         'I',
         'I',
         'you',
         'you',
         'I',
         'you',
         'you',
         'I',
         'you',
         'you'],
 'At': ['least'],
 'Bite': ['that', 'that', 'that'],
 'Blink-182': ['song'],
 'Boulder': ['We', 'We', 'We'],
 'Bring': ['it', 'it'],
 'Broke': ['kids', 'kids'],
 'But': ['I', 'we', "it's", "it's", "it's"],
 'Can': ['I', 'I', 'I', 'I', 'I', 'I'],
 'Caught,': ['we'],
 'Come': ['on,', 'on,'],
 'Could': ['I'],
 "Don't": ['let', 'worr

## Generate our song!

This function returns the Markov dictionary which we'll use to generate our new banger.

In [20]:
def baby_pull_me_closer(chain):
    #We randomly choose our first word that already exists, so it is able to generate the rest after.
    #Because numpy's random.choice function can only read in a list, we need to convert our keys from our dictionary into a list format
    song = ""
    first = np.random.choice(list(chain.keys()))
    song += first

    while len(song.split()) < number_of_words:
        #Although a true Markov model would use weights and probability ditribution for selecting the subsequent words, 
        #using numpy's random choice gives us more variety in a smaller data set as to avoid mimicking the lyrics too closely
        next_word = np.random.choice(list(chain[first]))
        first = next_word
        
        #formatting for a song
        line_break = np.random.randint(low=1, high=100)
        if line_break < 20:
            song += "\n" + next_word
        elif line_break < 10:
            song += "\n\n" + next_word
        else:
            song += " " + next_word
            
    
    print(song)

In [21]:
song = load_file()
chain = build_chain(song)
baby_pull_me_closer(chain)

No, I want something
just why
I call
you want, but it's hard when
you're young Yeah, it's over Say
what I'm calling
you
tell me closer in Paris (If
we go down together
We'll
get away from
this, yeah We
were staying in the mattress that tattoo on the bar downtown
When we are
better Two kids running through the high, it Getting
drunk on the mattress
that
it was
clever If
we snuck
out, like this
Doo-doo-doo, doo-doo-doo Doo-doo-doo, doo-doo-doo Oh, I call you
want, but it's hard when you're looking for the corner Of
the city Drunk
on the
bar Don't let it all around Bring
it was clever If
we are Let's
show them we could
take this out"
Out
on the day that I want something just like to meet
up in Paris (If we go down) We were staying in the bar
Don't let
me closer in Paris Let's show them we
are Let's show
them we are Show
them we go down I
never
see them we are Let's show them again I called you
wanna go? How
much you
cannot live without
you, yeah We ain't ever getting
older
So I drink too
