# Text Generation

## Select Text to Imitate

In [9]:
import pandas as pd

data = pd.read_pickle('data_clean.pkl')
data

Unnamed: 0,transcript
allen,i saw the best minds of my generation destroye...
axentioi,for what you might be last withwe might not un...
christina,hurt no living thing ladybird nor butterfly no...
george,when we two parted in silence and tears half ...
iff,loose leaves fall from winter gray calen...
judith,the eyeless labourer in the nightthe selfless ...
max,let me do my work each day and if the darkened...
nuno,im walking the dog its raining he sniffs and p...
paul,we wear the mask that grins and liesit hides o...
sammeh,who the fuck did you call when you were coked ...


In [10]:
allen_text = data.transcript.loc['allen']
allen_text[:200]

'i saw the best minds of my generation destroyed by     madness starving hysterical nakeddragging themselves through the negro streets at dawn     looking for an angry fix angelheaded hipsters burning '

## Build a Markov Chain Function

In [24]:
from collections import defaultdict

def markov_chain(text):
    '''The input is a string of text and the output will be a dictionary with each word as
       a key and each value as the list of words that come after the key in the text.'''
    
    words = text.split(' ')

    m_dict = defaultdict(list)

    for current_word, next_word in zip(words[0:-1], words[1:]):
        m_dict[current_word].append(next_word)

    m_dict = dict(m_dict)
    return m_dict

In [25]:
allen_dict = markov_chain(allen_text)
allen_dict

{'i': ['saw', 'had', 'am', 'sit', 'dream', 'am', 'abandon', 'am'],
 'saw': ['the', 'mohammedan', 'it'],
 'the': ['best',
  'negro',
  'ancient',
  'starry',
  'machin',
  'supernatural',
  'tops',
  'el',
  'scholars',
  'academies',
  'windows',
  '',
  'terror',
  'wall',
  'mind',
  'mo',
  'rooftops',
  'roaring',
  'endless',
  'noise',
  'drear',
  'stale',
  'crack',
  'hydrogen',
  'brook',
  'stoops',
  'moon',
  '',
  'pavement',
  '',
  'cross',
  'cosmos',
  'streets',
  'chinaman',
  'impulse',
  '',
  'volcanoes',
  'shadow',
  'lava',
  'west',
  '',
  'narcotic',
  'sirens',
  'staten',
  'machinery',
  'neck',
  'subway',
  'roof',
  'ass',
  'sailors',
  'morning',
  'evenings',
  'grass',
  'blond',
  'three',
  'one',
  'heterosexual',
  'one',
  'womb',
  'one',
  'intellectual',
  'craftsmans',
  'bed',
  'floor',
  'hall',
  'wall',
  'last',
  'snatches',
  'sunset',
  'morning',
  'snatch',
  'sun',
  'lake',
  'memory',
  'snowbank',
  '',
  'apartment',
  'hu

## Create a Text Generator

In [13]:
import random

def generate_sentence(chain, count=15):
    '''Input a dictionary in the format of key = current word, value = list of next words
       along with the number of words you would like to see in your generated sentence.'''

    word1 = random.choice(list(chain.keys()))
    sentence = word1.capitalize()

    for i in range(count-1):
        word2 = random.choice(chain[word1])
        word1 = word2
        sentence += ' ' + word2

    sentence += '.'
    return(sentence)

In [15]:
generate_sentence(allen_dict)

'Whomever come who    the crab at dawn  with you in boxes.'

## Additional Exercises

1. Try making the generate_sentence function better. Maybe allow it to end with a random punctuation mark or end whenever it gets to a word that already ends with a punctuation mark.

In [39]:
import random

def generate_sentence(chain, count=5):
    '''Input a dictionary in the format of key = current word, value = list of next words
       along with the number of words you would like to see in your generated sentence.'''

    word1 = random.choice(list(chain.keys()))
    sentence = word1.capitalize()

    for i in range(count-1):
        word2 = random.choice(chain[word1])
        word1 = word2
        sentence += ' ' + word2
    
    if not(sentence.endswith('.')):
        punctuation_list=['.','?','!',':','-',';']
        punctuation=random.choice(punctuation_list)
        sentence += punctuation
    return(sentence)

generate_sentence(allen_dict)

'Blood is the use :'