# Setup the environment

In [2]:
import csv
import random

In [3]:
path = "../lists/ANC-spoken-count.txt"
nouns = []
verbs = []
adjs = []

parts = {}

frequencyLimit = 10
hardStarts = ['b','f','m','p','v','w','sp']
hardMids = ['b','p','mm', 'sp']

# Parse in the data and build the dictionary

In [4]:
with open(path) as csvFile:
    reader = csv.DictReader(csvFile, fieldnames=['word','lemma','POS','count'])
    for row in reader:
        
        # Apply some pre-filters
        word = row['word']
        row['count'] = int(row['count'])
        
        # Starting filters

        # Frequency Limit
        frequent = False
        if row['count'] > frequencyLimit:
            frequent = True
            #print row['count'],frequencyLimit
        
        # Hard Start
        hardStart = False
        for start in hardStarts:
            if word.startswith(start):
                hardStart = True
                
        # Hard Middle
        hardMid = False
        for mid in hardMids:
            if mid in word:
                hardMid = True
            
        #if frequent and (hardStart or hardMid):
        if frequent and (hardStart):

            # wholistic word parts
            if row['POS'] in ['NN','NNP','NNS', 'NNPS']:
                nouns.append(row)
            elif row['POS'] in ['VBG','VB','VBN','VBD', 'VBP', 'VBZ']:
                verbs.append(row)
            elif row['POS'] in ['JJ']:
                adjs.append(row)

                
            # Put all the parts into the parts dictionary
            if row['POS'] in parts:
                parts[row['POS']].append(row)
            else:
                parts[row['POS']]=[row]
                
        # We want all of some classes to tie it together
        if row['POS'] in ['PRP$','IN']:
            if row['POS'] in parts:
                parts[row['POS']].append(row)
            else:
                parts[row['POS']]=[row] 
            
            

In [5]:
def chooseWord(general):
    return random.choice(general)['word']
def choosePart(pos):
    return random.choice(parts[pos])['word']
def chooseParts(posList):
    return [choosePart(pos) for pos in posList]

## Print some stats

In [6]:
print "Nouns: %d" % len(nouns)
print "Verbs: %d" % len(verbs)
print "Adj: %d" % len(adjs)
print "Total: %d" % (len(nouns)+len(verbs)+len(adjs))
#print ""
#print "Parts: %s" % nounParts.keys()
#for part in parts.keys():
#    print part,len(parts[part])

Nouns: 1668
Verbs: 635
Adj: 292
Total: 2595


## Try a simple sentence structure

In [7]:
# Adjective - Noun - Verb - Noun
#print random.choice(adjs)['word'], random.choice(nouns)['word'], random.choice(verbs)['word'], random.choice(nouns)['word']
print chooseWord(adjs).title(), chooseWord(nouns), chooseWord(verbs), chooseWord(nouns)

Mutual blues believe planner


In [8]:
# My lunchbox is filled with mellons
# My <NN> is <VBN> <IN> <NNP>
print "My %s is %s %s %s" % (choosePart('NN'), choosePart('VBN'), choosePart('IN'), choosePart('NNP'))

My background is begun through field


In [9]:
# I want a puffy power ranger
# I want a <RB> <ADJ> <NN>
print "I want a %s %s %s" % (choosePart('RB'), choosePart('JJ'),choosePart('NN'))

I want a before various membership


In [10]:
# The mailman is a bad baseball player
# The <NN> is a <ADJ> <NN>
print "The %s is a %s %s" % (choosePart('NN'), choosePart('JJ'), choosePart('NN'))

The muscle is a foolish basin


In [11]:
# A very smelly vacuum
# A <ADV> <ADJ> <NN>
print "A %s %s %s" % (choosePart('RB'),choosePart('JJ'), choosePart('NN'))

A professionally pick-up border


In [12]:
# The horse bucked your mom
# The <NN> <VBN> <PRP$> <NN>
print "The %s %s %s %s" % (choosePart('NN'),choosePart('VBN'), choosePart('PRP$'), choosePart('NN'))

The folder painted my play


In [13]:
# Puffins have white faces
# <NNS> <VBP> <JJ> <NNS>
print "%s %s %s %s" % tuple(chooseParts(['NNS','VBP','JJ','NNS']))

bodies fit philosophical mistakes


In [14]:
# Butter my bread with bacon
# <VB> <PRP$> <NN> with <NN>
print "%s %s %s with %s" % tuple(chooseParts(['VB','PRP$','NN','NN']))

plan my pair with borderline


# Build a better framework

In [18]:
grammars = [
    ("%s %s %s with %s",['VB','PRP$','NN','NN']), # Butter my bread with bacon
    ("%s %s %s %s",['NNS','VBP','JJ','NNS']), # Puffins have white faces
    ("The %s %s %s %s",['NN','VBN','PRP$','NN']), # The horse bucked your mom
    ("The %s is a %s %s %s",['NN','RB','JJ','NN']), # The mailman is a bad baseball player
    ("I want a %s %s %s",['RB','JJ','NN']), # I want a puffy power ranger
    ("My %s is %s %s %s",['NN','VBN','IN','NNP']), # My lunchbox is filled with mellons
]

def genPhrase():
    grammar = random.choice(grammars)
    #print grammar
    phrase = grammar[0] % tuple(chooseParts(grammar[1]))
    return phrase[0].upper() + phrase[1:]

In [19]:
genPhrase()

'Mean our broccoli with wealth'

# Generate a set of phrases

In [22]:
num = 100
for i in range(0,num):
    print genPhrase()

Floods meet fat markets
I want a specifically baked bulb
I want a fully precious fire
The bowl is a prior pathetic plenty
Speak her wheelchair with message
The flour blocked your bind
I want a foremost basic freezing
The bind is a but wild frustration
I want a politically bloody bin
I want a particularly play-off bank
I want a properly fascinating winter
Beds mean periodic moves
I want a whatsoever polish brick
My bath is spread beneath bombay
The windy fixed their brew
The freezer is a maybe precise bluebook
My m is paralyzed with pete
I want a before fancy wavelength
I want a perhaps punched-in-stomach painting
The philosopher is a forever peaceful push
The merchandise is a forever vital mkay
Media plan first-year peas
Form their funniest with minimum
The percentage is a finally paranoid bacon
The program believed its battery
Forests perceive frivolous bags
The force perceived his product
Work your permission with flow
I want a possibly minor pronunciation
I want a badly spayed feed
