In [271]:
# Load the dictionary
import csv
path = "../src/main/resources/EnglishCmu/cmudict_SPHINX_40.txt"

class Entry:
    def __init__(self, word, phonemes):
        self.word = word
        self.phonemes = phonemes
        
    def __str__(self): 
        return self.word # + ": " + " ".join(self.phonemes)
    
    def __repr__(self):
        return "Entry(" + self.word + ", [" + ", ".join(self.phonemes) +"])"

entries = []
with open(path) as f:
    reader = csv.reader(f, delimiter="\t", quoting=csv.QUOTE_NONE)
    for row in reader:
        entry = Entry(row[0], tuple(row[1].split(" ")))
        entries.append(entry)
        


In [272]:
entriesByWord = {entry.word: entry for entry in entries}

In [273]:
import itertools
sortedEntries = sorted(entries, key = lambda entry: entry.phonemes)
groups = itertools.groupby(sortedEntries, key = lambda entry: entry.phonemes)
entriesByPhonemes = {phonemes: tuple(group) for phonemes, group in groups}

In [274]:
entriesByWord["SON"]

Entry(SON, [S, AH, N])

In [276]:
entriesByPhonemes[("S", "AH", "N")]

(Entry(SON, [S, AH, N]), Entry(SUN, [S, AH, N]))

In [137]:
from textblob import TextBlob
principlesPath = "../src/main/resources/corpora/only_numbered_principles.txt"
with open(principlesPath) as f:
    text = f.read()
    principlesTextBlob = TextBlob(text)

In [160]:
quotesPath = "../src/main/resources/corpora/author-quote.txt"
with open(quotesPath) as f:
    all_lines_no_author = map(lambda s: s.split("\t")[1], f.readlines())
    all_text = "\n".join(all_lines_no_author)
    quotesTextBlob = TextBlob(all_text)

In [515]:
def word_list_to_entries(wordlist):
    return [entriesByWord[word] for word in map(lambda w: w.upper(), wordlist) if word in entriesByWord]

def switch_beginning_sounds(entries):
    if(len(entries) < 2):
        return []
    
    first = entries[0]
    last = entries[-1]
    first_new_phonemes = tuple([last.phonemes[0]] + list(first.phonemes[1:]))
    last_new_phonemes = tuple([first.phonemes[0]] + list(last.phonemes[1:]))
    
    if(not first_new_phonemes in entriesByPhonemes):
        return []
        
    if(not last_new_phonemes in entriesByPhonemes):
        return []
    
    if(last.phonemes[0] == first.phonemes[0]):
        return []
    
    #first_new_entry = entriesByPhonemes[first_new_phonemes][0]
    #last_new_entry = entriesByPhonemes[last_new_phonemes][0]
    new_first_lasts = product(entriesByPhonemes[first_new_phonemes], entriesByPhonemes[last_new_phonemes])
    
    result = [(first.word, last.word, first_last[0].word, first_last[1].word) for first_last in new_first_lasts]
    return result

import re
from itertools import product
from itertools import filterfalse

def new_sentence_from_spoonerism(sentence, switch_tuple):
    old_phrase = "{} {}".format(switch_tuple[0], switch_tuple[1])
    new_phrase = "{} {}".format(switch_tuple[2], switch_tuple[3])
    regex = re.compile(re.escape(old_phrase), re.IGNORECASE)
    new_sentence = regex.sub(new_phrase, " ".join(sentence.words))
    # new_sentence = " ".join(sentence.words).upper().replace(old_phrase, new_phrase)
    return (switch_tuple, new_sentence, sentence)
   
def new_sentences_with_spoonerisms(sentence):
    ngrams = map(word_list_to_entries, sentence.ngrams(2))
    ngrams_switched = chain(*map(switch_beginning_sounds, ngrams))
    spoonerisms = filter(found_spoonerism, ngrams_switched)
    new_sentences = list(map(lambda spoonerism: new_sentence_from_spoonerism(sentence, spoonerism), spoonerisms))
    
    return new_sentences

In [516]:
def new_sentences_using_homophones(sentence):
    distinct_new_sentences = set()
    entries_for_sentence_words = [entriesByWord[word.upper()] for word in sentence.words if word.upper() in entriesByWord]
    for entry in entries_for_sentence_words:
        entry_homophones = filter(lambda e: e != entry, entriesByPhonemes[entry.phonemes])
        
        def make_sentences(homophone):
            new_sentence = [homophone if entry == current_entry else current_entry for current_entry in entries_for_sentence_words] 
            return ((homophone, entry), new_sentence)
        
        new_sentence_entries = map(make_sentences, entry_homophones)
        new_sentences = [(substitutions, [entry.word for entry in new_sentence]) for (substitutions, new_sentence) in new_sentence_entries]
        for (substitutions, new_sentence) in new_sentences:
            new_sentence_string = " ".join([entry for entry in new_sentence])
            distinct_new_sentences.add((substitutions, new_sentence_string, sentence))
            
    return distinct_new_sentences

In [517]:
strategies = (new_sentences_with_spoonerisms, new_sentences_using_homophones)
def combined_strategy(sentence):
    new_sentences_from_strategies = chain(*map(lambda strategy: strategy(sentence), strategies))
    return new_sentences_from_strategies

#new_sentences = chain(*map(combined_strategy, quotesTextBlob.sentences))
#print_results(islice(new_sentences, 5))

In [518]:
def print_results(new_sentences):
    for (pun_words, new_sentence, old_sentence) in new_sentences:
        print("{}:".format(old_sentence))
        print("  {}".format(pun_words))
        print("  {}".format(new_sentence))

In [524]:
def matcher_for_word(word):
    def match(punned_sentence):
        (pun_words, old_sentence, new_sentence) = punned_sentence
        return word.upper() in map(lambda pun_word: pun_word, pun_words)
    return match

new_sentences = chain(*map(combined_strategy, principlesTextBlob.sentences))
matching_new_sentences = filter(matcher_for_word("too"), new_sentences)
print_results(matching_new_sentences)

Constantly Get in Synch
Constantly get in synch about what is true and what to do about it.:
  ('TO', 'DO', 'DEUX', 'TOO')
  Constantly Get in Synch Constantly get in synch about what is true and what DEUX TOO about it
Constantly Get in Synch
Constantly get in synch about what is true and what to do about it.:
  ('TO', 'DO', 'DEW', 'TOO')
  Constantly Get in Synch Constantly get in synch about what is true and what DEW TOO about it
Constantly Get in Synch
Constantly get in synch about what is true and what to do about it.:
  ('TO', 'DO', 'DO', 'TOO')
  Constantly Get in Synch Constantly get in synch about what is true and what DO TOO about it
Constantly Get in Synch
Constantly get in synch about what is true and what to do about it.:
  ('TO', 'DO', 'DOO', 'TOO')
  Constantly Get in Synch Constantly get in synch about what is true and what DOO TOO about it
Constantly Get in Synch
Constantly get in synch about what is true and what to do about it.:
  ('TO', 'DO', 'DOUWE', 'TOO')
  Consta

  Force yourself and the people who work for TOO YOO do difficult things
Force yourself and the people who work for you to do difficult things.:
  ('YOU', 'TO', 'TOO', 'YOU')
  Force yourself and the people who work for TOO YOU do difficult things
Force yourself and the people who work for you to do difficult things.:
  ('YOU', 'TO', 'TOO', 'YU')
  Force yourself and the people who work for TOO YU do difficult things
Force yourself and the people who work for you to do difficult things.:
  ('YOU', 'TO', 'TOO', 'YUE')
  Force yourself and the people who work for TOO YUE do difficult things
Force yourself and the people who work for you to do difficult things.:
  ('TO', 'DO', 'DEUX', 'TOO')
  Force yourself and the people who work for you DEUX TOO difficult things
Force yourself and the people who work for you to do difficult things.:
  ('TO', 'DO', 'DEW', 'TOO')
  Force yourself and the people who work for you DEW TOO difficult things
Force yourself and the people who work for you to do

  ('TOO', 'LATE', 'LUE', 'TAIT')
  Having people work so late that they might quit getting out reports LUE TAIT etc might be problems that are caused by a lack of capacity
Having people work so late that they might quit, getting out reports too late, etc., might be problems
that are caused by a lack of capacity.:
  ('TOO', 'LATE', 'LUE', 'TAITE')
  Having people work so late that they might quit getting out reports LUE TAITE etc might be problems that are caused by a lack of capacity
Having people work so late that they might quit, getting out reports too late, etc., might be problems
that are caused by a lack of capacity.:
  ('TOO', 'LATE', 'LUE', 'TAITT')
  Having people work so late that they might quit getting out reports LUE TAITT etc might be problems that are caused by a lack of capacity
Having people work so late that they might quit, getting out reports too late, etc., might be problems
that are caused by a lack of capacity.:
  ('TOO', 'LATE', 'LUE', 'TATE')
  Having people wo