In [2]:
# Load the dictionary
import csv
path = "../src/main/resources/EnglishCmu/cmudict_SPHINX_40.txt"

class Entry:
    def __init__(self, word, phonemes):
        self.word = word
        self.phonemes = phonemes
        
    def __str__(self): 
        return self.word # + ": " + " ".join(self.phonemes)
    
    def __repr__(self):
        return "Entry(" + self.word + ", [" + ", ".join(self.phonemes) +"])"

entries = []
with open(path) as f:
    reader = csv.reader(f, delimiter="\t", quoting=csv.QUOTE_NONE)
    for row in reader:
        entry = Entry(row[0], tuple(row[1].split(" ")))
        entries.append(entry)
        


In [3]:
entriesByWord = {entry.word: entry for entry in entries}

In [4]:
entriesByWord

{'!EXCLAMATION-POINT': Entry(!EXCLAMATION-POINT, [EH, K, S, K, L, AH, M, EY, SH, AH, N, P, OY, N, T]),
 '"CLOSE-QUOTE': Entry("CLOSE-QUOTE, [K, L, OW, Z, K, W, OW, T]),
 '"DOUBLE-QUOTE': Entry("DOUBLE-QUOTE, [D, AH, B, AH, L, K, W, OW, T]),
 '"END-OF-QUOTE': Entry("END-OF-QUOTE, [EH, N, D, AH, V, K, W, OW, T]),
 '"END-QUOTE': Entry("END-QUOTE, [EH, N, D, K, W, OW, T]),
 '"IN-QUOTES': Entry("IN-QUOTES, [IH, N, K, W, OW, T, S]),
 '"QUOTE': Entry("QUOTE, [K, W, OW, T]),
 '"UNQUOTE': Entry("UNQUOTE, [AH, N, K, W, OW, T]),
 '#SHARP-SIGN': Entry(#SHARP-SIGN, [SH, AA, R, P, S, AY, N]),
 '%PERCENT': Entry(%PERCENT, [P, ER, S, EH, N, T]),
 '&AMPERSAND': Entry(&AMPERSAND, [AE, M, P, ER, S, AE, N, D]),
 "'APOSTROPHE": Entry('APOSTROPHE, [AH, P, AA, S, T, R, AH, F, IY]),
 "'BOUT": Entry('BOUT, [B, AW, T]),
 "'CAUSE": Entry('CAUSE, [K, AH, Z]),
 "'COURSE": Entry('COURSE, [K, AO, R, S]),
 "'CUSE": Entry('CUSE, [K, Y, UW, Z]),
 "'EM": Entry('EM, [AH, M]),
 "'END-INNER-QUOTE": Entry('END-INNER-QUOTE, 

In [5]:
import itertools
sortedEntries = sorted(entries, key = lambda entry: entry.phonemes)
groups = itertools.groupby(sortedEntries, key = lambda entry: entry.phonemes)
entriesByPhonemes = {phonemes: tuple(group) for phonemes, group in groups}

In [6]:
entriesByWord["SON"]

Entry(SON, [S, AH, N])

In [7]:
entriesByPhonemes[("S", "AH", "N")]

(Entry(SON, [S, AH, N]), Entry(SUN, [S, AH, N]))

In [137]:
from textblob import TextBlob
principlesPath = "../src/main/resources/corpora/only_numbered_principles.txt"
with open(principlesPath) as f:
    text = f.read()
    principlesTextBlob = TextBlob(text)

In [160]:
quotesPath = "../src/main/resources/corpora/author-quote.txt"
with open(quotesPath) as f:
    all_lines_no_author = map(lambda s: s.split("\t")[1], f.readlines())
    all_text = "\n".join(all_lines_no_author)
    quotesTextBlob = TextBlob(all_text)

In [222]:
ss = [" ".join(s.words) for s in quotesTextBlob.sentences[0:5]]
sss = TextBlob(".\n".join(ss))
sentence = sss.sentences[0]

def word_list_to_entries(wordlist):
    return [entriesByWord[word] for word in map(lambda w: w.upper(), wordlist) if word in entriesByWord]

def switch_beginning_sounds(entries):
    if(len(entries) < 2):
        return ()
    
    first = entries[0]
    last = entries[-1]
    first_new_phonemes = tuple([last.phonemes[0]] + list(first.phonemes[1:]))
    last_new_phonemes = tuple([first.phonemes[0]] + list(last.phonemes[1:]))
    if(not first_new_phonemes in entriesByPhonemes or not last_new_phonemes in entriesByPhonemes):
        return ()
    
    first_new_entry = entriesByPhonemes[first_new_phonemes][0]
    last_new_entry = entriesByPhonemes[last_new_phonemes][0]
    
    return (first.word, last.word, first_new_entry.word, last_new_entry.word)

def found_spoonerism(switch_tuple):
    return len(switch_tuple) == 4

def new_sentence_from_spoonerism(sentence, switch_tuple):
    old_phrase = "{} {}".format(switch_tuple[0], switch_tuple[1])
    new_phrase = "{} {}".format(switch_tuple[2], switch_tuple[3])
    new_sentence = " ".join(sentence.words).upper().replace(old_phrase, new_phrase)
    return (switch_tuple, new_sentence)
   
spoonerisms = filter(found_spoonerism, map(switch_beginning_sounds, map(word_list_to_entries, sentence.ngrams(2))))
new_sentences = map(lambda spoonerism: new_sentence_from_spoonerism(sentence, spoonerism), spoonerisms)

list(new_sentences)




[(('TO', 'BE', 'BEU', 'T'),
  'IF YOU LIVE BEU T A HUNDRED I WANT TO LIVE BEU T A HUNDRED MINUS ONE DAY SO I NEVER HAVE TO LIVE WITHOUT YOU'),
 (('TO', 'BE', 'BEU', 'T'),
  'IF YOU LIVE BEU T A HUNDRED I WANT TO LIVE BEU T A HUNDRED MINUS ONE DAY SO I NEVER HAVE TO LIVE WITHOUT YOU'),
 (('ONE', 'DAY', 'DONE', 'WAY'),
  'IF YOU LIVE TO BE A HUNDRED I WANT TO LIVE TO BE A HUNDRED MINUS DONE WAY SO I NEVER HAVE TO LIVE WITHOUT YOU'),
 (('DAY', 'SO', "C'EST(2)", 'DAU'),
  "IF YOU LIVE TO BE A HUNDRED I WANT TO LIVE TO BE A HUNDRED MINUS ONE C'EST(2) DAU I NEVER HAVE TO LIVE WITHOUT YOU"),
 (('NEVER', 'HAVE', 'HAVER', 'NAV'),
  'IF YOU LIVE TO BE A HUNDRED I WANT TO LIVE TO BE A HUNDRED MINUS ONE DAY SO I HAVER NAV TO LIVE WITHOUT YOU')]

In [213]:
"abcdefg".replace("de", "xxxx")

'abcxxxxfg'

In [194]:
3 + [1,2,3,4][1:]

TypeError: unsupported operand type(s) for +: 'int' and 'list'

In [187]:
entriesByWord['abc']

KeyError: 'abc'

[Sentence("If you live to be a hundred, I want to live to be a hundred minus one day so I never have to live without you."),
 Sentence("Promise me you'll always remember: You're braver than you believe, and stronger than you seem, and smarter than you think."),
 Sentence("Did you ever stop to think, and forget to start again?"),
 Sentence("Organizing is what you do before you do something, so that when you do it, it is not all mixed up."),
 Sentence("Weeds are flowers too, once you get to know them.")]

In [None]:
1 + 1

In [151]:
sentences = principlesTextBlob.sentences + quotesTextBlob.sentences

In [143]:
def get_sentences_using_homophones(sentence):
    entries_for_sentence_words = [entriesByWord[word.upper()] for word in sentence.words]
    homophones_for_sentence_words = [entriesByPhonemes[entry.phonemes] for entry in entries_for_sentence_words]
    products = itertools.product(*homophones_for_sentence_words)
    new_sentences_words = [[entry.word for entry in product] for product in products]
    new_sentences = [" ".join(new_sentence_words) for new_sentence_words in new_sentences_words]
    new_sentences_unique = set(new_sentences)
    return new_sentences_unique

new_sentences = [sentence for sentence in get_sentences_using_homophones(sentences[1])]
for new_sentence in new_sentences:
    print(new_sentence)

REALIZE THAT U. HALVE NOTHING TOO FEAR FRUM TRUTH
REALIZE THAT U. HALVE NOTHING TU FIER FRUM TRUTH
REALIZE THAT YU HAVE NOTHING TU FIER FROM TRUTH
REALIZE THAT EWE HALVE NOTHING TOO FEAR FROM TRUTH
REALIZE THAT YUE HALVE NOTHING TUE FIER FROM TRUTH
REALIZE THAT HUGH(2) HAVE NOTHING THUY FEAR FRUM TRUTH
REALIZE THAT HUGH(2) HAVE NOTHING TUE FEAR FROM TRUTH
REALIZE THAT YOU HALVE NOTHING TEW(2) FIER FROM TRUTH
REALIZE THAT YOU HALVE NOTHING TU FIER FRUM TRUTH
REALIZE THAT YU HALVE NOTHING TOO FIER FRUM TRUTH
REALIZE THAT HUGH(2) HALVE NOTHING TWO FEAR FRUM TRUTH
REALIZE THAT HUGH(2) HAVE NOTHING THUY FIER FRUM TRUTH
REALIZE THAT EWE HALVE NOTHING TUE FIER FROM TRUTH
REALIZE THAT U HALVE NOTHING TOO FEAR FRUM TRUTH
REALIZE THAT YU HALVE NOTHING TWO FIER FRUM TRUTH
REALIZE THAT YUE HAVE NOTHING THUY FIER FRUM TRUTH
REALIZE THAT UWE HAVE NOTHING TEW(2) FIER FRUM TRUTH
REALIZE THAT YU HAVE NOTHING TUE FIER FROM TRUTH
REALIZE THAT EWE HALVE NOTHING TUE FIER FRUM TRUTH
REALIZE THAT UWE HAVE NO

In [144]:
def get_sentences_using_homophones_substitute_only_one(sentence):
    distinct_new_sentences = set()
    entries_for_sentence_words = [entriesByWord[word.upper()] for word in sentence.words if word.upper() in entriesByWord]
    for entry in entries_for_sentence_words:
        entry_homophones = filter(lambda e: e != entry, entriesByPhonemes[entry.phonemes])
        
        def make_sentences(homophone):
            new_sentence = [homophone if entry == current_entry else current_entry for current_entry in entries_for_sentence_words] 
            return ((homophone, entry), new_sentence)
        
        new_sentence_entries = map(make_sentences, entry_homophones)
        new_sentences = [(substitutions, [entry.word for entry in new_sentence]) for (substitutions, new_sentence) in new_sentence_entries]
        for (substitutions, new_sentence) in new_sentences:
            new_sentence_string = " ".join([entry for entry in new_sentence])
            distinct_new_sentences.add((substitutions, new_sentence_string))
            
    return list(distinct_new_sentences)

In [153]:
from itertools import chain, islice
new_sentences = chain(*map(get_sentences_using_homophones_substitute_only_one, sentences))

def has_word(word):
    def substitutions_has_word(substitutionsAndSentence):
        substitutions = list(map(lambda s: s.word, substitutionsAndSentence[0]))
        return word.upper() in substitutions
    return substitutions_has_word

new_sentences_with_word = filter(has_word("fight"), new_sentences)

print("RESULTS\n\n")
for (substitutions, sentence) in new_sentences_with_word: # itertools.islice(new_sentences_with_word, 100):
    print("{}: {}".format(substitutions, sentence))

RESULTS


(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): TALK ABOUT IS IT TRUE AND DOES IT MAKE SENSE FEIT FOR RIGHT
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): TALK ABOUT IS IT TRUE AND DOES IT MAKE SENSE FITE FOR RIGHT
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): DON T PICK YOUR BATTLES FEIT THEM ALL
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): DON T PICK YOUR BATTLES FITE THEM ALL
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): WHEN THERE IS PAIN THE ANIMAL INSTINCT IS FITE OR FLIGHT TO EITHER STRIKE BACK OR RUN AWAY INSTEAD
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): WHEN THERE IS PAIN THE ANIMAL INSTINCT IS FEIT OR FLIGHT TO EITHER STRIKE BACK OR RUN AWAY INSTEAD
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): FITE FOR RIGHT
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): FEIT FOR RIGHT
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): THERE IS GIANT UNTAPPED POTENTIAL IN DISAGREEMENT ESPECIALLY IF THE DISAGREEMENT IS BETWE

(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): GET UP STAND UP DO GIVE UP THE FEIT
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): GET UP STAND UP DO GIVE UP THE FITE
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): BUD ABBOTT YOU NEVER HEARD OF A COMEDY TEAM THAT DID FITE DID YOU
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): BUD ABBOTT YOU NEVER HEARD OF A COMEDY TEAM THAT DID FEIT DID YOU
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): YOU HAVE TO FITE FOR YOUR DREAM BUT YOU ALSO HAVE TO FEEL FORTUNATE FOR WHAT YOU HAVE
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): YOU HAVE TO FEIT FOR YOUR DREAM BUT YOU ALSO HAVE TO FEEL FORTUNATE FOR WHAT YOU HAVE
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): FROM WHERE THE SUN NOW STANDS I WILL FITE NO MORE FOREVER
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): FROM WHERE THE SUN NOW STANDS I WILL FEIT NO MORE FOREVER
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): CHIEF JOSEPH FROM WHERE THE SUN

(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): HERB CAEN THE ONLY WAY TO FITE A THING LIKE IS TO STAY AU COURANT IF IT KILLS YOU
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): HERB CAEN THE ONLY WAY TO FEIT A THING LIKE IS TO STAY AU COURANT IF IT KILLS YOU
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): IGNATIUS OF ANTIOCH FROM SYRIA EVEN TO ROME I FEIT WITH WILD BEASTS BY LAND AND SEA BY NIGHT AND BY DAY BEING BOUND AMIDST TEN LEOPARDS EVEN A COMPANY OF SOLDIERS WHO ONLY GROW WORSE WHEN THEY ARE KINDLY TREATED
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): IGNATIUS OF ANTIOCH FROM SYRIA EVEN TO ROME I FITE WITH WILD BEASTS BY LAND AND SEA BY NIGHT AND BY DAY BEING BOUND AMIDST TEN LEOPARDS EVEN A COMPANY OF SOLDIERS WHO ONLY GROW WORSE WHEN THEY ARE KINDLY TREATED
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): IRELAND BALDWIN WHEN I WAS YOUNG I FITE EVERYONE WHO INSISTED I BE AN ACTRESS
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): IRELAND BALDWIN 

(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): MIKE TYSON WHEN I FITE SOMEONE I WANT TO BREAK HIS WILL
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): MIKE TYSON WHEN I FEIT SOMEONE I WANT TO BREAK HIS WILL
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): MIKE TYSON I DO TRY TO INTIMIDATE ANYBODY BEFORE A FITE
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): MIKE TYSON I DO TRY TO INTIMIDATE ANYBODY BEFORE A FEIT
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): MIKE TYSON I INTEND TO FEIT AND I WANT TO WIN
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): MIKE TYSON I INTEND TO FITE AND I WANT TO WIN
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): MUHAMMAD ALI THE FITE IS WON OR LOST FAR AWAY FROM WITNESSES BEHIND THE LINES IN THE GYM AND OUT THERE ON THE ROAD LONG BEFORE I DANCE UNDER THOSE LIGHTS
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): MUHAMMAD ALI THE FEIT IS WON OR LOST FAR AWAY FROM WITNESSES BEHIND THE LINES IN THE GYM AND OUT THERE ON 

(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): SERGIO AGUERO I LIKE TO THINK THAT PEOPLE WHO REALLY KNOW ME UNDERSTAND I AM THE SAME PERSON AND THAT IS SOMETHING I WILL ALWAYS FITE TO MAINTAIN
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): SERGIO AGUERO I LIKE TO THINK THAT PEOPLE WHO REALLY KNOW ME UNDERSTAND I AM THE SAME PERSON AND THAT IS SOMETHING I WILL ALWAYS FEIT TO MAINTAIN
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): I SAW THE FEIT SCENES HE HAD
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): I SAW THE FITE SCENES HE HAD
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): GABRIEL WE WILL FEIT AGAINST ANY POWER PLAN
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): GABRIEL WE WILL FITE AGAINST ANY POWER PLAN
(Entry(FITE, [F, AY, T]), Entry(FIGHT, [F, AY, T])): STEPHEN BALDWIN I 'M NOT GOING TO FITE IN THE PHYSICAL WITH PHYSICAL WEAPONS BECAUSE IT 'S NOT A PHYSICAL FITE
(Entry(FEIT, [F, AY, T]), Entry(FIGHT, [F, AY, T])): STEPHEN BALDWIN I 'M NOT GO

In [126]:
from textblob import Word
s = Word("thinking").synsets
s[0].lemmas()

[Lemma('thinking.n.01.thinking'),
 Lemma('thinking.n.01.thought'),
 Lemma('thinking.n.01.thought_process'),
 Lemma('thinking.n.01.cerebration'),
 Lemma('thinking.n.01.intellection'),
 Lemma('thinking.n.01.mentation')]

In [111]:
Word("phone").get_synsets()

[Synset('telephone.n.01'),
 Synset('phone.n.02'),
 Synset('earphone.n.01'),
 Synset('call.v.03')]

In [51]:
3 in list([1,2,3])

True

In [None]:
blah = TextBlob("Talk about “Is it true?” and “Does it make sense?”")
print(blah.words)