In [27]:
# Load the dictionary
import csv
path = "../src/main/resources/EnglishCmu/cmudict_SPHINX_40.txt"

class Entry:
    def __init__(self, word, phonemes):
        self.word = word
        self.phonemes = phonemes
        
    def __str__(self): 
        return self.word # + ": " + " ".join(self.phonemes)
    
    def __repr__(self):
        return "Entry(" + self.word + ", [" + ", ".join(self.phonemes) +"])"

entries = []
with open(path) as f:
    reader = csv.reader(f, delimiter="\t", quoting=csv.QUOTE_NONE)
    for row in reader:
        entry = Entry(row[0], tuple(row[1].split(" ")))
        entries.append(entry)
        


In [28]:
entriesByWord = {entry.word: entry for entry in entries}

In [29]:
import itertools
sortedEntries = sorted(entries, key = lambda entry: entry.phonemes)
groups = itertools.groupby(sortedEntries, key = lambda entry: entry.phonemes)
entriesByPhonemes = {phonemes: tuple(group) for phonemes, group in groups}

In [30]:
entriesByWord["SON"]

Entry(SON, [S, AH, N])

In [31]:
entriesByPhonemes[("S", "AH", "N")]

(Entry(SON, [S, AH, N]), Entry(SUN, [S, AH, N]))

In [32]:
from textblob import TextBlob
principlesPath = "../src/main/resources/corpora/only_numbered_principles.txt"
with open(principlesPath) as f:
    text = f.read()
    textBlob = TextBlob(text)

In [33]:
noun_phrases = textBlob.noun_phrases
sentences = textBlob.sentences
some_noun_phrases = noun_phrases[:20]
some_sentences = sentences[:-40:-1]

In [34]:
dir(noun_phrases[3])
noun_phrases[3]

'critical opinion'

In [35]:
entries = [entriesByWord[word.upper()] for word in sentences[0].words]
entries

[Entry(TRUST, [T, R, AH, S, T]),
 Entry(IN, [IH, N]),
 Entry(TRUTH, [T, R, UW, TH])]

In [41]:
def get_sentences_using_homophones(sentence):
    entries_for_sentence_words = [entriesByWord[word.upper()] for word in sentence.words]
    homophones_for_sentence_words = [entriesByPhonemes[entry.phonemes] for entry in entries_for_sentence_words]
    products = itertools.product(*homophones_for_sentence_words)
    new_sentences_words = [[entry.word for entry in product] for product in products]
    new_sentences = [" ".join(new_sentence_words) for new_sentence_words in new_sentences_words]
    new_sentences_unique = set(new_sentences)
    return new_sentences_unique

new_sentences = [sentence for sentence in get_sentences_using_homophones(sentences[1])]
for new_sentence in new_sentences:
    print(new_sentence)

REALIZE THAT U HAVE NOTHING THUY FEAR FROM TRUTH
REALIZE THAT U. HALVE NOTHING THUY FIER FRUM TRUTH
REALIZE THAT YOO HALVE NOTHING TU FEAR FROM TRUTH
REALIZE THAT EWE HAVE NOTHING THUY FEAR FRUM TRUTH
REALIZE THAT YEW HAVE NOTHING TEW(2) FIER FROM TRUTH
REALIZE THAT U HAVE NOTHING TOO FEAR FROM TRUTH
REALIZE THAT YEW HALVE NOTHING TWO FEAR FROM TRUTH
REALIZE THAT U HAVE NOTHING TU FIER FROM TRUTH
REALIZE THAT EWE HAVE NOTHING THUY FEAR FROM TRUTH
REALIZE THAT YU HALVE NOTHING TO FIER FROM TRUTH
REALIZE THAT HUGH(2) HAVE NOTHING TU FEAR FROM TRUTH
REALIZE THAT YEW HAVE NOTHING TO FEAR FROM TRUTH
REALIZE THAT YUE HALVE NOTHING TEW(2) FIER FRUM TRUTH
REALIZE THAT HUGH(2) HAVE NOTHING TEW(2) FEAR FROM TRUTH
REALIZE THAT YOO HALVE NOTHING TEW(2) FIER FROM TRUTH
REALIZE THAT YU HAVE NOTHING THUY FEAR FROM TRUTH
REALIZE THAT YOO HAVE NOTHING TU FEAR FROM TRUTH
REALIZE THAT YEW HAVE NOTHING TUE FIER FROM TRUTH
REALIZE THAT YUE HALVE NOTHING TU FEAR FRUM TRUTH
REALIZE THAT YOU HAVE NOTHING THUY

In [86]:
def get_sentences_using_homophones_substitute_only_one(sentence):
    distinct_new_sentences = set()
    entries_for_sentence_words = [entriesByWord[word.upper()] for word in sentence.words if word.upper() in entriesByWord]
    for entry in entries_for_sentence_words:
        entry_homophones = entriesByPhonemes[entry.phonemes]
        new_sentence_entries = [[homophone if entry == current_entry else current_entry for current_entry in entries_for_sentence_words] 
                                for homophone in entry_homophones]
        new_sentences = [[entry.word for entry in product] for product in new_sentence_entries]
        for new_sentence in new_sentences:
            new_sentence_string = " ".join([entry for entry in new_sentence])
            distinct_new_sentences.add(new_sentence_string)
            
    return list(distinct_new_sentences)

def print_new_sentences(sentence):    
    print(sentence)
    new_sentences = [sentence for sentence in get_sentences_using_homophones_substitute_only_one(sentence)]
    for new_sentence in new_sentences:
        print("\t" + new_sentence)

for sentence in filter_sentences_with_only_recognized_words(sentences[0:100]):
    print_new_sentences(sentence)

Trust in Truth.
	TRUST IN TRUTH
	TRUST INN TRUTH
	TRUST IN. TRUTH
Realize that you have nothing to fear from truth.
	REALIZE THAT HUGH(2) HAVE NOTHING TO FEAR FROM TRUTH
	REALIZE THAT YU HAVE NOTHING TO FEAR FROM TRUTH
	REALIZE THAT YOU HAVE NOTHING TUE FEAR FROM TRUTH
	REALIZE THAT YUE HAVE NOTHING TO FEAR FROM TRUTH
	REALIZE THAT YOU HAVE NOTHING THUY FEAR FROM TRUTH
	REALIZE THAT UWE HAVE NOTHING TO FEAR FROM TRUTH
	REALIZE THAT YOU HAVE NOTHING TO FEAR FROM TRUTH
	REALIZE THAT U. HAVE NOTHING TO FEAR FROM TRUTH
	REALIZE THAT YEW HAVE NOTHING TO FEAR FROM TRUTH
	REALIZE THAT YOO HAVE NOTHING TO FEAR FROM TRUTH
	REALIZE THAT YOU HALVE NOTHING TO FEAR FROM TRUTH
	REALIZE THAT EWE HAVE NOTHING TO FEAR FROM TRUTH
	REALIZE THAT YOU HAVE NOTHING TOO FEAR FROM TRUTH
	REALIZE THAT YOU HAVE NOTHING TWO FEAR FROM TRUTH
	REALIZE THAT YOU HAVE NOTHING TO FIER FROM TRUTH
	REALIZE THAT U HAVE NOTHING TO FEAR FROM TRUTH
	REALIZE THAT YOU HAVE NOTHING TU FEAR FROM TRUTH
	REALIZE THAT YOU HAVE NOTHI

In [81]:
sentences_recognized = list(filter(filter_sentences_with_only_recognized_words, sentences))

In [100]:
def has_word(word):
    def sentence_has_word(sentence):
        return sentence.words.count(Word(word)) > 0
    return sentence_has_word

sentences_with_word = list(filter(has_word("weakness"), sentences))
print("Found {} items".format(len(sentences_with_word)))
for sentence in sentences_with_word:
    print_new_sentences(sentence)

Found 5 items
While the
truth itself may be scary—you have a weakness, you have a deadly disease, etc.—knowing the truth will allow
you to deal with your situation better.
	WHILE THE TRUTH ITSELF MAE BE HAVE A WEAKNESS YOU HAVE A DEADLY DISEASE THE TRUTH WILL ALLOW YOU TO DEAL WITH YOUR SITUATION BETTER
	WILE THE TRUTH ITSELF MAY BE HAVE A WEAKNESS YOU HAVE A DEADLY DISEASE THE TRUTH WILL ALLOW YOU TO DEAL WITH YOUR SITUATION BETTER
	WHILE THE TRUTH ITSELF MAY BE HAVE A WEAKNESS YOU HAVE A DEADLY DISEASE THE TRUTH WILL ALLOW YOU TU DEAL WITH YOUR SITUATION BETTER
	WHILE THE TRUTH ITSELF MAY BE HAVE A WEAKNESS YOU HAVE A DEADLY DISEASE THE TRUTH WILL ALLOW YOU TWO DEAL WITH YOUR SITUATION BETTER
	WHILE THE TRUTH ITSELF MAY BE HAVE A WEAKNESS UWE HAVE A DEADLY DISEASE THE TRUTH WILL ALLOW UWE TO DEAL WITH YOUR SITUATION BETTER
	WHILE THE TRUTH ITSELF MAY BE HAVE A WEAKNESS YOU HAVE A DEADLY DISEASE THE TRUTH WILL ALLOW YOU TEW(2) DEAL WITH YOUR SITUATION BETTER
	WHILE THE TRUTH ITSELF MA

In [85]:
blah = TextBlob("Talk about “Is it true?” and “Does it make sense?”")
print(blah.words)

['Talk', 'about', '“', 'Is', 'it', 'true', '”', 'and', '“', 'Does', 'it', 'make', 'sense', '”']
