In [1]:
# Load the dictionary
import csv
path = "../src/main/resources/EnglishCmu/cmudict_SPHINX_40.txt"

class Entry:
    def __init__(self, word, phonemes):
        self.word = word
        self.phonemes = phonemes
        
    def __str__(self): 
        return self.word # + ": " + " ".join(self.phonemes)
    
    def __repr__(self):
        return "Entry(" + self.word + ", [" + ", ".join(self.phonemes) +"])"

entries = []
with open(path) as f:
    reader = csv.reader(f, delimiter="\t", quoting=csv.QUOTE_NONE)
    for row in reader:
        entry = Entry(row[0], tuple(row[1].split(" ")))
        entries.append(entry)
        


In [2]:
entriesByWord = {entry.word: entry for entry in entries}

In [3]:
import itertools
sortedEntries = sorted(entries, key = lambda entry: entry.phonemes)
groups = itertools.groupby(sortedEntries, key = lambda entry: entry.phonemes)
entriesByPhonemes = {phonemes: tuple(group) for phonemes, group in groups}

In [4]:
entriesByWord["SON"]

Entry(SON, [S, AH, N])

In [5]:
entriesByPhonemes[("S", "AH", "N")]

(Entry(SON, [S, AH, N]), Entry(SUN, [S, AH, N]))

In [10]:
principlesPath = "../src/main/resources/corpora/only_numbered_principles.txt"
with open(principlesPath) as f:
    text = f.read()
    textBlob = TextBlob(text)

In [21]:
noun_phrases = textBlob.noun_phrases
sentences = textBlob.sentences
some_noun_phrases = noun_phrases[:20]
some_sentences = sentences[:-40:-1]

In [59]:
dir(noun_phrases[3])
noun_phrases[3]

'critical opinion'

In [33]:
entries = [entriesByWord[word.upper()] for word in sentences[0].words]
entries

[Entry(TRUST, [T, R, AH, S, T]),
 Entry(IN, [IH, N]),
 Entry(TRUTH, [T, R, UW, TH])]

In [104]:
def get_sentences_using_homophones(sentence):
    entries_for_sentence_words = [entriesByWord[word.upper()] for word in sentence.words]
    homophones_for_sentence_words = [entriesByPhonemes[entry.phonemes] for entry in entries_for_sentence_words]
    products = itertools.product(*homophones_for_sentence_words)
    new_sentences_words = [[entry.word for entry in product] for product in products]
    new_sentences = [" ".join(new_sentence_words) for new_sentence_words in new_sentences_words]
    return new_sentences

new_sentences = [sentence for sentence in get_sentences_using_homophones(sentences[1])]
for new_sentence in new_sentences:
    print(new_sentence)

REALIZE THAT EWE HALVE NOTHING TEW(2) FEAR FROM TRUTH
REALIZE THAT EWE HALVE NOTHING TEW(2) FEAR FRUM TRUTH
REALIZE THAT EWE HALVE NOTHING TEW(2) FIER FROM TRUTH
REALIZE THAT EWE HALVE NOTHING TEW(2) FIER FRUM TRUTH
REALIZE THAT EWE HALVE NOTHING THUY FEAR FROM TRUTH
REALIZE THAT EWE HALVE NOTHING THUY FEAR FRUM TRUTH
REALIZE THAT EWE HALVE NOTHING THUY FIER FROM TRUTH
REALIZE THAT EWE HALVE NOTHING THUY FIER FRUM TRUTH
REALIZE THAT EWE HALVE NOTHING TO FEAR FROM TRUTH
REALIZE THAT EWE HALVE NOTHING TO FEAR FRUM TRUTH
REALIZE THAT EWE HALVE NOTHING TO FIER FROM TRUTH
REALIZE THAT EWE HALVE NOTHING TO FIER FRUM TRUTH
REALIZE THAT EWE HALVE NOTHING TOO FEAR FROM TRUTH
REALIZE THAT EWE HALVE NOTHING TOO FEAR FRUM TRUTH
REALIZE THAT EWE HALVE NOTHING TOO FIER FROM TRUTH
REALIZE THAT EWE HALVE NOTHING TOO FIER FRUM TRUTH
REALIZE THAT EWE HALVE NOTHING TU FEAR FROM TRUTH
REALIZE THAT EWE HALVE NOTHING TU FEAR FRUM TRUTH
REALIZE THAT EWE HALVE NOTHING TU FIER FROM TRUTH
REALIZE THAT EWE HALVE

In [99]:
entries_for_sentence_words = [entriesByWord[word.upper()] for word in sentences[1].words]
homophones_for_sentence_words = [entriesByPhonemes[entry.phonemes] for entry in entries_for_sentence_words]
# print(len(homophones_for_sentence_words))
products = itertools.product(*homophones_for_sentence_words)
len([product for product in products])

560

In [127]:
def get_sentences_using_homophones_substitute_only_one(sentence):
    distinct_new_sentences = set()
    entries_for_sentence_words = [entriesByWord[word.upper()] for word in sentence.words]
    for entry in entries_for_sentence_words:
        entry_homophones = entriesByPhonemes[entry.phonemes]
        new_sentence_entries = [[homophone if entry == current_entry else current_entry for current_entry in entries_for_sentence_words] 
                                for homophone in entry_homophones]
        new_sentences = [[entry.word for entry in product] for product in new_sentence_entries]
        for new_sentence in new_sentences:
            new_sentence_string = " ".join([entry for entry in new_sentence])
            distinct_new_sentences.add(new_sentence_string)
            
    return list(distinct_new_sentences)

def print_new_sentences(sentence):    
    new_sentences = [sentence for sentence in get_sentences_using_homophones_substitute_only_one(sentence)]
    for new_sentence in new_sentences:
        print(new_sentence)
      
for sentence in sentences[0:10]:
    print_new_sentences(sentence)

TRUST INN TRUTH
TRUST IN TRUTH
TRUST IN. TRUTH
REALIZE THAT YOU HAVE NOTHING THUY FEAR FROM TRUTH
REALIZE THAT U HAVE NOTHING TO FEAR FROM TRUTH
REALIZE THAT YOU HAVE NOTHING TO FEAR FROM TRUTH
REALIZE THAT EWE HAVE NOTHING TO FEAR FROM TRUTH
REALIZE THAT YOU HAVE NOTHING TWO FEAR FROM TRUTH
REALIZE THAT YOO HAVE NOTHING TO FEAR FROM TRUTH
REALIZE THAT YOU HAVE NOTHING TO FIER FROM TRUTH
REALIZE THAT HUGH(2) HAVE NOTHING TO FEAR FROM TRUTH
REALIZE THAT U. HAVE NOTHING TO FEAR FROM TRUTH
REALIZE THAT YOU HAVE NOTHING TUE FEAR FROM TRUTH
REALIZE THAT YOU HAVE NOTHING TOO FEAR FROM TRUTH
REALIZE THAT UWE HAVE NOTHING TO FEAR FROM TRUTH
REALIZE THAT YU HAVE NOTHING TO FEAR FROM TRUTH
REALIZE THAT YUE HAVE NOTHING TO FEAR FROM TRUTH
REALIZE THAT YOU HAVE NOTHING TU FEAR FROM TRUTH
REALIZE THAT YOU HAVE NOTHING TO FEAR FRUM TRUTH
REALIZE THAT YOU HAVE NOTHING TEW(2) FEAR FROM TRUTH
REALIZE THAT YOU HALVE NOTHING TO FEAR FROM TRUTH
REALIZE THAT YEW HAVE NOTHING TO FEAR FROM TRUTH
CREATE AN EN

KeyError: 'WOULDN'

In [112]:
s = set()
s.add(1)
s.add(2)
s.add(1)
print(s)

{1, 2}


In [113]:
s.add(1,2)

TypeError: add() takes exactly one argument (2 given)