In [48]:
# Load the dictionary
import csv
path = "../src/main/resources/EnglishCmu/cmudict_SPHINX_40.txt"

class Entry:
    def __init__(self, word, phonemes):
        self.word = word
        self.phonemes = phonemes
        
    def __str__(self): 
        return self.word # + ": " + " ".join(self.phonemes)
    
    def __repr__(self):
        return "Entry(" + self.word + ", [" + ", ".join(self.phonemes) +"])"

entries = []
with open(path) as f:
    reader = csv.reader(f, delimiter="\t", quoting=csv.QUOTE_NONE)
    for row in reader:
        entry = Entry(row[0], tuple(row[1].split(" ")))
        entries.append(entry)
        


In [49]:
entriesByWord = {entry.word: entry for entry in entries}

In [59]:
import itertools
sortedEntries = sorted(entries, key = lambda entry: entry.phonemes)
groups = itertools.groupby(sortedEntries, key = lambda entry: entry.phonemes)
entriesByPhonemes = {phonemes: tuple(group) for phonemes, group in groups}

In [60]:
entriesByWord["SON"]

Entry(SON, [S, AH, N])

In [61]:
entriesByPhonemes[("S", "AH", "N")]

(Entry(SON, [S, AH, N]), Entry(SUN, [S, AH, N]))

In [3]:
from textblob import TextBlob

In [4]:
blob = TextBlob("ITP is a two-year graduate program located in the Tisch School of the Arts. Perhaps the best way to describe us is as a Center for the Recently Possible.")

In [7]:
for sentence in blob.sentences:
    for noun_phrase in sentence.noun_phrases:
        print(noun_phrase)

itp
two-year graduate program
tisch
recently


In [11]:
from textblob import Word
rich = Word("rich")
synsets = rich.synsets
for synset in synsets:
    print(synset.definition())
    for word in synset.examples():
        print("\t" + word)

people who have possessions and wealth (considered as a group)
	only the very rich benefit from this legislation
possessing material wealth
	her father is extremely rich
	many fond hopes are pinned on rich uncles
having an abundant supply of desirable qualities or substances (especially natural resources)
	blessed with a land rich in minerals
	rich in ideas
	rich with cultural interest
of great worth or quality
	a rich collection of antiques
marked by great fruitfulness
	fertile farmland
	a fat land
	a productive vineyard
	rich soil
strong; intense
	deep purple
	a rich red
very productive
	rich seams of coal
high in mineral content; having a high proportion of fuel to air
	a rich vein of copper", "a rich gas mixture
suggestive of or characterized by great expense
	a rich display
containing plenty of fat, or eggs, or sugar
	rich desserts
	they kept gorging on rich foods
marked by richness and fullness of flavor
	a rich ruby port
	full-bodied wines
	a robust claret
	the robust flavor of 

In [12]:
principlesPath = "../src/main/resources/corpora/principles.txt"
with open(principlesPath) as f:
    text = f.read()
    textBlob = TextBlob(text)

In [13]:
textBlob.tags

[('PRINCIPLES', 'NNS'),
 ('BY', 'NNP'),
 ('RAY', 'NNP'),
 ('DALIO', 'NNP'),
 ('What', 'WP'),
 ('follows', 'VBZ'),
 ('are', 'VBP'),
 ('three', 'CD'),
 ('distinct', 'JJ'),
 ('parts', 'NNS'),
 ('that', 'WDT'),
 ('can', 'MD'),
 ('be', 'VB'),
 ('read', 'VBN'),
 ('either', 'DT'),
 ('independently', 'RB'),
 ('or', 'CC'),
 ('as', 'IN'),
 ('a', 'DT'),
 ('connected', 'JJ'),
 ('whole', 'NN'),
 ('Part', 'NN'),
 ('1', 'CD'),
 ('is', 'VBZ'),
 ('about', 'IN'),
 ('the', 'DT'),
 ('purpose', 'NN'),
 ('and', 'CC'),
 ('importance', 'NN'),
 ('of', 'IN'),
 ('having', 'VBG'),
 ('principles', 'NNS'),
 ('in', 'IN'),
 ('general', 'JJ'),
 ('having', 'VBG'),
 ('nothing', 'NN'),
 ('to', 'TO'),
 ('do', 'VB'),
 ('with', 'IN'),
 ('mine', 'NN'),
 ('Part', 'NN'),
 ('2', 'CD'),
 ('explains', 'NNS'),
 ('my', 'PRP$'),
 ('most', 'JJS'),
 ('fundamental', 'JJ'),
 ('life', 'NN'),
 ('principles', 'NNS'),
 ('that', 'WDT'),
 ('apply', 'VBP'),
 ('to', 'TO'),
 ('everything', 'NN'),
 ('I', 'PRP'),
 ('do', 'VBP'),
 ('Part', 'NN'),
 

In [15]:
noun_phrases = textBlob.noun_phrases
print(noun_phrases)

['principles by ray dalio', 'distinct parts', 'part', 'part', 'fundamental life principles', 'part', 'management principles', 'bridgewater', 'management principles', 'fundamental life principles', 'part', 'part', '’ s', 'part', 'management principles', 'bridgewater', '’ d', 'part', 'investment principles', 'parts', 'part', 'summary', 'principles', 'whole picture', '’ s', 'normal size book', 'thoughtful way', '’ t', 'copyright', 'ray dalio', 'contents introduction', 'part', 'importance', 'principles��������������������������������������', 'part', 'fundamental life', 'principles���������������������', 'part', 'management principles�������������������������������������', 'introduction', 'principles', 'ray dalio principles', 'similar circumstances', 'narrow answers', 'specific questions', 'successful players master', 'principles', 'different', 'different aspects', 'principles ”', 'principles ”', '“ management principles ”', '“ investment principles ”', '“ life principles ”', 'different peo

In [16]:
some_noun_phrases = noun_phrases[:20]

In [17]:
some_noun_phrases

WordList(['principles by ray dalio', 'distinct parts', 'part', 'part', 'fundamental life principles', 'part', 'management principles', 'bridgewater', 'management principles', 'fundamental life principles', 'part', 'part', '’ s', 'part', 'management principles', 'bridgewater', '’ d', 'part', 'investment principles', 'parts'])

In [22]:
sentences = textBlob.sentences
some_sentences = sentences[:-40:-1]
some_sentences

[Sentence("So when you are really in synch with others about what you’re wrestling with, that is a great step forward, because this feedback is probably true."),
 Sentence("And it’s often difficult for us to see
 and accept our own weaknesses."),
 Sentence("72Everyone is wrestling with some things, but most people don’t talk about them—some people don’t like to probe you about your weaknesses because they think it’s unkind or awkward."),
 Sentence("Not everyone is going to be happy about every decision you make, especially
 the decisions that say they can’t do something."),
 Sentence("But I want great people.72
 ... 210) Don’t try to please everyone."),
 Sentence("I also believe that to allow opt-outs would legitimatize two sets of rules and put our radically honest way of
 being in jeopardy."),
 Sentence("For reasons articulated throughout these principles, I believe we can’t compromise on
 this because that process of exploration is healthy for Bridgewater, healthy for them, and key 

In [27]:
principle_sentences = [sentence for sentence in some_sentences]
principle_sentences

[Sentence("So when you are really in synch with others about what you’re wrestling with, that is a great step forward, because this feedback is probably true."),
 Sentence("And it’s often difficult for us to see
 and accept our own weaknesses."),
 Sentence("72Everyone is wrestling with some things, but most people don’t talk about them—some people don’t like to probe you about your weaknesses because they think it’s unkind or awkward."),
 Sentence("Not everyone is going to be happy about every decision you make, especially
 the decisions that say they can’t do something."),
 Sentence("But I want great people.72
 ... 210) Don’t try to please everyone."),
 Sentence("I also believe that to allow opt-outs would legitimatize two sets of rules and put our radically honest way of
 being in jeopardy."),
 Sentence("For reasons articulated throughout these principles, I believe we can’t compromise on
 this because that process of exploration is healthy for Bridgewater, healthy for them, and key 