In [29]:
import csv
import re
from collections import defaultdict

In [2]:
!open .

In [261]:
word_phone_dict = {}
# associate words with a list of phonemes
with open('./cmudict/cmudict-0.7b.txt', encoding='latin1') as f:
    for line in f:
        if line.startswith(';;;'):
            continue
        splits = line.split()
        word = splits[0]
        word_phone_dict[splits[0]] = splits[1:]

def load_word_phone_dict():
    word_phone_dict = {}
    # associate words with a list of phonemes
    with open('./cmudict/cmudict-0.7b.txt', encoding='latin1') as f:
        for line in f:
            if line.startswith(';;;'):
                continue
            splits = line.split()
            word = splits[0]
            word_phone_dict[splits[0]] = splits[1:]
    return word_phone_dict

In [201]:
phone_type_dict = {}
type_phone_dict = defaultdict(list)
with open('./cmudict/cmudict-0.7b.phones.txt', encoding='latin1') as f:
    for line in f:
        splits = line.split()
        word = splits[0]
        phone_type_dict[splits[0]] = splits[1]
        if splits[1] == 'vowel':
            # quick lookup for stressed syllables: AY0, AY1, AY2
            for x in range(3):
                phone_type_dict[splits[0] + str(x)] = 'vowel'
        type_phone_dict[splits[1]].append(splits[0])

In [None]:
def flatten(x):
    for y in x:
        yield from y

In [287]:
def is_vowel(phone):
    ''''''
    return phone_type_dict[phone] == 'vowel'

def stress_vowel(vowel):
    for x in range(1, 3):
        yield vowel + str(x)

['hi1', 'hi2']

In [203]:
def extract_syllables(phones):
    '''Extract syllable groupings from a list of phones. Syllables are split by vowel, including ending consonants.
    Starting consonants are grouped with the following vowel and consonants
    Returns a list of lists'''
    syllables = []
    syl = []
    seen_vowel = False
    for phone in phones:
        if is_vowel(phone):
            if syl and seen_vowel:
                syllables.append(syl)
                syl = []
            seen_vowel = True
        syl.append(phone)
    syllables.append(syl)
    return syllables
    
        

In [214]:
class RhymeTrie(object):
    '''A RhymeTrie holds RhymeTrie nodes and accesses them.'''
    
    def __init__(self):
        self.children = {}
    
    def insert(self, phones, word):
        '''Given a word and its phones, insert them into the trie. Associate the word with the end node.
        Returns a RhymeTrieNode'''
        # reverse phones to search down trie
        phones = phones[::-1]
        child_node = self.children.get(phones[0])
        if child_node is None:
            child_node = RhymeTrieNode(phones[0], self, len(phones) == 1)
            self.children[phones[0]] = child_node
        remaining_phones = phones[1:]
        if remaining_phones:
            final_node = child_node.insert(remaining_phones)
            final_node.words.add(word)
            return final_node
        else:
            child_node.words.add(word)
            return child_node
        
    def contains(self, phones):
        '''Given a list of phones, finds the end node in the trie associated with those phones.
        Returns a RhymeTrieNode'''
        phones = phones[::-1]
        child_node = self.children.get(phones[0])
        remaining_phones = phones[1:]
        if child_node:
            return child_node.contains(remaining_phones)
        return False
        
    def search(self, phones):
        '''Given a list of phones, find a node in the trie associated with those phones.
        Returns a RhymeTrieNode or None if there is no node associated with the given phones'''
        # reverse phones to search down trie
        phones = phones[::-1]
        child_node = self.children.get(phones[0])
        remaining_phones = phones[1:]
        if child_node:
            return child_node.search(remaining_phones)
        return None
        
    def count_nodes(self):
        '''Counts the number of children nodes in the trie'''
        return sum(child.count_nodes() for _, child in self.children.items())
    
    def count_words(self):
        '''Counts the number of words in the trie'''
        return sum(child.count_words() for _, child in self.children.items())

In [232]:
class RhymeTrieNode(object):
    
    def __init__(self, phone, parent, word=False):
        self.children = {}
        self.parent = parent
        self.phone = phone
        self.words = set()
        
    def insert(self, phones):
        '''Insert a list of phones into this node and its children. Returns the final node of the insert.'''
        if not phones:
            return self
        child_node = self.children.get(phones[0])
        if child_node is None:
            child_node = RhymeTrieNode(phones[0], self, len(phones) == 1)
            self.children[phones[0]] = child_node
        remaining_phones = phones[1:]
        return child_node.insert(remaining_phones)
    
    def contains(self, phones):
        '''Given a list of phones, finds the end node in the trie associated with those phones.
        Returns a RhymeTrieNode or False if there is no end node associated with the given phones'''
        if phones:
            child_node = self.children.get(phones[0])
            if child_node:
                return child_node.search(phones[1:])
        elif self.words:
            return self
        return False
    
    def search(self, phones):
        '''Given a list of phones, find a node in the trie associated with those phones.
        Returns a RhymeTrieNode or None if there is no node associated with the given phones'''
        if not phones:
            return self
        child_node = self.children.get(phones[0])
        if child_node:
            return child_node.search(phones[1:])
        return None
    
    def assemble(self):
        '''Aggregate all phones up the trie from this node, inclusive. Returns a generator'''
        if isinstance(self.parent, RhymeTrieNode):
            yield self.phone
            yield from self.parent.assemble()
        else:
            yield self.phone
    
    def count_nodes(self):
        '''Counts the number of children nodes in the trie'''
        return 1 + sum(child.count_nodes() for _, child in self.children.items())
    
    def count_words(self):
        '''Counts the number of words in the trie'''
        return len(self.words) + sum(child.count_words() for _, child in self.children.items())
    
    def get_sub_words(self):
        yield from self.words
        for _, child in self.children.items():
            yield from child.get_sub_words()

In [233]:
rt = RhymeTrie()
for word, phones in word_phone_dict.items():
    rt.insert(phones, word)

In [213]:
for word, phones in word_phone_dict.items():
    retrieved = rt.search(phones)
    assert word in retrieved.words
assert 'KLEVEN' in set(rt.search(word_phone_dict['LEVEN']).get_sub_words())

In [277]:
class RhymeDict(object):
    
    def __init__(self):
        self.dict = load_word_phone_dict()
        rt = RhymeTrie()
        for word, phones in self.dict.items():
            rt.insert(phones, word)
        self.rhyme_trie = rt
    
    def perfect_rhyme(self, word):
        word = word.upper()
        phones = self.dict[word]
        # chop off first consonant
        phones = phones[1:]
        results = set(rt.search(phones).get_sub_words())
        if results:
            results.remove(word)
        return results
    
    def
        

In [286]:
rd = RhymeDict()
assert 'COG' in rd.perfect_rhyme('dog')
assert 'COB' in rd.consonant_family_rhyme('dog')

AttributeError: 'RhymeDict' object has no attribute 'consonant_family_rhyme'

In [283]:
word_phone_dict['DR'], word_phone_dict['PROLOGUE']

(['D', 'R', 'AY1', 'V'], ['P', 'R', 'OW1', 'L', 'AA0', 'G'])

In [175]:
word_phone_dict

{}