## Phonemes and Carnegie Mellon Pronouncing Dictionary

(See https://github.com/cmusphinx/cmudict/tree/4c6a365cea2c34340ffc218d5af7a38920fa7e37)

From https://www.nltk.org/_modules/nltk/corpus/reader/cmudict.html:

The Carnegie Mellon Pronouncing Dictionary [cmudict.0.6]
Copyright 1998 Carnegie Mellon University

File Format: Each line consists of an uppercased word, a counter
(for alternative pronunciations), and a transcription.  Vowels are
marked for stress (1=primary, 2=secondary, 0=no stress).  E.g.:
NATURAL 1 N AE1 CH ER0 AH0 L

The dictionary contains 127069 entries.  Of these, 119400 words are assigned
a unique pronunciation, 6830 words have two pronunciations, and 839 words have
three or more pronunciations.  Many of these are fast-speech variants.

Phonemes: There are 39 phonemes, as shown below:

    Phoneme Example Translation    Phoneme Example Translation
    ------- ------- -----------    ------- ------- -----------
    AA      odd     AA D           AE      at      AE T
    AH      hut     HH AH T        AO      ought   AO T
    AW      cow     K AW           AY      hide    HH AY D
    B       be      B IY           CH      cheese  CH IY Z
    D       dee     D IY           DH      thee    DH IY
    EH      Ed      EH D           ER      hurt    HH ER T
    EY      ate     EY T           F       fee     F IY
    G       green   G R IY N       HH      he      HH IY
    IH      it      IH T           IY      eat     IY T
    JH      gee     JH IY          K       key     K IY
    L       lee     L IY           M       me      M IY
    N       knee    N IY           NG      ping    P IH NG
    OW      oat     OW T           OY      toy     T OY
    P       pee     P IY           R       read    R IY D
    S       sea     S IY           SH      she     SH IY
    T       tea     T IY           TH      theta   TH EY T AH
    UH      hood    HH UH D        UW      two     T UW
    V       vee     V IY           W       we      W IY
    Y       yield   Y IY L D       Z       zee     Z IY
    ZH      seizure S IY ZH ER
    
From https://www.pythonstudio.us/language-processing/a-pronouncing-dictionary.html:

For each word, this lexicon provides a list of phonetic codes—distinct labels for each contrastive sound—known as phones. Observe that fire has two pronunciations (in U.S. English): the one-syllable F AY1 R, and the two-syllable F AY1 ER0. The symbols in the CMU Pronouncing Dictionary are from the Arpabet, described in more detail at http://en.wikipedia.org/wiki/Arpabet.

In [1]:
import nltk

test_dictionaries = True

# CMU Pronunciation dictionary:
entries = nltk.corpus.cmudict.entries()
if test_dictionaries:
    cmu_words = []
    for cmu_word, cmu_pronunciation in entries:
        cmu_words.append(cmu_word)
    cmu_wordset = set(cmu_words)

phoneme_list = ['AA','AH','AW','B','D','EH','EY','G','IH','JH','L','N','OW','P','S','T','UH','V','Y','ZH',
                'AE','AO','AY','CH','DH','ER','F','HH','IY','K','M','NG','OY','R','SH','TH','UW','W','Z']
phoneme_vowel_list = ['AA','AH','AW','EH','EY','IH','OW','UH','AE','AO','AY','ER','IY','OY','UW']
phoneme_consonant_list = ['B','D','G','JH','L','N','P','S','T','V','Y','ZH','CH',
                          'DH','F','HH','K','M','NG','R','SH','TH','W','Z']

# Pyenchant spelling dictionary:
import enchant
enchant_dict = enchant.Dict("en_US")

# Most Common English Words (https://github.com/dolph/dictionary)
enable1 = [line.rstrip() for line in open('enable1.txt')]
 
# english-words-py (https://pypi.org/project/english-words/)
if test_dictionaries:
    from english_words import english_words_set
    
# NLTK words corpus:
if test_dictionaries:
    #nltk.download('words')
    from nltk.corpus import words
    nltk_wordset = set(words.words())

# Wiktionary Word Frequency_lists (https://en.wiktionary.org/wiki/Wiktionary:Frequency_lists#English)



if test_dictionaries:
    test_words = ['can', "can't", 'geese', 'shelves', 'Thai', 'thai', 'ee', 'e.']
    for test_word in test_words:
        print(test_word)
        print('        CMU pronunciation:  {0}'.format(test_word in cmu_wordset))
        print('        Pyenchant spelling: {0}'.format(enchant_dict.check(test_word)))
        print('        english-words-py:   {0}'.format(test_word in english_words_set))
        print('        NLTK words corpus:  {0}'.format(test_word in nltk_wordset))
        print('        enable frequency:   {0}'.format(test_word in enable1))
        print('')


can
        CMU pronunciation:  True
        Pyenchant spelling: True
        english-words-py:   True
        NLTK words corpus:  True
        enable frequency:   True

can't
        CMU pronunciation:  True
        Pyenchant spelling: True
        english-words-py:   True
        NLTK words corpus:  False
        enable frequency:   False

geese
        CMU pronunciation:  True
        Pyenchant spelling: True
        english-words-py:   True
        NLTK words corpus:  False
        enable frequency:   True

shelves
        CMU pronunciation:  True
        Pyenchant spelling: True
        english-words-py:   False
        NLTK words corpus:  False
        enable frequency:   True

Thai
        CMU pronunciation:  False
        Pyenchant spelling: True
        english-words-py:   True
        NLTK words corpus:  True
        enable frequency:   False

thai
        CMU pronunciation:  True
        Pyenchant spelling: False
        english-words-py:   False
        NLTK words corpus:  

## Code to convert words to phonemes and phonemes to words

In [2]:
from g2p_en import G2p
word_to_phonemes = G2p()


def get_unique_numbers(numbers):
    unique = []
    for number in numbers:
        if number not in unique:
            unique.append(number)
    return unique


def phonemes_to_candidate_words(phonemes, start=0): 
    '''
    Generate a list of words from a list of phonemes,
    by concatenating sequences of the phonemes 
    and searching in CMU's Pronunciation Dictionary.
    '''
    words_from_phonemes = []
    words_from_consonants = []
    
    # For each subsequence of phonemes
    for stop in range(start + 1, len(phonemes) + 1):
        
        # Remove stresses from the subsequence of phonemes
        phoneme_subset = phonemes[start:stop]
        phoneme_subset = [re.sub(r'\d+', '', p) for p in phoneme_subset]
        
        # For each word in the CMU dictionary
        for word, pronunciation in entries:
            pron_no_stress = [re.sub(r'\d+', '', p) for p in pronunciation]
            
            # Find matching phonemes (homonym)
            if len(pron_no_stress) == len(phoneme_subset):
                match = 0
                for index, p in enumerate(pron_no_stress):
                    if p == phoneme_subset[index]:
                        match += 1
                if match == len(pron_no_stress):
                    words_from_phonemes.append([word, start, stop - 1]) #start + index])

            # Find matching consonants
            if len(pron_no_stress) == len(phoneme_subset):
            #if len(pron_no_stress) >= len(phoneme_subset):
                
                pron_consonant_subset = [x for x in pron_no_stress if x in phoneme_consonant_list]
                if pron_consonant_subset != []:
                    phon_consonant_subset = [x for x in phoneme_subset if x in phoneme_consonant_list]
                    if phon_consonant_subset != []:
                        if len(pron_consonant_subset) == len(phon_consonant_subset):
                            match = 0
                            for index, p in enumerate(pron_consonant_subset):
                                if p == phon_consonant_subset[index]:
                                    match += 1
                            if match == len(phon_consonant_subset):
                                #print(pron_no_stress, phoneme_subset)
                                words_from_consonants.append([word, start, stop - 1])
                    
    unique_stops = get_unique_numbers([i2 for x,i1,i2 in words_from_phonemes])
    unique_stops_consonants = get_unique_numbers([i2 for x,i1,i2 in words_from_consonants])

    return words_from_phonemes, unique_stops, words_from_consonants, unique_stops_consonants

## Code to count syllables

In [3]:
# https://datascience.stackexchange.com/questions/23376/how-to-get-the-number-of-syllables-in-a-word

import re

VOWEL_RUNS = re.compile("[aeiouy]+", flags=re.I)
EXCEPTIONS = re.compile(
    # fixes trailing e issues:
    # smite, scared
    "[^aeiou]e[sd]?$|"
    # fixes adverbs:
    # nicely
    + "[^e]ely$",
    flags=re.I
)
ADDITIONAL = re.compile(
    # fixes incorrect subtractions from exceptions:
    # smile, scarred, raises, fated
    "[^aeioulr][lr]e[sd]?$|[csgz]es$|[td]ed$|"
    # fixes miscellaneous issues:
    # flying, piano, video, prism, fire, evaluate
    + ".y[aeiou]|ia(?!n$)|eo|ism$|[^aeiou]ire$|[^gq]ua",
    flags=re.I
)

def count_syllables(word):
    vowel_runs = len(VOWEL_RUNS.findall(word))
    exceptions = len(EXCEPTIONS.findall(word))
    additional = len(ADDITIONAL.findall(word))
    return max(1, vowel_runs - exceptions + additional)

## Code to extract phonemes, stresses, and number of syllables per line of text

In [4]:
def words_to_sounds(line):

    if line.strip() != "":
        words = line.split()
        phonemes = []
        stresses = []
        syllables = 0
        for word in words:
            
            word = word.lower()

            # Extract phonemes per word (choose the first version of the phoneme)
            #     :: multiple pronunciations: pronouncing.phones_for_word(word) 
            phonemes_and_stresses_for_word = word_to_phonemes(word)
                      
            phonemes_for_word = [re.sub(r'\d+', '', x) for x in phonemes_and_stresses_for_word]
            stresses_blanks_for_word = [re.sub(r"(?:[A-Z])",'', x) for x in phonemes_and_stresses_for_word]
            stresses_for_word = []
            for i,p in enumerate(phonemes_for_word):
                if p in phoneme_list:
                    if stresses_blanks_for_word[i] == '':
                        stresses_for_word.append(0)
                    elif stresses_blanks_for_word[i] == '0':
                        stresses_for_word.append(0)
                    elif stresses_blanks_for_word[i] == '1':
                        stresses_for_word.append(1)
                    elif stresses_blanks_for_word[i] == '2':
                        stresses_for_word.append(2)                        
            phonemes_for_word = [x for x in phonemes_for_word if x in phoneme_list]                  
            phonemes += phonemes_for_word  
            stresses += stresses_for_word
            syllables += count_syllables(word)

        consonants = [x for x in phonemes if x in phoneme_consonant_list] 

    return phonemes, consonants, stresses, syllables

## Find all words that sound like each segment of each phoneme list

In [5]:
def phoneme_subsets_to_words(phonemes):

    phoneme_words = []
    consonant_words = []
    start = 0
    unique_stops = [-1]
    while start < len(phonemes):
        if len(unique_stops) == 0:
            unique_stops = [start + 1]
        for stop in unique_stops:
            start = stop + 1
            if start < len(phonemes):
                words_from_phonemes, unique_stops, words_from_consonants, unique_stops_consonants = phonemes_to_candidate_words(phonemes, start)
                phoneme_words += words_from_phonemes
                consonant_words += words_from_consonants

    return phoneme_words, consonant_words

## Filter words by another English dictionary

In [6]:
filter_strings = ['.', ',']

def filter_dictionary_words(words, verbose=False):

    filtered_words = []
    removed_words = []
    for word in words: 
        if enchant_dict.check(word[0]) and all([x not in word for x in filter_strings]):
            filtered_words.append(word)
        else:
            removed_words.append(word)
    
    if verbose and removed_words != []:
        print('Removed words:  {0}'.format(', '.join([x[0] for x in removed_words])), end='\n\n')

    return filtered_words, removed_words

## Organize words by their phoneme start and stop indices

In [7]:
def copy_list(list_to_copy, ncopies):
    list_copies = []
    for i in range(ncopies):
        list_copies.extend(list_to_copy)
    return list_copies


def flatten_list(nested_list):
    '''
    Flatten so that there are no tuples or lists within the list.
    
    >>> nested_list = [(['tye', 'a'], 'ja')]
    >>> flatten_list(nested_list)
    ... ['tye', 'a', 'ja']
    '''
    result=[]
    if nested_list != []:
        for element in nested_list:
            if isinstance(element, list) or isinstance(element, tuple):
                result.extend(flatten_list(element))
            else:
                result.append(element)
    return result

            
def flatten_sublists(nested_list):
    '''
    Flatten so that there are no subsublists within the sublists.
    
    >>> nested_list = [[('pty', 'a'), ('pty', 'uh'), ('pty', 'uhh')], [('tae', 'a'), ('tae', 'uh'), ('tae', 'uhh')]]
    >>> flatten_sublists(nested_list)
    [('pty', 'a'),
     ('pty', 'uh'),
     ('pty', 'uhh'),
     ('tae', 'a'),
     ('tae', 'uh'),
     ('tae', 'uhh')]
    '''
    result=[]
    if nested_list != []:
        for element in nested_list:
            if isinstance(element, list) or isinstance(element, tuple):
                if element != []:
                    if isinstance(element[0], list) or isinstance(element[0], tuple):
                        result.extend(flatten_sublists(element))
                    else:
                        result.append(element)
    return result

            
def find_words_with_start_index(word_start_stop_list, start_index):
    # store words that start at start_index
    start_words = []
    starts = []
    stops = []
    for word, start, stop in word_start_stop_list:
        if start == start_index and start != []:
            start_words.append(word)
            starts.append(start)
            stops.append(stop)
            
    return start_words, starts, stops


def organize_words_by_start(words_list):

    if not isinstance(words_list[0], list) and not isinstance(words_list[0], tuple):
        words_list = [words_list]
        
    # Get unique starts and stops, and max start and stop
    words2 = []
    starts2 = []
    stops2 = []
    for word, start, stop in words_list:
        words2.append(word)
        starts2.append(start)
        stops2.append(stop)
    unique_starts = get_unique_numbers(starts2)
    unique_stops = get_unique_numbers(stops2)
    max_start = max(get_unique_numbers(starts2))
    max_stop = max(get_unique_numbers(stops2))

    # Words organized by start index
    words_by_start = []
    stops = []
    for start_index in range(max_start + 1):
        start_words, istarts, istops = find_words_with_start_index(words_list, start_index)
        words_by_start.append(start_words)
        stops.append(istops)        
    #stops = stops[0:-1] 

    return words_by_start, stops, unique_starts, unique_stops, max_start, max_stop

## Construct word sequences with matching phoneme stop and start indices

In [16]:
def concatenate_lists(list_of_lists1, list_of_lists2):
    result = []
    for item1, item2 in zip(list_of_lists1, list_of_lists2):
        if isinstance(item1, str) and isinstance(item2, list):
            for element in item2:
                result.append((item1, element))
        elif isinstance(item1, tuple) and isinstance(item2, list):
            result.append((list(item1) + list(item2)))
    return result


def concatenate_words(new_words, new_stops, words, stops, unique_starts):
    '''
    Concatenate words where the stop index of one matches the start index of the next.
    '''
    # Initialize / format words
    if new_words == []:
        words1 = words[0]
        stops1 = stops[0]
    else:
        words1 = flatten_sublists(new_words)
        stops1 = flatten_list(new_stops)

    # For each word that starts at  a given index
    for iword1, word1 in enumerate(words1):

        # Find words that start after that word stops
        word1_stop = stops1[iword1]
        word2_start = word1_stop + 1
        #print(word1, word1_stop, word2_start, unique_starts)
        if word2_start in unique_starts:
            words2 = words[word2_start]
            stops2 = stops[word2_start]

            # Concatenate the first word with each of the second set of words
            if len(words2) > 0:
                word1_copies = copy_list([word1], len(words2))
                words2_list = [[x] for x in words2]
                new_words.append(concatenate_lists(word1_copies, words2_list))
                new_stops.append(stops2)
            
    return new_words, new_stops


def words_stop_to_start(words, stops, unique_starts, max_stop):

    updated_words = []
    updated_stops = []
    all_lines = []
    run = True
    while(run):

        updated_words, updated_stops = concatenate_words(updated_words, updated_stops, 
                                                         words, stops, unique_starts)

        # Store list of words if reached max_stop
        all_words = flatten_sublists(updated_words)
        all_stops = flatten_list(updated_stops)

        # Stop when number of words and stops don't match
        if len(all_words) != len(all_stops):
            break

        for istop, stop in enumerate(all_stops):
            if stop == max_stop:
                all_lines.append(' '.join(all_words[istop]))

    candidate_lines = []
    for line in all_lines:
        if line not in candidate_lines:
            candidate_lines.append(line)
                
    return candidate_lines

## Run all code on input text

In [17]:
# Load text
f = open("demo.txt", "r")
lines = f.readlines()


verbose = True
verbose2 = False

for line in lines:

    if verbose:
        print('')
        print('===============================================================================')
        print('Input line:  "{0}"'.format(line.strip()))
        print('===============================================================================')
    
    phonemes, consonants, stresses, syllables = words_to_sounds(line)
    
    if verbose:
        print('Syllables:   {0}'.format(syllables))
        print('Stresses:    {0}'.format(stresses))
        print('Phonemes:    {0}'.format(', '.join(phonemes)))
        print('Consonants:  {0}'.format(', '.join(consonants)), end='\n\n')

    phoneme_words, consonant_words = phoneme_subsets_to_words(phonemes)

    if verbose2:
        print('Phoneme words:  {0}'.format(', '.join([x[0] for x in flatten_sublists(phoneme_words)])), end='\n\n')
        print('Consonant words:  {0}'.format(', '.join([x[0] for x in flatten_sublists(consonant_words)])), end='\n\n')

    filtered_words_from_phonemes = filter_dictionary_words(phoneme_words, verbose=verbose2)
    filtered_words_from_consonants = filter_dictionary_words(consonant_words, verbose==verbose2)
    
    if verbose2:
        if filtered_words_from_phonemes != []:
            print('Remaining words from phonemes:  {0}'.format(', '.join([x[0] for x in flatten_sublists(filtered_words_from_phonemes)])), end='\n\n')
        if filtered_words_from_consonants != []:
            print('Remaining words from consonants:  {0}'.format(', '.join([x[0] for x in flatten_sublists(filtered_words_from_consonants)])), end='\n\n')

    words_by_start1, stops1, unique_starts1, unique_stops1, max_start1, max_stop1 = organize_words_by_start(flatten_sublists(filtered_words_from_phonemes))
    words_by_start2, stops2, unique_starts2, unique_stops2, max_start2, max_stop2 = organize_words_by_start(flatten_sublists(filtered_words_from_consonants))
    
    if verbose2:
        print('Phoneme-generated words organized by start index:')
        print('{0}'.format(words_by_start1), end='\n\n')
        print('Consonant-generated words organized by start index:')
        print('{0}'.format(words_by_start2), end='\n\n')

    candidate_lines1 = words_stop_to_start(words_by_start1, stops1, unique_starts1, max_stop1)

    if verbose:
        print('Candidate lines from phonemes:', end='\n\n')
        if candidate_lines1 == []:
            print('    None', end='\n')
        else:
            for candidate_line1 in flatten_list(candidate_lines1):
               print('    {0}'.format(candidate_line1), end='\n')
            print('')

    candidate_lines2 = words_stop_to_start(words_by_start2, stops2, unique_starts2, max_stop2)
    
    if verbose:
        print('Candidate lines from consonants:', end='\n\n')
        if candidate_lines2 == []:
            print('    None', end='\n')
        else:
            for candidate_line2 in flatten_list(candidate_lines2):
               print('    {0}'.format(candidate_line2), end='\n')


Input line:  "butler bitter butter"
Syllables:   6
Stresses:    [0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0]
Phonemes:    B, AH, T, L, ER, B, IH, T, ER, B, AH, T, ER
Consonants:  B, T, L, B, T, B, T

Candidate lines from phonemes:

    butler bitter butter
    butler bitar butter
    buttler bitter butter
    buttler bitar butter

Candidate lines from consonants:

    baht albeit abate
    baht albeit abbot
    baht albeit abet
    baht albeit about
    baht albeit abut
    baht albeit barrette
    baht albeit batter
    baht albeit batty
    baht albeit beater
    baht albeit beta
    baht albeit betta
    baht albeit better
    baht albeit bettor
    baht albeit biter
    baht albeit bitter
    baht albeit bitty
    baht albeit booty
    baht albeit butter
    baht albeit buyout
    baht albeit obit
    baht albeit abbate
    baht albeit abbett
    baht albeit abbitt
    baht albeit abbott
    baht albeit abert
    baht albeit abott
    baht albeit aubert
    baht albeit baity
    baht a

    bet liberty butter
    bet liberty buyout
    bet liberty obit
    bet liberty abbate
    bet liberty abbett
    bet liberty abbitt
    bet liberty abbott
    bet liberty abert
    bet liberty abott
    bet liberty aubert
    bet liberty baity
    bet liberty barritt
    bet liberty batey
    bet liberty batie
    bet liberty battey
    bet liberty baty
    bet liberty beatie
    bet liberty beattie
    bet liberty beatty
    bet liberty beaty
    bet liberty beiter
    bet liberty berta
    bet liberty bertie
    bet liberty berty
    bet liberty bette
    bet liberty betti
    bet liberty betty
    bet liberty bhatti
    bet liberty bhutto
    bet liberty bitar
    bet liberty bootie
    bet liberty botha
    bet liberty botta
    bet liberty boughter
    bet liberty boyett
    bet liberty boyette
    bet liberty boyter
    bet liberty bt
    bet liberty buetow
    bet liberty burritt
    bet liberty ebbert
    bet liberty ebert
    bet liberty obert
    bet albert batterer
    b

    bought albeit batty
    bought albeit beater
    bought albeit beta
    bought albeit betta
    bought albeit better
    bought albeit bettor
    bought albeit biter
    bought albeit bitter
    bought albeit bitty
    bought albeit booty
    bought albeit butter
    bought albeit buyout
    bought albeit obit
    bought albeit abbate
    bought albeit abbett
    bought albeit abbitt
    bought albeit abbott
    bought albeit abert
    bought albeit abott
    bought albeit aubert
    bought albeit baity
    bought albeit barritt
    bought albeit batey
    bought albeit batie
    bought albeit battey
    bought albeit baty
    bought albeit beatie
    bought albeit beattie
    bought albeit beatty
    bought albeit beaty
    bought albeit beiter
    bought albeit berta
    bought albeit bertie
    bought albeit berty
    bought albeit bette
    bought albeit betti
    bought albeit betty
    bought albeit bhatti
    bought albeit bhutto
    bought albeit bitar
    bought albeit boo

    battle barritt abet
    battle barritt about
    battle barritt abut
    battle barritt barrette
    battle barritt batter
    battle barritt batty
    battle barritt beater
    battle barritt beta
    battle barritt betta
    battle barritt better
    battle barritt bettor
    battle barritt biter
    battle barritt bitter
    battle barritt bitty
    battle barritt booty
    battle barritt butter
    battle barritt buyout
    battle barritt obit
    battle barritt abbate
    battle barritt abbett
    battle barritt abbitt
    battle barritt abbott
    battle barritt abert
    battle barritt abott
    battle barritt aubert
    battle barritt baity
    battle barritt barritt
    battle barritt batey
    battle barritt batie
    battle barritt battey
    battle barritt baty
    battle barritt beatie
    battle barritt beattie
    battle barritt beatty
    battle barritt beaty
    battle barritt beiter
    battle barritt berta
    battle barritt bertie
    battle barritt berty
    ba

    beetle bhutto beater
    beetle bhutto beta
    beetle bhutto betta
    beetle bhutto better
    beetle bhutto bettor
    beetle bhutto biter
    beetle bhutto bitter
    beetle bhutto bitty
    beetle bhutto booty
    beetle bhutto butter
    beetle bhutto buyout
    beetle bhutto obit
    beetle bhutto abbate
    beetle bhutto abbett
    beetle bhutto abbitt
    beetle bhutto abbott
    beetle bhutto abert
    beetle bhutto abott
    beetle bhutto aubert
    beetle bhutto baity
    beetle bhutto barritt
    beetle bhutto batey
    beetle bhutto batie
    beetle bhutto battey
    beetle bhutto baty
    beetle bhutto beatie
    beetle bhutto beattie
    beetle bhutto beatty
    beetle bhutto beaty
    beetle bhutto beiter
    beetle bhutto berta
    beetle bhutto bertie
    beetle bhutto berty
    beetle bhutto bette
    beetle bhutto betti
    beetle bhutto betty
    beetle bhutto bhatti
    beetle bhutto bhutto
    beetle bhutto bitar
    beetle bhutto bootie
    beetle bhutto bo

    betel boyter boyter
    betel boyter bt
    betel boyter buetow
    betel boyter burritt
    betel boyter ebbert
    betel boyter ebert
    betel boyter obert
    betel bt abate
    betel bt abbot
    betel bt abet
    betel bt about
    betel bt abut
    betel bt barrette
    betel bt batter
    betel bt batty
    betel bt beater
    betel bt beta
    betel bt betta
    betel bt better
    betel bt bettor
    betel bt biter
    betel bt bitter
    betel bt bitty
    betel bt booty
    betel bt butter
    betel bt buyout
    betel bt obit
    betel bt abbate
    betel bt abbett
    betel bt abbitt
    betel bt abbott
    betel bt abert
    betel bt abott
    betel bt aubert
    betel bt baity
    betel bt barritt
    betel bt batey
    betel bt batie
    betel bt battey
    betel bt baty
    betel bt beatie
    betel bt beattie
    betel bt beatty
    betel bt beaty
    betel bt beiter
    betel bt berta
    betel bt bertie
    betel bt berty
    betel bt bette
    betel bt betti
 

    bottler bought abt
    bottler bought beata
    bottler bought beato
    bottler bought beretta
    bottler bought bowater
    bottler bought bta
    bottler bout batterer
    bottler bout battery
    bottler bout burrito
    bottler bout buttery
    bottler bout abeyta
    bottler bout abt
    bottler bout beata
    bottler bout beato
    bottler bout beretta
    bottler bout bowater
    bottler bout bta
    bottler but batterer
    bottler but battery
    bottler but burrito
    bottler but buttery
    bottler but abeyta
    bottler but abt
    bottler but beata
    bottler but beato
    bottler but beretta
    bottler but bowater
    bottler but bta
    bottler butt batterer
    bottler butt battery
    bottler butt burrito
    bottler butt buttery
    bottler butt abeyta
    bottler butt abt
    bottler butt beata
    bottler butt beato
    bottler butt beretta
    bottler butt bowater
    bottler butt bta
    bottler byte batterer
    bottler byte battery
    bottler byte burr

    butler batty buetow
    butler batty burritt
    butler batty ebbert
    butler batty ebert
    butler batty obert
    butler beater abate
    butler beater abbot
    butler beater abet
    butler beater about
    butler beater abut
    butler beater barrette
    butler beater batter
    butler beater batty
    butler beater beater
    butler beater beta
    butler beater betta
    butler beater better
    butler beater bettor
    butler beater biter
    butler beater bitter
    butler beater bitty
    butler beater booty
    butler beater butter
    butler beater buyout
    butler beater obit
    butler beater abbate
    butler beater abbett
    butler beater abbitt
    butler beater abbott
    butler beater abert
    butler beater abott
    butler beater aubert
    butler beater baity
    butler beater barritt
    butler beater batey
    butler beater batie
    butler beater battey
    butler beater baty
    butler beater beatie
    butler beater beattie
    butler beater beatty


    beit lobato abate
    beit lobato abbot
    beit lobato abet
    beit lobato about
    beit lobato abut
    beit lobato barrette
    beit lobato batter
    beit lobato batty
    beit lobato beater
    beit lobato beta
    beit lobato betta
    beit lobato better
    beit lobato bettor
    beit lobato biter
    beit lobato bitter
    beit lobato bitty
    beit lobato booty
    beit lobato butter
    beit lobato buyout
    beit lobato obit
    beit lobato abbate
    beit lobato abbett
    beit lobato abbitt
    beit lobato abbott
    beit lobato abert
    beit lobato abott
    beit lobato aubert
    beit lobato baity
    beit lobato barritt
    beit lobato batey
    beit lobato batie
    beit lobato battey
    beit lobato baty
    beit lobato beatie
    beit lobato beattie
    beit lobato beatty
    beit lobato beaty
    beit lobato beiter
    beit lobato berta
    beit lobato bertie
    beit lobato berty
    beit lobato bette
    beit lobato betti
    beit lobato betty
    beit loba

    boyt labate beata
    boyt labate beato
    boyt labate beretta
    boyt labate bowater
    boyt labate bta
    boyt labatt batterer
    boyt labatt battery
    boyt labatt burrito
    boyt labatt buttery
    boyt labatt abeyta
    boyt labatt abt
    boyt labatt beata
    boyt labatt beato
    boyt labatt beretta
    boyt labatt bowater
    boyt labatt bta
    boyt lebert batterer
    boyt lebert battery
    boyt lebert burrito
    boyt lebert buttery
    boyt lebert abeyta
    boyt lebert abt
    boyt lebert beata
    boyt lebert beato
    boyt lebert beretta
    boyt lebert bowater
    boyt lebert bta
    boyt leibert batterer
    boyt leibert battery
    boyt leibert burrito
    boyt leibert buttery
    boyt leibert abeyta
    boyt leibert abt
    boyt leibert beata
    boyt leibert beato
    boyt leibert beretta
    boyt leibert bowater
    boyt leibert bta
    boyt libert batterer
    boyt libert battery
    boyt libert burrito
    boyt libert buttery
    boyt libert abeyta
 

    batley bette abott
    batley bette aubert
    batley bette baity
    batley bette barritt
    batley bette batey
    batley bette batie
    batley bette battey
    batley bette baty
    batley bette beatie
    batley bette beattie
    batley bette beatty
    batley bette beaty
    batley bette beiter
    batley bette berta
    batley bette bertie
    batley bette berty
    batley bette bette
    batley bette betti
    batley bette betty
    batley bette bhatti
    batley bette bhutto
    batley bette bitar
    batley bette bootie
    batley bette botha
    batley bette botta
    batley bette boughter
    batley bette boyett
    batley bette boyette
    batley bette boyter
    batley bette bt
    batley bette buetow
    batley bette burritt
    batley bette ebbert
    batley bette ebert
    batley bette obert
    batley betti abate
    batley betti abbot
    batley betti abet
    batley betti about
    batley betti abut
    batley betti barrette
    batley betti batter
    batley b

    battelle boughter betta
    battelle boughter better
    battelle boughter bettor
    battelle boughter biter
    battelle boughter bitter
    battelle boughter bitty
    battelle boughter booty
    battelle boughter butter
    battelle boughter buyout
    battelle boughter obit
    battelle boughter abbate
    battelle boughter abbett
    battelle boughter abbitt
    battelle boughter abbott
    battelle boughter abert
    battelle boughter abott
    battelle boughter aubert
    battelle boughter baity
    battelle boughter barritt
    battelle boughter batey
    battelle boughter batie
    battelle boughter battey
    battelle boughter baty
    battelle boughter beatie
    battelle boughter beattie
    battelle boughter beatty
    battelle boughter beaty
    battelle boughter beiter
    battelle boughter berta
    battelle boughter bertie
    battelle boughter berty
    battelle boughter bette
    battelle boughter betti
    battelle boughter betty
    battelle boughter bhatti
  

    beatle ebert burritt
    beatle ebert ebbert
    beatle ebert ebert
    beatle ebert obert
    beatle obert abate
    beatle obert abbot
    beatle obert abet
    beatle obert about
    beatle obert abut
    beatle obert barrette
    beatle obert batter
    beatle obert batty
    beatle obert beater
    beatle obert beta
    beatle obert betta
    beatle obert better
    beatle obert bettor
    beatle obert biter
    beatle obert bitter
    beatle obert bitty
    beatle obert booty
    beatle obert butter
    beatle obert buyout
    beatle obert obit
    beatle obert abbate
    beatle obert abbett
    beatle obert abbitt
    beatle obert abbott
    beatle obert abert
    beatle obert abott
    beatle obert aubert
    beatle obert baity
    beatle obert barritt
    beatle obert batey
    beatle obert batie
    beatle obert battey
    beatle obert baty
    beatle obert beatie
    beatle obert beattie
    beatle obert beatty
    beatle obert beaty
    beatle obert beiter
    beatle ob

    beitel bettor baty
    beitel bettor beatie
    beitel bettor beattie
    beitel bettor beatty
    beitel bettor beaty
    beitel bettor beiter
    beitel bettor berta
    beitel bettor bertie
    beitel bettor berty
    beitel bettor bette
    beitel bettor betti
    beitel bettor betty
    beitel bettor bhatti
    beitel bettor bhutto
    beitel bettor bitar
    beitel bettor bootie
    beitel bettor botha
    beitel bettor botta
    beitel bettor boughter
    beitel bettor boyett
    beitel bettor boyette
    beitel bettor boyter
    beitel bettor bt
    beitel bettor buetow
    beitel bettor burritt
    beitel bettor ebbert
    beitel bettor ebert
    beitel bettor obert
    beitel biter abate
    beitel biter abbot
    beitel biter abet
    beitel biter about
    beitel biter abut
    beitel biter barrette
    beitel biter batter
    beitel biter batty
    beitel biter beater
    beitel biter beta
    beitel biter betta
    beitel biter better
    beitel biter bettor
    beite

    beitler aubert boyter
    beitler aubert bt
    beitler aubert buetow
    beitler aubert burritt
    beitler aubert ebbert
    beitler aubert ebert
    beitler aubert obert
    beitler baity abate
    beitler baity abbot
    beitler baity abet
    beitler baity about
    beitler baity abut
    beitler baity barrette
    beitler baity batter
    beitler baity batty
    beitler baity beater
    beitler baity beta
    beitler baity betta
    beitler baity better
    beitler baity bettor
    beitler baity biter
    beitler baity bitter
    beitler baity bitty
    beitler baity booty
    beitler baity butter
    beitler baity buyout
    beitler baity obit
    beitler baity abbate
    beitler baity abbett
    beitler baity abbitt
    beitler baity abbott
    beitler baity abert
    beitler baity abott
    beitler baity aubert
    beitler baity baity
    beitler baity barritt
    beitler baity batey
    beitler baity batie
    beitler baity battey
    beitler baity baty
    beitler baity 

    bertil beatie ebbert
    bertil beatie ebert
    bertil beatie obert
    bertil beattie abate
    bertil beattie abbot
    bertil beattie abet
    bertil beattie about
    bertil beattie abut
    bertil beattie barrette
    bertil beattie batter
    bertil beattie batty
    bertil beattie beater
    bertil beattie beta
    bertil beattie betta
    bertil beattie better
    bertil beattie bettor
    bertil beattie biter
    bertil beattie bitter
    bertil beattie bitty
    bertil beattie booty
    bertil beattie butter
    bertil beattie buyout
    bertil beattie obit
    bertil beattie abbate
    bertil beattie abbett
    bertil beattie abbitt
    bertil beattie abbott
    bertil beattie abert
    bertil beattie abott
    bertil beattie aubert
    bertil beattie baity
    bertil beattie barritt
    bertil beattie batey
    bertil beattie batie
    bertil beattie battey
    bertil beattie baty
    bertil beattie beatie
    bertil beattie beattie
    bertil beattie beatty
    bertil

    betley battey betty
    betley battey bhatti
    betley battey bhutto
    betley battey bitar
    betley battey bootie
    betley battey botha
    betley battey botta
    betley battey boughter
    betley battey boyett
    betley battey boyette
    betley battey boyter
    betley battey bt
    betley battey buetow
    betley battey burritt
    betley battey ebbert
    betley battey ebert
    betley battey obert
    betley baty abate
    betley baty abbot
    betley baty abet
    betley baty about
    betley baty abut
    betley baty barrette
    betley baty batter
    betley baty batty
    betley baty beater
    betley baty beta
    betley baty betta
    betley baty better
    betley baty bettor
    betley baty biter
    betley baty bitter
    betley baty bitty
    betley baty booty
    betley baty butter
    betley baty buyout
    betley baty obit
    betley baty abbate
    betley baty abbett
    betley baty abbitt
    betley baty abbott
    betley baty abert
    betley baty abott

    beutler beaty batey
    beutler beaty batie
    beutler beaty battey
    beutler beaty baty
    beutler beaty beatie
    beutler beaty beattie
    beutler beaty beatty
    beutler beaty beaty
    beutler beaty beiter
    beutler beaty berta
    beutler beaty bertie
    beutler beaty berty
    beutler beaty bette
    beutler beaty betti
    beutler beaty betty
    beutler beaty bhatti
    beutler beaty bhutto
    beutler beaty bitar
    beutler beaty bootie
    beutler beaty botha
    beutler beaty botta
    beutler beaty boughter
    beutler beaty boyett
    beutler beaty boyette
    beutler beaty boyter
    beutler beaty bt
    beutler beaty buetow
    beutler beaty burritt
    beutler beaty ebbert
    beutler beaty ebert
    beutler beaty obert
    beutler beiter abate
    beutler beiter abbot
    beutler beiter abet
    beutler beiter about
    beutler beiter abut
    beutler beiter barrette
    beutler beiter batter
    beutler beiter batty
    beutler beiter beater
    beutler

    birtle bhutto bitty
    birtle bhutto booty
    birtle bhutto butter
    birtle bhutto buyout
    birtle bhutto obit
    birtle bhutto abbate
    birtle bhutto abbett
    birtle bhutto abbitt
    birtle bhutto abbott
    birtle bhutto abert
    birtle bhutto abott
    birtle bhutto aubert
    birtle bhutto baity
    birtle bhutto barritt
    birtle bhutto batey
    birtle bhutto batie
    birtle bhutto battey
    birtle bhutto baty
    birtle bhutto beatie
    birtle bhutto beattie
    birtle bhutto beatty
    birtle bhutto beaty
    birtle bhutto beiter
    birtle bhutto berta
    birtle bhutto bertie
    birtle bhutto berty
    birtle bhutto bette
    birtle bhutto betti
    birtle bhutto betty
    birtle bhutto bhatti
    birtle bhutto bhutto
    birtle bhutto bitar
    birtle bhutto bootie
    birtle bhutto botha
    birtle bhutto botta
    birtle bhutto boughter
    birtle bhutto boyett
    birtle bhutto boyette
    birtle bhutto boyter
    birtle bhutto bt
    birtle bhutto b

    birtley bootie bettor
    birtley bootie biter
    birtley bootie bitter
    birtley bootie bitty
    birtley bootie booty
    birtley bootie butter
    birtley bootie buyout
    birtley bootie obit
    birtley bootie abbate
    birtley bootie abbett
    birtley bootie abbitt
    birtley bootie abbott
    birtley bootie abert
    birtley bootie abott
    birtley bootie aubert
    birtley bootie baity
    birtley bootie barritt
    birtley bootie batey
    birtley bootie batie
    birtley bootie battey
    birtley bootie baty
    birtley bootie beatie
    birtley bootie beattie
    birtley bootie beatty
    birtley bootie beaty
    birtley bootie beiter
    birtley bootie berta
    birtley bootie bertie
    birtley bootie berty
    birtley bootie bette
    birtley bootie betti
    birtley bootie betty
    birtley bootie bhatti
    birtley bootie bhutto
    birtley bootie bitar
    birtley bootie bootie
    birtley bootie botha
    birtley bootie botta
    birtley bootie boughter
   

    bitler beatty beatie
    bitler beatty beattie
    bitler beatty beatty
    bitler beatty beaty
    bitler beatty beiter
    bitler beatty berta
    bitler beatty bertie
    bitler beatty berty
    bitler beatty bette
    bitler beatty betti
    bitler beatty betty
    bitler beatty bhatti
    bitler beatty bhutto
    bitler beatty bitar
    bitler beatty bootie
    bitler beatty botha
    bitler beatty botta
    bitler beatty boughter
    bitler beatty boyett
    bitler beatty boyette
    bitler beatty boyter
    bitler beatty bt
    bitler beatty buetow
    bitler beatty burritt
    bitler beatty ebbert
    bitler beatty ebert
    bitler beatty obert
    bitler beaty abate
    bitler beaty abbot
    bitler beaty abet
    bitler beaty about
    bitler beaty abut
    bitler beaty barrette
    bitler beaty batter
    bitler beaty batty
    bitler beaty beater
    bitler beaty beta
    bitler beaty betta
    bitler beaty better
    bitler beaty bettor
    bitler beaty biter
    bitle

    bittel betti burritt
    bittel betti ebbert
    bittel betti ebert
    bittel betti obert
    bittel betty abate
    bittel betty abbot
    bittel betty abet
    bittel betty about
    bittel betty abut
    bittel betty barrette
    bittel betty batter
    bittel betty batty
    bittel betty beater
    bittel betty beta
    bittel betty betta
    bittel betty better
    bittel betty bettor
    bittel betty biter
    bittel betty bitter
    bittel betty bitty
    bittel betty booty
    bittel betty butter
    bittel betty buyout
    bittel betty obit
    bittel betty abbate
    bittel betty abbett
    bittel betty abbitt
    bittel betty abbott
    bittel betty abert
    bittel betty abott
    bittel betty aubert
    bittel betty baity
    bittel betty barritt
    bittel betty batey
    bittel betty batie
    bittel betty battey
    bittel betty baty
    bittel betty beatie
    bittel betty beattie
    bittel betty beatty
    bittel betty beaty
    bittel betty beiter
    bittel be

    bittle bitar abate
    bittle bitar abbot
    bittle bitar abet
    bittle bitar about
    bittle bitar abut
    bittle bitar barrette
    bittle bitar batter
    bittle bitar batty
    bittle bitar beater
    bittle bitar beta
    bittle bitar betta
    bittle bitar better
    bittle bitar bettor
    bittle bitar biter
    bittle bitar bitter
    bittle bitar bitty
    bittle bitar booty
    bittle bitar butter
    bittle bitar buyout
    bittle bitar obit
    bittle bitar abbate
    bittle bitar abbett
    bittle bitar abbitt
    bittle bitar abbott
    bittle bitar abert
    bittle bitar abott
    bittle bitar aubert
    bittle bitar baity
    bittle bitar barritt
    bittle bitar batey
    bittle bitar batie
    bittle bitar battey
    bittle bitar baty
    bittle bitar beatie
    bittle bitar beattie
    bittle bitar beatty
    bittle bitar beaty
    bittle bitar beiter
    bittle bitar berta
    bittle bitar bertie
    bittle bitar berty
    bittle bitar bette
    bittle bita

    bottel buetow betti
    bottel buetow betty
    bottel buetow bhatti
    bottel buetow bhutto
    bottel buetow bitar
    bottel buetow bootie
    bottel buetow botha
    bottel buetow botta
    bottel buetow boughter
    bottel buetow boyett
    bottel buetow boyette
    bottel buetow boyter
    bottel buetow bt
    bottel buetow buetow
    bottel buetow burritt
    bottel buetow ebbert
    bottel buetow ebert
    bottel buetow obert
    bottel burritt abate
    bottel burritt abbot
    bottel burritt abet
    bottel burritt about
    bottel burritt abut
    bottel burritt barrette
    bottel burritt batter
    bottel burritt batty
    bottel burritt beater
    bottel burritt beta
    bottel burritt betta
    bottel burritt better
    bottel burritt bettor
    bottel burritt biter
    bottel burritt bitter
    bottel burritt bitty
    bottel burritt booty
    bottel burritt butter
    bottel burritt buyout
    bottel burritt obit
    bottel burritt abbate
    bottel burritt abbett

    boutelle abet abott
    boutelle abet aubert
    boutelle abet baity
    boutelle abet barritt
    boutelle abet batey
    boutelle abet batie
    boutelle abet battey
    boutelle abet baty
    boutelle abet beatie
    boutelle abet beattie
    boutelle abet beatty
    boutelle abet beaty
    boutelle abet beiter
    boutelle abet berta
    boutelle abet bertie
    boutelle abet berty
    boutelle abet bette
    boutelle abet betti
    boutelle abet betty
    boutelle abet bhatti
    boutelle abet bhutto
    boutelle abet bitar
    boutelle abet bootie
    boutelle abet botha
    boutelle abet botta
    boutelle abet boughter
    boutelle abet boyett
    boutelle abet boyette
    boutelle abet boyter
    boutelle abet bt
    boutelle abet buetow
    boutelle abet burritt
    boutelle abet ebbert
    boutelle abet ebert
    boutelle abet obert
    boutelle about abate
    boutelle about abbot
    boutelle about abet
    boutelle about about
    boutelle about abut
    boutelle abou

    buttler beta bhatti
    buttler beta bhutto
    buttler beta bitar
    buttler beta bootie
    buttler beta botha
    buttler beta botta
    buttler beta boughter
    buttler beta boyett
    buttler beta boyette
    buttler beta boyter
    buttler beta bt
    buttler beta buetow
    buttler beta burritt
    buttler beta ebbert
    buttler beta ebert
    buttler beta obert
    buttler betta abate
    buttler betta abbot
    buttler betta abet
    buttler betta about
    buttler betta abut
    buttler betta barrette
    buttler betta batter
    buttler betta batty
    buttler betta beater
    buttler betta beta
    buttler betta betta
    buttler betta better
    buttler betta bettor
    buttler betta biter
    buttler betta bitter
    buttler betta bitty
    buttler betta booty
    buttler betta butter
    buttler betta buyout
    buttler betta obit
    buttler betta abbate
    buttler betta abbett
    buttler betta abbitt
    buttler betta abbott
    buttler betta abert
    buttler

In [13]:
def words_stop_to_start(words, stops, unique_starts, max_stop):

    updated_words = []
    updated_stops = []
    all_lines = []
    run = True
    while(run):

        updated_words, updated_stops = concatenate_words(updated_words, updated_stops, 
                                                         words, stops, unique_starts)

        # Store list of words if reached max_stop
        all_words = flatten_sublists(updated_words)
        all_stops = flatten_list(updated_stops)

        # Stop when number of words and stops don't match
        if len(all_words) != len(all_stops):
            break
        else:
            count += 1

        for istop, stop in enumerate(all_stops):
            if stop == max_stop:
                all_lines.append(' '.join(all_words[istop]))

    candidate_lines = []
    for line in all_lines:
        if line not in candidate_lines:
            candidate_lines.append(line)
                
    return candidate_lines

candidate_lines1 = words_stop_to_start(words_by_start1, stops1, unique_starts1, max_stop1, nphonemes)

if verbose:
    print('Candidate lines from phonemes:', end='\n\n')
    if candidate_lines1 == []:
        print('    None', end='\n')
    else:
        for candidate_line1 in flatten_list(candidate_lines1):
           print('    {0}'.format(candidate_line1), end='\n')
        print('')


[('but', 'luhr'), ('butt', 'luhr'), ('butler', 'bit'), ('butler', 'bitter'), ('butler', 'bui'), ('butler', 'bitar'), ('buttler', 'bit'), ('buttler', 'bitter'), ('buttler', 'bui'), ('buttler', 'bitar')] [4, 4, 7, 8, 6, 8, 7, 8, 6, 8] 10 10
[('but', 'luhr'), ('butt', 'luhr'), ('butler', 'bit'), ('butler', 'bitter'), ('butler', 'bui'), ('butler', 'bitar'), ('buttler', 'bit'), ('buttler', 'bitter'), ('buttler', 'bui'), ('buttler', 'bitar'), ['but', 'luhr', 'bit'], ['but', 'luhr', 'bitter'], ['but', 'luhr', 'bui'], ['but', 'luhr', 'bitar'], ['butt', 'luhr', 'bit'], ['butt', 'luhr', 'bitter'], ['butt', 'luhr', 'bui'], ['butt', 'luhr', 'bitar'], ['butler', 'bit', 'are'], ['butler', 'bit', 'er'], ['butler', 'bit', 'err'], ['butler', 'bit', 'or'], ['butler', 'bit', 'herb'], ['butler', 'bit', 'eure'], ['butler', 'bit', 'ur'], ['butler', 'bit', 'erb'], ['butler', 'bit', 'erbe'], ['butler', 'bitter', 'but'], ['butler', 'bitter', 'butt'], ['butler', 'bitter', 'butter'], ['butler', 'bui', 'ter'], ['