The input is the output from MFA, so tiers have specific names.

In [14]:
from praatio import textgrid

def get_phone_sequences(tgfile):
    tg = textgrid.openTextgrid(tgfile, False)

    def get_tier_list(tiername):
        utterances = []
        tier = tg.getTier(tiername)
        for entry in tier.entries:
            text = entry.label.strip()
            if text == "":
                continue
            utterance = {}
            utterance["start"] = entry.start
            utterance["end"] = entry.end
            utterance["text"] = text
            utterances.append(utterance)
        return utterances
    
    utterances = get_tier_list("utterances")
    words = get_tier_list("words")
    phones = get_tier_list("phones")

    i = j = k = 0
    while i < len(utterances):
        utterance = utterances[i]
        utterance["words"] = []

        while j < len(words) and words[j]["end"] <= utterance["end"]:
            word = words[j]
            if word["end"] <= utterance["end"]:
                word["phones"] = []

                while k < len(phones) and phones[k]["end"] <= word["end"]:
                    phone = phones[k]
                    if phone["end"] <= word["end"]:
                        word["phones"].append(phone)
                    k += 1

                utterance["words"].append(word)
            j += 1
        i += 1

    return utterances

In [42]:
ps = get_phone_sequences("/Users/joregan/Desktop/hsi_5_0718_209_001_main.TextGrid")

In [17]:
def get_dictionary(phone_sequence):
    lines = set()
    for utterance in phone_sequence:
        for word in utterance["words"]:
            phone_seq = [p["text"] for p in word["phones"]]
            lines.add(f"{word['text']}\t{' '.join(phone_seq)}")
    return list(lines)

In [20]:
MAPPING = """
AA0 ɑː
AA1 ˈɑː
AA2 ˌɑː
AE0 æ
AE1 ˈæ
AE2 ˌæ
AH0 ə
AH0 ɐ
AH1 ˈʌ
AH2 ˌʌ
AO0 ɔː
AO1 ˈɔː
AO2 ˌɔː
AW0 aʊ
AW1 ˈaʊ
AW2 ˌaʊ
AY0 aɪ
AY1 ˈaɪ
AY2 ˌaɪ
B b
CH tʃ
D d
DH ð
EH0 ɛ
EH1 ˈɛ
EH2 ˌɛ
ER0 ɚ
ER1 ˈɜː
ER2 ˌɜː
EY0 eɪ
EY1 ˈeɪ
EY2 ˌeɪ
F f
G ɡ
HH h
IH0 ɪ
IH1 ˈɪ
IH2 ˌɪ
IY0 i
IY1 ˈiː
IY2 ˌiː
JH dʒ
K k
L l
M m
N n
NG ŋ
OW0 oʊ
OW1 ˈoʊ
OW2 ˌoʊ
OY0 ɔɪ
OY1 ˈɔɪ
OY2 ˌɔɪ
P p
R ɹ
S s
SH ʃ
T t
TH θ
UH0 ʊ
UH1 ˈʊ
UH2 ˌʊ
UW0 uː
UW1 ˈuː
UW2 ˌuː
V v
W w
Y j
Z z
ZH ʒ
"""

In [21]:
cmudict_to_espeak = {}
for line in MAPPING.split("\n"):
    if line == "":
        continue
    line = line.strip()
    parts = line.split(" ")

    if len(parts) != 2:
        print(line)
        continue
    k, v = line.split(" ")
    if not k in cmudict_to_espeak:
        cmudict_to_espeak[k] = v

In [38]:
def espeakify(phlist, sep=""):
    output = []
    if phlist == ["spn"] or phlist == ["sil"]:
        return ""
    for phone in phlist:
        if phone == "":
            continue
        if " " in phone:
            output += [cmudict_to_espeak[x] for x in phone.split(" ")]
        else:
            output.append(cmudict_to_espeak[phone])
    return sep.join(output)

In [40]:
import re

def get_utterances_like_espeak(phone_sequence):
    pairs = {}
    for utterance in phone_sequence:
        words = []
        text = utterance["text"]
        if text.startswith("[") and text.endswith("]"):
            continue
        for word in utterance["words"]:
            phones = [x["text"] for x in word["phones"]]
            phword = espeakify(phones)
            words.append(phword)
        joined = " ".join(words)
        joined = re.sub("  +", " ", joined)
        pairs[text] = joined
    return pairs

In [45]:
utts = get_utterances_like_espeak(ps)

In [46]:
for utt in utts:
    print(utt)
    print(utts[utt])
    print()

Yeah, but we we can't walk around in the entire room.
jˈæ bˈʌt wˈiː wˈiː kˈɑːnt wˈɔːk əɹˈaʊnd ɪn ɪntˈaɪɚ ɹˈuːm

Yeah.
jˈæ

Ah, there you are. And you...
ˈɑː ðˈɛɹ jˈuː ˈɑːɹ ən jˈuː

Yes.
jˈɛs

So it is so nice that you came here and I just want to... I just have to brag of it. Ah, you see this couch and and the...
sˈoʊ ˈɪt ˈɪ sˈoʊ nˈaɪs ðˈæt jˈuː kˈeɪm hˈiːɹ ˈænd ˈaɪ dʒˈʌst wˈɑːn tˈuː ˈaɪ dʒˈʌst hˈæv tə bɹˈæɡ əv ˈɪt ˈæ jˈuː sˈiː ðˈɪs kˈaʊtʃ ˈɑːn ˈɑːnd ə

What do you call this one in Swedish eh or in English, fåtölj?
wˈʌt jˈuː kˈɔːl ðˈɪs wˈʌn ɪn swˈiːdɪʃ ˈɛ ˈɔːɹ ɪn ˈɪŋlɪʃ fˈoʊtˈʌlj

Chair yes this- armchair, yes, this armchair eh is really really amazing because it's a Danish designer who's uh really famous. He's called... his name is eh Jensen.
tʃˈɛɹ jˈɛs ðɪs ˈɑːɹmtʃˌɛɹ jˈɛs ðˈɪs ˈɑːɹmtʃˌɛɹ ˈɛ ˈɪs ɹˈiːli ɹˈiːli əmˈeɪsɪŋ bɪkˈɑːz ɪts ə dˈeɪnɪʃ dɪsˈaɪnɚ hˈuːs ˈɛ ɹˈiːli fˈeɪməs hˈiːs kˈɔːld hɪz nˈeɪm ˈɪz ˈɛ jˈɛnsən

And he's also doing uh like... other stuff, like like the vases there. He d