# Example of using ReAline

In [18]:
from clu.phontools.lang.en import EnglishUtils
from clu.phontools.struct import *
from clu.phontools.pronouncing import ConverterUtils
from clu.phontools.alignment.realine import *
from clu.phontools.alignment.lbe import *
import random

# Target and Transcript as strings

In [19]:
target = "cat in the hat"
transcript = "canyon hat"

# 1) Get the ARBABET phrases from cmu

In [20]:
target_phrases = EnglishUtils.all_possible_phrases_for(target.split())
target_phrases


[Phrase(words=(Word(word='cat', phonological_form=PhonologicalWord(phones=('K', 'AE1', 'T'), stress_pattern=[Stress.NON_VOWEL, Stress.PRIMARY, Stress.NON_VOWEL])), Word(word='in', phonological_form=PhonologicalWord(phones=('IH0', 'N'), stress_pattern=[Stress.NO_STRESS, Stress.NON_VOWEL])), Word(word='the', phonological_form=PhonologicalWord(phones=('DH', 'AH0'), stress_pattern=[Stress.NON_VOWEL, Stress.NO_STRESS])), Word(word='hat', phonological_form=PhonologicalWord(phones=('HH', 'AE1', 'T'), stress_pattern=[Stress.NON_VOWEL, Stress.PRIMARY, Stress.NON_VOWEL])))),
 Phrase(words=(Word(word='cat', phonological_form=PhonologicalWord(phones=('K', 'AE1', 'T'), stress_pattern=[Stress.NON_VOWEL, Stress.PRIMARY, Stress.NON_VOWEL])), Word(word='in', phonological_form=PhonologicalWord(phones=('IH0', 'N'), stress_pattern=[Stress.NO_STRESS, Stress.NON_VOWEL])), Word(word='the', phonological_form=PhonologicalWord(phones=('DH', 'AH1'), stress_pattern=[Stress.NON_VOWEL, Stress.PRIMARY])), Word(word=

# 2) get the ARBABET phones

In [21]:
def arpabet_phones(phrase):
    return [list(phrase.phones) for phrase in EnglishUtils.all_possible_phrases_for(phrase)]

target = arpabet_phones(target.split())
transcript = arpabet_phones(transcript.split())

print(target) 
print()
print(transcript)

[['K', 'AE1', 'T', 'IH0', 'N', 'DH', 'AH0', 'HH', 'AE1', 'T'], ['K', 'AE1', 'T', 'IH0', 'N', 'DH', 'AH1', 'HH', 'AE1', 'T'], ['K', 'AE1', 'T', 'IH0', 'N', 'DH', 'IY0', 'HH', 'AE1', 'T'], ['K', 'AE1', 'T', 'IH1', 'N', 'DH', 'AH0', 'HH', 'AE1', 'T'], ['K', 'AE1', 'T', 'IH1', 'N', 'DH', 'AH1', 'HH', 'AE1', 'T'], ['K', 'AE1', 'T', 'IH1', 'N', 'DH', 'IY0', 'HH', 'AE1', 'T']]

[['K', 'AE1', 'N', 'Y', 'AH0', 'N', 'HH', 'AE1', 'T']]


# 3) select a random item

In [22]:
def random_item(phones):
    return random.choice(phones)

target = random_item(target)
transcript = random_item(transcript)

print(target) 
print()
print(transcript)

['K', 'AE1', 'T', 'IH1', 'N', 'DH', 'AH0', 'HH', 'AE1', 'T']

['K', 'AE1', 'N', 'Y', 'AH0', 'N', 'HH', 'AE1', 'T']


# 4) get IPA for both target and transcript

In [25]:
target_phones = [ConverterUtils.arpabet_to_ipa(phone) for phone in target]
transcript_phones = [ConverterUtils.arpabet_to_ipa(phone) for phone in transcript]

print(target_phones)
print()
print(transcript_phones)

['k', 'æ', 't', 'i', 'n', 'ð', 'ʌ', 'h', 'æ', 't']

['k', 'æ', 'n', 'j', 'ʌ', 'n', 'h', 'æ', 't']


# 5) Realine

In [26]:
realine = ReAline()

res = realine.align(target_phones, transcript_phones)
res

[('k', 'k'),
 ('æ', 'æ'),
 ('t', '-'),
 ('i', '-'),
 ('n', 'n'),
 ('ð', '-'),
 ('-', 'j'),
 ('ʌ', 'ʌ'),
 ('-', 'n'),
 ('h', 'h'),
 ('æ', 'æ'),
 ('t', 't')]