In [2]:
import icu

In [3]:
def transliterator_from_rules(name, rules):
    fromrules = icu.Transliterator.createFromRules(name, rules)
    icu.Transliterator.registerInstance(fromrules)
    return icu.Transliterator.createInstance(name)

In [13]:
irishlc_rules = """
:: NFD;
$uvowel=[AEIOU];
$wb=[^[:L:][:M:]];
$wb { ([nt]) } $uvowel → $1 '-';
:: lower;
:: NFC;
"""

In [14]:
irishlc = transliterator_from_rules('irishlc', irishlc_rules)

In [8]:
ulster_stress_rules = """
$wb=[^[:L:][:M:]];
$cons = [bcdfghjklmnpqrstvwxyz];
$vowel = [aeiouáéíóú];
$bvowel = [aouáóú{ae}];
$svowel = [eiéí];
:: irishlc;
$wb { anois } $wb → an'ˈ'ois;
$wb { arís } $wb → air'ˈ'ís;
$wb { isteach } $wb → ist'ˈ'each;
$wb { amach } $wb → am'ˈ'ach;
$wb $cons* { ($vowel) → 'ˈ'$1;
"""

In [9]:
stress = transliterator_from_rules('ulster_stress', ulster_stress_rules)

In [12]:
stress.transliterate("amach")

'amˈach'

In [134]:
ulster_g2p_rules = """
$wb=[^[:L:][:M:]];
$cons = [bcdfghjklmnpqrstvwxyz];
# when we transliterate past the first consonant,
# we need to use the transliteration as context
$scons = [{bʲ}{dʲ}{fʲ}{mʲ}ʃ{tʲ}{vʲ}];
$bcons = [{bˠ}{dˠ}{fˠ}{mˠ}{sˠ}{tˠ}w];
$vowel = [aeiouáéíóú];
$bvowel = [aouáóú{ae}];
$svowel = [eiéí];
$ps = 'ˈ';
:: irishlc;
:: ulster_stress;



$wb { mb } $cons* ˈ? $bvowel → mˠ;
$wb { mb } $cons* ˈ? $svowel → mʲ;
$bvowel $bcons* { bh → w;
bh } $cons* ˈ? $bvowel → w;
$svowel $scons* { bh → vʲ;
bh } $cons* ˈ? $svowel → vʲ;
$bvowel $bcons* { b → bˠ;
b } $cons* ˈ? $bvowel → bˠ;
$svowel $scons* { b → bʲ;
b } $cons* ˈ? $svowel → bʲ;

$wb { gc } $cons* ˈ? $bvowel → gˠ;
$wb { gc } $cons* ˈ? $svowel → ɟ;
$bvowel $bcons* { ch → x;
ch } $cons* ˈ? $bvowel → x;
$svowel $scons* { ch → ç;
ch } $cons* ˈ? $svowel → ç;
$bvowel $bcons* { c → kˠ;
c } $cons* ˈ? $bvowel → kˠ;
$svowel $scons* { c → c;
c } $cons* ˈ? $svowel → c;

$wb { nd } $cons* ˈ? $bvowel → ɴˠ;
$wb { nd } $cons* ˈ? $svowel → ɴʲ;
$bvowel $bcons* { dh → ɣ;
dh } $cons* ˈ? $bvowel → ɣ;
$svowel $scons* { dh → j;
dh } $cons* ˈ? $svowel → j;
$bvowel $bcons* { d → dˠ;
d } $cons* ˈ? $bvowel → dˠ;
$svowel $scons* { d → dʲ;
d } $cons* ˈ? $svowel → dʲ;

$wb { bhf } $cons* ˈ? $bvowel → w;
$wb { bhf } $cons* ˈ? $svowel → vʲ;
fh → ;
$bvowel $bcons* { f → fˠ;
f } $cons* ˈ? $bvowel → fˠ;
$svowel $scons* { f → fʲ;
f } $cons* ˈ? $svowel → fʲ;

$bvowel $bcons* { mh → w;
mh } $cons* ˈ? $bvowel → w;
$svowel $scons* { mh → vʲ;
mh } $cons* ˈ? $svowel → vʲ;
$bvowel $bcons* { m → mˠ;
m } $cons* ˈ? $bvowel → mˠ;
$svowel $scons* { m → mʲ;
m } $cons* ˈ? $svowel → mʲ;

$wb { l } ˈ? $bvowel → ʟˠ;
$wb { l } ˈ? $svowel → ʟʲ;
$wb { r → ɾˠ;
$bvowel $cons* { s → sˠ;
s } $cons* ˈ? $bvowel → sˠ;
$svowel $cons* { s → ʃ;
s } $cons* ˈ? $svowel → ʃ;
$bvowel $bcons* { t → tˠ;
t } $cons* ˈ? $bvowel → tˠ;
$svowel $scons* { t → tʲ;
t } $cons* ˈ? $svowel → tʲ;

# 'oi' can represent either:
# * 'o' before a slender consonant
# * 'i' after a broad consonant
$wb g { $ps oi } tse $wb → $ps i ;
$wb an { $ps oi } s $wb → $ps i ;

:: null;

$ps eái → $ps aː;
$ps eá → $ps aː;
$ps ái → $ps aː;
$ps á → $ps aː;
$ps aei → $ps eː;
$ps ae → $ps eː;
$ps éi → $ps eː;
$ps é → $ps eː;
eái → a;
eá → a;
ái → a;
á → a;
aei → e;
ae → e;
éi → e;
é → e;

$ps eai → $ps a ;
$ps ea → $ps a ;
$ps ai → $ps a ;
$ps a → $ps a ;
$ps ei → $ps e ;
$ps ue → $ps e ;
$ps e → $ps e ;
eai → ə ;
ea → ə ;
ai → ə ;
a → ə ;
ei → ə ;
ue → ə ;
e → ə ;

$ps oi → $ps o ;
$ps ui → $ps i ;
$ps iu → $ps u ;
$ps u → $ps u ;
$ps io → $ps i ;
$ps i → $ps i ;
oi → ə ;
ui → ə ;
iu → ə ;
io → ə ;
u → ə ;
i → ə ;

$ps → $ps;
"""

In [135]:
ulster_g2p = transliterator_from_rules('ulster_g2p', ulster_g2p_rules)

In [138]:
ulster_g2p.transliterate("chat")

'xˈatˠ'