Skip to content

Commit

Permalink
first commit (Ligurian Support)
Browse files Browse the repository at this point in the history
  • Loading branch information
fleanend committed Jan 10, 2023
1 parent de8182a commit 4891696
Show file tree
Hide file tree
Showing 4 changed files with 225 additions and 0 deletions.
58 changes: 58 additions & 0 deletions epitran/data/map/lij-Latn.csv
@@ -0,0 +1,58 @@
Orth,Phon
a,a
á,a
à,a
â,aː
ä,aː
æ,ɛː
æn,ɛn
æñ,ɛŋ
b,b
c,k
ç,s
d,d
e,e
é,e
ê,eː
ë,eː
è,ɛ
éu,ew
eu,ø
f,f
g,ɡ
gl,d͡ʒ
gn,ɲ
h,
i,i
í,i
ì,i
î,iː
ï,iː
j,j
l,l
m,m
n,n
ñ,ŋ
ò,ɔ
ö,ɔː
o,u
ó,u
ô,uː
ou,ɔw
p,p
q,k
qu,kw
r,r
s,s
sch,sk
t,t
u,y
ú,y
ù,y
û,yː
ü,yː
v,v
z,z
<dZ>,d͡ʒ
<StS>,ʃt͡ʃ
<tS>,t͡ʃ
54 changes: 54 additions & 0 deletions epitran/data/post/lij-Latn.txt
@@ -0,0 +1,54 @@
::voicedcons:: = b|d|d͡ʒ|ɡ|l|m|n|ŋ|r|v
::vowels:: = a|ɛ|e|i|ɔ|ø|u|y
::consonant:: = b|ʃ|d|d͡ʒ|f|ɡ|j|k|l|m|n|ŋ|ɲ|p|r|s|t|t͡ʃ|v|w|z|ʒ

% Treatment of <ao> endings
au -> ɔw / _ #

% Treatment of <sc>
st͡ʃ -> ʃ / _

% Treatment of <n>
n -> ŋ / _ #
n -> ŋ / _ (::consonant::)
[nm] -> ŋ / _ [bp]

% Treatment of <s>
s -> z / _ (::voicedcons::)
s -> z / (a|ɛː?|eː?|i|ɔ|øː?|uː?|yː?) _ (::vowels::)
s -> z / [ai]ː _ (?!e#)

% Treatment of <gu>
ɡy -> ɡw / (?<!fi) _ (::vowels::)
seɡw -> seɡy / _ (::vowels::)
yŋna -> ina / _ #

% Plural endings
z?iuŋ -> sjuŋ / _
zi?uiŋ -> swiŋ / _
i?uiŋ -> wiŋ / _

% Treatment of double consonants
b+ -> b / _
ʃ+ -> ʃ / _
d+ -> d / _
ɡ*d͡ʒ+ -> d͡ʒ / _
f+ -> f / _
ɡ+ -> ɡ / _
j+ -> j / _
k+ -> k / _
l+ -> l / _
m+ -> m / _
n+ -> n / _
ŋ*n+ -> n / _
n*ŋ+ -> ŋ / _
ɲ+ -> ɲ / _
p+ -> p / _
r+ -> r / _
s+ -> s / _
t+ -> t / _
k*t͡ʃ+ -> t͡ʃ / _
v+ -> v / _
w+ -> w / _
z+ -> z / _
ʒ+ -> ʒ / _
11 changes: 11 additions & 0 deletions epitran/data/pre/lij-Latn.txt
@@ -0,0 +1,11 @@
scc -> <StS> / _
cci -> <tS> / [^s] _ [aeou]
cc -> <tS> / [^s] _ [ie]
ci -> <tS> / _ [aeou]
c -> <tS> / _ [ie]
ggi -> <dZ> / _ [aeou]
gg -> <dZ> / _ [ie]
gi -> <dZ> / _ [aeou]
gli -> <dZ> / _ [aeou]
gl -> <dZ> / _ [ie]
g -> <dZ> / _ [ie]
102 changes: 102 additions & 0 deletions tests.ipynb
@@ -0,0 +1,102 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'faːse zeneize fiɡyraːse fiɡya ɛːɡwa leŋɡwa ɡwɛːra iŋfiltraziuŋ iŋfiltraziuiŋ paʃuŋ paʃuiŋ ʃt͡ʃetu ʃt͡ʃøpu kaŋ kaŋpu teŋpu sykɔw datɔw datai'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import epitran\n",
"epi = epitran.Epitran('lij-Latn')\n",
"\" \".join(epi.transliterate(w) for w in \"fâse zeneise figurâse figua ægua lengua guæra infiltraçion infiltraçioin pascion pascioin sccetto scceuppo can campo tempo succao dattao dattai\".split())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'yŋna paŋna'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sentence = \"pròpio unna panna\"\n",
"\" \".join(epi.transliterate(w) for w in sentence.split())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'meʃt͡ʃiaː ʃabeku zɡød͡ʒa t͡ʃiɛːu aŋɡɛːzu'"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sentence = \"mescciâ sciabecco sgheuggia ciæo angæzo\"\n",
"\" \".join(epi.transliterate(w) for w in sentence.split())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.7.1 ('base')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "8717662bf88969997ef180c9a6313e076396e402a442e83df9655bbeeedc9756"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 4891696

Please sign in to comment.