Skip to content

Commit

Permalink
rebasing master
Browse files Browse the repository at this point in the history
  • Loading branch information
MambetniyazovAmir committed Jun 15, 2020
1 parent 3100802 commit 0c3379e
Show file tree
Hide file tree
Showing 4 changed files with 319 additions and 10 deletions.
8 changes: 5 additions & 3 deletions apertium-uzb-kaa.uzb-kaa.dix
Original file line number Diff line number Diff line change
Expand Up @@ -5455,7 +5455,7 @@
<e> <p><l>kuchli<s n="adj"/></l> <r>kúshli<s n="adj"/></r></p></e>
<e> <p><l>kuchsiz<s n="adj"/></l> <r>kúshsiz<s n="adj"/></r></p></e>
<e> <p><l>kuchsizlik<s n="n"/></l> <r>kúshsizlik<s n="n"/></r></p></e>
<e> <p><l>kuchuk<s n="n"/></l> <r>kushık<s n="n"/></r></p></e>
<e> <p><l>kuchuk<s n="n"/></l> <r>kúshik<s n="n"/></r></p></e>
<e> <p><l>kudurat<s n="n"/></l> <r>qayǵı<s n="n"/></r></p></e>
<e> <p><l>kufron<s n="n"/></l> <r>kufron<s n="n"/></r></p></e>
<e> <p><l>kuf-suf<s n="ideo"/></l> <r>kuf-suf<s n="ideo"/></r></p></e>
Expand Down Expand Up @@ -19492,7 +19492,7 @@
<e> <p><l>tashqari<s n="n"/></l> <r>sırt<s n="n"/></r></p></e>
<e> <p><l>Oygul<s n="np"/><s n="ant"/><s n="f"/></l> <r>Aygúl<s n="np"/><s n="ant"/><s n="f"/></r></p></e>
<e> <p><l>Baxtiyor<s n="np"/><s n="ant"/><s n="m"/></l> <r>Baxtıyar<s n="np"/><s n="ant"/><s n="f"/></r></p></e>
<e> <p><l>ular<s n="p3"/><s n="pl"/></l> <r>olar<s n="p3"/><s n="pl"/></r></p></e>
<e> <p><l>ular<s n="prn"/><s n="p3"/><s n="pl"/></l> <r>olar<s n="p3"/><s n="pl"/></r></p></e>
<e> <p><l>ulkan<s n="adj"/></l> <r>úlken<s n="adj"/></r></p></e>
<e> <p><l>qachon<s n="adv"/><s n="itg"/></l> <r>qashan<s n="adv"/><s n="itg"/></r></p></e>
<e> <p><l>xursand<s n="adj"/></l> <r>quwanıshli<s n="adj"/></r></p></e>
Expand All @@ -19502,7 +19502,7 @@
<e> <p><l>ber<s n="v"/><s n="tv"/></l> <r>ber<s n="v"/><s n="tv"/></r></p></e>
<e> <p><l>yoki<s n="cnjcoo"/></l> <r>yamasa<s n="cnjcoo"/></r></p></e>
<e> <p><l>yosh<s n="n"/></l> <r>jas<s n="n"/></r></p></e>
<e> <p><l>sen<s n="p2"/><s n="sg"/></l> <r>sen<s n="p2"/><s n="sg"/></r></p></e>
<e> <p><l>sen<s n="prn"/><s n="p2"/><s n="sg"/></l> <r>sen<s n="p2"/><s n="sg"/></r></p></e>
<e> <p><l>yer<s n="n"/></l> <r>jer<s n="n"/></r></p></e>
<e> <p><l>kir<s n="v"/><s n="iv"/></l> <r>kir<s n="v"/><s n="iv"/></r></p></e>
<e> <p><l>bolakay<s n="n"/></l> <r>bala<s n="n"/></r></p></e>
Expand All @@ -19517,6 +19517,8 @@
<e> <p><l>yeyish<s n="v"/><s n="iv"/><s n="coop"/></l><r>jew<s n="v"/><s n="iv"/><s n="coop"/></r></p></e>
<e> <p><l>bekinish<s n="v"/><s n="iv"/><s n="coop"/></l><r>jasırınıw<s n="v"/><s n="iv"/><s n="coop"/></r></p></e>
<e> <p><l>bekin<s n="v"/><s n="iv"/></l><r>jasırın<s n="v"/><s n="iv"/></r></p></e>
<e> <p><l>koʼr<s n="v"/><s n="tv"/><s n="coop"/></l><r>kór<s n="v"/><s n="tv"/><s n="coop"/></r></p></e>
<e> <p><l>narsa<s n="n"/></l> <r>nárse<s n="n"/></r></p></e>

</section>
</dictionary>
12 changes: 6 additions & 6 deletions tests/wer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
# apertium-quality


cat ../texts/story.uzb.txt | apertium -d . uzb-kaa > ../texts/story.uzb-kaa.txt
cat texts/story.uzb.txt | apertium -d . uzb-kaa > texts/story.uzb-kaa.txt

cat ../texts/story.kaa.txt | apertium -d . kaa-uzb > ../texts/story.kaa-uzb.txt
cat texts/story.kaa.txt | apertium -d . kaa-uzb > texts/story.kaa-uzb.txt

echo 'WER uzb-kaa:'
perl ../apertium-eval-translator/apertium-eval-translator-line.pl -test ../texts/story.uzb-kaa.txt -ref ../texts/story.kaa.txt > ../texts/uzb-kaa-wer.txt
grep '(WER)' ../texts/uzb-kaa-wer.txt
perl ../../apertium-eval-translator/apertium-eval-translator-line.pl -test texts/story.uzb-kaa.txt -ref texts/story.kaa.txt > texts/uzb-kaa-wer.txt
grep '(WER)' texts/uzb-kaa-wer.txt

echo 'WER kaa-uzb:'
perl ../apertium-eval-translator/apertium-eval-translator-line.pl -test ../texts/story.kaa-uzb.txt -ref ../texts/story.uzb.txt > ../texts/kaa-uzb-wer.txt
grep '(WER)' ../texts/kaa-uzb-wer.txt
perl ../../apertium-eval-translator/apertium-eval-translator-line.pl -test texts/story.kaa-uzb.txt -ref texts/story.uzb.txt > texts/kaa-uzb-wer.txt
grep '(WER)' texts/kaa-uzb-wer.txt

2 changes: 1 addition & 1 deletion texts/story.kaa.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
1 Baxtıyar QAY JERDE?
2 Baxtıyar benen Aygúl baǵda. Búgin hawa-rayı jaqsı, júdá jıllı. Biraq keshe júdá suwıq boldı. Olar dalada oynay almadı. Baxtıyar benen Aygúl oynaǵandı jaqsı kóredi, olar barqulla úlken úydiń aldındaǵı baǵda birge oynaydı.
3 Baxtıyar altı jasar kishkene bala. Qız onıń qarındası, ol bes jasta. Baxtıyartıń kishkene bir kúshigi bar, házir ol da baǵda. Kúshik balalar menen oynaǵandı jaqsı kóredi. Kúshigi házir júdá quwanıshlı.
3 Baxtıyar altı jasar kishkene bala. Qız onıń qarındası, ol bes jasta. Baxtıyardıń kishkene bir kúshigi bar, házir ol da baǵda. Kúshik balalar menen oynaǵandı jaqsı kóredi. Kúshigi házir júdá quwanıshlı.
4 Al Aygúldiń kúshigi barma? Yaq, Aygúldiń kúshigi joq, onıń pıshıǵı bar. Biraq pıshıǵı úyde, uyıqlap atır.
5 Olardıń anası pıshıǵı menen birge úyde, ol aynadan Baxtıyar benen Aygúldiń oynaǵanına qarap tur. Baxtıyar eski úlken bir terekke qaray qattı júgirip baratır, ol Aygúlden jasırınıp atır. Ne ushın ekenligin bilesizbe? Aygúl qolı menen kózlerin jawıp otır. Ol hesh nárseni kórip turǵanı joq, ol sanap atır. Aygúl ne ushın bunday qılıp atır? Hám Baxtıyar terektiń qasında ne qılıp atır?
6 Bul oyın. Aygúl sanap bolǵannan keyin átirapına qaradı. Ol "Baxtıyar qay jerge ketti? Onı kórdińizbe?" - dep izlep atır.
Expand Down
307 changes: 307 additions & 0 deletions uzb-kaa.rlx
Original file line number Diff line number Diff line change
@@ -0,0 +1,307 @@
DELIMITERS = "<.>" "<!>" "<?>" ;
SOFT-DELIMITERS = "<,>" ;

LIST BOS = (>>>) sent ; # Beginning of sentence
LIST EOS = (<<<) sent ; # End of sentence
LIST Lpar = lpar;
LIST Rpar = rpar ;

LIST N = n ;
LIST V = v ;
LIST Prop = np ;
LIST Pron = prn ;
LIST Num = num ;
LIST A = adj ;
LIST Det = det ;
LIST Adv = adv ;
LIST CC = cnjcoo ;
LIST CS = cnjsub ;
LIST Interj = ij ;
LIST Post = post ;
LIST Cop = cop ;
LIST IV = iv ;
LIST TV = tv ;
LIST Poss = px1sg px2sg px3sg px1pl px2pl px3pl ;
LIST Poss3 = px3sg px3sp px3pl ;
LIST Poss2 = px2sg ;
LIST Past = past ;
LIST Px3Sp = px3sp ;
LIST Px2Sg = px2sg ;

LIST 1PS = p1 sg ;
LIST 2PS = p2 sg ;
LIST 3PS = p3 sg ;
LIST 1Pl = p1 pl ;
LIST 2Pl = p2 pl ;
LIST 3Pl = p3 pl ;

LIST Person = p1 p2 p3 ;

LIST Nom = nom ;
LIST Gen = gen ;
LIST Abe = abe ;
LIST Acc = acc ;
LIST Dat = dat ;
LIST Loc = loc ;

LIST Subst = subst ;
LIST Attr = attr ;
LIST Advl = advl ;

LIST Ant = (np ant);
LIST Cog = (np cog);

LIST Recip = rec ;
LIST Caus = caus ;

LIST FiniteVerb = pres aor past ifi ifi_evid fut fut_plan imp opt pih ;

LIST Ger = ger_past ger_abst ger_inf ger4 ger5 gna2 gna3 gna4 gpr_rsub; #these numbers are to be replaced with actual ones, right?

LIST Vaux = vaux ;

LIST rsub = gpr_rsub ;

LIST Gerinf = ger_inf ;

LIST Imper = imp ;

LIST Mistake = mistake ;

LIST Colon = ":" ;

SET FINITE = V - Ger ;

SET PRE-N = Det | Num | Attr | A | Gen | ("-") ; # CC

SET NOMINAL-HEAD = N | Ger | Subst ;

SET WORD = N | V | A | Post | Pron | Det | Adv | CC | CS | Interj | Num | ("\?") ;
SET MARK = (",") | ("\\") | ("\;") | ("–"); #"
SET WORDMARK = WORD | MARK ;
SET PHRASEMARK = ("\\") | ("\;") ; #"



REMOVE Mistake ;
#why is there a tag like this anyway?


#To be fixed, ugly

REMOVE Imper IF (NOT 0C Imper) ;

#There can be no gerund at the end of a sentence

REMOVE Ger IF (1 EOS OR Lpar);


# N+attr selections
REMOVE Attr IF (0 A);
#select adjectives over n.attr

REMOVE Attr IF (NOT 1 PRE-N) (NOT 1 NOMINAL-HEAD) (NOT 1/1 NOMINAL-HEAD) ;

REMOVE N + Nom IF (0 Attr OR Nom) (1C Nom) ;

SELECT Attr (0 Nom) (1C Px3Sp + Nom) ;

#


SELECT Pron IF (0 N) ;

#REMOVE Cop IF (NOT 1C EOS);

SELECT SUB:1 Cop IF (1 EOS) ;

REMOVE SUB:1 Cop IF
(NOT 1 EOS OR MARK OR ("da"))
;
REMOVE SUB:1 Cop IF
(-1 BOS OR MARK) ## Headings or enumerations
(NOT 1 EOS)
;

SELECT SUB:1 Cop IF
(1 (lpar))
(2* (rpar) BARRIER EOS)
(NOT -1 Colon)
;



#
SELECT SUB:1 Cop IF
(1 MARK)
(2*/1 Cop BARRIER EOS)
(NOT 0 Interj) ## Дұрыс, оның мысығы бар.
(NOT 0 FiniteVerb) ## 74 ... барлығы 53 ел [0]қатысты.
(NOT 2 N)
;
## Жоқ, Айгүлдің күшігі [0]жоқ, оның мысығы [0]бар.

SELECT SUB:1 Cop IF
(1 EOS)
(NOT 0 V OR Vaux)
;




#SInce the 3 singular can be mute in some cases, better to remove it if it is not the end of the sentence!

REMOVE 3PS IF (NOT 1 EOS) ;

# REMOVE the Intransitive if the previous item is in accusative form

REMOVE IV IF (-1C Acc) ;

SELECT TV IF (-1C Acc) ;

#If following item is an ADV, then select Pron reading

SELECT Pron IF (0C Det OR Pron) (1 Adv) ;

# Select Proper noun if it starts with a capital letter while not being after a full stop

SELECT Prop IF (0 N)(0 Prop) (0 ("[:upper:]+[:lower:]*"r))(NOT -1 BOS) ;

# IF there is a Noun which is both np and n, and the following name is a cog, then the first one may be as well a proper noun

SELECT Prop IF (0 N) (0 Ant)(1 Cog) (-1 BOS);

#If we have a form which is both present as N1 or derivative gerund, select N1

SELECT N IF (0 N) (0 Ger) ;



#Construction gen + poss (ataturk'un cumhuriyeti)

SELECT Poss3 IF (-1 Gen) ;

SELECT Gen IF (1C Poss3) ;


#### POSTPOSITIONS ######

"<bilan>" SELECT Post IF (-1 Ger + Poss) ;

"<sari>" SELECT Post IF (-1 Ger) ;

"<qadar>" SELECT Post IF (-1 Dat) ;

"<haqida>" SELECT Post IF (-1 Nom) ;

"<oldin>" SELECT Post IF (-1 Abe) ;

"<tomon>" SELECT Post IF (-1 Dat) ;

"<ko'ra>" SELECT Post IF (-1 Dat) ;

"<qarshi>" SELECT Post IF (-1 Dat) ;

"<qaramay>" SELECT Post IF (-1 Dat) ;

"<oid>" SELECT Post IF (-1 Dat) ;

"<dovur>" SELECT Post IF (-1 Dat) ;

"<zid>" SELECT Post IF (-1 Dat) ;

"<qarab>" SELECT Post IF (-1 Dat) ;

"<mansub>" SELECT Post IF (-1 Dat) ;

"<boshqa>" SELECT Post IF (-1 Abe) ;

"<beri>" SELECT Post IF (-1 Abe) ;

"<buyon>" SELECT Post IF (-1 Abe) ;

"<sababli>" SELECT Post IF (-1 Abe) ;

"<etibaran>" SELECT Post IF (-1 Abe) ;

"<avval>" SELECT Post IF (-1 Abe) ;

"<keyin>" SELECT Post IF (-1 Abe) ;

"<oldin>" SELECT Post IF (-1 Abe) ;

"<orqasindan>" SELECT Post IF (-1 Gen) ;

"<mayda>" SELECT N IF (-1 Num) ;

"<qaraganda>" SELECT Post IF (-1 Dat) ;

"<deb>" SELECT CS IF (-1 MARK) (1 V) ;
#“Qadimgilarga: “Zino qilma”, – deb aytilganini eshitgansizlar.

# ATTRIBUTIVE ADJ

#If an adjective is right before a finite verb, select its adverbial reading

SELECT Advl IF (1C FINITE) ;

#If an adjective is right before the end of a sentence, we can safely expect it to be an adjective.

SELECT N IF (0 Ger);
#kurash

REMOVE Advl IF (0 A)(1 EOS) ;

#If the following item does not include a copula in its reading, discard subst from the adjective.

REMOVE Subst IF (0 A) (1 EOS) (NOT 1 Cop) ;

REMOVE A IF (0 A) (NOT 1 Subst) ;

#If an adjective is right before a numeral + noun it is an adjective for sure

SELECT A IF (1C Num) (2C N) ;

## select A if inbetween nouns
SELECT A IF (-1 N) (1 N) ;

## select A if before copula (idi, iken)
SELECT A IF (1 Cop) ;

# 2nd Singular Possessive

REMOVE Poss2 IF (NOT 0 Gen)(1 Poss3) ;

REMOVE Gerinf IF (0 Loc) ;
#ketmoqda

#VERBS

#Select FINITE FORM (in this case past) if it is the last word of the sentence

SELECT Past IF (1 EOS) ;

#Remove V + V reading

REMOVE V IF (1 FINITE) (2 EOS) ;

SELECT A IF (-1 N) (0 Nom) (1 N) ;
# Aholining koʻpchilik qismi

REMOVE Attr IF (1C Cop);
## Shu bilan birga kamolchilik inqilobi kator salbiy xususiyatlarga xam ega edi.

REMOVE Interj IF (NOT -1 BOS) (NOT 1 EOS) ;
#yoq, bar

SELECT rsub IF (-1 N) (1 N) ;
#yor olgan tasvir

SELECT Ger IF (0 A) ;
#o'tgan

SELECT Sub:1 FINITE IF
#(0/1 Ger) # FIXME: why does this only work when commented
(1 EOS OR PHRASEMARK) ;

SELECT Prop IF (0 N) (-1 Post) ; # e.g. asal/Asal

0 comments on commit 0c3379e

Please sign in to comment.