Permalink
Fetching contributors…
Cannot retrieve contributors at this time
857 lines (750 sloc) 32.6 KB
! morph-phon.xfst
!
! Released under the terms of the MIT License
! Copyright (c) 2011-2015 Çağrı Çöltekin <cagri@coltekin.net>
!
! Permission is hereby granted, free of charge, to any person obtaining a
! copy of this software and associated documentation files (the "Software"),
! to deal in the Software without restriction, including without limitation
! the rights to use, copy, modify, merge, publish, distribute, sublicense,
! and/or sell copies of the Software, and to permit persons to whom the
! Software is furnished to do so, subject to the following conditions:
!
! The above copyright notice and this permission notice shall be included in
! all copies or substantial portions of the Software.
!
! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
! IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
! FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
! AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
! LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
! FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
! DEALINGS IN THE SOFTWARE.
!
#include "options.h"
!
! This file only contains a set of useful definitions. This is sourced
! from the other scripts that generate useful automata.
!
!
! Define some useful character classes
!
! Vowels
define Vf e | i | ö | ü | E | İ | Ö | Ü | î | Î | %^sFRV | %^sFUV;
define Vb a | ı | o | u | A | I | O | U | â | Â | û | Û | %^sBRV | %^sBUV;
define Vh i | ü | ı | u | İ | Ü | I | U | î | Î | û | Û | %^si | %^sü | %^sı | %^su;
define Vl e | ö | a | o | E | Ö | A | O | â | Â | %^se | %^sö | %^sa | %^so;
define Vfr ö | ü | Ö | Ü | %^sFRV;
define Vfu e | i | E | İ | î | Î | %^sFUV;
define Vbr o | u | O | U | û | Û | %^sBRV;
define Vbu a | ı | A | I | â | Â | %^sBUV;
define Vowel Vf | Vb;
define Vbuf %^%(I%) | %^%(A%);
define Vpal %^pA | %^pI | %^pO | %^pU | %^p | %^pÎ | %^pÔ |%^pÛ;
define Vxx %^A | %^I ;
define Vsilent %^sFRV | %^sFUV | %^sBRV | %^sBUV ;
define Vall Vowel | Vxx | Vpal | Vsilent; ! except buffer vowels
define Vsurface e | i | ö | ü | E | İ | Ö | Ü | î | Î |
a | ı | o | u | A | I | O | U | â | Â | û | Û ;
! Consonants
define ConsV b | d | c | g | v | z | j | f | ğ | l | m | n | r | w | y |
B | D | C | G | V | Z | J | F | Ğ | L | M | N | R | W | Y | %^sVC;
define ConsUV p | t | ç | k | f | s | ş | h |
P | T | Ç | K | F | S | Ş | H | %^sUC;
define Cons ConsV | ConsUV;
define ConsBuf %^%(y%) | %^%(s%) | %^%(ş%) | %^%(n%);
define ConsXX %^c | %^p | %^t | %^k | %^g | %^K | %^D | %^C | %@LN ;
define ConsSilent %^sVC | %^sUC;
define ConsAll Cons | ConsXX |ConsSilent; ! except buffers
define ConsSurface b | d | c | g | v | z | j | f | ğ | l | m | n | r | w | y |
B | D | C | G | V | Z | J | F | Ğ | L | M | N | R | W | Y |
p | t | ç | k | f | s | ş | h |
P | T | Ç | K | F | S | Ş | H ;
define SilentLetter Vsilent | ConsSilent;
define SurfaceLetter ConsSurface | Vsurface;
! Intermediate symbols with no surface forms
define Boundary %@MB | %@RB | %@dB ;
define IntSym Boundary | %@M | %@MREDUP | %@NCOMP | %@opt3 | %@APOS | %^NS | %^NNS;
define Digit %0|1|2|3|4|5|6|7|8|9;
!
! Delete the stress-related intermediate symbols
!
define delStressMarks ["^NS"|"^NNS"|ˈ] -> 0;
!
! Abbreviation handling
!
! The first rule adds the default silent front/unronded vowel
! to abbreviation which does not have any vowel, or any silent
! letters added in the lexicon.
!
! The second rule inserts an optional dot `.' if there isn't one
! already.
!
define Abbr [..] -> %^sFUV || .#. ConsSurface+ _ %@ABBR
.o. [..] (->) %. || \%. _ %@ABBR
.o. %@ABBR -> 0;
!
! Delete apostrophe if it is followed by null morphemes
! Replace it with acceptable symbols
!
! NOTE: for generation it is better to limit these to only one.
!
define AposSym APOSTROPHE_SYMS ;
define replaceApos %@APOS -> 0 || _ Boundary+ .#.
.o. %@APOS -> AposSym;
!
! The same as above, but also allow forms with no apostrophe
! This is currently not used, in favor of options in 'options.h'.
!
define replaceAposOpt %@APOS -> 0 || _ Boundary+ .#.
.o. %@APOS (->) 0
.o. %@APOS -> AposSym;
!
! Replace @dB `digit boundary' with space or nothing
!
! we allow insertion of space anywhere, or lack of it between the
! `digits'.
!
define NumberSpace %@dB -> 0 || _ %@RB
.o. %@dB (->) " "
.o. %@dB -> 0;
!
! Numeric (silent) phonemes
!
!
! We add `silent' phonemes to the end of the numeric forms so that
! they inflect correctly with regard to vowel harmony, buffer deletion
! and voicing assimilation.
!
define NumericInsSilentP [..] -> %^sBRV %^sVC || %0^{12,14} ./. %@tSep _ %@RB
.o. [..] -> %^sBUV %^sVC || %0^{9,11} ./. %@tSep _ %@RB
.o. [..] -> %^sBRV %^sVC || %0^{6,8} ./. %@tSep _ %@RB
.o. [..] -> %^sFUV %^sVC || %0^{3,5} ./. %@tSep _ %@RB
.o. [..] -> %^sFRV %^sVC || Digit %0^2 _ %@RB
.o. [..] -> %^sBUV %^sVC || \Digit %0^2 _ %@RB
.o. [..] -> %^sBRV %^sVC || [1|3] %0 _ %@RB
.o. [..] -> %^sFUV || 2 %0 _ %@RB
.o. [..] -> %^sBUV %^sUC || 4 %0 _ %@RB
.o. [..] -> %^sFUV || 5 %0 _ %@RB
.o. [..] -> %^sBUV %^sUC || 6 %0 _ %@RB
.o. [..] -> %^sFUV %^sUC || 7 %0 _ %@RB
.o. [..] -> %^sFUV %^sVC || 8 %0 _ %@RB
.o. [..] -> %^sBUV %^sVC || 9 %0 _ %@RB
.o. [..] -> %^sBUV %^sVC || %0 _ %@RB
.o. [..] -> %^sFUV %^sVC || 1 _ %@RB
.o. [..] -> %^sFUV || [2|½] _ %@RB
.o. [..] -> %^sFRV %^sUC || [3|4|¼|¾] _ %@RB
.o. [..] -> %^sFUV %^sUC || 5 _ %@RB
.o. [..] -> %^sBUV || 6 _ %@RB
.o. [..] -> %^sFUV || 7 _ %@RB
.o. [..] -> %^sFUV %^sVC || 8 _ %@RB
.o. [..] -> %^sBRV %^sVC || 9 _ %@RB
.o. [..] -> %^sFUV %^sVC || [Ⅰ|I|ⅰ|i][v|ⅴ|V|Ⅴ] _ %@RomNum ! 4
.o. [..] -> %^sFUV %^sUC || [v|ⅴ|V|Ⅴ] _ %@RomNum ! 5
.o. [..] -> %^sBUV || [v|ⅴ|V|Ⅴ][Ⅰ|I|ⅰ|i] _ %@RomNum ! 6
.o. [..] -> %^sFUV || [v|ⅴ|V|Ⅴ][Ⅰ|I|ⅰ|i]^{2} _ %@RomNum ! 7
.o. [..] -> %^sFUV %^sVC || [v|ⅴ|V|Ⅴ][Ⅰ|I|ⅰ|i]^{3} _ %@RomNum ! 8
.o. [..] -> %^sFRV %^sUC || [Ⅰ|I|ⅰ|i]^{3,4} _ %@RomNum ! 3,4
.o. [..] -> %^sFUV || [Ⅰ|I|ⅰ|i]^{2} _ %@RomNum ! 2
.o. [..] -> %^sFUV %^sVC || [Ⅰ|I|ⅰ|i] _ %@RomNum ! 1
.o. [..] -> %^sBRV %^sVC || [Ⅰ|I|ⅰ|i][X|Ⅹ|x|ⅹ] _ %@RomNum ! 9
.o. [..] -> %^sBUV %^sUC || [X|Ⅹ|x|ⅹ][L|Ⅼ|l|ⅼ] _ %@RomNum ! 40
.o. [..] -> %^sFUV || [L|Ⅼ|l|ⅼ] _ %@RomNum ! 50
.o. [..] -> %^sBUV %^sUC || [L|Ⅼ|l|ⅼ][X|Ⅹ|x|ⅹ] _ %@RomNum ! 60
.o. [..] -> %^sFUV %^sUC || [L|Ⅼ|l|ⅼ][X|Ⅹ|x|ⅹ]^{2} _ %@RomNum ! 70
.o. [..] -> %^sFUV %^sVC || [L|Ⅼ|l|ⅼ][X|Ⅹ|x|ⅹ]^{3} _ %@RomNum ! 80
.o. [..] -> %^sFUV || [X|Ⅹ|x|ⅹ]^{2} _ %@RomNum ! 20
.o. [..] -> %^sBRV %^sVC || [X|Ⅹ|x|ⅹ]^{3} _ %@RomNum ! 30
.o. [..] -> %^sBUV %^sVC || [X|Ⅹ|x|ⅹ][C|Ⅽ|c|ⅽ] _ %@RomNum ! 90
.o. [..] -> %^sFRV %^sVC || [C|Ⅽ|c|ⅽ] _ %@RomNum ! 100
.o. [..] -> %^sFRV %^sVC || [D|Ⅾ|d|ⅾ] _ %@RomNum ! 500
.o. [..] -> %^sFRV %^sVC || [C|Ⅽ|c|ⅽ][M|Ⅿ|m|ⅿ] _ %@RomNum ! 900
.o. [..] -> %^sFUV %^sVC || [M|Ⅿ|m|ⅿ] _ %@RomNum ! 1000
.o. [..] -> %^sFUV || [Ⅱ|ⅱ|Ⅶ|ⅶ|Ⅻ|ⅻ] _ %@RomNum ! 2,7,12
.o. [..] -> %^sFRV %^sUC || [Ⅲ|ⅲ|Ⅳ|ⅳ] _ %@RomNum ! 3,4
.o. [..] -> %^sBUV || [Ⅵ|ⅵ] _ %@RomNum ! 6
.o. [..] -> %^sBUV || [Ⅵ|ⅵ] _ %@RomNum ! 6
.o. [..] -> %^sFUV %^sVC || [Ⅷ|ⅷ] _ %@RomNum ! 8
.o. [..] -> %^sBRV %^sVC || [Ⅸ|ⅸ] _ %@RomNum ! 9
.o. [..] -> %^sFUV %^sVC || [Ⅺ|ⅺ] _ %@RomNum ! 11
.o. %@RomNum -> 0 ;
!
! Numeric separator
!
! The behaviour of the following is defined in options.h
!
define NumericSeparator %@tSep -> THOUSAND_SEPARATOR
.o. %@dSep -> DECIMAL_SEPARATOR ;
!
! This part deals with the intermediate symbol @NCOMP inserted
! in surface forms. In particular, if @NCOMP is followed by plural
! suffix lAr, it is moved after the plural suffix, if it is followed
! by the composite suffix lArI, it is deleted. The other cases where
! (s)I is deleted handled separately below.
!
! TODO: there is some double work done in parts of this rule
define CompNoun [ %^%(s%) %^I -> 0 || _ %@NCOMP Boundary l (IntSym) %^A r ]
.o. [ l %^A r -> l %^A r %@MB %^%(s%) %^I
|| %@NCOMP Boundary _ %@MB ]
.o. [ %^%(s%) %^I %@MB l %^A r -> l %^A r %@MB %^%(s%) %^I
|| (%@NCOMP) Boundary (AposSym) _ %@MB ]
.o. [ %@NCOMP -> 0 ];
!
! (s)I deletion
!
!
! The morpheme (s)I is deleted in some contexts. What is deleted is
! not clear, but we model such that (s)I gets deleted
! - before another (s)I
! - before lArI
! - after lI, lIk, sIz, CI
! - after -(y)AsI
! - and optionally after aynı and before a case suffix
!
! TODO/CHECK: It seems not to be deleted afater -lIk in some cases.
! `ev hayvanılık??'
!
define DeleteSI %^%(s%) %^I -> 0 || _ Boundary+ %^%(s%) %^I Boundary,
_ Boundary+ l %^A r %^I Boundary,
_ Boundary+ [ l %^I |
l %^I k |
s %^I z |
%^C %^I ],
%^%(y%) %^A s %^I Boundary+ _ %Boundary,
%^%(s%) %^I %@RB AposSym _ %@MB
.o. %^%(s%) %^I (->) 0 ||
.#. {aynı} %@RB _ %@MB [ %^%(y%) %^A
|%^%(y%) %^I
|%^%(n%) %^I n
|%^D %^A
|%^D %^A n
|%^%(y%) l %^A ];
!
! lAr deletion
!
!
! If there are multiple morphemes in a sequence with surface
! form lAr or lArI following each other, only one survives on the surface.
!
define DeleteLAR l %^A r -> 0 || Boundary _ Boundary+ l %^A r
.o. l %^A r -> 0 || Boundary l %^A r (%^I) Boundary+ _
.o. l %^A r -> 0 || Boundary l %^A r %@MB %^%(s%) (%^I) %@MB+ _ ;
!
! `n' deletion in derivation -InC
! this happesn after the stems einding with `n'
!
define DeleteN %^%(I%) n %^c -> %^c || n Boundary+ _ %@MB ;
!
! `n' insertion after (s) / larI (y) -> (n)
!
! After (s)I and larI -DA -DAn become -nDA -nDAn, (y)A becomes nA
! Last line is about -(s)I deleted after root 'aynı"
define InsertN %^D %^A -> n %^D %^A,
%^D %^A n -> n %^D %^A n,
%^%(y%) %^A -> n %^A,
%^%(y%) %^I -> n %^I ,
%^C %^A -> n %^C %^A
|| Boundary [ [%^%(s%)|s] %^I
| Boundary [ l %^A r %^I | k i | k ü]
] [Boundary|AposSym]+ _ %@MB,
.#. {aynı} %@RB %@MB _;
!
! DelS
!
! Delete (s) after a few marked nouns. The well known example is cami
! -> cami-i (p3s or compn). Yet, this deletion in current language
! seems to be optional ((as of 2009-10-21 google finds
! more `camisi' than `camii'). Better course of action seems to be
! disregarding this alternation during generation.
!
define DelS %^%(s%) (->) 0 || %@DELS%@ IntSym* _
.o. %@DELS%@ -> 0;
!
! InsertS
!
! for now this handles only a single exception with the root 'güvey'.
!
define InsertS [..] (->) s || .#. (i ç) g ü v e y IntSym* _ %^%(s%) %^I;
!
! Pronoun Exceptions
!
!
! The stuff below in the order composed:
! . before dative marker `ben' and `sen' become `ban-a' and `san-a'
! . o/bu/şu before plural and case markers an `n' is inserted
! . o/bu/şu before 1st/2nd person possessive suffixes a `y' is inserted
! . after listed pronouns, before (y)lA, surface form of the genitive
! suffix is inserted (this is about form, it is not the genitive marker)
! this is normally the rule, a large number of speaker do not follow it.
! again, it may be a good idea to restrict generation on this one.
! . genitive marker after ben/biz becomes -Im instead of -(n)In.
! . locative pronouns ora/bura/sura/nere optionally loose their
! last vowel if they precede -DA and -DAn, and less frequently -(y)A.
! this is another case where generations should take only one of
! the options (not deleting the vowel is the correct written form, in
! colloquial use deletion is more common for -DA and -DAn)
! A similar contraction also occurs for "directional pronouns"
! içeri and dışarı before -dA and -dAN (e.g., içerden, dışarda).
!
define PronounExceptions e -> a || .#. [b|s] _ n %@PRN %@RB %^%(y%) %^A %@MB
.o. [..] -> n || .#. [ {o} |
{bu} |
{şu} |
{kendi} |
{kendisi} |
{biri} |
{birisi} |
{birileri} |
{üzeri} |
{kendileri}
] %@RB (AposSym) Boundary*
_
[ %^%(y%) %^I |
%^%(y%) %^A |
%^D %^A n |
%^C %^A |
%^D %^A
] %@MB
.o. [..] -> n || .#. [ {o} |
{bu} |
{şu} ] %@RB (AposSym) Boundary*
_
[ l %^A (r) |
%^%(y%) l %^A |
s %^I z
] %@MB
.o. [..] (->) n || .#. [ {kendi} |
{kendisi} |
{kendileri}
] %@RB (AposSym) Boundary*
_
%^%(y%) l %^A %@MB
.o. [..] -> y || .#. [ {o} |
{bu} |
{şu}
]
_ %@RB (AposSym) Boundary*
[ %^%(I%) m |
%^%(I%) n |
%^%(I%) m %^I z |
%^%(I%) n %^I z
] %@MB
.o. [..] (->) %^%(n%) %^I n || .#. [ [{ben} |
{sen} |
{biz} ] %@PRN |
{o} |
{siz} |
{bu} |
{şu} |
{kim}
] %@RB (AposSym) Boundary* (n) _
%^%(y%) l %^A %@MB
.o. n -> m
|| .#. [{ben}|{biz}] %@PRN Boundary+ %^%(n%) %^I _
.o. %@PRN -> 0
.o. [a|e] (->) 0 || .#. [{bur}|{şur}|{or}|{ner}] _
%@RB [%^D %^A (n) | %^%(y%) %^A] %@MB
.o. %^%(y%) -> y || .#. [{bur}|{şur}|{or}|{ner}] %@RB _ %^A %@MB
.o. [i|ı] (->) 0 || .#. [{içer}|{dışar}] _
Boundary+ [%^D %^A (n) ] %@MB ;
!
! InsertY
!
!
! The symbol `y' is inserted after some words, particularly
! ones that end with `su' and before the suffixes that start with
! (n), (y) or (I) (genitive and possessive suffixes except p3p).
! For some words, this insertion seems to be optional for some
! speakers/registers, e.g., `ne-y-imiz' is the common/correct form but
! `ne-miz' is also used relatively frequently.
define insertYwords {su}|{özsu}|{karasu} ;
define insertYoptional {ne}|{akarsu}|{Akarsu} ;
define InsertY [..] -> y
|| .#. insertYwords %@RB _ [%^%(I%) | %^%(n%) | %^%(s%)]
.o. [..] (->) y
|| .#. insertYoptional %@RB _ [ %^%(I%) | %^%(n%) | %^%(s%)] ;
!
! Delete buffer y after i-
!
define iDeleteY %^%(y%) -> 0 || .#. i %@RB _ ;
!
! `N' before passive morpheme
!
!
! after a limited set of verbal roots, and optional `n' is inserted
! before passive morpheme
define wordsPassiveN {de}|{ye}|{yaşa}|{kapa}|{boya}|{söyle}|{iste};
define NbeforePassive [..] (->) n || .#. wordsPassiveN %@RB
_ %^%(I%) %@LN %@MB ;
!
! Replace intermediate symbol @LN on passive according to preceding context
!
!
! The passive morpheme is -(I)n if the preceding letter is `l' or a
! vowel -(I)n otherwise. We also add a few more exceptional cases here.
! Namely,
! - in the passive form of the verb stems ending with `koy'
! `y' becomes `n' optionally (generally preferred).
! - the passive form of `kapa' is `kapatıl'. Note that with the
! above exception, this makes it ambiguous between 'kapatıl'
! and `kapanıl'.
! TODO: not completely convinced that `kapanır' is not another
! option
! - the passive form of `anla' is `anlaşıl'
!
! TODO: LN2 repetition is ugly, it could be done nicer.
!
define PassiveLN y (->) n || k o _ %@RB %^%(I%) %@LN %@MB
.o. %^%(I%) %@LN (->) 0 || k o n %@RB _ %@MB
.o. [..] -> t || .#. {kapa} %@RB _ %^%(I%) %@LN %@MB
.o. [..] -> ş || .#. {anla} %@RB _ %^%(I%) %@LN %@MB
.o. %@LN -> n || [Vall | l] IntSym* %^%(I%) _ %@MB
.o. %@LN -> l
.o. %@LN2 -> n || [Vall | l] IntSym* %^%(I%) _ %@MB
.o. %@LN2 -> l
;
!
! Replace intermediate symbol @CAUS with the correct form of causative suffix
!
!
! Except in lexical exceptions handled in morphotactics, the passive suffix
! is -t after a `l', `r' or vowel, otherwise it is -DIr.
define Causative %@CAUS -> t // [Vall | l | r] IntSym* _ %@MB
,, %@CAUS -> %^D %^I r // \[Vall | l | r] Boundary+ _ %@MB;
!
! Optative 3s form change
!
!
! After third person agreement suffixes optative suffix disappears. We mark
! this with symbol @opt3 in morphotactics, so that it is easy to spot it
! here.
!
define Optative3 %^%(y%) %^A -> 0 || Boundary _ %@MB %@opt3
.o. %@opt3 -> 0;
!
! Insert space before question particle
!
!
! Even though it is normally written separately, correct analysis of the
! question particle -mI requires the knowledge of the previous word
! (generally verb). The following allows correct analysis in such
! cases given that the input is tokenized accordingly (keeping the
! preceding verbal form and the question particle within the same
! token).
!
! Furthermore, people tend not to obey the rules, and do not leave
! space between the question particle and the preceding word. As a
! making this insertion optional allows us to analyze less formal
! writing like blog posts or internet forums.
#if (MI_NOSPACE == 1)
define SpaceBeforeMI %@miSPC -> [0|" "];
#else
define SpaceBeforeMI %@miSPC -> " ";
#endif
!
! Negative Aorist
!
!
! Aorist suffix becomes -z after negative marker -mA, and before first
! person suffixes it gets dropped completely.
!
define NegAorist %^%(I%) r -> z || Boundary m (%^NNS) %^A %@MB
_ %@MB
.o. z -> 0 || Boundary m (%^NNS) %^A %@MB
_ %@MB %^%(y%) %^I [m | z] %@MB
.o. %^%(y%) %^I -> 0 || Boundary m (%^NNS) %^A %@MB %@MB
_ m %@MB;
!
! Insertion of a redundant negative marker -mA in 'duyMAmazlıktan',
! 'görMEmezlikten', etc. This happens only with a few verb roots,
! but we allow for all here. The suffix sequence is also rather
! specific, and does not occur (much) in other contexts.
!
! This is non-standard.
!
define DoubleNegative [..] (->) %@MB m %^A || Boundary m (%^NNS) %^A _
%@MB z [%@MB]+
( l %^I %^k ) [%@MB]*
%^D %^A n ;
!
! Buffer deletion
!
define NonBufCons [Cons | ConsXX ];
define DelBufCons [ConsBuf -> 0 || [Cons | ConsXX] [IntSym | AposSym]* _ ]
.o. [ %^%(y%) -> y,,
%^%(s%) -> s,,
%^%(ş%) -> ş,,
%^%(n%) -> n
];
define DelBufVow [Vbuf -> 0 || [Vowel | Vxx ] [IntSym | AposSym]* _ ]
.o. [ %^%(I%) -> %^I ,,
%^%(A%) -> %^A ] ;
define DeleteBuffer DelBufCons .o. DelBufVow ;
!
! de/ye exception
!
!
! final `e' becomes `i' in `de' and `ye' before a suffix starting
! with `y'.
! Second line addes an exception to the exception optionally allowing 'deyiş'.
!
define DeYeException e -> i || .#. [d|y] _ %@RB Boundary* y
.o. i (->) e || .#. d _ %@RB Boundary* y %^I ş ;
!
! -yor exception
!
!
! `a' `e' and underspecified low-unrounded vowel `A' becomes
! corresponding high vowel before the suffix -yor
!
! We change all to ^I here and let the vowel harmony do the rest.
define YorException [a|e|%^A] -> %^I || _ Boundary (%^I) y IntSym* o r %@MB;
!
! Question particle mI
!
!
! We analyze question particle mI as a free morpheme. It normally follows
! vowel harmony according to the preceding word. But we allow all
! combinations here. This option overgenerates.
!
!define CliticMI %^I -> [ı|i|u|ü] || .#. m _ %@RB;
!
! vowel epenthesis: burun -> burnu
!
!
! Delete vowels after @DEL@ from lexically marked words
! if they are suffixed a morpheme starting with a vowel.
!
define VowelEpenthesis [ Vowel -> 0 || _ %@DEL%@ ConsAll IntSym* Vall ]
.o. %@DEL%@ -> 0;
!
! Vowel Harmony
!
! We also delete `palatalized' vowels at the end since they are
! only used for vowel harmony.
!
define DelPV %^pA -> a , %^pI -> ı, %^pO -> o, %^pU -> u ,
%^p -> â , %^pÎ -> î, %^pÔ -> ô, %^pÛ -> û;
define VowelHarmony [ %^I -> ı // Vbu \Vall* _ ,,
%^I -> u // Vbr \Vall* _ ,,
%^I -> ü // [Vfr | %^pO | %^pU] \Vall* _ ,,
%^I -> i // [Vfu | %^pA | %^pI] \Vall* _ ,,
%^A -> a // Vb \Vall* _ ,,
%^A -> e // [Vf | Vpal] \Vall* _ ]
.o. [ %^A -> e , %^I -> i ] ! defaults
.o. DelPV;
!
! Final stop devoicing
!
!
! NOTE: reduplication should be done after this one.
!
define FSDevoicing [%^c -> c,
%^g -> ğ,
#if (ALLOW_mAG == 1)
%^K (->) ğ,
#endif
%^K -> y,
%^p -> b,
%^t -> d || _ IntSym* [ Vowel | Vxx ] ]
.o. %^k -> ğ || \n _ IntSym* [ Vowel | Vxx ]
.o. %^k -> g || n _ IntSym* [ Vowel | Vxx ]
.o. [%^c -> ç,
%^g -> g,
%^K -> k,
%^p -> p,
%^t -> t,
%^k -> k];
!
! Voicing assimilation
!
#if (RELAXED_D_ASSIMILATION == 1)
define VoicingAssimilation [ %^D (->) t ,
#else
define VoicingAssimilation [ %^D -> t ,
#endif
#if (RELAXED_C_ASSIMILATION == 1)
%^C (->) ç,
#else
%^C -> ç,
#endif
%^G -> k
|| ConsUV [ IntSym | AposSym]* _ ]
.o. [ %^D -> d,
%^C -> c,
%^G -> g ];
!
! Reduplication of some root final consonants (gemination)
! hak -> hakkı
define RedupGen %@DUP%@ -> %@DUPL || _ Cons IntSym* Vall
.o. [..] -> %@DUPR || %@DUPL ? _
.o. [..] -> %@DUPL Cons %@DUPR || %@DUPL ? %@DUPR _ ;
define Redup _eq(RedupGen, %@DUPL, %@DUPR)
.o. %@DUP%@ | %@DUPL | %@DUPR -> 0;
!
! Delete silent phonemes
!
define DeleteSilentP [ Vsilent | ConsSilent ] -> 0;
!
! ki -> kü
!
! the suffix ki may be realized as kü if the preceding vowel is `ü'
! this seems to be mostly optional with varying degree of preference
! over one or the other based on the root form. For example, it is
! more likely to hear 'dünkü' but 'gülünki'.
define KiKu i (->) ü || ü [\Vall]* Boundary k _ %@MB ;
!
! m-reduplication
!
! This, in fact, is not reduplication in morphology, but reduplication
! of words with a phonetic alternation. In a nutshell: replicated words
! follow the original word, but the first letter/phoneme is replaced
! with 'm' if the original word starts with a consonant, or an 'm' is
! prepended to the original word if it starts with a vowel.
!
! For example, 'gözü mözü (şişmiş)', 'arabası marabası varmış'
!
define Mredup [..] -> m || .#. _ [Vall] ?* %@MREDUP
.o. [Cons] -> m || .#. _ ?+ %@MREDUP;
define DeleteMredup %@MREDUP -> 0;
!
! Filter for cleaning up the intermediate symbols in the analysis level
! we also deal with adding -sI to compound noun roots, since we use a
! special notation in lexicon and leave the -(s)I out in lexical entries.
!
!
#if (PERCENT_AS_PREFIX == 0)
!
! move the prefix percent after the number.
! this feels more readable witho right-arrow rules.
! we'll use the inverse of it
!
define percfilter [ [..] -> {<perc>} || .#.
{<perc>}
[%0|1|2|3|4|5|6|7|8|9|%.|%,]+
{<Num} \">"* %> _ ]
.o. {<perc>} -> 0 || .#. _ ;
#endif
#if (SEPARATOR_PLUS == 1)
define plussep %< -> %+ // _ \%>+ %>
#if (MARK_SUBCATEGORIES == 0)
.o. %: -> %+ // %+ \%>+ _ \%>+ %>
#endif
.o. %> -> 0 // %+ \[%+|%>]+ _ ;
#elif (MARK_SUBCATEGORIES == 0)
define subcatfeature %: -> %>%< // %< [\[%>|%:]]+ _ ?+ %>;
#endif
define AFilter FSDevoicing.i
.o. VowelHarmony.i
.o. DeleteBuffer.i
#if (SEPARATOR_PLUS == 1)
.o. plussep.i
#elif (MARK_SUBCATEGORIES == 0)
.o. subcatfeature.i
#endif
.o. %^%(s%) %^I <- %@NCOMP
.o. 0 <- %@DEL%@
.o. 0 <- %@DELS%@
.o. 0 <- %@DUP%@
.o. 0 <- ˈ
.o. 0 <- %@dB
.o. 0 <- (ConsSilent|Vsilent)
#if (PERCENT_AS_PREFIX == 0)
.o. percfilter.i
#endif
! Disallow derivational suffix -sI with stems ending with a vowel
.o. ~[?*] <- Vall {<N><si>}
;
! A few analysis symbols may result from different surface forms.
! In the morphological description, we keep theme saparate so that
! they can be distinguished when needed. However, we do not want them
! as different symbols in normal analysis strings, so we collapse them
! here.
!
define collapseAsymbols %: %0 -> 0 || %< p 3 p _ %>
.o. %: s I n l A r -> 0 || %< 3 p _ %>
.o. %: s I n -> 0 || %< 3 p _ %>
.o. %: %0 -> 0 || %< 3 p _ %>
.o. %: %0 -> 0 || %< 3 s _ %>
.o. %: s A n A -> 0 || %< 2 s _ %>
.o. %: y I n -> 0 || %< 2 s _ %>
.o. %: y I n -> 0 || %< 2 p _ %>
.o. %: y I n I z -> 0 || %< 2 p _ %>
.o. %: s A n I z -> 0 || %< 2 p _ %>
#if (COLLAPSE_MA_MAK == 1)
.o. m A (K) -> 0 || %< v n %: i n f _ %>
#endif
.o. %: z -> 0 || %< c v %: d e n _ %>
.o. %: c I k -> 0 || %< d i m _ %>
.o. %: c A k -> 0 || %< d i m _ %>
.o. %: I c A k -> 0 || %< d i m _ %>
.o. %: A c A k -> 0 || %< d i m _ %>
.o. %: c A g I z -> 0 || %< d i m _ %>
;
define DeleteBoundary ["@MB" | "@RB"] -> 0 ;
!
! Delete Circumflex
!
! The vowels with circumflex î, û, â are written without circumflex
! most of the time. We optionally delete the accent from the surface.
define DeleteCircumflex î (->) i, Î (->) İ,
û (->) u, Û (->) U,
â (->) a, Â (->) A;
!
! Add Circumflex
!
! Current spelling rules state that circumflex is used only when a
! word is ambiguous without circumflex, e.g., 'hala' and 'hâlâ'.
! However most older text include circumflexed vowels that would not
! be recognized by a lexicon according to the new rules.
!
! This simply allows free replacement of non-circumflexed versions of
! the vowels with circumflexed versions in the surface. This should
! have little impact on analysis since people rarely insert these
! accents by mistake, but this would certainly cause overblown
! generation with lots of incorrect words.
!
define AddCircumflex DeleteCircumflex.i;
!
! Allow capitalization, normally this should happen only at the
! beginning of a sentence.
!
!
define Capitalize a (->) A, b (->) B, c (->) C, ç (->) Ç, d (->) D,
e (->) E, f (->) F, g (->) G, ğ (->) Ğ, h (->) H, ı (->) I, i (->) İ,
j (->) J, k (->) K, l (->) L, m (->) M, n (->) N, o (->) O, ö (->) Ö,
p (->) P, r (->) R, s (->) S, ş (->) Ş, t (->) T, u (->) U, ü (->) Ü,
v (->) V, y (->) Y, z (->) Z, î (->) Î, û (->) Û, â (->) Â || .#. _ ;
!
! Accept All-capitals text. For this to work properly, it should be
! composed with Capitalize above.
!
! Second bit makes sure that once we start capitalizing the word, we
! go all the way to the end. No half capitalization.
!
define CAP [A|B|C|Ç|D|E|F|G|Ğ|H|I|İ|J|K|L|M|N|O|Ö|P|R|S|Ş|T|U|Ü|V|Y|Z|Î|Û|Â];
define AllCaps a (->) A, b (->) B, c (->) C, ç (->) Ç, d (->) D,
e (->) E, f (->) F, g (->) G, ğ (->) Ğ, h (->) H, ı (->) I, i (->) İ,
j (->) J, k (->) K, l (->) L, m (->) M, n (->) N, o (->) O, ö (->) Ö,
p (->) P, r (->) R, s (->) S, ş (->) Ş, t (->) T, u (->) U, ü (->) Ü,
v (->) V, y (->) Y, z (->) Z, î (->) Î, û (->) Û, â (->) Â
// (AposSym) CAP (AposSym) _
.o. a -> A, b -> B, c -> C, ç -> Ç, d -> D,
e -> E, f -> F, g -> G, ğ -> Ğ, h -> H, ı -> I, i -> İ,
j -> J, k -> K, l -> L, m -> M, n -> N, o -> O, ö -> Ö,
p -> P, r -> R, s -> S, ş -> Ş, t -> T, u -> U, ü -> Ü,
v -> V, y -> Y, z -> Z, î -> Î, û -> Û, â -> Â
// CAP (AposSym) CAP _;
!
! Accept text written in all or partial ASCII. This is common in
! online communication, especially when people use non TR keyboards.
!
define AllASCII ç (->) c, Ç (->) Ç,
ğ (->) g, Ğ (->) G,
ı (->) i, İ (->) I,
ö (->) o, Ö (->) O,
ş (->) s, Ş (->) S,
ü (->) u, Ü (->) U,
î (->) i, Î (->) I,
û (->) u, Û (->) U,
â (->) a, Â (->) A;