-
Notifications
You must be signed in to change notification settings - Fork 0
/
disambiguator.cg3
193 lines (139 loc) · 3.39 KB
/
disambiguator.cg3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# ============================ #
#!! # Plains Cree disambiguator
# ============================ #
# ========== #
# Delimiters #
# ========== #
DELIMITERS = "<.>" "<!>" "<?>" "<¶>";
# ============= #
# Tags and sets #
# ============= #
# ========
SETS
# ========
LIST BOS = (>>>) (<s>);
LIST EOS = (<<<) (</s>); # vislcg and CG-2 together.
LIST N = N ;
LIST V = V ;
LIST Ipc = Ipc ;
LIST Interj = Interj ;
LIST Adv = Adv ;
LIST CC = CC ;
LIST CS = CS ;
LIST AN = AN ;
LIST IN = IN ;
LIST IA = IA ;
LIST II = II ;
LIST TA = TA ;
LIST TI = TI ;
LIST Cnj = Cnj ;
LIST ConNeg = ConNeg ;
LIST Cond = Cond ;
LIST Def = Def ;
LIST Del = Del ;
LIST Dem = Dem ;
LIST Dim = Dim ;
LIST Dim/Der = Dim/Der ;
LIST Dist = Dist ;
LIST Fut = Fut ;
LIST Imm = Imm ;
LIST Imp = Imp ;
LIST Incl = Incl ;
LIST Ind = Ind ;
LIST Inf = Inf ;
LIST Int = Int ;
LIST Interr = Interr ;
LIST Loc = Loc ;
LIST Med = Med ;
LIST Neg = Neg ;
LIST Num = Num ;
LIST Obv = Obv ;
LIST Pos = Pos ;
LIST Prf = Prf ;
LIST Pron = Pron ;
LIST Prox = Prox ;
LIST Prs = Prs ;
LIST Prt = Prt ;
LIST Qst = Qst ;
LIST Sbj = Sbj ;
LIST SgO = SgO ;
LIST P2lO = P2lO ;
LIST Pl = Pl ;
LIST PlO = PlO ;
LIST 12Pl = 12Pl ;
LIST 1Pl = 1Pl ;
LIST 1PlO = 1PlO ;
LIST Sg = Sg ;
LIST 1Sg = 1Sg ;
LIST 1SgO = 1SgO ;
LIST 2Pl = 2Pl ;
LIST 2Sg = 2Sg ;
LIST 2SgO = 2SgO ;
LIST 3Pl = 3Pl ;
LIST 3PlO = 3PlO ;
LIST 3Sg = 3Sg ;
LIST 3SgO = 3SgO ;
LIST 4Pl = 4Pl ;
LIST 4PlO = 4PlO ;
LIST 4Sg = 4Sg ;
LIST 4SgO = 4SgO ;
LIST 5Sg = 5Sg ;
LIST Px12Pl = Px12Pl ;
LIST Px1Pl = Px1Pl ;
LIST Px1Sg = Px1Sg ;
LIST Px2Pl = Px2Pl ;
LIST Px2Sg = Px2Sg ;
LIST Px3Pl = Px3Pl ;
LIST Px3Sg = Px3Sg ;
LIST Px4Pl = Px4Pl ;
LIST Px4Sg = Px4Sg ;
LIST Der/Dim = Der/Dim ;
# Sets
# ====
LIST WORD = N V Ipc Interj ;
LIST VFIN = (V 1Sg) (V 2Sg) (V 3Sg) (V 4Sg) (V 5Sg) (V 1Pl) (V 2Pl) (V 3Pl) (V 4Pl) (V 5Pl) ;
SET NOT-VERB = WORD - V ;
LIST NUMBER = Sg Pl ;
LIST GENDER = AN IN ;
SET NUMBERGENDER = (AN Sg) OR (AN Pl) OR (IN Sg) OR (IN PL) ;
# Noun sets
LIST HUMAN = "awâsis" "okimâw" "iyiniw" "iskwêw" ;
# Boundary sets
# Complementary set
# Case sets
# Some case, but not...
LIST COMMA = (",") ;
SET MARK = COMMA | ("\\") | ("\;") ; #"
LIST PUNCT-LEFT = (PUNCT LEFT) ;
LIST PUNCT-RIGHT = (PUNCT RIGHT) ;
SET PRE-APP = COMMA OR PUNCT-LEFT ;
# Here come the rules #
BEFORE-SECTIONS
SECTION
# We remove the particle if the word can be something more interesting:
REMOVE:NotIpc Ipc (0 N OR V) ;
# We like finite verbs:
SELECT:Vfin VFIN ;
# We go for derivation, more to look at when we are searching a lemme in Korp:
SELECT:DerYes Der/Dim (0C N) ;
# We say postnominal words are determiners, not particles, if they agree
REMOVE:DemNotIpc Ipc (-1 N + $$NUMBERGENDER)(0 Pron + Dem + $$NUMBERGENDER) ;
REMOVE:DemNotIpc Ipc ( 1 N + $$NUMBERGENDER)(0 Pron + Dem + $$NUMBERGENDER) ;
## Mîcisowinâhtik ôma.
# Demonstratives follow nouns.
REMOVE:IpcNotDem Dem (NOT -1 N)(NOT 1 N)(0 Ipc) ;
# Interrogatives in questions
REMOVE:InterrNotN N (0 Interr)(NOT *-1 Interr)(*1 ("?"));
# NP-internal agreement
REMOVE:SgAgr Sg (0 N OR Pron)(1C Dem + Pl) ;
REMOVE:PlAgr Pl (0 N OR Pron)(1C Dem + Sg) ;
# N Px number agreement
REMOVE:PxSg Px3Pl (-1 N + Sg);
REMOVE:PxPl Px3Sg (-1 N + Pl);
# Singular object
REMOVE:SgIfObjSg 3PlO (0 3SgO)(*0 Sg + Obv);
REMOVE:PlIfObjSg 3SgO (0 3SgO)(*0 Pl + Obv);
# Mood rules -- do not understand moods yet
REMOVE Fut (0 Ind);
REMOVE Imp (0 Ind)(*0 N + Sg);
AFTER-SECTIONS