-
Notifications
You must be signed in to change notification settings - Fork 0
/
functions.cg3
1845 lines (1326 loc) · 90.3 KB
/
functions.cg3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#Distributed under the terms of the GNU General Public License version 2
# or any later version.
# ==================================================================== #
# N O R T H E R N S Á M I D I S A M B I G U A T O R
# ==================================================================== #
# ====================================================================
# Sámi language technology project 2003-2006, University of Tromsø #
# ========== #
# Delimiters #
# ========== #
DELIMITERS = "<.>" "<!>" "<?>" "<...>" "<¶>";
#PARENTHESES = "<$(>" "<$)>" "<$[>" "<$]>" "<${>" "<$}>" "<$«>" "<$»>" ;
# ============= #
# Tags and sets #
# ============= #
SETS
# I define end of clause and beginning of clause in a way so that the file
# may be read both by the CG-2 and the vislcg formalisms.
LIST BOS = (>>>) (<s>);
LIST EOS = (<<<) (</s>);
# CG3 doesn´t function without >>> and <<< !
# Tags declared as single-membered LISTs
# ======================================
# Parts of speech
# ---------------
LIST N = N ;
LIST A = A ;
LIST Adv = Adv ;
LIST V = V ;
LIST Pron = Pron ;
LIST CS = CS ;
LIST CC = CC ;
SET CC-CS = CC OR CS ;
LIST Po = Po ;
LIST Pr = Pr ;
LIST Pcle = Pcle ;
LIST Num = Num ;
LIST Interj = Interj ;
LIST ABBR = ABBR ;
LIST ACR = ACR ;
LIST CLB = CLB ;
LIST LEFT = LEFT ;
LIST RIGHT = RIGHT ;
LIST WEB = WEB ;
LIST QMARK = """ ; # ADD " FOR SYNTAX COLOURING.
LIST PPUNCT = PUNCT ;
SET PUNCT = PPUNCT - QMARK ;
LIST COMMA = "," ;
LIST ¶ = ¶;
#LIST V* = V* ;
#!! * Sets for POS sub-categories
# ------------------
LIST Pers = Pers ;
LIST Dem = Dem ;
LIST Interr = Interr ;
LIST Indef = Indef ;
LIST Recipr = Recipr ;
LIST Refl = Refl ;
LIST Rel = Rel ;
#LIST Adp = Adp ;
LIST Coll = Coll ;
LIST NomAg = NomAg ;
LIST G3 = G3 ;
LIST Prop = Prop ;
#!! * Sets for Semantic tags
# -------------
LIST Sem/Ani = Sem/Ani ;
LIST Sem/Date = Sem/Date ;
LIST Sem/Fem = Sem/Fem ;
LIST Sem/Group = Sem/Group ;
LIST Sem/Hum = Sem/Hum ;
LIST Sem/Mal = Sem/Mal ;
LIST Sem/Measr = Sem/Measr ;
LIST Sem/Money = Sem/Money ;
LIST Sem/Obj = Sem/Obj ;
LIST Sem/Org = Sem/Org ;
LIST Sem/Plc = Sem/Plc ;
LIST Sem/Sur = Sem/Sur ;
LIST Sem/Time = Sem/Time ;
LIST Sem/Year = Sem/Year ;
SET FIRSTNAME = (Prop Sem/Fem) OR (Prop Sem/Mal) ;
LIST TIME-N-SET = (N Sem/Time) ;
LIST NOT-TIME = "dulvi" "vuorru" ;
SET TIME-N = TIME-N-SET - NOT-TIME ;
LIST HUMAN = Sem/Hum Sem/Mal Sem/Fem Sem/Sur Pers NumAg Der/NumAg ;
#!! * Sets for Morphosyntactic properties
# --------------------------
LIST Nom = Nom ;
LIST Acc = Acc ;
LIST Gen = Gen ;
LIST Ill = Ill ;
LIST Loc = Loc ;
LIST Com = Com ;
LIST Ess = Ess ;
LIST Ine = Ine ;
LIST Ela = Ela ;
LIST LOC = Loc Ine Ela ;
LIST Sg = Sg ;
LIST Du = Du ;
LIST Pl = Pl ;
LIST RCmpnd = RCmpnd ;
LIST Cmpnd = Cmpnd ;
LIST SgNomCmp = SgNomCmp ;
LIST SgGenCmp = SgGenCmp ;
LIST ShCmp = ShCmp ;
LIST PxSg1 = PxSg1 ;
LIST PxSg2 = PxSg2 ;
LIST PxSg3 = PxSg3 ;
LIST PxDu1 = PxDu1 ;
LIST PxDu2 = PxDu2 ;
LIST PxDu3 = PxDu3 ;
LIST PxPl1 = PxPl1 ;
LIST PxPl2 = PxPl2 ;
LIST PxPl3 = PxPl3 ;
LIST Comp = Comp ;
LIST Superl = Superl ;
LIST Attr = Attr ;
LIST Ord = Ord ;
LIST Qst = Qst ;
# The ("ge" Pcle) etc. are used in Apertium
LIST Foc/ge = Foc/ge ("ge" Pcle) ;
LIST Foc/gen = Foc/gen ("gen" Pcle) ;
LIST Foc/ges = Foc/ges ("ges" Pcle) ;
LIST Foc/gis = Foc/gis ("gis" Pcle) ;
LIST Foc/naj = Foc/naj ("naj" Pcle) ;
LIST Foc/ba = Foc/ba ("ba" Pcle) ;
LIST Foc/be = Foc/be ("be" Pcle) ;
LIST Foc/hal = Foc/hal ("hal" Pcle) ;
LIST Foc/han = Foc/han ("han" Pcle) ;
LIST Foc/bat = Foc/bat ("bat" Pcle) ;
LIST Foc/son = Foc/son ("son" Pcle) ;
LIST IV = IV ;
LIST TV = TV ;
LIST Der/Pass = Der/PassL Der/PassS ;
LIST Prt = Prt;
LIST Prs = Prs ;
LIST Ind = Ind ;
LIST Pot = Pot ;
LIST Cond = Cond ;
LIST Imprt = Imprt ;
LIST ImprtII = ImprtII ;
LIST Sg1 = Sg1 ;
LIST Sg2 = Sg2 ;
LIST Sg3 = Sg3 ;
LIST Du1 = Du1 ;
LIST Du2 = Du2 ;
LIST Du3 = Du3 ;
LIST Pl1 = Pl1 ;
LIST Pl2 = Pl2 ;
LIST Pl3 = Pl3 ;
LIST Inf = Inf ;
LIST ConNeg = ConNeg ;
LIST Neg = Neg ;
LIST PrfPrc = PrfPrc ;
LIST VGen = VGen ;
LIST PrsPrc = PrsPrc ;
LIST Ger = Ger ;
LIST Sup = Sup ;
LIST Actio = Actio ;
LIST Der/PassL = Der/PassL ;
LIST Der/NomAg = Der/NomAg ;
LIST Actor = NomAg Der/NomAg ;
LIST VAbess = VAbess ;
# Derivation
# ----------
LIST Der/adda = Der/adda ;
LIST Der/ahtti = Der/ahtti ;
LIST Der/alla = Der/alla ;
LIST Der/asti = Der/asti ;
LIST Der/easti = Der/easti ;
LIST Der/d = Der/d ;
LIST Der/NomAct = Der/NomAct ;
LIST Der/eamoš = Der/eamoš ;
LIST Der/amoš = Der/amoš ;
LIST Der/eapmi = Der/eapmi ;
LIST Der/geahtes = Der/geahtes ;
LIST Der/gielat = Der/gielat ;
LIST !better: = !better: ;
LIST Der/NuA = Der/NuA ;
LIST Der/h = Der/h ;
LIST Der/heapmi = Der/heapmi ;
LIST Der/hudda = Der/hudda ;
LIST Der/huhtti = Der/huhtti ;
LIST Der/huvva = Der/huvva ;
LIST Der/halla = Der/halla ;
LIST Der/j = Der/j ;
LIST Der/l = Der/l ;
LIST Der/laš = Der/laš ;
LIST Der/las = Der/las ;
LIST Der/hat = Der/hat ;
LIST Der/meahttun = Der/meahttun ;
LIST Der/muš = Der/muš ;
LIST Der/n = Der/n ;
LIST Der/st = Der/st ;
LIST Der/stuvva = Der/stuvva ;
LIST Der/upmi = Der/upmi ;
LIST Der/supmi = Der/supmi ;
LIST Der/vuohta = Der/vuohta ;
LIST Der/goahti = Der/goahti ;
LIST Der/lágan = Der/lágan ;
LIST Der/lágán = Der/lágán ;
LIST Der/lágaš = Der/lágaš ;
LIST Der/jagáš = Der/jagáš ;
LIST Der/jahkásaš = Der/jahkásaš ;
LIST Der/diibmosaš = Der/diibmosaš ;
LIST Der/Dimin = Der/Dimin ;
LIST Der/viđá = Der/viđá ;
LIST Der/viđi = Der/viđi ;
LIST Der/veara = Der/veara ;
LIST Der/duohke = Der/duohke ;
LIST Der/duohkai = Der/duohkai ;
LIST Der/vuolle = Der/vuolle ;
LIST Der/vuollai = Der/vuollai ;
LIST Der/vuolde = Der/vuolde ;
# Syntactic tags
# --------------
LIST @+FAUXV = @+FAUXV ;
LIST @+FMAINV = @+FMAINV ;
LIST @-FAUXV = @-FAUXV ;
LIST @-FMAINV = @-FMAINV ;
LIST @-FSUBJ> = @-FSUBJ> ;
LIST @-F<OBJ = @-F<OBJ ;
LIST @-FOBJ> = @-FOBJ> ;
LIST @SPRED<OBJ = @SPRED<OBJ ;
LIST @-FADVL = @-FADVL ;
SET FOBJ = @-F<OBJ OR @-FOBJ> ;
SET FMAINV = @-FMAINV OR @+FMAINV OR (V @<OBJ) OR (V @N<) OR (V @<SUBJ) OR (V @<ADVL) ;
SET FAUXV = @-FAUXV OR @+FAUXV ;
LIST @ADVL = @ADVL @ADVL> @ADVL< @<ADVL @>ADVL @-F<ADVL ;
LIST @>ADVL = @>ADVL ;
LIST @ADVL< = @ADVL< ;
LIST @<ADVL = @<ADVL ;
LIST @ADVL> = @ADVL> ;
LIST @ADVL>CS = @ADVL>CS ;
LIST <hab> = <hab> ;
LIST <ext> = <ext> ;
LIST HAB-V-TAGS = Pl3 Sg3 ConNeg PrfPrc Inf ;
LIST @>N = @>N ;
LIST @N< = @N< ;
LIST @>A = @>A ;
LIST @P< = @P< ;
LIST @>P = @>P ;
LIST @HNOUN = @HNOUN ;
LIST @INTERJ = @INTERJ ;
LIST @>Num = @>Num;
LIST @Pron< = @Pron< ;
LIST @>Pron = @>Pron ;
LIST @Num< = @Num< ;
LIST @OBJ = @OBJ ;
LIST @<OBJ = @<OBJ ;
LIST @OBJ> = @OBJ> ;
LIST @OPRED = @OPRED ;
LIST @<OPRED = @<OPRED ;
LIST @OPRED> = @OPRED> ;
LIST @PCLE = @PCLE ;
LIST @COMP-CS< = @COMP-CS< ;
LIST @SPRED = @SPRED ;
LIST @<SPRED = @<SPRED ;
LIST @SPRED> = @SPRED> ;
LIST @SUBJ = @SUBJ ;
LIST @<SUBJ = @<SUBJ ;
LIST @SUBJ> = @SUBJ> ;
SET SUBJ = @<SUBJ OR @SUBJ> OR @SUBJ ;
SET SPRED = @<SPRED OR @SPRED> OR @SPRED ;
SET OPRED = @<OPRED OR @OPRED> OR @OPRED ;
LIST @PPRED = @PPRED ;
LIST @APP = @APP ;
LIST @APP-N< = @APP-N< ;
LIST @APP-Pron< = @APP-Pron< ;
LIST @APP>Pron = @APP>Pron ;
LIST @APP-Num< = @APP-Num< ;
LIST @APP-ADVL< = @APP-ADVL< ;
LIST @VOC = @VOC ;
LIST @CVP = @CVP ;
LIST @CNP = @CNP ;
SET OBJ = (@<OBJ) OR (@OBJ>) OR (@OBJ) OR (@-F<OBJ) OR (@-FOBJ>) ;
LIST <OBJ = @-F<OBJ @<OBJ ;
LIST OBJ> = @OBJ> @-FOBJ> ;
SET <OBJ-OTHERS = <OBJ OR (Gen) OR (Nom) OR (Ess) OR (Loc) OR (Adv) OR (Ine) OR (Ela) ;
SET OBJ>-OTHERS = OBJ> OR (Gen) OR (Nom) OR (Ess) OR (Loc) OR (Adv) OR (Ine) OR (Ela) ;
SET SYN-V = V + SUBJ OR OBJ + V OR @ADVL + V OR (V @N<) OR (V @A<) OR V + SPRED OR (V @COMP-CS<) ;
LIST @X = @X ;
LIST OKTA = "akta" "okta";
LIST go = "go" ;
# Initials
# --------
LIST INITIAL = "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m"
"n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"
"á" "æ" "ø" "å" "ö" "ä" ;
LIST CAP-INITIAL = "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M"
"N" "O" "P" "Q" "R" "S" "T" "U" "V" "W" "X" "Y" "Z"
"Á" "Æ" "Ø" "Å" "Ö" "Ä" ;
# Tag sets
# ========
SETS
# ========
# Word or not
# -----------
LIST WORD = N A Adv V Pron CS CC Po Pr Interj Pcle Num ABBR ACR \? ;
# any word
SET REALWORD = WORD - Num - Ord ;
SET REALWORD-NOTABBR = WORD - Num - Ord - ABBR ;
SET WORD-NOTDE = WORD - ("de") ;
# Verb sets
# ---------
# Verbs and their complements
# - - - - - - - - - - - - - -
SET NOT-VERB = WORD - V ;
# Finiteness and mood
# - - - - - - - - - -
SET V-IND-FIN = Prs OR Prt ;
# Problem: "In boahtán" is an invisible indicative
SET V-MOOD = Ind OR Pot OR Imprt OR ImprtII OR Cond OR (Neg Sup) ;
LIST GC = ("gč") ;
SET VFIN = GC OR V-MOOD - ConNeg ;
SET VFIN-POS = V-MOOD - ConNeg - Neg ;
SET VFIN-NOT-IMP = VFIN - Imprt ;
SET VFIN-NOT-NEG = VFIN - Neg ;
# this might be to strict, besides, "iige" can be written "ii ge"
SET NOT-PRFPRC = WORD - PrfPrc ;
# Person
# - - - -
LIST V-SG1 = (V Ind Prs Sg1) (V Ind Prt Sg1) (V Cond Prs Sg1)
(V Cond Prt Sg1) (V Pot Prs Sg1) (V Neg Ind Sg1);
LIST V-SG2 = (V Ind Prs Sg2) (V Ind Prt Sg2) (V Cond Prs Sg2)
(V Cond Prt Sg2) (V Pot Prs Sg2) (V Neg Ind Sg2);
LIST V-SG3 = (V Ind Prs Sg3) (V Ind Prt Sg3) (V Cond Prs Sg3)
(V Cond Prt Sg3) (V Pot Prs Sg3) (V Neg Ind Sg3);
LIST V-DU1 = (V Ind Prs Du1) (V Ind Prt Du1) (V Cond Prs Du1)
(V Cond Prt Du1) (V Pot Prs Du1) (V Neg Ind Du1);
LIST V-DU2 = (V Ind Prs Du2) (V Ind Prt Du2) (V Cond Prs Du2)
(V Cond Prt Du2) (V Pot Prs Du2) (V Neg Ind Du2);
LIST V-DU3 = (V Ind Prs Du3) (V Ind Prt Du3) (V Cond Prs Du3)
(V Cond Prt Du3) (V Pot Prs Du3) (V Neg Ind Du3);
LIST V-PL1 = (V Ind Prs Pl1) (V Ind Prt Pl1) (V Cond Prs Pl1)
(V Cond Prt Pl1) (V Pot Prs Pl1) (V Neg Ind Pl1);
LIST V-PL2 = (V Ind Prs Pl2) (V Ind Prt Pl2) (V Cond Prs Pl2)
(V Cond Prt Pl2) (V Pot Prs Pl2) (V Neg Ind Pl2);
LIST V-PL3 = (V Ind Prs Pl3) (V Ind Prt Pl3) (V Cond Prs Pl3)
(V Cond Prt Pl3) (V Pot Prs Pl3) (V Neg Ind Pl3);
# Note that imperative verbs are not included in these sets!
# Some subsets of the VFIN sets
# - - - - - - - - - - - - - - -
SET V-SG = V-SG1 OR V-SG2 OR V-SG3 ;
SET V-DU = V-DU1 OR V-DU2 OR V-DU3 ;
SET V-PL = V-PL1 OR V-PL2 OR V-PL3 ;
SET V-DU-PL = V-DU1 OR V-DU2 OR V-DU3 OR V-PL1 OR V-PL2 OR V-PL3 ;
SET V-NOT-SG1 = VFIN-NOT-IMP - V-SG1 ;
SET V-NOT-SG2 = VFIN-NOT-IMP - V-SG2 ;
SET V-NOT-SG3 = VFIN-NOT-IMP - V-SG3 ;
SET V-NOT-DU1 = VFIN-NOT-IMP - V-DU1 ;
SET V-NOT-DU2 = VFIN-NOT-IMP - V-DU2 ;
SET V-NOT-DU3 = VFIN-NOT-IMP - V-DU3 ;
SET V-NOT-PL1 = VFIN-NOT-IMP - V-PL1 ;
SET V-NOT-PL2 = VFIN-NOT-IMP - V-PL2 ;
SET V-NOT-PL3 = VFIN-NOT-IMP - V-PL3 ;
SET V-1-2 = V-SG1 OR V-SG2 OR V-DU1 OR V-DU2 OR V-PL1 OR V-PL2 ;
SET V-3 = V-SG3 OR V-DU3 OR V-PL3 ;
# Sets consisting of LEAT
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
LIST LEAT = "leat" "liehket" "lea" ;
SET LEAT-FIN-NOT-IMP = LEAT - Imprt;
LIST VPRFPRC = (V PrfPrc) ;
LIST LEATPRFPRC = ("leat" PrfPrc) ("liehket" PrfPrc) ("lea" PrfPrc) ;
SET PRC-NOT-LEAT = VPRFPRC - LEATPRFPRC ;
# In order to distinguish between real leat-constructions and participles of other verbs
LIST HABEO-SG3 = ("leat" Sg3) (V Neg Sg3) ("šaddat" Sg3) ("sjaddat" Sg3) ("liehket" Sg3) ("lea" Sg3) ;
LIST HABEO-PL3 = ("leat" Pl3) (V Neg Pl3) ("šaddat" Pl3) ("sjaddat" Pl3) ("liehket" Pl3) ("lea" Pl3) ;
SET HABEO-3 = HABEO-SG3 OR HABEO-PL3 ;
# Pronoun sets
# ------------
LIST MUN = (Pron Pers Sg1 Nom);
LIST DON = (Pron Pers Sg2 Nom);
LIST SON = ("son" Pron Pers Sg3 Nom);
LIST MOAI = (Pron Pers Du1 Nom);
LIST DOAI = (Pron Pers Du2 Nom);
LIST SOAI = (Pron Pers Du3 Nom);
LIST MII-PERS = (Pron Pers Pl1 Nom);
LIST DII = (Pron Pers Pl2 Nom);
LIST SII = ("son" Pron Pers Pl3 Nom);
SET PPRON-NOM-NOT-DAT = MUN OR DON OR SON OR MOAI OR DOAI OR SOAI OR MII-PERS OR DII OR SII ;
SET PPRON-NOT-DAT = (Pron Pers) - ("dat") ;
SET PPRON-DU-PL = MOAI OR DOAI OR SOAI OR MII-PERS OR DII OR SII ;
SET PPRON-PL = MII-PERS OR DII OR SII ;
SET PRON-DU = MOAI OR DOAI OR SOAI ;
SET PPRON-NOT-SII = MUN OR DON OR SON OR MOAI OR DOAI OR SOAI OR MII-PERS OR DII ;
LIST PPRON-GEN = (Sg1 Gen) (Sg2 Gen) (Sg3 Gen) (Du1 Gen) (Du2 Gen) (Du3 Gen)
(Pl1 Gen) (Pl2 Gen) (Pl3 Gen);
SET PPRON-NOT-GEN = (Pron Pers) - PPRON-GEN ;
LIST DEM-SG = (Pron Dem Sg Nom);
LIST DEM-PL = (Pron Dem Pl Nom);
SET NOT-DEM = WORD - Dem ;
LIST SGPRON = (Pron Sg1) (Pron Sg2) (Pron Sg3) (Pron Sg) (Pron PxSg1) (Pron PxSg2) (Pron PxSg3);
LIST DUPRON = (Pron Du1) (Pron Du2) (Pron Du3) (Pron PxDu1) (Pron PxDu2) (Pron PxDu3);
LIST PLPRON = (Pron Pl1) (Pron Pl2) (Pron Pl3) (Pron Pl) (Pron PxPl1) (Pron PxPl2) (Pron PxPl3);
LIST DUPRON-NOTPX = (Pron Du1) (Pron Du2) (Pron Du3) ;
SET PRON-NOT-SG = DUPRON OR PLPRON ;
LIST DAT-PRON = ("dat") ("dát") ("diet") ("duot") ("dot") ;
LIST QUANT-PRON = "ollu" "olu" "unnán" "váháš" "veaháš" "veháš" ;
# This set is for choosing between Adv and Pron Indef.
# Adjectival sets and their complements
# -------------------------------------
SET NOT-A = WORD - A ;
SET NOT-A-COMMA = WORD - A - COMMA ;
SET NOT-Attr = WORD - Attr ;
SET NOT-A-PCLE = WORD - A - Pcle ;
SET NOT-A-ADV = WORD - A - Adv OR ("maid") ;
LIST NOMINAL-ADJ = "guoktilaš" "lámis" "oasálaš" ("suddu" Der/laš) "viissis";
# and many others
# Adverbial sets and their complements
# ------------------------------------
SET LEX-ADV = Adv - (A*) ;
SET NOT-ADV-DE = WORD - Adv ;
SET NOT-ADV = NOT-ADV-DE OR ("de" Adv) OR CLB ;
SET NOT-ADV-N = NOT-ADV - N;
SET NOT-ADV-PCLE = NOT-ADV - Pcle ;
SET NOT-ADV-INDEF = NOT-ADV - Indef ;
SET NOT-ADV-PCLE-ILL = WORD - Adv - Pcle - Ill ;
SET NOT-ADV-PCLE-Refl = WORD - Adv - Pcle - Refl ;
SET NOT-ADV-PCLE-INDEF = WORD - Adv - Pcle - Indef ;
SET NOT-ADV-PCLE-NEG = WORD - Adv - Pcle - Neg ;
SET NOT-ADVL-PCLE-NEG = WORD - @ADVL - @P< - Pcle - Neg ;
LIST MO-MANge = "goas" "gokko" "gos" "gosa" "govt" "makkár" "man" "manne" "mo" "mot" "mov" "movt" ("nugo" @CVP) (V Qst);
SET MO = MO-MANge - ("man" Foc/ge) ;
# Introduce finite clauses.
LIST PLACE-ADV = "davá#bealde" "mátta#bealde" "nuortta#bealde" "oarje#bealde" "olggo#beale" ;
# There will usually be a Gen in front.
LIST TIME-ADVL = "dalle" "diibmá" "dolin" "dovle" "duvle" "eske" "gieskat" "ikte" "ovdal" ("ovdditbeaivi" Gen) ("vássánáigi" Loc) ("boaresáigi" Gen) "dál" "ihttin" "odne" "otne" ("boahtteáigi" Loc) ("boahtteáigi" Gen) ;
LIST TIME-ADV = (Adv Sem/Time) ;
LIST DOPPE = "badjin" "bajil" "dakko" "dá" "dákko" "dáppe" "diekko" "dieppe" "do" "dokko" "doppe" "duo" "duokko" "duoppe" "olgun" ;
# Adverbs with a locativic form, but don´t get Loc as a tag.
LIST DOHKO = "bajás" "deike" "diehke" "diehko" "dohko" "duohko" "lulás" "olggos" "ruoktot" "sisa" "vuovdimassii" ;
# Adverbs with a illativic form, but they don´t get Ill as a tag.
# Coordinators
# ------------
SET Foc = Foc/ge OR Foc/gen OR Foc/ges OR Foc/gis OR Foc/naj OR Foc/ba OR Foc/be OR Foc/hal OR Foc/han OR Foc/bat OR Foc/son ;
LIST NEGFOC = (Neg Foc/ge) ;
LIST XGO = "dego" "dugo" "nugo" "seammaládjego" "seammaláhkaigo" ;
# Compounds
LIST SEAMMAX = "seamma#ládje" "seamma#láhkai" "seamma#láhkái" ;
# Those combine with go
LIST MADE = "mađe" "mađi" ;
LIST DADE = "dađe" "dađi" ;
SET CRD = @CNP OR COMMA OR NEGFOC OR XGO OR ("/") OR ("-") OR DADE ;
# AFTER LCRD vs. GCRD disambiguation
LIST ADV-AFTER-NUM = "geardde" ;
LIST DUSSE = "áibbas" "dušše" "erenoamážit" "goitge" "man" "measta" "meastui" "oalle" "oba" "oktiibuot" "oppa" "sullii" ;
LIST GRADE-ADV = "áibbas" "beare" ("eanet" Adv) "erenoamán" "erenoamáš" "hirbmat" "hui" "ila" "issoras" ("man" Adv) ("mealgat" Adv) "measta" "menddo" "muđui" "muhtun_muddui" "nu" "oalle" "oba" ("oppa" Adv) "sakka" "seammá" ("veaháš" Adv) ("uhccán" Adv) ("unnán" Adv) "vehá" "veháš" "veahá" "veaháš" "viehka" ;
LIST BUOT = "buot" "gait" "gaitin" "gaitdivnnat" "visot" ;
LIST EASKKA = "easkka" ("easka" Adv) ;
LIST ADV-NOT-ARG = "aiddo" "ain" "aivve" "albma" "aŋkke" "álggos" "bái#fáhkka" "beanta" "beare" ;
LIST ADV-NOT-VERB = "dušše" "viimmat" ;
LIST ADV-NOT-NOUN = "easka" "várra" ;
LIST ADV-NOT-NAMES = "Ain" "Anne" "Diego" "Dieppe" "Enge" "Galle" "Haga"
"Joba" "Johan" "Liikka" "Mai" "Mannes" "Mo" "Mot" "Naba"
"Nan" "Oktan" "Sierra" "Sokka" "Villa" ;
SET ADV-NOT-OTHER-POS = ADV-NOT-VERB OR ADV-NOT-NOUN OR ADV-NOT-NAMES ;
LIST HAB-CASE = Loc (<smj> Ine) (<sma> Gen) ;
LIST HAB-ACTOR-ALL = Sem/Hum Sem/Mal Sem/Sur Sem/Fem Sem/Ani Pers ("gii") Indef Coll ;
SET HAB-ACTOR = HAB-ACTOR-ALL - ("cihca") ;
SET HAB-ACTOR-NOT-HUMAN = Sem/Org ;
# Sets of elements with common syntactic behaviour
# ================================================
SETS
#!! * Sets for verbs
# -----
LIST NOT-REAL-V = (Actio Nom) (Actio Gen) (Actio Loc) (Actio Com) PrsPrc ;
SET REAL-V = V - NOT-REAL-V ;
#!! - V is all readings with a V tag in them, REAL-V should
#!! be the ones without an N tag following the V.
#!! The REAL-V set thus awaits a fix to the preprocess V ... N bug.
# The set REAL is smaller than COPULAS, made for verbs with PrfPrc complements: Seammás REAL-COPULAS son dovdan iežas...
LIST REAL-COPULAS = "dáidit" "leat" "soaitit" "veadjit" "liehket" "lea";
#!! * The set COPULAS is for predicative constructions
LIST COPULAS = "dáidit" "gártat" "leat" "soaitit" "šaddat" "orrut" "veadjit" "liehket" "sjaddat" "lea" "sjïdtedh" ;
# "bissut" ?
# 'Dáidit' can appear without 'leat'.
SET NOT-COP-V = V - COPULAS ;
SET MAIN-V = V - FAUXV ;
# All active verbs with a TV tag, including AUX-OR-MAIN.
LIST V-TRANS = (V TV) (IV Der/ahtti) (IV Der/h) ;
SET TRANS-V = V-TRANS - Der/Pass + REAL-V ;
#!! * NP sets defined according to their morphosyntactic features
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
LIST N-SG-NOM = (N Sg Nom);
SET HEAD-N = N - RCmpnd ;
SET HEAD-N-NOM = (N Nom) - RCmpnd ;
SET SUBJECTHEAD = N OR A OR Pron - Refl ; # These, can be subject heads
SET NP = N OR A ; # anything that can take except numerals
SET NP-HEAD = Pron OR HEAD-N - ("buorre") ;
SET NP-HEAD-SG = SGPRON OR (N Sg) OR (A Sg) - RCmpnd - Dem - ("buorre");
SET NP-HEAD-PL = PLPRON OR (N Pl) OR (A Pl) - RCmpnd - Dem - ("buorre");
SET NP-HEAD-SG-NOM = SGPRON + Nom OR (N Sg Nom) OR (A Sg Nom) - ("buorre") - RCmpnd ;
SET NP-HEAD-PL-NOM = PLPRON + Nom OR (N Pl Nom) OR (A Pl Nom) - RCmpnd - ("buorre");
SET NP-HEAD-NOM = NP-HEAD-SG-NOM OR NP-HEAD-PL-NOM ;
SET NP-HEAD-ACC = (Pron Acc) OR (N Acc) OR (A Acc) - RCmpnd - (Dem Attr) - ("buorre");
SET NP-HEAD-GEN = (Pron Gen) OR (N Gen) OR (A Gen) - Der/NomAct - RCmpnd - (Dem Attr) - ("buorre") ;
#!! * The PRE-NP-HEAD family of sets
#!! These sets model noun phrases (NPs). The idea is to first define whatever can
#!! occur in front of the head of the NP, and thereafter negate that with the
#!! expression **WORD - premodifiers**.
SET PRE-NP-HEAD = (Prop Attr) OR (Prop @>N) OR (A Attr) OR (ABBR Attr) OR ("buorre")
OR (Pron Pers Gen) OR (N Gen) OR (A Gen) OR ("buot") OR
Num OR RCmpnd OR CC OR (Pron Dem) OR (Pron Refl Gen) OR (Indef Attr) OR
(PrfPrc @>N) OR PrsPrc OR (A Ord) OR (Num @>N) OR (A @>N) OR @>N OR @>A OR @>Pron OR @Num< OR (CC @CNP) OR (@>CC) OR (Action Gen) OR (@Pron<) ;
# The strict version of items that can only be premodifiers, not parts of the predicate
#LIST PRE-NP-HEAD = @>N @>A @>Pron @Num< @CNP ;
SET PRE-NP-V = PrfPrc OR PrsPrc OR Der/NomAg OR Actio OR Der/NomAct OR (V A) OR (Ind Prs Sg3) OR (Ind Prs Sg1) OR (Imprt Du2) ;
# to be used together with PRE-NP-HEAD before @>N is disambiguated
SET NP-MEMBER = PRE-NP-HEAD OR N ;
SET PRE-A-N = (Pron Pers Gen) OR (Pron Pers Acc) OR (Pron Indef) OR Num OR (A Ord) OR (Pron Dem) OR (Pron Refl Gen) OR (Pron Refl Acc) ; # Acc pga av manglende disambiguering tidlig i fila
SET NOT-PRE-A-N = WORD - PRE-A-N ;
LIST PUNCT-LEFT = (PUNCT LEFT) ;
LIST PUNCT-RIGHT = (PUNCT RIGHT) ;
SET PRE-APP = COMMA OR PUNCT-LEFT OR PRE-NP-HEAD ;
# This set ist not only for what can
# stand in front of appositions but also
# postmodifiers.
#!! The set **NOT-NPMOD** is used to find barriers between NPs.
#!! Typical usage: ... (*1 N BARRIER NPT-NPMOD) ...
#!! meaning: Scan to the first noun, ignoring anything that can be
#!! part of the noun phrase of that noun (i.e., "scan to the next NP head")
SET NOT-NPMOD = WORD - PRE-NP-HEAD OR ABBR ;
# This is the previous NPNH (npnh) set.
# NOT-NPMOD = "NOT-PRE-NP-HEAD"
SET NOT-NPMOD-ACC = NOT-NPMOD - Acc OR ABBR ;
SET NOT-NPMOD-ACC-ADV = NOT-NPMOD - Acc - Adv OR ABBR ;
# To be used in mappingrules, before the disambuation of Acc and Gen.
SET NOT-NPMODADV = WORD - PRE-NP-HEAD - Adv ;
# NOT-NPMODADV = "NOT-PRE-NP-HEAD-OR-ADV"
SET NOT-NPMODADV-INDEF = WORD - PRE-NP-HEAD - Adv - Indef ;
## NOT-NPMODADVI = " ...-OR-INDEF"
SET NOT-NPMODADVII = WORD - PRE-NP-HEAD - Adv - Indef - Ill ;
# Illative indir.obj.
SET NOT-NPMODADVIIP = WORD - PRE-NP-HEAD - Adv - Indef - Ill - Pcle ;
# <== is this our NOT-NPMOD set?
SET NOT-NPMODCC = WORD - PRE-NP-HEAD - COMMA - @CNP ;
SET NAPP = WORD - PRE-APP ;
#!! * Miscellaneous sets
# ------------------
LIST Px = PxSg1 PxSg2 PxSg3 PxDu1 PxDu2 PxDu3 PxPl1 PxPl2 PxPl3 ;
LIST GASKAL = "gaskal" "gaskkal" "gaskii" "gaskka" "gaskkas" ;
# p-positions that like coordination
LIST TIME-PP = "badjel" "čađa" "earret" "gaskkal" "guovddáš" "maŋŋel" "maŋŋil" "maŋŋá" "miehtá" "ovdal" ;
SET NUM = Num - OKTA ;
# this set does not contain ordinals, I am not sure if that is necessary.
SET NOT-NUM = WORD - Num ;
LIST MANGA = "máŋga" "galle" ;
# Not referred to by any rule.
SET CARDINALS = Num - Ord - MANGA ;
SET NOT-CC = WORD - CC ;
SET NOT-PCLE = WORD - Pcle ;
LIST COMPAR = ("dávji" A Comp) "eanet" "earalágan" "eará" "earret" "seammás" "seammalágan" "seamma_láhkái" ;
# These combine with "go" Pcle, but COMPAR stands for 'comparison' rather than
# 'comparative'.
LIST CONTRA = "muhto" ;
# In lean muitalan, muhto dál muitalan.
LIST PROSEANTA = "proseanta" "%" ;
SET REAL-CLB = CLB - COMMA ;
SET NOT-INITIAL-CC = WORD - INITIAL - CC ;
#!! * Border sets and their complements
# ---------------------------------
SET CP = (Pron Interr) OR (Pron Rel) OR MO ;
LIST BOUNDARYSYMBOLS = "\;" ":" "-" "–" ;
SET S-BOUNDARY = CP OR BOUNDARYSYMBOLS OR ("muhto") OR ("de" Adv) OR (Neg Sup) OR @CVP OR ("vel" Adv Qst) ;
# does not include CS, because of "go" in questions, before it is disambugated.
# includes CP
# this one includes @CVP, the conjunction which actually connects two sentences (each with a finite verb) to each other,
# and not @CNP, which coordinates internal NP-/AdvP-/AP ... coordination
# To be used only AFTER the disambiguation of @CVP and @CNP taking place in the chapter right before "Disambiguating pronouns"
SET BOC = S-BOUNDARY OR BOS ;
SET BOC-PUNCT = BOC - ("-") - ("–") ;
SET EOC = S-BOUNDARY OR EOS ;
SET NP-BOUNDARY = BOS OR EOS OR REAL-CLB OR VFIN OR Inf OR (Actio Ess) OR ConNeg OR VGen OR Sup OR PPRON-NOT-GEN OR Recipr OR Po OR Pr OR Pcle OR ("jed") OR Interj OR CS OR CP OR @CVP ;
SET APP-BOUNDARY = REAL-CLB OR VFIN OR Inf OR (Actio Ess) OR ConNeg OR VGen OR Sup OR Recipr OR Po OR Pr OR Pcle OR Interj OR CS OR CP OR PrfPrc - @>N ;
# A special barrier used with mapping of appositions.
# This set contains FMAINV with @, which means that it functions for all kind of mainverbs after the verb-mapping rules
SET SV-BOUNDARY = S-BOUNDARY OR Inf - FAUXV OR Sup OR FMAINV ;
# VFIN-NOT-AUX ;
# should be MAIN-V linked to VFIN-aux to the left. (cg-3)
# This set is ment to use in rules for disambiguating due to verbs or verbsets. It contents @.
SET SVF-BOUNDARY = S-BOUNDARY OR VFIN ;
# This set is ment to use in rules for disambiguating due to verbs or verbsets.
# Here we search for either an S-BOUNDARY or a finite verb, either aux or main.
LIST ADVLCASE = Ill Loc Com Ess Ine Ela ;
LIST CASE = Nom Acc Gen Ill Loc Com Ess Ine Ela ;
#!! * Syntactic sets
# --------------
LIST ALLSYNTAG = (@+FAUXV) (@+FMAINV) (@-FAUXV) (@-FMAINV) (@-FSUBJ>)(@-FOBJ)(@-F<ADVL)(@-FADVL>)(@>A) (@>ADVL) (@ADVL<) (@<ADVL) (@ADVL>) (@ADVL) (@>N) (@APP) (@APP-N<) (@APP-Pron<) (@APP>Pron) (@APP-Num<) (@APP-ADVL<) (@VOC)(@CNP) (@CVP) (@P<) (@>P) (@HNOUN) (@INTERJ) (@Pron<) (@OBJ) (@OBJ>) (@<OBJ) (@OPRED) (@<OPRED) (@OPRED>) (@SPRED<OBJ) (@PCLE) (@COMP-CS<) (@N<) (@SPRED) (@<SPRED) (@SPRED>) (@PPRED) (@Num<) (@SUBJ) (@<SUBJ) (@SUBJ>) (@X);
SET NON-APP = ALLSYNTAG - (@APP);
#!! These were the set types.
SECTION #
#!! ## HABITIVE MAPPING
# Habitives - HAB
#!! * **hab1**
MAP:hab1 (<hab> @ADVL>) TARGET HAB-ACTOR + HAB-CASE - Attr (NEGATE 0 HAB-ACTOR-NOT-HUMAN LINK *1 HUMAN BARRIER (N Nom))(1 FAUXV + HAB-V-TAGS LINK *1 LEAT BARRIER NP-HEAD OR MAIN-V);
#$ Mus eai leat girjjit mielde.
#% Dorskis sáhttet leat vuoivvas ja meađđemat.
MAP:hab_numo (<hab> @ADVL>) TARGET HAB-ACTOR + HAB-CASE - Attr (NEGATE 0 HAB-ACTOR-NOT-HUMAN LINK *1 HUMAN BARRIER (N Nom))(1 COPULAS + HAB-V-TAGS LINK 1 COMMA LINK *1 COMMA LINK 1 (N Nom)) ;
#%Dus lea, nu mo buot eará bargiin, vuoigatvuohta oahppasoahpamuša lassin maiddái čálalaš bargosoahpamuššii oahppafitnodagain.
#!! * **hab2**
MAP:hab2 (<hab> @ADVL>) TARGET HAB-ACTOR + HAB-CASE - Attr (NEGATE 0 HAB-ACTOR-NOT-HUMAN LINK *1 HUMAN BARRIER (N Nom))(1 FAUXV + HAB-V-TAGS LINK 1 Adv LINK *1 LEAT BARRIER NOT-ADV-PCLE);
#!! * **hab3** (<hab> @ADVL>) for hab-actor and hab-case; if leat to the right, and Nom to the right of leat. Lots of restrictions.
MAP:hab3 (<hab> @ADVL>) TARGET HAB-ACTOR + HAB-CASE - Attr (NEGATE 0 Sem/Group OR Pl - Rel LINK *-1 Indef - BUOT + Nom OR HUMAN + Nom BARRIER NOT-NPMOD-ACC)(NEGATE 0 HAB-ACTOR-NOT-HUMAN LINK *1 HUMAN BARRIER (N Nom))(NEGATE 0 (Sem/Ani Sem/Group) LINK *1 Sem/Ani BARRIER (N Nom))(NEGATE 0 (Sem/Hum Sem/Group) LINK *1 Sem/Hum BARRIER (N Nom))(NEGATE 0 N OR Pers OR Indef LINK *-1 HAB-ACTOR + HAB-CASE BARRIER NOT-NPMOD-ACC OR CC)(NEGATE -1 Num LINK -1 Num)(*1 LEAT + HAB-V-TAGS BARRIER NOT-ADV-PCLE LINK *1 Nom BARRIER S-BOUNDARY);
#$ Máhtes lea beana.
#% Juos gorreválggain Sámedikkis lea dárbbašlaš deavdit gáibádusa, ahte okta stivrralahtuin ja su sadjásašlahttu galgaba leat boazodoalu ovddasteaddjit, de galgá boazodoalu ovddasteddjiid loktet badjeliidda dan listtas, mii lea ožžon uhcimus jienaid daid listtaid gaskkas, mat galget ovddastuvvot ja main leat dakkár evttohasat.
#% Ealus leat eanaš álddut.
MAP:habInf (<hab> @ADVL>) TARGET HAB-ACTOR + HAB-CASE OR ("dat" Dem) + HAB-CASE - Attr (1 LEAT + Sg3 LINK 1 Inf) ;
#$ Mis lea cahkkehit dola.
#!! * **habNomLeft**
MAP:habNomLeft (<hab> @ADVL>) TARGET HAB-ACTOR + HAB-CASE - Attr ((-1 Nom LINK NEGATE *-1 (Pers Gen) BARRIER NOT-NPMOD) OR (-1 (Sg Gen) LINK -1 (Sg Num Nom) LINK NEGATE *-1 (Pers Gen) BARRIER NOT-NPMOD))(NEGATE -1 Num LINK -1 Num)(*1 LEAT + HAB-V-TAGS BARRIER NOT-ADV-PCLE);
#$ Mii dus lea?
MAP:habAdvl (<hab> @ADVL>) TARGET HAB-ACTOR + HAB-CASE - Attr (NEGATE 0 HAB-ACTOR-NOT-HUMAN LINK *1 HUMAN BARRIER (N Nom))(NEGATE 0 (Sem/Ani Sem/Group) LINK *1 Sem/Ani BARRIER (N Nom))(NEGATE 0 (Sem/Hum Sem/Group) LINK *1 Sem/Hum BARRIER (N Nom))((*1 HAB-CASE BARRIER NOT-NPMOD-ACC LINK *1 LEAT + HAB-V-TAGS BARRIER NOT-ADV-PCLE) OR (*1 HAB-CASE BARRIER NOT-NPMODADV LINK *1 FAUXV LINK *1 LEAT + HAB-V-TAGS BARRIER NOT-ADV-PCLE));
#$ Ii han ovttasge du sogas leat dat namma.
#!! * **hab4**
MAP:hab4 (<hab> @ADVL>) TARGET HAB-ACTOR + HAB-CASE - Attr (NEGATE 0 HAB-ACTOR-NOT-HUMAN LINK *1 HUMAN BARRIER (N Nom))(1 CC LINK *1 HAB-ACTOR BARRIER NOT-NPMOD LINK *1 LEAT + HAB-V-TAGS BARRIER NOT-ADV-PCLE);
#!! * **hab6**
MAP:hab6 (<hab> @<ADVL) TARGET HAB-ACTOR + HAB-CASE - Attr ((*-1 go BARRIER NOT-NPMOD LINK -1 LEAT + HAB-V-TAGS LINK -1 BOC) OR (*-1 LEAT + Qst BARRIER NOT-NPMOD LINK -1 BOC));
#$ Lea go dis ruhta?
#$ Leago sámeálbmogis vuoigatvuođat vai eai?
#!! * **hab7**
MAP:hab7 (<hab> @<ADVL) TARGET HAB-ACTOR + HAB-CASE - Attr (*-1 go BARRIER NOT-NPMOD LINK -1 LEAT + HAB-V-TAGS LINK -1 MAIN-V);
#$ Dalle mun ferten iskat lea go dus feber.
#!! * **hab8** This is not HAB
#MAP:hab8 (<hab> @ADVL>) TARGET HAB-ACTOR + Ill IF (*-1 BOS BARRIER NOT-NPMODADV)(*1 COPULAS BARRIER NOT-ADV-PCLE LINK *1 A OR N BARRIER NOT-ADV-PCLE);
#$ Ellii šattai hoahppu.
#!! * **hab5** This is not HAB
#MAP:hab5 (<hab> @ADVL>) TARGET HAB-ACTOR + HAB-CASE (NEGATE 0 HAB-ACTOR-NOT-HUMAN LINK *1 HUMAN BARRIER (N Nom))(*1 HAB-V BARRIER NOT-ADV-PCLE LINK *1 Nom BARRIER VFIN);
#$ Mánás gollot gieđat.
MAP:hab9 (<hab> @ADVL>) TARGET HAB-CASE IF (-1 FIRSTNAME)(0 Ord)(NEGATE 0 HAB-ACTOR-NOT-HUMAN LINK *1 HUMAN BARRIER (N Nom))(*1 LEAT + HAB-V-TAGS BARRIER NOT-ADV-PCLE);
#$ Heinrich njealjádis lea rikkis eamit.
MAP:hab10 (<hab> @<ADVL) TARGET HAB-ACTOR + HAB-CASE (NEGATE 0 HAB-ACTOR-NOT-HUMAN OR Sem/Group LINK *1 HUMAN + Nom BARRIER S-BOUNDARY) (NEGATE 0 HAB-ACTOR-NOT-HUMAN LINK *1 Num BARRIER S-BOUNDARY LINK 1 HUMAN + Gen)(*-1 LEAT + HAB-V-TAGS BARRIER Nom OR S-BOUNDARY OR MAIN-V LINK -1 ADVLCASE OR Adv OR Po)((*1 Nom BARRIER NOT-NPMOD-ACC) OR (1 CC LINK *1 HAB-CASE BARRIER NOT-NPMOD-ACC LINK *1 Nom BARRIER NOT-NPMOD-ACC));
#$ Dál leat sus 137 gáicca.
#$ De lea sihke divššohasas ja su lagamus oapmahaččas vuoigatvuohta oažžut dieđuid.
#$ Doppe leamaš sámiin stuora deaddu.
#!! * **habDain** (<hab> @ADVL>) for (Pron Dem Pl Loc) if leat followed by Nom to the right
MAP:habDain (<hab> @ADVL>) TARGET (Pron Dem Pl Loc)(*1 LEAT + HAB-V-TAGS BARRIER NOT-ADV-PCLE - FAUXV LINK 1 Nom) ;
MAP:habDain2 (<hab> @ADVL>) TARGET (Pron Dem Pl Loc)(-1 Nom LINK -1 (Pron Interr))(*1 LEAT + HAB-V-TAGS BARRIER NOT-ADV-PCLE - FAUXV LINK *1 Ess) ;
#$ Hui dávjá lea nu ahte dain leat olbmot, geat eai doarvái bures dovdda sámi kultuvrra eaige sin jurddašanvuogi.
#$ Makkár vuoigatvuođat dain leat Norgga riikavuložin?
#% Dat bulle ja goldne go dain ii leat ruohtas.
#Comment: hard to determin whether 'dain' is HAB or ADVL.
# before relative clause
MAP:habRel (<hab> @ADVL>) TARGET HAB-ACTOR + HAB-CASE (NEGATE 0 HAB-ACTOR-NOT-HUMAN LINK *1 HUMAN BARRIER (N Nom))(*1 Rel BARRIER WORD LINK *1 FMAINV BARRIER S-BOUNDARY LINK *1 LEAT + HAB-V-TAGS BARRIER S-BOUNDARY OR MAIN-V);
#$ Divššohasas, gii dárbbaša guhkit áiggi ja oktiiheivehuvvon dearvvašvuođabálvalusaid, lea vuoigatvuohta oažžut ráhkaduvvot oktagaslaš plána.
MAP:habEllipse (<hab> @<ADVL) TARGET HAB-ACTOR + HAB-CASE (1 Adv LINK 1 (N Nom))(-1 COMMA LINK -1 (N Nom) LINK *-1 LEAT + HAB-V-TAGS BARRIER NOT-NPMOD LINK -1 HAB-ACTOR + HAB-CASE);
#$ Buot gánddain lea dreassa, nieiddain fas gákti.
#!! * **habGen** (<hab> @<ADVL) hab for Gen; if Gen is located in the end of the sentence and Nom is sentence initial
MAP:habGen (<hab> @<ADVL) TARGET HAB-ACTOR + Gen (*-1 LEAT + HAB-V-TAGS BARRIER NOT-NPMODADV LINK -1 (N Nom) OR (Dem Nom))(1 EOC) ;
#$ Dát lea áhči.
#$ Riššat dat gal leat musge, jus eai leačča njuoskan.
#$ Dán čieža oassálasti searvvis ii lean oktage boazodoalli, muhto kurssa oahpaheaddjis, Kjell Smestadas, lea doaivva ahte boazodolliide maid galgá gávdnot sierra heivehuvvon fálaldat.
#$ Dás ii gávdno eará ágga go ahte njunuš olbmuin gielddas lea vuosteháhku sámegillii, go jo ákkat dán mearrádussii maid váilot, čujuha son das mii dáhpáhuvvá gieldda siste politihkalaččat.
#$ Mu mielas goit galggaše sáhttit ovttasbargat, sis dat han leat ollu seamma beroštumit, galggaše goit sin áigumušaid ektui, luonddu gáhttet.
#$ Ja son ii dahkan maidege dan jagi, muhto bođii fas nuppi jagi, ja de ledje sus guoimmit mielde, ja sii dahke darfegoađi vuohččan ja vuojehedje sámiid eret dasnai.
#$ Sámediggi berošta erenoamážit das ahte ođđa boazodoalloláhka galgá sihkkarastit ja ovddidit boazodoalu vai mis maiddái boahtteáiggis sáhttá leat ceavzilis boazodoallu.
#$ Dás lei buohkain vejolašvuohta vuoitit jus beare ledje doarvái áirruid oastán ja dan gal maiddái ollusat dahkege.
#$ Juohke darfegoađášlágan goahti muohtaduoddaris ferte dohkket skuvlan; dás galget nuorat, čohkut dahje veallut duorggaid alde buolli árrandola ovddas vuostáiváldit oahpahusa olbmáin, geain alddiset lei unnán máhttu.
#$ Buot gánddain lea dreassa, nieiddain fas gákti.
#$ Doppe leamaš sámiin stuora deaddu.
#$ Giellakantuvrras ii leat formála váldi álggahit gielladoaimmaid etáhtain, muhto veahkehit.
#$ Duođalaš nana jáhku geažil lea divššohasas vuoigatvuohta biehttalit vuostáváldimis vara dahje varabuktagiid dahje biehttalit botkemis nealgudeami mii lea jođus.
#$ Sin mearridanorgánain berre mearridanváldi leat nu viiddis go vejolaš ja nu čadni go vejolaš.
#$ Sámedikkiin lea ráđđeaddi váldi buot sámi beroštumiide guoskevaš áššiin.
#$ Sápmelaččat oasálaste maid dasa, muhto lassin ledje sis vel eará resursat.
#% Olbmot jáhkke ahte stáhtainternáhtain lei buoret dássi, muhto dat guhkin eret duhtadedje dáláš gáibádusaid.
#% Dan ulbmil das lea nannet oktavuođa ja mearridit vejolaš ovttasbargosurggiid mat leat ealáhusa oasálaččaide ávkin.
#% Earát su luohkás ledje juo vissa njeallje siiddu su ovdalis matematihkka-girjjis.
#% Mánáid-skuvllas gal lei mannan bures, muhto go álggii nuoraid-skuvlii, de ii gillen šat oba bargat ge rehkenastimiin.
#% Dávjá muhtun sin fulkkiin lei várjaleaddji, doarjjan sidjiide.
#% Das leat buorit lanjat čáppa guovllus.
#% Das leat mielde olbmot geain lea sihke suoma ja sámi gullevašvuohta.
#% Dat bulle ja goldne go dain ii lean ruohtas.
#% Joavkkus leat guokte Riikaoasselávdegotti ovddasteaddji ja 2 Sámedikkis.
#% Jus lea sáhka 12 jagi deavdán mánás, gii ii leat ovtta oaivilis mearrádusain, gozihanlávdegoddi galgá gieđahallat mearrádusa.
#% Muhtin biirres leat gávcci 12 áirasis leamaš nissonolbmot, ja eará biirres leat buohkat geat leat válljejuvvon leamaš dievddut.
#% Politihkas lei 1866 rájes sáhka ahte galggai go stivrregoahtit gii oažžu eatnama láigohit stáhtas.
#% Searvegottiin lei dál ráfi miehtá Judea ja Galilea ja Samaria.
#% Soabadanráđis galget leat golbma miellahtu ja seamma ollu várrelahtut.
#% Sámedikkis lea leamaš čoahkkin 14.03.01.
MAP:pcle (@PCLE) TARGET Pcle ;
MAP:interj (@INTERJ) TARGET Interj ;
MAP:>P (@>P) Gen (1 Po) ;
MAP:>Pcoor (@>P) Gen (1 CC LINK *1 @>P BARRIER NOT-NPMOD) ;
MAP:>P (@>P) Gen - TIME-N (NEGATE -1 Pr)(1 Pr LINK 1 Gen) ;
#$ Sápmelaččat guđet orrot Norgga bealde Deanu, geavahit seammá gávtti go sápmelaččat Suoma bealde Deanu.
MAP:p< (@P<) Gen (*-1 Pr BARRIER NOT-NPMOD)(NEGATE 1 Gen) ;
MAP:p<coor (@P<) Gen (-1 CRD OR COMMA LINK -1 @P<) ;
#$ Maŋŋegiđa ja árrageasi guohtu boazu lulábeali rámaid, čohkaid ja jekkiid.
LIST JAHKI = "jahke" "jahki" ;
MAP:num< (@Num<) TARGET (N Sg Gen)(*-1 NUM + (Sg Nom) OR NUM + (Sg Acc) BARRIER NOT-A LINK NOT -1 JAHKI) ;
MAP:n<titel (@N<) TARGET ("jr") OR ("sr") (-1 Prop) ;
#$ Loahpas jearai Johttisámelisttu Anders Somby jr. sudnos njuolga háliida go čearru gulahallat, juo dahje ii.
MAP:n<titel (@N<) TARGET INITIAL ((-1 N) OR (-1 CC LINK -1 INITIAL LINK -1 N))(NEGATE 1 Prop OR Num OR INITIAL) ;