-
Notifications
You must be signed in to change notification settings - Fork 65
/
Parser.i6t
4178 lines (3549 loc) · 146 KB
/
Parser.i6t
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
Parser Template.
The parser for turning the text of the typed command into a proposed
action by the player.
@h Identification.
=
Constant COMMANDPARSERKIT = 1;
@h Parser Error Numbers.
The traditional ways in which the I6 library's parser, which we adopt here
more or less intact, can give up on a player's command.
=
Constant STUCK_PE = 1;
Constant UPTO_PE = 2;
Constant NUMBER_PE = 3;
Constant ANIMA_PE = 4;
Constant CANTSEE_PE = 5;
Constant TOOLIT_PE = 6;
Constant NOTHELD_PE = 7;
Constant MULTI_PE = 8;
Constant MMULTI_PE = 9;
Constant VAGUE_PE = 10;
Constant EXCEPT_PE = 11;
Constant VERB_PE = 12;
Constant SCENERY_PE = 13;
Constant ITGONE_PE = 14;
Constant JUNKAFTER_PE = 15;
Constant TOOFEW_PE = 16;
Constant NOTHING_PE = 17;
Constant ASKSCOPE_PE = 18;
Constant NOTINCONTEXT_PE = 19;
Constant BLANKLINE_PE = 20; ! Not formally a parser error, but used by I7 as if
Constant ANIMAAGAIN_PE = 21;
Constant COMMABEGIN_PE = 22;
Constant MISSINGPERSON_PE = 23;
Constant ANIMALISTEN_PE = 24;
Constant TOTALK_PE = 25;
@h Grammar Line Variables.
This is the I6 library parser in mostly untouched form: reformatted for template
file use, and with paragraph divisions, but otherwise hardly changed at all.
It is a complex algorithm but one which is known to produce good results for
the most part, and it is well understood from (at time of writing) fifteen
years of use. A few I7 additions have been made, but none disrupting the
basic method. For instance, I7's system for resolving ambiguities is
implemented by providing a |ChooseObjects| routine, just as a user of the
I6 library would do.
The I6 parser uses a huge number of global variables, which is not to modern
programming tastes: in the early days of Inform, the parser was essentially
written in assembly-language only lightly structured by C-like syntaxes,
and the Z-machine's 240 globals were more or less registers. The I6 library
made no distinction between which were "private" to the parser and which
allowed to be accessed by the user's code at large. The I7 template does
impose that boundary, though not very strongly: the variables defined
in "Output.i6t" are for general access, while the ones below should only
be read or written by the parser.
=
Global etype; ! parser error number if command not recognised
Global best_etype; ! Preferred error number so far
Global nextbest_etype; ! Preferred one, if ASKSCOPE_PE disallowed
Global parser_inflection; ! A property (usually "name") to find object names in
Global indirect_parser_inflection; ! Set this to have parser_inflection called
Array pattern --> 32; ! For the current pattern match
Global pcount; ! and a marker within it
Array pattern2 --> 32; ! And another, which stores the best match
Global pcount2; ! so far
Array line_ttype-->32; ! For storing an analysed grammar line
Array line_tdata-->32;
Array line_token-->32;
Global nsns; ! Number of special_numbers entered so far
Global params_wanted; ! Number of parameters needed (which may change in parsing)
Global inferfrom; ! The point from which the rest of the command must be inferred
Global inferword; ! And the preposition inferred
Global dont_infer; ! Another dull flag
Global cobj_flag = 0;
Global oops_from; ! The "first mistake" word number
Global saved_oops; ! Used in working this out
Array oops_workspace -> 64; ! Used temporarily by "oops" routine
Global held_back_mode; ! Flag: is there some input from last time
Global hb_wn; ! left over? (And a save value for wn.)
! (Used for full stops and "then".)
Global usual_grammar_after; ! Point from which usual grammar is parsed (it may vary from
! the above if user's routines match multi-word verbs)
@h Grammar Token Variables.
More globals, but dealing at the level of individual tokens now.
=
Constant PATTERN_NULL = $ffff; ! Entry for a token producing no text
Global found_ttype; ! Used to break up tokens into type
Global found_tdata; ! and data (by AnalyseToken)
Global token_filter; ! For noun filtering by user routines
Global length_of_noun; ! Set by NounDomain to no of words in noun
Global lookahead; ! The token after the one now being matched
Global multi_mode; ! Multiple mode
Global multi_wanted; ! Number of things needed in multitude
Global multi_had; ! Number of things actually found
Global multi_context; ! What token the multi-obj was accepted for
Global indef_type; ! Bit-map holding types of specification
Global indef_wanted; ! Number of items wanted (INDEF_ALL_WANTED for all)
Constant INDEF_ALL_WANTED = 32767;
Global indef_guess_p; ! Plural-guessing flag
Global indef_owner; ! Object which must hold these items
Global indef_cases; ! Possible gender and numbers of them
Global indef_possambig; ! Has a possibly dangerous assumption
! been made about meaning of a descriptor?
Global indef_nspec_at; ! Word at which a number like "two" was parsed
! (for backtracking)
Global allow_plurals; ! Whether plurals presently allowed or not
Global take_all_rule; ! Slightly different rules apply to "take all" than other uses
! of multiple objects, to make adjudication produce more
! pragmatically useful results
! (Not a flag: possible values 0, 1, 2)
Global dict_flags_of_noun; ! Of the noun currently being parsed
! (a bitmap in #dict_par1 format)
Global pronoun__word; ! Saved value
Global pronoun__obj; ! Saved value
Constant comma_word = 'comma,'; ! An "untypeable word" used to substitute
! for commas in parse buffers
@h Match List Variables.
The most difficult tokens to match are those which refer to objects, since
there is such a variety of names which can be given to any individual object,
and we don't of course know which object or objects are meant. We store the
possibilities (up to |MATCH_LIST_WORDS|, anyway) in a data structure called the match list.
=
Array match_list --> MATCH_LIST_WORDS; ! An array of matched objects so far
Array match_classes --> MATCH_LIST_WORDS; ! An array of equivalence classes for them
Array match_scores --> MATCH_LIST_WORDS; ! An array of match scores for them
Global number_matched; ! How many items in it? (0 means none)
Global number_of_classes; ! How many equivalence classes?
Global match_length; ! How many words long are these matches?
Global match_from; ! At what word of the input do they begin?
@h Words.
The player's command is broken down into a numbered sequence of words, which
break at spaces or certain punctuation (see the DM4). The numbering runs
upwards from 1 to |WordCount()|. The following utility routines provide
access to words in the current command; because buffers have different
definitions in Z and Glulx, so these routines must vary also.
The actual text of each word is stored as a sequence of ZSCII values in
a |->| (byte) array, with address |WordAddress(x)| and length |WordLength(x)|.
We picture the command as a stream of words to be read one at a time, with
the global variable |wn| being the "current word" marker. |NextWord|, which
takes no arguments, returns:
(a) 0 if the word at |wn| is unrecognised by the dictionary or |wn| is out
of range,
(b) |comma_word| if the word was a comma,
(c) |THEN1__WD| if it was a full stop (because of the Infocom tradition that
a full stop abbreviates for the word "then": e.g., TAKE BOX. EAST was read
as two commands in succession),
(d) or the dictionary address if the word was recognised.
The current word marker |wn| is always advanced.
|NextWordStopped| does the same, but returns $-1$ when |wn| is out of range
(e.g., by having advanced past the last word in the command).
=
#Ifdef TARGET_ZCODE;
[ WordCount; return parse->1; ];
[ WordAddress wordnum; return buffer + parse->(wordnum*4+1); ];
[ WordLength wordnum; return parse->(wordnum*4); ];
#Ifnot;
[ WordCount; return parse-->0; ];
[ WordAddress wordnum; return buffer + parse-->(wordnum*3); ];
[ WordLength wordnum; return parse-->(wordnum*3 - 1); ];
#Endif;
[ WordFrom w p i j wc;
#Ifdef TARGET_ZCODE; wc = p->1; i = w*2-1;
#Ifnot; wc = p-->0; i = w*3-2; #Endif;
if ((w < 1) || (w > wc)) return 0;
j = p-->i;
if (j == ',//') j = comma_word;
if (j == './/') j = THEN1__WD;
return j;
];
[ NextWord i j wc;
#Ifdef TARGET_ZCODE; wc = parse->1; i = wn*2-1;
#Ifnot; wc = parse-->0; i = wn*3-2; #Endif;
wn++;
if ((wn < 2) || (wn > wc+1)) return 0;
j = parse-->i;
if (j == ',//') j = comma_word;
if (j == './/') j = THEN1__WD;
return j;
];
[ NextWordStopped wc;
#Ifdef TARGET_ZCODE; wc = parse->1; #Ifnot; wc = parse-->0; #Endif;
if ((wn < 1) || (wn > wc)) { wn++; return -1; }
return NextWord();
];
@h Snippets.
Although the idea is arguably implicit in I6, the formal concept of
"snippet" is new in I7. A snippet is a value which represents a word
range in the command most recently typed by the player. These words number
consecutively upwards from 1, as noted above. The correspondence between
$(w_1, w_2)$, the word range, and $V$, the number used to represent it as
an I6 value, is:
$$ V = 100w_1 + (w_2-w_1+1) $$
so that the remainder mod 100 is the number of words in the range. We
require that $1\leq w_1\leq w_2\leq N$, where $N$ is the number of words in
the current player's command. The entire command is therefore represented by:
$$ C = 100 + N $$
=
[ PrintSnippet snip from to i w1 w2;
w1 = snip/100; w2 = w1 + (snip%100) - 1;
if ((w2<w1) || (w1<1) || (w2>WordCount())) {
if ((w1 == 1) && (w2 == 0)) rfalse;
return RunTimeProblem(RTP_SAYINVALIDSNIPPET, w1, w2);
}
from = WordAddress(w1); to = WordAddress(w2) + WordLength(w2) - 1;
for (i=from: i<=to: i++) print (char) i->0;
];
[ SpliceSnippet snip t i w1 w2 nextw at endsnippet newlen;
w1 = snip/100; w2 = w1 + (snip%100) - 1;
if ((w2<w1) || (w1<1)) {
if ((w1 == 1) && (w2 == 0)) return;
return RunTimeProblem(RTP_SPLICEINVALIDSNIPPET, w1, w2);
}
@push say__p; @push say__pc;
nextw = w2 + 1;
at = WordAddress(w1) - buffer;
if (nextw <= WordCount()) endsnippet = 100*nextw + (WordCount() - nextw + 1);
buffer2-->0 = 120;
newlen = VM_PrintToBuffer(buffer2, 120, SpliceSnippet__TextPrinter, t, endsnippet);
for (i=0: (i<newlen) && (at+i<120): i++) buffer->(at+i) = buffer2->(WORDSIZE+i);
#Ifdef TARGET_ZCODE; buffer->1 = at+i; #ifnot; buffer-->0 = at+i; #endif;
for (:at+i<120:i++) buffer->(at+i) = ' ';
VM_Tokenise(buffer, parse);
players_command = 100 + WordCount();
@pull say__pc; @pull say__p;
];
[ SpliceSnippet__TextPrinter t endsnippet;
TEXT_TY_Say(t);
if (endsnippet) { print " "; PrintSnippet(endsnippet); }
];
[ SnippetIncludes test snippet w1 w2 wlen i j;
w1 = snippet/100; w2 = w1 + (snippet%100) - 1;
if ((w2<w1) || (w1<1)) {
if ((w1 == 1) && (w2 == 0)) rfalse;
return RunTimeProblem(RTP_INCLUDEINVALIDSNIPPET, w1, w2);
}
if (metaclass(test) == Routine) {
wlen = snippet%100;
for (i=w1, j=wlen: j>0: i++, j--) {
if (((test)(i, 0)) ~= GPR_FAIL) return i*100+wn-i;
}
}
rfalse;
];
[ SnippetMatches snippet topic_gpr rv;
wn=1;
if (topic_gpr == 0) rfalse;
if (metaclass(topic_gpr) == Routine) {
rv = (topic_gpr)(snippet/100, snippet%100);
if (rv ~= GPR_FAIL) rtrue;
rfalse;
}
RunTimeProblem(RTP_BADTOPIC);
rfalse;
];
@h Unpacking Grammar Lines.
Grammar lines are sequences of tokens in an array built into the story file,
but in a format which differs depending on the virtual machine in use, so
the following code unpacks the data into more convenient if larger arrays
which are VM-independent.
=
[ UnpackGrammarLine line_address i size;
for (i=0 : i<32 : i++) {
line_token-->i = ENDIT_TOKEN;
line_ttype-->i = ELEMENTARY_TT;
line_tdata-->i = ENDIT_TOKEN;
}
#Ifdef TARGET_ZCODE;
action_to_be = 256*(line_address->0) + line_address->1;
action_reversed = ((action_to_be & $400) ~= 0);
action_to_be = action_to_be & $3ff;
line_address--;
size = 3;
#Ifnot; ! GLULX
@aloads line_address 0 action_to_be;
action_reversed = (((line_address->2) & 1) ~= 0);
line_address = line_address - 2;
size = 5;
#Endif;
params_wanted = 0;
for (i=0 : : i++) {
line_address = line_address + size;
if (line_address->0 == ENDIT_TOKEN) break;
line_token-->i = line_address;
AnalyseToken(line_address);
if (found_ttype ~= PREPOSITION_TT) params_wanted++;
line_ttype-->i = found_ttype;
line_tdata-->i = found_tdata;
}
return line_address + 1;
];
[ AnalyseToken token;
if (token == ENDIT_TOKEN) {
found_ttype = ELEMENTARY_TT;
found_tdata = ENDIT_TOKEN;
return;
}
found_ttype = (token->0) & $$1111;
found_tdata = (token+1)-->0;
];
@h Keyboard Primitive.
This is the primitive routine to read from the keyboard: it usually delegates
this to a routine specific to the virtual machine being used, but sometimes
uses a hacked version to allow TEST commands to work. (When a TEST is running,
the text in the walk-through provided is fed into the buffer as if it had
been typed at the keyboard.)
=
[ KeyboardPrimitive a_buffer a_table;
#Ifdef DEBUG;
return TestKeyboardPrimitive(a_buffer, a_table);
#Endif;
return VM_ReadKeyboard(a_buffer, a_table);
];
@h Reading the Command.
The |Keyboard| routine actually receives the player's words, putting the
words in |a_buffer| and their dictionary addresses in |a_table|. It is
assumed that the table is the same one on each (standard) call. Much
of the code handles the OOPS and UNDO commands, which are not actions and
do not pass through the rest of the parser. The undo state is saved --
it is essentially an internal saved game, in the VM interpreter's memory
rather than in an external file -- and note that this is therefore also
where execution picks up if an UNDO has been typed. Since UNDO recreates
the former machine state perfectly, it might seem impossible to tell that
an UNDO had occurred, but in fact the VM passes information back in the
form of a return code from the relevant instruction, and this allows us
to detect an undo. (We deal with it by printing the current location and
asking another command.)
|Keyboard| can also be used by miscellaneous routines in the game to ask
yes/no questions and the like, without invoking the rest of the parser.
The return value is the number of words typed.
=
[ Keyboard a_buffer a_table nw i w w2 x1 x2;
sline1 = score; sline2 = turns;
while (true) {
! Save the start of the buffer, in case "oops" needs to restore it
for (i=0 : i<64 : i++) oops_workspace->i = a_buffer->i;
! In case of an array entry corruption that shouldn't happen, but would be
! disastrous if it did:
#Ifdef TARGET_ZCODE;
a_buffer->0 = INPUT_BUFFER_LEN;
a_table->0 = 15; ! Allow to split input into this many words
#Endif; ! TARGET_
! Print the prompt, and read in the words and dictionary addresses
PrintPrompt();
DrawStatusLine();
KeyboardPrimitive(a_buffer, a_table);
! Set nw to the number of words
#Ifdef TARGET_ZCODE; nw = a_table->1; #Ifnot; nw = a_table-->0; #Endif;
! If the line was blank, get a fresh line
if (nw == 0) {
@push etype; etype = BLANKLINE_PE;
players_command = 100;
BeginActivity(PRINTING_A_PARSER_ERROR_ACT);
if (ForActivity(PRINTING_A_PARSER_ERROR_ACT) == false) {
PARSER_ERROR_INTERNAL_RM('X', noun); new_line;
}
EndActivity(PRINTING_A_PARSER_ERROR_ACT);
@pull etype;
continue;
}
! Unless the opening word was OOPS, return
! Conveniently, a_table-->1 is the first word on both the Z-machine and Glulx
w = a_table-->1;
if (w == OOPS1__WD or OOPS2__WD or OOPS3__WD) {
if (oops_from == 0) { PARSER_COMMAND_INTERNAL_RM('A'); new_line; continue; }
if (nw == 1) { PARSER_COMMAND_INTERNAL_RM('B'); new_line; continue; }
if (nw > 2) { PARSER_COMMAND_INTERNAL_RM('C'); new_line; continue; }
! So now we know: there was a previous mistake, and the player has
! attempted to correct a single word of it.
for (i=0 : i<INPUT_BUFFER_LEN : i++) buffer2->i = a_buffer->i;
#Ifdef TARGET_ZCODE;
x1 = a_table->9; ! Start of word following "oops"
x2 = a_table->8; ! Length of word following "oops"
#Ifnot; ! TARGET_GLULX
x1 = a_table-->6; ! Start of word following "oops"
x2 = a_table-->5; ! Length of word following "oops"
#Endif; ! TARGET_
! Repair the buffer to the text that was in it before the "oops"
! was typed:
for (i=0 : i<64 : i++) a_buffer->i = oops_workspace->i;
VM_Tokenise(a_buffer,a_table);
! Work out the position in the buffer of the word to be corrected:
#Ifdef TARGET_ZCODE;
w = a_table->(4*oops_from + 1); ! Start of word to go
w2 = a_table->(4*oops_from); ! Length of word to go
#Ifnot; ! TARGET_GLULX
w = a_table-->(3*oops_from); ! Start of word to go
w2 = a_table-->(3*oops_from - 1); ! Length of word to go
#Endif; ! TARGET_
! Write spaces over the word to be corrected:
for (i=0 : i<w2 : i++) a_buffer->(i+w) = ' ';
if (w2 < x2) {
! If the replacement is longer than the original, move up...
for (i=INPUT_BUFFER_LEN-1 : i>=w+x2 : i--)
a_buffer->i = a_buffer->(i-x2+w2);
! ...increasing buffer size accordingly.
#Ifdef TARGET_ZCODE;
a_buffer->1 = (a_buffer->1) + (x2-w2);
#Ifnot; ! TARGET_GLULX
a_buffer-->0 = (a_buffer-->0) + (x2-w2);
#Endif; ! TARGET_
}
! Write the correction in:
for (i=0 : i<x2 : i++) a_buffer->(i+w) = buffer2->(i+x1);
VM_Tokenise(a_buffer, a_table);
#Ifdef TARGET_ZCODE; nw = a_table->1; #Ifnot; nw = a_table-->0; #Endif;
return nw;
}
! Undo handling
if ((w == UNDO1__WD or UNDO2__WD or UNDO3__WD) && (nw==1)) {
Perform_Undo();
continue;
}
i = VM_Save_Undo();
if (KIT_CONFIGURATION_BITMAP & PREVENT_UNDO_TCBIT) undo_flag = 0;
else undo_flag = 2;
if (i == -1) undo_flag = 0;
if (i == 0) undo_flag = 1;
if (i == 2) {
DealWithUndo();
! VM_RestoreWindowColours();
! VM_Style(SUBHEADER_VMSTY);
! SL_Location(); print "^";
! VM_Style(NORMAL_VMSTY);
! IMMEDIATELY_UNDO_RM('E'); new_line;
continue;
}
return nw;
}
];
[ DealWithUndo;
VM_RestoreWindowColours();
VM_Style(SUBHEADER_VMSTY);
SL_Location(); print "^";
VM_Style(NORMAL_VMSTY);
IMMEDIATELY_UNDO_RM('E'); new_line;
];
@h Parser Proper.
The main parser routine is something of a leviathan, and it has traditionally
been divided into 11 lettered parts:
(A) Get the input, do OOPS and AGAIN
(B) Is it a direction, and so an implicit GO? If so go to (K)
(C) Is anyone being addressed?
(D) Get the command verb: try all the syntax lines for that verb
(E) Break down a syntax line into analysed tokens
(F) Look ahead for advance warning for |multiexcept|/|multiinside|
(G) Parse each token in turn (calling |ParseToken| to do most of the work)
(H) Cheaply parse otherwise unrecognised conversation and return
(I) Print best possible error message
(J) Retry the whole lot
(K) Last thing: check for THEN and further instructions(s), return.
This lettering has been preserved here, with the code under each letter
now being the body of "Parser Letter A", "Parser Letter B" and so on.
Note that there are three different places where a return can happen.
The routine returns only when a sensible request has been made; for a
fairly thorough description of its output, which is written into the
|parser_results| array and also into several globals (see "OrderOfPlay.i6t").
=
[ Parser__parse
syntax line num_lines line_address i j k token l m inferred_go;
cobj_flag = 0;
parser_results-->ACTION_PRES = 0;
parser_results-->NO_INPS_PRES = 0;
parser_results-->INP1_PRES = 0;
parser_results-->INP2_PRES = 0;
meta = false;
@h Parser Letter A.
Get the input, do OOPS and AGAIN.
=
if (held_back_mode) {
held_back_mode = false; wn = hb_wn;
if (verb_wordnum > 0) i = WordAddress(verb_wordnum); else i = WordAddress(1);
j = WordAddress(wn);
if (i<=j) for (: i<j : i++) i->0 = ' ';
i = NextWord();
if (i == AGAIN1__WD or AGAIN2__WD or AGAIN3__WD) {
! Delete the words "then again" from the again buffer,
! in which we have just realised that it must occur:
! prevents an infinite loop on "i. again"
i = WordAddress(wn-2)-buffer;
if (wn > num_words) j = INPUT_BUFFER_LEN-1;
else j = WordAddress(wn)-buffer;
for (: i<j : i++) buffer3->i = ' ';
}
VM_Tokenise(buffer, parse);
jump ReParse;
}
.ReType;
cobj_flag = 0;
actors_location = ScopeCeiling(player);
BeginActivity(READING_A_COMMAND_ACT); if (ForActivity(READING_A_COMMAND_ACT)==false) {
Keyboard(buffer,parse);
num_words = WordCount(); players_command = 100 + num_words;
} if (EndActivity(READING_A_COMMAND_ACT)) jump ReType;
.ReParse;
parser_inflection = name;
! Initially assume the command is aimed at the player, and the verb
! is the first word
num_words = WordCount(); players_command = 100 + num_words;
wn = 1; inferred_go = false;
LanguageToInformese();
! Re-tokenise:
VM_Tokenise(buffer,parse);
num_words = WordCount(); players_command = 100 + num_words;
k=0;
#Ifdef DEBUG;
if (parser_trace >= 2) {
print "[ ";
for (i=0 : i<num_words : i++) {
#Ifdef TARGET_ZCODE;
j = parse-->(i*2 + 1);
#Ifnot; ! TARGET_GLULX
j = parse-->(i*3 + 1);
#Endif; ! TARGET_
k = WordAddress(i+1);
l = WordLength(i+1);
print "~"; for (m=0 : m<l : m++) print (char) k->m; print "~ ";
if (j == 0) print "?";
else {
#Ifdef TARGET_ZCODE;
if (UnsignedCompare(j, HDR_DICTIONARY-->0) >= 0 &&
UnsignedCompare(j, HDR_HIGHMEMORY-->0) < 0)
print (address) j;
else print j;
#Ifnot; ! TARGET_GLULX
if (j->0 == $60) print (address) j;
else print j;
#Endif; ! TARGET_
}
if (i ~= num_words-1) print " / ";
}
print " ]^";
}
#Endif; ! DEBUG
verb_wordnum = 1;
actor = player;
actors_location = ScopeCeiling(player);
usual_grammar_after = 0;
.AlmostReParse;
scope_token = 0;
action_to_be = NULL;
! Begin from what we currently think is the verb word
.BeginCommand;
wn = verb_wordnum;
verb_word = NextWordStopped();
! If there's no input here, we must have something like "person,".
if (verb_word == -1) {
best_etype = STUCK_PE; jump GiveError;
}
if (verb_word == comma_word) {
best_etype = COMMABEGIN_PE; jump GiveError;
}
! Now try for "again" or "g", which are special cases: don't allow "again" if nothing
! has previously been typed; simply copy the previous text across
if (verb_word == AGAIN2__WD or AGAIN3__WD) verb_word = AGAIN1__WD;
if (verb_word == AGAIN1__WD) {
if (actor ~= player) {
best_etype = ANIMAAGAIN_PE;
jump GiveError;
}
#Ifdef TARGET_ZCODE;
if (buffer3->1 == 0) {
PARSER_COMMAND_INTERNAL_RM('D'); new_line;
jump ReType;
}
#Ifnot; ! TARGET_GLULX
if (buffer3-->0 == 0) {
PARSER_COMMAND_INTERNAL_RM('D'); new_line;
jump ReType;
}
#Endif; ! TARGET_
for (i=0 : i<INPUT_BUFFER_LEN : i++) buffer->i = buffer3->i;
VM_Tokenise(buffer,parse);
num_words = WordCount(); players_command = 100 + num_words;
jump ReParse;
}
! Save the present input in case of an "again" next time
if (verb_word ~= AGAIN1__WD)
for (i=0 : i<INPUT_BUFFER_LEN : i++) buffer3->i = buffer->i;
if (usual_grammar_after == 0) {
j = verb_wordnum;
i = RunRoutines(actor, grammar);
#Ifdef DEBUG;
if (parser_trace >= 2 && actor.grammar ~= 0 or NULL)
print " [Grammar property returned ", i, "]^";
#Endif; ! DEBUG
if ((i ~= 0 or 1) && (VM_InvalidDictionaryAddress(i))) {
usual_grammar_after = verb_wordnum; i=-i;
}
if (i == 1) {
parser_results-->ACTION_PRES = action;
parser_results-->NO_INPS_PRES = 0;
parser_results-->INP1_PRES = noun;
parser_results-->INP2_PRES = second;
if (noun) parser_results-->NO_INPS_PRES = 1;
if (second) parser_results-->NO_INPS_PRES = 2;
rtrue;
}
if (i ~= 0) { verb_word = i; wn--; verb_wordnum--; }
else { wn = verb_wordnum; verb_word = NextWord(); }
}
else usual_grammar_after = 0;
@h Parser Letter B.
Is the command a direction name, and so an implicit GO? If so, go to (K).
=
if (verb_word == 0) {
i = wn; verb_word = LanguageIsVerb(buffer, parse, verb_wordnum);
wn = i;
}
! If the first word is not listed as a verb, it must be a direction
! or the name of someone to talk to
if (verb_word == 0 || ((verb_word->#dict_par1) & 1) == 0) {
! So is the first word an object contained in the special object "compass"
! (i.e., a direction)? This needs use of NounDomain, a routine which
! does the object matching, returning the object number, or 0 if none found,
! or REPARSE_CODE if it has restructured the parse table so the whole parse
! must be begun again...
wn = verb_wordnum; indef_mode = false; token_filter = 0; parameters = 0;
@push actor; @push action; @push action_to_be;
actor = player; meta = false; action = ##Go; action_to_be = ##Go;
l = NounDomain(Compass, 0, 0);
@pull action_to_be; @pull action; @pull actor;
if (l == REPARSE_CODE) jump ReParse;
! If it is a direction, send back the results:
! action=GoSub, no of arguments=1, argument 1=the direction.
if ((l~=0) && (l ofclass K3_direction)) {
parser_results-->ACTION_PRES = ##Go;
parser_results-->NO_INPS_PRES = 1;
parser_results-->INP1_PRES = l;
inferred_go = true;
jump LookForMore;
}
} ! end of first-word-not-a-verb
@h Parser Letter C.
Is anyone being addressed?
=
! Only check for a comma (a "someone, do something" command) if we are
! not already in the middle of one. (This simplification stops us from
! worrying about "robot, wizard, you are an idiot", telling the robot to
! tell the wizard that she is an idiot.)
if (actor == player) {
for (j=2 : j<=num_words : j++) {
i=NextWord();
if (i == comma_word) jump Conversation;
}
}
jump NotConversation;
! NextWord nudges the word number wn on by one each time, so we've now
! advanced past a comma. (A comma is a word all on its own in the table.)
.Conversation;
j = wn - 1;
! Use NounDomain (in the context of "animate creature") to see if the
! words make sense as the name of someone held or nearby
wn = 1; lookahead = HELD_TOKEN;
scope_reason = TALKING_REASON;
l = NounDomain(player,actors_location,6);
scope_reason = PARSING_REASON;
if (l == REPARSE_CODE) jump ReParse;
if (l == 0) {
if (verb_word && ((verb_word->#dict_par1) & 1)) jump NotConversation;
best_etype = MISSINGPERSON_PE; jump GiveError;
}
.Conversation2;
! The object addressed must at least be "talkable" if not actually "animate"
! (the distinction allows, for instance, a microphone to be spoken to,
! without the parser thinking that the microphone is human).
if (l hasnt animate && l hasnt talkable) {
best_etype = ANIMALISTEN_PE; noun = l; jump GiveError;
}
! Check that there aren't any mystery words between the end of the person's
! name and the comma (eg, throw out "dwarf sdfgsdgs, go north").
if (wn ~= j) {
if (verb_word && ((verb_word->#dict_par1) & 1)) jump NotConversation;
best_etype = TOTALK_PE; jump GiveError;
}
! The player has now successfully named someone. Adjust "him", "her", "it":
PronounNotice(l);
! Set the global variable "actor", adjust the number of the first word,
! and begin parsing again from there.
verb_wordnum = j + 1;
! Stop things like "me, again":
if (l == player) {
wn = verb_wordnum;
if (NextWordStopped() == AGAIN1__WD or AGAIN2__WD or AGAIN3__WD) {
best_etype = ANIMAAGAIN_PE;
jump GiveError;
}
}
actor = l;
actors_location = ScopeCeiling(l);
#Ifdef DEBUG;
if (parser_trace >= 1)
print "[Actor is ", (the) actor, " in ", (name) actors_location, "]^";
#Endif; ! DEBUG
jump BeginCommand;
@h Parser Letter D.
Get the verb: try all the syntax lines for that verb.
=
.NotConversation;
if (verb_word == 0 || ((verb_word->#dict_par1) & 1) == 0) {
verb_word = UnknownVerb(verb_word);
if (verb_word ~= 0) jump VerbAccepted;
best_etype = VERB_PE;
jump GiveError;
}
.VerbAccepted;
! We now definitely have a verb, not a direction, whether we got here by the
! "take ..." or "person, take ..." method. Get the meta flag for this verb:
meta = ((verb_word->#dict_par1) & 2)/2;
! You can't order other people to "full score" for you, and so on...
if (meta == 1 && actor ~= player) {
best_etype = VERB_PE;
meta = 0;
jump GiveError;
}
! Now let i be the corresponding verb number...
i = DictionaryWordToVerbNum(verb_word);
! ...then look up the i-th entry in the verb table, whose address is at word
! 7 in the Z-machine (in the header), so as to get the address of the syntax
! table for the given verb...
#Ifdef TARGET_ZCODE;
syntax = (HDR_STATICMEMORY-->0)-->i;
#Ifnot; ! TARGET_GLULX
syntax = (#grammar_table)-->(i+1);
#Endif; ! TARGET_
! ...and then see how many lines (ie, different patterns corresponding to the
! same verb) are stored in the parse table...
num_lines = (syntax->0) - 1;
! ...and now go through them all, one by one.
! To prevent pronoun_word 0 being misunderstood,
pronoun_word = NULL; pronoun_obj = NULL;
#Ifdef DEBUG;
if (parser_trace >= 1)
print "[Parsing for the verb '", (address) verb_word, "' (", num_lines+1, " lines)]^";
#Endif; ! DEBUG
best_etype = STUCK_PE; nextbest_etype = STUCK_PE;
multiflag = false;
! "best_etype" is the current failure-to-match error - it is by default
! the least informative one, "don't understand that sentence".
! "nextbest_etype" remembers the best alternative to having to ask a
! scope token for an error message (i.e., the best not counting ASKSCOPE_PE).
! multiflag is used here to prevent inappropriate MULTI_PE errors
! in addition to its unrelated duties passing information to action routines
@h Parser Letter E.
Break down a syntax line into analysed tokens.
=
line_address = syntax + 1;
for (line=0 : line<=num_lines : line++) {
! Unpack the syntax line from Inform format into three arrays; ensure that
! the sequence of tokens ends in an ENDIT_TOKEN.
line_address = UnpackGrammarLine(line_address);
#Ifdef DEBUG;
if (parser_trace >= 1) {
if (parser_trace >= 2) new_line;
print "[line ", line; DebugGrammarLine();
print "]^";
}
#Endif; ! DEBUG
! We aren't in "not holding" or inferring modes, and haven't entered
! any parameters on the line yet, or any special numbers; the multiple
! object is still empty.
inferfrom = 0;
parameters = 0;
nsns = 0; special_word = 0;
multiple_object-->0 = 0;
multi_context = 0;
etype = STUCK_PE;
! Put the word marker back to just after the verb
wn = verb_wordnum+1;
@h Parser Letter F.
Look ahead for advance warning for |multiexcept|/|multiinside|.
There are two special cases where parsing a token now has to be affected by
the result of parsing another token later, and these two cases (multiexcept
and multiinside tokens) are helped by a quick look ahead, to work out the
future token now. We can only carry this out in the simple (but by far the
most common) case:
|multiexcept <one or more prepositions> noun|
and similarly for |multiinside|.
=
advance_warning = -1; indef_mode = false;
for (i=0,m=false,pcount=0 : line_token-->pcount ~= ENDIT_TOKEN : pcount++) {
scope_token = 0;
if (line_ttype-->pcount ~= PREPOSITION_TT) i++;
if (line_ttype-->pcount == ELEMENTARY_TT) {
if (line_tdata-->pcount == MULTI_TOKEN) m = true;
if (line_tdata-->pcount == MULTIEXCEPT_TOKEN or MULTIINSIDE_TOKEN && i == 1) {
! First non-preposition is "multiexcept" or
! "multiinside", so look ahead.
#Ifdef DEBUG;
if (parser_trace >= 2) print " [Trying look-ahead]^";
#Endif; ! DEBUG
! We need this to be followed by 1 or more prepositions.
pcount++;
if (line_ttype-->pcount == PREPOSITION_TT) {
! skip ahead to a preposition word in the input
do {
l = NextWord();
} until ((wn > num_words) ||
(l && (l->#dict_par1) & 8 ~= 0));
if (wn > num_words) {
#Ifdef DEBUG;
if (parser_trace >= 2)
print " [Look-ahead aborted: prepositions missing]^";
#Endif;
jump EmptyLine;
}
do {
if (PrepositionChain(l, pcount) ~= -1) {
! advance past the chain
if ((line_token-->pcount)->0 & $20 ~= 0) {
pcount++;
while ((line_token-->pcount ~= ENDIT_TOKEN) &&
((line_token-->pcount)->0 & $10 ~= 0))
pcount++;
} else {
pcount++;
}
} else {
! try to find another preposition word
do {
l = NextWord();
} until ((wn >= num_words) ||
(l && (l->#dict_par1) & 8 ~= 0));
if (l && (l->#dict_par1) & 8) continue;
! lookahead failed