-
Notifications
You must be signed in to change notification settings - Fork 0
/
gencomp.c
336 lines (304 loc) · 11.4 KB
/
gencomp.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
// I've just finished adding semantic phrases. I haven't
// tidied the sources up yet, following some major surgery.
#include "parser.h"
#include GRAMMAR
// This program generates the C code that ultimately uses the AST.
// The generated code is just the body of the main switch statement.
// It is included by algol-compile.c which itself is included by uparse.c
// The code is generated based on the grammar, which starts as a ".g"
// file which is itself converted into a C header file by the 'takeon'
// utility.
// This code was based on regen.c
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
#include <ctype.h>
#include <wchar.h>
#include <locale.h>
void escape(FILE *out, const wchar_t *s, wchar_t *indent) {
while (*s != '\0') {
wint_t c = *s++;
if (c == '\\') {
fputc('\\', out);
fputc('\\', out); // BUGFIX?
} else if (c == '\"') {
fputc('\\', out);
fputc('\"', out);
} else if (c == '\n') {
fputc('\\', out);
fputc('n', out);
if (indent && (*s != '\0')) {
fprintf(out, "%ls", indent);
}
} else {
fprintf(out, "%lc", c);
}
}
}
#define UNDERLINE 818
static int visited[sizeof(gram)/sizeof(gram[0])];
#define visited(x) BOUNDS_CHECK(visited,x,sizeof(gram)/sizeof(gram[0]))
#define phrase_start(i) BOUNDS_CHECK(sequential_phrase_no_to_grammar_index,i,NUM_SIMPLE_PHRASES)
#define bip_map(i) BOUNDS_CHECK(bip_map,i,sizeof(bip_map)/sizeof(bip_map[0]))
#define gram(x) BOUNDS_CHECK(gram,x,sizeof(gram)/sizeof(gram[0]))
// phrasenum & TERM_NAME are so useful they may be better in a library.
// Look up name of a phrase entry.
int phrasenum_inner(int PhraseStart, char *file, int line) {
int i = 0;
for (;;) {
if ( (i >= NUM_SIMPLE_PHRASES)
|| (sequential_phrase_no_to_grammar_index[i] == PhraseStart)) break;
i++;
}
if (i == NUM_SIMPLE_PHRASES) {
fprintf(stderr, "%s:%d: Cannot find a phrase starting at index=%d\n",
file, line, PhraseStart);
exit(1);
};
return i+PHRASE_BASE;
}
int phrasenum(int PhraseStart) {
return phrasenum_inner(PhraseStart, __FILE__, __LINE__);
}
#define phrasenum(PhraseStart) phrasenum_inner(PhraseStart, __FILE__, __LINE__)
// Look up name of a BIP entry.
int BIPnum(int BIPStart) {
int i = 0, bipsize = sizeof(bip_map)/sizeof(bip_map[0]);
for (;;) { if ((i >= bipsize) || (bip_map(i) == BIPStart)) break; i++; }
if (i == bipsize) { fprintf(stderr, "Cannot find a phrase starting at index=%d\n", BIPStart); exit(1); };
return i;
}
#define MAX(a,b) ({int A=a, B=b; (A>B ? A : B);})
// Look up description of a terminal.
wchar_t *TERM_NAME(int P) {
#define TMPSIZE 512
static wchar_t tmp[TMPSIZE+1];
int type = PhraseType(P);
if (type == PhraseType(PHRASE_TYPE)) {
swprintf(tmp, TMPSIZE, L"<%s%s%ls>", P&NEGATED_PHRASE ? "!":"",
P&GUARD_PHRASE ? "?":"",
phrasename[phrasenum(P&INDEX_MASK)]);
} else if (type == PhraseType(SEMANTIC_TYPE)) {
swprintf(tmp, TMPSIZE, L"<%s%s%ls>", P&NEGATED_PHRASE ? "!":"",
P&GUARD_PHRASE ? "?":"",
semantic_phrasename[P&INDEX_MASK]);
} else if (type == PhraseType(KEYWORD_TYPE)) {
swprintf(tmp, TMPSIZE, L"\"%ls\"", keyword[P&INDEX_MASK]);
} else if (type == PhraseType(CHAR_TYPE)) {
if ((P&255) == '\\') {
swprintf(tmp, TMPSIZE, L"'\%lc'", P&255);
} else {
swprintf(tmp, TMPSIZE, L"'%lc'", P&255);
}
} else if (type == PhraseType(REGEXP_TYPE)) {
swprintf(tmp, TMPSIZE, L"«%ls»", regexps[P&INDEX_MASK]+1);
} else if (type == PhraseType(BIP_TYPE)) {
swprintf(tmp, TMPSIZE, L"<%ls>", phrasename[P & INDEX_MASK]);
} else {
swprintf(tmp, TMPSIZE, L"{undecoded terminal %d, Type=%d}", P & INDEX_MASK, type);
}
return tmp;
}
void output_alt(int phrase_no, int P, int altno) {
int alts = gram(P) & INDEX_MASK;
int alt;
P++;
printf(" //\\\\ ");
for (alt = 0; alt < alts; alt++) {
int phrases = gram(P) & INDEX_MASK;
int phrase;
P++;
for (phrase = 0; phrase < phrases; phrase++) {
if (alt == altno) {
printf("%ls", TERM_NAME(gram(P))); if (phrase+1 != phrases) printf(" "); // TO DO: Add , or ;
}
P++;
}
}
fflush(stdout);
}
void evaluate_subphrase(int P, int astidx) {
//int alts = gram(P & INDEX_MASK) & INDEX_MASK;
int type = PhraseType(P)<<GRAMMAR_TYPE_SHIFT;
if (type == PHRASE_TYPE) {
if (P&NEGATED_PHRASE) {
printf("/* ignore negative guard */");
} else {
if (P&GUARD_PHRASE) {
printf("/* ignore guard */");
} else {
if (phrasenum(P&INDEX_MASK) < PHRASE_BASE) {
printf("t[%d] = compile(P(%d), depth+1 /* B_%ls */)", astidx, astidx, phrasename_c[phrasenum(P&INDEX_MASK)]);
} else {
printf("t[%d] = compile(P(%d), depth+1 /* P_%ls */)", astidx, astidx, phrasename_c[phrasenum(P&INDEX_MASK)]);
}
}
}
} else if (type == KEYWORD_TYPE) {
printf("t[%d] = wlit(P(%d))", astidx, astidx);
} else if (type == CHAR_TYPE) {
if ((P&255) == '\\') {
printf("t[%d] = wlit(P(%d) /*, L'\\%lc' */)", astidx, astidx , P&255);
} else {
printf("t[%d] = wlit(P(%d) /*, L'%lc' */)", astidx, astidx, P&255);
}
} else if (type == REGEXP_TYPE) {
printf("t[%d] = wlit(P(%d))", astidx, astidx);
} else if (type == BIP_TYPE) {
printf("t[%d] = wlit(P(%d) /*, L\"%ls\" */)", astidx, astidx, phrasename[BIPnum(P & INDEX_MASK)]);
} else if (type == SEMANTIC_TYPE) {
printf("/* semantic procedure %ls */", semantic_phrasename[P & INDEX_MASK]);
} else {
printf("/* undecoded terminal %d */", P & INDEX_MASK);
}
}
void output_code(int phrase_no, int P) {
int OP=P;
int alts = gram(P) & INDEX_MASK;
int alt;
printf(" case P_%ls:\n", phrasename_c[phrase_no]); // NO! Include '-' not '_'
P++;
for (alt = 0; alt < alts; alt++) {
int phrases = gram(P) & INDEX_MASK;
int phrase;
if (alts > 1) {
if (alt == 0) {
printf(" if (alt == 0) { ");
} else if (alt+1 == alts) {
printf(" } else { ");
} else {
printf(" } else if (alt == %d) { ", alt);
}
} else {
printf(" { ");
}
if (phrases == 0) {
printf(" //\\\\ ;\n");
} else {
output_alt(phrase_no, OP, alt);
if (alt+1==alts) printf(";"); else printf(",");
}
printf("\n");
P++;
for (phrase = 0; phrase < phrases; phrase++) {
// loop over phrases:
if (alts == 1) printf(" "); else printf(" ");
//printf("T[++phrases] = "/*, phrase+1*/);
evaluate_subphrase(gram(P), phrase+1);
printf(";\n");
P++;
}
if (alts==1) printf(" }\n");
//printf(" phrases = %d;\n", phrases);
// if (alt+1 == alts) printf(";"); else printf(",\n//\\\\ ");
}
if (alts > 1) printf(" }\n");
printf(" return t[0];\n\n" /*, phrasename[phrase_no]*/);
fflush(stdout);
}
void determine_visited(int P, int visited[]) {
P = P & INDEX_MASK;
int alts = gram(P) & INDEX_MASK;
int alt;
P++; // skip count of alts
for (alt = 0; alt < alts; alt++) {
int phrases = gram(P) & INDEX_MASK;
int phrase;
P++; // skip count of phrases
for (phrase = 0; phrase < phrases; phrase++) {
int subphrase_type = (gram(P) & (GRAMMAR_TYPE_MASK<<GRAMMAR_TYPE_SHIFT));
if (subphrase_type == PHRASE_TYPE) {
// Only recurse for proper phrases, not terminals
int subphrase_start = gram(P) & INDEX_MASK;
int subphrase_phrase_idx = phrasenum(gram(P) & INDEX_MASK);
if (!visited(subphrase_phrase_idx)) { // recursion stopper
visited(subphrase_phrase_idx) = 1;
determine_visited(subphrase_start, visited);
}
}
P++; // skip each phrase
}
}
}
void output_grammar(int visited[]) {
int P = 0;
int i;
// First output BIPs,
for (i = 0; i < PHRASE_BASE; i++) {
printf("//\\\\ B<%ls> = %d;\n//\\\\#\n", phrasename[i], bip_map[i]);
}
// then proper phrases
for (i = 0; i < NUM_SIMPLE_PHRASES; i++) {
P = phrase_start(i);
printf("//\\\\ P<%ls> =\n", phrasename[i+PHRASE_BASE]);
output_code(i+PHRASE_BASE, P);
if (!visited(i+PHRASE_BASE)) {
// Exception for root phrase P<SS> (Source Statement) which is usually not referenced within the grammar
// Actually the 'unused phrase' test is not very onerous given that we're doing a tree walk anyway.
//if (wcscmp(phrasename[i+PHRASE_BASE], L"SS") != 0) fprintf(stderr, "? Warning: P<%ls> UNUSED\n", phrasename[i+PHRASE_BASE]);
}
}
printf("//\\\\ E\n");
}
int main(int argc, char **argv) {
/* If your native locale doesn't use UTF-8 encoding
* you need to replace the empty string with a
* locale like "en_US.utf8"
*/
/* char *locale = */ (void) setlocale(LC_ALL, "");
int P;
int phrase_count = sizeof(gram)/sizeof(gram[0]);
for (P = 0; P < phrase_count; P++) visited(P) = 0;
printf("#error This is not directly used in anything at the moment.\n");
printf("// It is here for incorporating as the basis of a compile() function to do something useful with an AST.\n");
printf("// For example this generated the code that was manually converted into algol60-indent.h\n\n");
determine_visited(G_SS|PHRASE_TYPE,visited);
fprintf(stdout, "#include <stdio.h>\n");
fprintf(stdout, "\n");
fprintf(stdout, "#define AST_idx_mask 0xfFFFFFF\n");
fprintf(stdout, "#define AST_type_shift 28\n");
fprintf(stdout, "#define AST_type_mask 15\n");
fprintf(stdout, "#define AST_BIP (1 << AST_type_shift)\n");
fprintf(stdout, "#define AST_PHRASE (2 << AST_type_shift)\n");
fprintf(stdout, "#define AST_LITERAL (3 << AST_type_shift)\n");
fprintf(stdout, "\n");
fprintf(stdout, "int AST(int idx) {\n");
fprintf(stdout, "}\n");
fprintf(stdout, "\n");
fprintf(stdout, "int wlit(int P) {\n");
fprintf(stdout, "}\n");
fprintf(stdout, "\n");
fprintf(stdout, "int P(int P_) {\n");
fprintf(stdout, "}\n");
// fprintf(stdout, "#include \"%s.h\"\n", APPNAME);
fprintf(stdout, "int compile(int Ph, int depth) {\n");
fprintf(stdout, "\n");
fprintf(stdout, " // AST format not properly designed yet. These are placeholders:\n");
fprintf(stdout, "#define P(x) (x) // TEMP\n");
fprintf(stdout, "#define A(x) (x) // TEMP\n");
fprintf(stdout, "#define wlit(x) (x)\n");
fprintf(stdout, "\n");
fprintf(stdout, " int AST_index = Ph&AST_idx_mask;\n");
fprintf(stdout, " int AST_type = Ph & (AST_type_mask << AST_type_shift);\n");
fprintf(stdout, " int op = A(AST_index+1);\n");
fprintf(stdout, " int alt = A(AST_index+2);\n");
fprintf(stdout, " int count = A(AST_index+3);\n");
fprintf(stdout, " int t[LARGEST_ALT];\n");
fprintf(stdout, " int r = 0;\n");
fprintf(stdout, "\n");
fprintf(stdout, " fprintf(stderr, \"Ph=%%d AST_type = %%d AST_index = %%d op=%%d alt=%%d count=%%d\\n\", Ph, AST_type>>AST_type_shift, AST_index, op, alt, count);\n");
fprintf(stdout, "\n");
fprintf(stdout, " switch (op) {\n");
fprintf(stdout, " // It is here for incorporating as the basis of a compile() function to do something useful with an AST.\n");
fprintf(stdout, " // For example this generated the code that was manually converted into algol60-indent.h\n");
fprintf(stdout, "\n");
output_grammar(visited);
fprintf(stdout, " default:\n");
fprintf(stdout, " return r;\n");
fprintf(stdout, " }\n");
fprintf(stdout, "}\n");
fflush(stdout);
exit(EXIT_SUCCESS);
return EXIT_FAILURE;
}