forked from dun/conman
/
lex.h
177 lines (143 loc) · 5.85 KB
/
lex.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
/*****************************************************************************
* Written by Chris Dunlap <cdunlap@llnl.gov>.
* Copyright (C) 2007-2016 Lawrence Livermore National Security, LLC.
* Copyright (C) 2001-2007 The Regents of the University of California.
* UCRL-CODE-2002-009.
*
* This file is part of ConMan: The Console Manager.
* For details, see <https://dun.github.io/conman/>.
*
* ConMan is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option)
* any later version.
*
* ConMan is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License along
* with ConMan. If not, see <http://www.gnu.org/licenses/>.
*****************************************************************************/
#ifndef _LEX_H
#define _LEX_H
/*****************************************************************************\
* Laws of the Lexer:
*----------------------------------------------------------------------------
* - Whitespace is ignored.
* - Comments are ignored (from the pound char to the newline).
* - Lines may be terminated by either carriage-returns (CR),
* linefeeds (LF), or carriage-return/linefeed (CR/LF) pairs.
* - A newline may be escaped by immediately preceding it with a backslash.
* - Integers may begin with either a plus or minus, and contain only digits.
* - Strings may be single-quoted or double-quoted.
* - Strings cannot contain CRs or LFs.
* - Unquoted strings are sequences of letters, digits, and underscores;
* they may not begin with a digit (just like a C identifier).
* - Tokens are unquoted case-insensitive strings.
\*****************************************************************************/
/***********\
** Notes **
\***********/
/* When a memory allocation request fails, the lexer returns out_of_memory().
* By default, this is a macro definition that returns NULL; this macro may
* be redefined to invoke another routine instead. Furthermore, if WITH_OOMF
* is defined, this macro will not be defined and the lexer will expect an
* external Out-Of-Memory Function to be defined.
*/
/***************\
** Constants **
\***************/
#define LEX_MAX_STR 1024 /* max length of lexer string */
enum common_tokens {
LEX_ERR = -1, /* lex error token */
LEX_EOF = 0, /* end-of-file/buffer token */
LEX_EOL = 256, /* end-of-line token */
LEX_INT, /* integer token: ([+-]?[0-9]+) */
LEX_STR, /* string token */
LEX_TOK_OFFSET /* (cf. LEX_UNTOK macro) */
};
/****************\
** Data Types **
\****************/
typedef struct lexer_state *Lex;
/*
* Lex opaque data type.
*/
/************\
** Macros **
\************/
#define LEX_UNTOK(tok) \
( ((tok) < LEX_TOK_OFFSET) ? (tok) : ((tok) - LEX_TOK_OFFSET) )
/*
* LEX_TOK_OFFSET specifies the next available enumeration at which
* the array of strings supplied to lex_create (toks) can begin.
* LEX_UNTOK(tok) undoes this offset adjustment and returns the
* offset corresponding to this token within the (toks) array.
*/
/**********************\
** Lexing Functions **
\**********************/
Lex lex_create(void *buf, char *toks[]);
/*
* Creates and returns a new lexer, or out_of_memory() on failure.
* The text to be lexed is specified by the NUL-terminated buffer (buf);
* this buffer WILL NOT be modified by the lexer.
* The NULL-terminated array of strings (toks) defines the set of tokens
* that will be recognized by the lexer; these strings must be listed
* in a case-insensitive ascending order (ie, according to strcasecmp).
* Note: Abadoning a lexer without calling lex_destroy() will result
* in a memory leak.
*/
void lex_destroy(Lex l);
/*
* Destroys lexer (l), freeing memory used for the lexer itself.
*/
int lex_next(Lex l);
/*
* Returns the next token in the buffer given to lex_create()
* according to the Laws of the Lexer.
* Single-character tokens (eg, punctuation) are specified by
* their ASCII code. Common tokens are specified by the
* common_token enumeration. Tokens specified by the (toks)
* array of strings begin at LEX_TOK_OFFSET. (cf. LEX_UNTOK macro).
*/
int lex_prev(Lex l);
/*
* Returns the last token returned by lex_next().
*/
int lex_line(Lex l);
/*
* Returns the line number of the last token returned by lex_next().
*/
const char * lex_text(Lex l);
/*
* Returns the string corresponding to the last token returned by lex_next().
*/
/*************************\
** Auxiliary Functions **
\*************************/
char * lex_encode(char *str);
/*
* Encodes the string (str) so that it may safely be used by the lexer.
* This is needed if the string may contain quote characters.
* The string cannot be a constant as it will be modified in place.
* Returns the encoded string.
*/
char * lex_decode(char *str);
/*
* Decodes the string (str) that has been encoded with lex_encode().
* The string cannot be a constant as it will be modified in place.
* Returns the decoded string.
*/
/********************\
** Test Functions **
\********************/
void lex_parse_test(char *buf, char *toks[]);
/*
* Example code that tokenizes the buffer (buf) based upon the
* NULL-terminated array of strings (toks) that defines the
* set of recognized tokens.
*/
#endif /* !_LEX_H */