-
-
Notifications
You must be signed in to change notification settings - Fork 421
/
regex.inc
290 lines (268 loc) · 12 KB
/
regex.inc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
/**
* vim: set ts=4 sw=4 tw=99 noet :
* =============================================================================
* SourceMod (C)2004-2008 AlliedModders LLC. All rights reserved.
* =============================================================================
*
* This file is part of the SourceMod/SourcePawn SDK.
*
* This program is free software; you can redistribute it and/or modify it under
* the terms of the GNU General Public License, version 3.0, as published by the
* Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*
* As a special exception, AlliedModders LLC gives you permission to link the
* code of this program (as well as its derivative works) to "Half-Life 2," the
* "Source Engine," the "SourcePawn JIT," and any Game MODs that run on software
* by the Valve Corporation. You must obey the GNU General Public License in
* all respects for all other code used. Additionally, AlliedModders LLC grants
* this exception to all derivative works. AlliedModders LLC defines further
* exceptions, found in LICENSE.txt (as of this writing, version JULY-31-2007),
* or <http://www.sourcemod.net/license.php>.
*
* Version: $Id$
*/
#if defined _regex_included
#endinput
#endif
#define _regex_included
/**
* @section Flags for compiling regex expressions. These come directly from the
* pcre library and can be used in MatchRegex and CompileRegex.
*/
#define PCRE_CASELESS 0x00000001 /* Ignore Case */
#define PCRE_MULTILINE 0x00000002 /* Multilines (affects ^ and $ so that they match the start/end of a line rather than matching the start/end of the string). */
#define PCRE_DOTALL 0x00000004 /* Single line (affects . so that it matches any character, even new line characters). */
#define PCRE_EXTENDED 0x00000008 /* Pattern extension (ignore whitespace and # comments). */
#define PCRE_ANCHORED 0x00000010 /* Force pattern anchoring. */
#define PCRE_DOLLAR_ENDONLY 0x00000020 /* $ not to match newline at end. */
#define PCRE_UNGREEDY 0x00000200 /* Invert greediness of quantifiers */
#define PCRE_NOTEMPTY 0x00000400 /* An empty string is not a valid match. */
#define PCRE_UTF8 0x00000800 /* Use UTF-8 Chars */
#define PCRE_NO_UTF8_CHECK 0x00002000 /* Do not check the pattern for UTF-8 validity (only relevant if PCRE_UTF8 is set) */
#define PCRE_UCP 0x20000000 /* Use Unicode properties for \ed, \ew, etc. */
/**
* Regex expression error codes.
*/
enum RegexError
{
REGEX_ERROR_NONE = 0, /* No error */
REGEX_ERROR_ASSERT = 1, /* internal error ? */
REGEX_ERROR_BADBR, /* invalid repeat counts in {} */
REGEX_ERROR_BADPAT, /* pattern error */
REGEX_ERROR_BADRPT, /* ? * + invalid */
REGEX_ERROR_EBRACE, /* unbalanced {} */
REGEX_ERROR_EBRACK, /* unbalanced [] */
REGEX_ERROR_ECOLLATE, /* collation error - not relevant */
REGEX_ERROR_ECTYPE, /* bad class */
REGEX_ERROR_EESCAPE, /* bad escape sequence */
REGEX_ERROR_EMPTY, /* empty expression */
REGEX_ERROR_EPAREN, /* unbalanced () */
REGEX_ERROR_ERANGE, /* bad range inside [] */
REGEX_ERROR_ESIZE, /* expression too big */
REGEX_ERROR_ESPACE, /* failed to get memory */
REGEX_ERROR_ESUBREG, /* bad back reference */
REGEX_ERROR_INVARG, /* bad argument */
REGEX_ERROR_NOMATCH = -1, /* No match was found */
REGEX_ERROR_NULL = -2,
REGEX_ERROR_BADOPTION = -3,
REGEX_ERROR_BADMAGIC = -4,
REGEX_ERROR_UNKNOWN_OPCODE = -5,
REGEX_ERROR_NOMEMORY = -6,
REGEX_ERROR_NOSUBSTRING = -7,
REGEX_ERROR_MATCHLIMIT = -8,
REGEX_ERROR_CALLOUT = -9, /* Never used by PCRE itself */
REGEX_ERROR_BADUTF8 = -10,
REGEX_ERROR_BADUTF8_OFFSET = -11,
REGEX_ERROR_PARTIAL = -12,
REGEX_ERROR_BADPARTIAL = -13,
REGEX_ERROR_INTERNAL = -14,
REGEX_ERROR_BADCOUNT = -15,
REGEX_ERROR_DFA_UITEM = -16,
REGEX_ERROR_DFA_UCOND = -17,
REGEX_ERROR_DFA_UMLIMIT = -18,
REGEX_ERROR_DFA_WSSIZE = -19,
REGEX_ERROR_DFA_RECURSE = -20,
REGEX_ERROR_RECURSIONLIMIT = -21,
REGEX_ERROR_NULLWSLIMIT = -22, /* No longer actually used */
REGEX_ERROR_BADNEWLINE = -23,
REGEX_ERROR_BADOFFSET = -24,
REGEX_ERROR_SHORTUTF8 = -25,
REGEX_ERROR_RECURSELOOP = -26,
REGEX_ERROR_JIT_STACKLIMIT = -27,
REGEX_ERROR_BADMODE = -28,
REGEX_ERROR_BADENDIANNESS = -29,
REGEX_ERROR_DFA_BADRESTART = -30,
REGEX_ERROR_JIT_BADOPTION = -31,
REGEX_ERROR_BADLENGTH = -32
};
// Regular expression objects are used to match or decompose strings based on
// patterns.
methodmap Regex < Handle
{
// Compile a regular expression.
//
// @param pattern The regular expression pattern.
// @param flags General flags for the regular expression.
// @param error Error message encountered, if applicable.
// @param maxLen Maximum string length of the error buffer.
// @param errcode Regex type error code encountered, if applicable.
public native Regex(const char[] pattern, int flags = 0, char[] error="", int maxLen = 0, RegexError &errcode = REGEX_ERROR_NONE);
// Matches a string against a pre-compiled regular expression pattern.
//
// @param str The string to check.
// @param ret Error code, if applicable.
// @param offset Offset in the string to start searching from. MatchOffset returns the offset of the match.
// @return Number of captures found or -1 on failure.
//
// @note Use the regex handle passed to this function to extract
// matches with GetSubString().
public native int Match(const char[] str, RegexError &ret = REGEX_ERROR_NONE, int offset = 0);
// Gets all matches from a string against a pre-compiled regular expression pattern.
//
// @param str The string to check.
// @param ret Error code, if applicable.
// @return Number of matches found or -1 on failure.
//
// @note Use GetSubString() and loop from 0 -> totalmatches - 1.
public native int MatchAll(const char[] str, RegexError &ret = REGEX_ERROR_NONE);
// Returns a matched substring from a regex handle.
//
// Substring ids start at 0 and end at captures-1, where captures is the
// number returned by Regex.Match or Regex.CaptureCount.
//
// @param str_id The index of the expression to get - starts at 0, and ends at captures - 1.
// @param buffer The buffer to set to the matching substring.
// @param maxlen The maximum string length of the buffer.
// @param match Match to get the captures for - starts at 0, and ends at MatchCount() -1
// @return True if a substring was found, False on fail/error
//
// @note str_id = 0 is the full captured string, anything else is the capture group index.
// if Regex.Match is used match can only be 0
public native bool GetSubString(int str_id, char[] buffer, int maxlen, int match = 0);
// Returns number of matches
//
// When using Match this is always 1 or 0 (unless an error occured)
// @return Total number of matches found.
public native int MatchCount();
// Returns number of captures for a match
//
// @param match Match to get the number of captures for. Match starts at 0, and ends at MatchCount() -1
// @return Number of captures in the match.
//
// @note Use GetSubString() and loop from 1 -> captures -1 for str_id to get all captures
public native int CaptureCount(int match = 0);
// Returns the string offset of a match.
//
// @param match Match to get the offset of. Match starts at 0, and ends at MatchCount() -1
// @return Offset of the match in the string.
public native int MatchOffset(int match = 0);
};
/**
* Precompile a regular expression. Use this if you intend on using the
* same expression multiple times. Pass the regex handle returned here to
* MatchRegex to check for matches.
*
* @param pattern The regular expression pattern.
* @param flags General flags for the regular expression.
* @param error Error message encountered, if applicable.
* @param maxLen Maximum string length of the error buffer.
* @param errcode Regex type error code encountered, if applicable.
* @return Valid regex handle on success, INVALID_HANDLE on failure.
*/
native Regex CompileRegex(const char[] pattern, int flags = 0, char[] error="", int maxLen = 0, RegexError &errcode = REGEX_ERROR_NONE);
/**
* Matches a string against a pre-compiled regular expression pattern.
*
* @param regex Regex Handle from CompileRegex()
* @param str The string to check.
* @param ret Error code, if applicable.
* @return Number of captures found or -1 on failure.
*
* @note Use the regex handle passed to this function to extract
* matches with GetRegexSubString().
*/
native int MatchRegex(Handle regex, const char[] str, RegexError &ret = REGEX_ERROR_NONE);
/**
* Returns a matched substring from a regex handle.
* Substring ids start at 0 and end at captures-1, where captures is the number returned
* by MatchRegex.
*
* @param regex The regex handle to extract data from.
* @param str_id The index of the expression to get - starts at 0, and ends at captures - 1.
* @param buffer The buffer to set to the matching substring.
* @param maxlen The maximum string length of the buffer.
* @return True if a substring was found, False on fail/error
*
* @note str_id = 0 is the full captured string, anything else is the capture group index.
*
*/
native bool GetRegexSubString(Handle regex, int str_id, char[] buffer, int maxlen);
/**
* Matches a string against a regular expression pattern.
*
* @note If you intend on using the same regular expression pattern
* multiple times, consider using CompileRegex and MatchRegex
* instead of making this function reparse the expression each time.
*
* @param str The string to check.
* @param pattern The regular expression pattern.
* @param flags General flags for the regular expression.
* @param error Error message, if applicable.
* @param maxLen Maximum length of the error buffer.
* @return Number of substrings found or -1 on failure.
*/
stock int SimpleRegexMatch(const char[] str, const char[] pattern, int flags = 0, char[] error="", int maxLen = 0)
{
Regex regex = new Regex(pattern, flags, error, maxLen);
if (!regex)
{
return -1;
}
int substrings = regex.Match(str);
delete regex;
return substrings;
}
/**
* @endsection
*/
/**
* Do not edit below this line!
*/
public Extension __ext_regex =
{
name = "Regex Extension",
file = "regex.ext",
#if defined AUTOLOAD_EXTENSIONS
autoload = 1,
#else
autoload = 0,
#endif
#if defined REQUIRE_EXTENSIONS
required = 1,
#else
required = 0,
#endif
};
#if !defined REQUIRE_EXTENSIONS
public void __ext_regex_SetNTVOptional()
{
MarkNativeAsOptional("CompileRegex");
MarkNativeAsOptional("MatchRegex");
MarkNativeAsOptional("GetRegexSubString");
MarkNativeAsOptional("Regex.Regex");
MarkNativeAsOptional("Regex.Match");
MarkNativeAsOptional("Regex.MatchAll");
MarkNativeAsOptional("Regex.GetSubString");
MarkNativeAsOptional("Regex.MatchCount");
MarkNativeAsOptional("Regex.CaptureCount");
MarkNativeAsOptional("Regex.MatchOffset");
}
#endif