/
FileSystemName.cs
411 lines (364 loc) · 20.2 KB
/
FileSystemName.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System;
using System.Text;
namespace System.IO.Enumeration
{
/// <summary>Provides methods for matching file system names.</summary>
public static class FileSystemName
{
/// <summary>Translates the given Win32 expression. Change '*' and '?' to '<', '>' and '"' to match Win32 behavior.</summary>
/// <param name="expression">The expression to translate.</param>
/// <returns>A string with the translated Win32 expression.</returns>
/// <remarks>For compatibility, Windows changes some wildcards to provide a closer match to historical DOS 8.3 filename matching.</remarks>
public static string TranslateWin32Expression(string? expression)
{
if (string.IsNullOrEmpty(expression) || expression == "*" || expression == "*.*")
return "*";
bool modified = false;
ValueStringBuilder sb = new ValueStringBuilder(stackalloc char[32]);
int length = expression.Length;
for (int i = 0; i < length; i++)
{
char c = expression[i];
switch (c)
{
case '.':
modified = true;
if (i >= 1 && i == length - 1 && expression[i - 1] == '*')
{
sb[sb.Length - 1] = '<'; // DOS_STAR (ends in *.)
}
else if (i < length - 1 && (expression[i + 1] == '?' || expression[i + 1] == '*'))
{
sb.Append('\"'); // DOS_DOT
}
else
{
sb.Append('.');
}
break;
case '?':
modified = true;
sb.Append('>'); // DOS_QM
break;
default:
sb.Append(c);
break;
}
}
return modified ? sb.ToString() : expression;
}
/// <summary>Verifies whether the given Win32 expression matches the given name. Supports the following wildcards: '*', '?', '<', '>', '"'. The backslash character '\' escapes.</summary>
/// <param name="expression">The expression to match with, such as "*.foo".</param>
/// <param name="name">The name to check against the expression.</param>
/// <param name="ignoreCase"><see langword="true" /> to ignore case (default), <see langword="false" /> if the match should be case-sensitive.</param>
/// <returns><see langword="true" /> if the given expression matches the given name; otherwise, <see langword="false" />.</returns>
/// <remarks>The syntax of the <paramref name="expression" /> parameter is based on the syntax used by FileSystemWatcher, which is based on [RtlIsNameInExpression](/windows/win32/devnotes/rtlisnameinexpression), which defines the rules for matching DOS wildcards (`'*'`, `'?'`, `'<'`, `'>'`, `'"'`).
/// Matching will not correspond to Win32 behavior unless you transform the expression using <see cref="FileSystemName.TranslateWin32Expression(string)" />.</remarks>
public static bool MatchesWin32Expression(ReadOnlySpan<char> expression, ReadOnlySpan<char> name, bool ignoreCase = true)
{
return MatchPattern(expression, name, ignoreCase, useExtendedWildcards: true);
}
/// <summary>Verifies whether the given expression matches the given name. Supports the following wildcards: '*' and '?'. The backslash character '\\' escapes.</summary>
/// <param name="expression">The expression to match with.</param>
/// <param name="name">The name to check against the expression.</param>
/// <param name="ignoreCase"><see langword="true" /> to ignore case (default); <see langword="false" /> if the match should be case-sensitive.</param>
/// <returns><see langword="true" /> if the given expression matches the given name; otherwise, <see langword="false" />.</returns>
public static bool MatchesSimpleExpression(ReadOnlySpan<char> expression, ReadOnlySpan<char> name, bool ignoreCase = true)
{
return MatchPattern(expression, name, ignoreCase, useExtendedWildcards: false);
}
// Matching routine description
// ============================
// (copied from native impl)
//
// This routine compares a Dbcs name and an expression and tells the caller
// if the name is in the language defined by the expression. The input name
// cannot contain wildcards, while the expression may contain wildcards.
//
// Expression wild cards are evaluated as shown in the nondeterministic
// finite automata below. Note that ~* and ~? are DOS_STAR and DOS_QM.
//
// ~* is DOS_STAR, ~? is DOS_QM, and ~. is DOS_DOT
//
// S
// <-----<
// X | | e Y
// X * Y == (0)----->-(1)->-----(2)-----(3)
//
// S-.
// <-----<
// X | | e Y
// X ~* Y == (0)----->-(1)->-----(2)-----(3)
//
// X S S Y
// X ?? Y == (0)---(1)---(2)---(3)---(4)
//
// X . . Y
// X ~.~. Y == (0)---(1)----(2)------(3)---(4)
// | |________|
// | ^ |
// |_______________|
// ^EOF or .^
//
// X S-. S-. Y
// X ~?~? Y == (0)---(1)-----(2)-----(3)---(4)
// | |________|
// | ^ |
// |_______________|
// ^EOF or .^
//
// where S is any single character
// S-. is any single character except the final .
// e is a null character transition
// EOF is the end of the name string
//
// In words:
//
// * matches 0 or more characters.
// ? matches exactly 1 character.
// DOS_STAR matches 0 or more characters until encountering and matching
// the final . in the name.
// DOS_QM matches any single character, or upon encountering a period or
// end of name string, advances the expression to the end of the
// set of contiguous DOS_QMs.
// DOS_DOT matches either a . or zero characters beyond name string.
private static bool MatchPattern(ReadOnlySpan<char> expression, ReadOnlySpan<char> name, bool ignoreCase, bool useExtendedWildcards)
{
// The idea behind the algorithm is pretty simple. We keep track of all possible locations
// in the regular expression that are matching the name. When the name has been exhausted,
// if one of the locations in the expression is also just exhausted, the name is in the
// language defined by the regular expression.
if (expression.Length == 0 || name.Length == 0)
return false;
if (expression[0] == '*')
{
// Just * matches everything
if (expression.Length == 1)
return true;
ReadOnlySpan<char> expressionEnd = expression.Slice(1);
// [MS - FSA] 2.1.4.4 Algorithm for Determining if a FileName Is in an Expression
// https://msdn.microsoft.com/en-us/library/ff469270.aspx
bool hasWildcards = (useExtendedWildcards ?
expressionEnd.IndexOfAny("\"<>*?") :
expressionEnd.IndexOfAny('*', '?')) >= 0;
if (!hasWildcards)
{
// Handle the special case of a single starting *, which essentially means "ends with"
// If the name doesn't have enough characters to match the remaining expression, it can't be a match.
if (name.Length < expressionEnd.Length)
return false;
// See if we end with the expression
return name.EndsWith(expressionEnd, ignoreCase ? StringComparison.OrdinalIgnoreCase : StringComparison.Ordinal);
}
}
int nameOffset = 0;
int expressionOffset;
int priorMatch;
int currentMatch;
int priorMatchCount;
int matchCount = 1;
char nameChar = '\0';
char expressionChar;
scoped Span<int> temp = default;
Span<int> currentMatches = stackalloc int[16];
Span<int> priorMatches = stackalloc int[16];
priorMatches[0] = 0;
int maxState = expression.Length * 2;
int currentState;
bool nameFinished = false;
// Walk through the name string, picking off characters. We go one
// character beyond the end because some wild cards are able to match
// zero characters beyond the end of the string.
//
// With each new name character we determine a new set of states that
// match the name so far. We use two arrays that we swap back and forth
// for this purpose. One array lists the possible expression states for
// all name characters up to but not including the current one, and other
// array is used to build up the list of states considering the current
// name character as well. The arrays are then switched and the process
// repeated.
//
// There is not a one-to-one correspondence between state number and
// offset into the expression. State numbering is not continuous.
// This allows a simple conversion between state number and expression
// offset. Each character in the expression can represent one or two
// states. * and DOS_STAR generate two states: expressionOffset * 2 and
// expressionOffset * 2 + 1. All other expression characters can produce
// only a single state. Thus expressionOffset = currentState / 2.
while (!nameFinished)
{
if (nameOffset < name.Length)
{
// Not at the end of the name. Grab the current character and move the offset forward.
nameChar = name[nameOffset++];
}
else
{
// At the end of the name. If the expression is exhausted, exit.
if (priorMatches[matchCount - 1] == maxState)
break;
nameFinished = true;
}
// Now, for each of the previous stored expression matches, see what
// we can do with this name character.
priorMatch = 0;
currentMatch = 0;
priorMatchCount = 0;
while (priorMatch < matchCount)
{
// We have to carry on our expression analysis as far as possible for each
// character of name, so we loop here until the expression stops matching.
expressionOffset = (priorMatches[priorMatch++] + 1) / 2;
while (expressionOffset < expression.Length)
{
currentState = expressionOffset * 2;
expressionChar = expression[expressionOffset];
// We may be about to exhaust the local space for matches,
// so we have to reallocate if this is the case.
if (currentMatch >= currentMatches.Length - 2)
{
int newSize = currentMatches.Length * 2;
temp = new int[newSize];
currentMatches.CopyTo(temp);
currentMatches = temp;
temp = new int[newSize];
priorMatches.CopyTo(temp);
priorMatches = temp;
}
if (expressionChar == '*')
{
// '*' matches any character zero or more times.
goto MatchZeroOrMore;
}
else if (useExtendedWildcards && expressionChar == '<')
{
// '<' (DOS_STAR) matches any character except '.' zero or more times.
// If we are at a period, determine if we are allowed to
// consume it, i.e. make sure it is not the last one.
bool notLastPeriod = false;
if (!nameFinished && nameChar == '.')
{
for (int offset = nameOffset; offset < name.Length; offset++)
{
if (name[offset] == '.')
{
notLastPeriod = true;
break;
}
}
}
if (nameFinished || nameChar != '.' || notLastPeriod)
{
goto MatchZeroOrMore;
}
else
{
// We are at a period. We can only match zero
// characters (i.e. the epsilon transition).
goto MatchZero;
}
}
else
{
// The remaining expression characters all match by consuming a character,
// so we need to force the expression and state forward.
currentState += 2;
if (useExtendedWildcards && expressionChar == '>')
{
// '>' (DOS_QM) is the most complicated. If the name is finished,
// we can match zero characters. If this name is a '.', we
// don't match, but look at the next expression. Otherwise
// we match a single character.
if (nameFinished || nameChar == '.')
goto NextExpressionCharacter;
currentMatches[currentMatch++] = currentState;
goto ExpressionFinished;
}
else if (useExtendedWildcards && expressionChar == '"')
{
// A '"' (DOS_DOT) can match either a period, or zero characters
// beyond the end of name.
if (nameFinished)
{
goto NextExpressionCharacter;
}
else if (nameChar == '.')
{
currentMatches[currentMatch++] = currentState;
}
goto ExpressionFinished;
}
else
{
if (expressionChar == '\\')
{
// Escape character, try to move the expression forward again and match literally.
if (++expressionOffset == expression.Length)
{
currentMatches[currentMatch++] = maxState;
goto ExpressionFinished;
}
currentState = expressionOffset * 2 + 2;
expressionChar = expression[expressionOffset];
}
// From this point on a name character is required to even
// continue, let alone make a match.
if (nameFinished) goto ExpressionFinished;
if (expressionChar == '?')
{
// If this expression was a '?' we can match it once.
currentMatches[currentMatch++] = currentState;
}
else if (ignoreCase
? char.ToUpperInvariant(expressionChar) == char.ToUpperInvariant(nameChar)
: expressionChar == nameChar)
{
// Matched a non-wildcard character
currentMatches[currentMatch++] = currentState;
}
goto ExpressionFinished;
}
}
MatchZeroOrMore:
currentMatches[currentMatch++] = currentState;
MatchZero:
currentMatches[currentMatch++] = currentState + 1;
NextExpressionCharacter:
if (++expressionOffset == expression.Length)
currentMatches[currentMatch++] = maxState;
} // while (expressionOffset < expression.Length)
ExpressionFinished:
// Prevent duplication in the destination array.
//
// Each of the arrays is monotonically increasing and non-duplicating, thus we skip
// over any source element in the source array if we just added the same element to
// the destination array. This guarantees non-duplication in the destination array.
if ((priorMatch < matchCount) && (priorMatchCount < currentMatch))
{
while (priorMatchCount < currentMatch)
{
int previousLength = priorMatches.Length;
while ((priorMatch < previousLength) && (priorMatches[priorMatch] < currentMatches[priorMatchCount]))
{
priorMatch++;
}
priorMatchCount++;
}
}
} // while (sourceCount < matchesCount)
// If we found no matches in the just finished iteration it's time to bail.
if (currentMatch == 0)
return false;
// Swap the meaning the two arrays
temp = priorMatches;
priorMatches = currentMatches;
currentMatches = temp;
matchCount = currentMatch;
} // while (!nameFinished)
currentState = priorMatches[matchCount - 1];
return currentState == maxState;
}
}
}