From edbe8f5de75a975275ed1cba1d75d266c9bbf8c9 Mon Sep 17 00:00:00 2001 From: maca88 Date: Sat, 7 Oct 2017 20:25:56 +0200 Subject: [PATCH] Added case-insensitive support for the wildcard algorithm and corrected case-insensitive comparison for excludes modified: src/Shared/FileMatcher.cs --- src/Shared/FileMatcher.cs | 79 ++++++-- src/Shared/UnitTests/FileMatcher_Tests.cs | 233 +++++++++++++++++++++- 2 files changed, 298 insertions(+), 14 deletions(-) diff --git a/src/Shared/FileMatcher.cs b/src/Shared/FileMatcher.cs index e48adb4c969..e7cbe484a1c 100644 --- a/src/Shared/FileMatcher.cs +++ b/src/Shared/FileMatcher.cs @@ -152,7 +152,7 @@ private static ImmutableArray GetAccessibleFilesAndDirectories(string pa { return (ShouldEnforceMatching(pattern) ? Directory.EnumerateFileSystemEntries(path, pattern) - .Where(o => IsMatch(Path.GetFileName(o), pattern)) + .Where(o => IsMatch(Path.GetFileName(o), pattern, true)) : Directory.EnumerateFileSystemEntries(path, pattern) ).ToImmutableArray(); } @@ -231,7 +231,7 @@ bool stripProjectDirectory files = Directory.EnumerateFiles(dir, filespec); if (ShouldEnforceMatching(filespec)) { - files = files.Where(o => IsMatch(Path.GetFileName(o), filespec)); + files = files.Where(o => IsMatch(Path.GetFileName(o), filespec, true)); } } // If the Item is based on a relative path we need to strip @@ -292,7 +292,7 @@ string pattern directories = Directory.EnumerateDirectories((path.Length == 0) ? s_thisDirectory : path, pattern); if (ShouldEnforceMatching(pattern)) { - directories = directories.Where(o => IsMatch(Path.GetFileName(o), pattern)); + directories = directories.Where(o => IsMatch(Path.GetFileName(o), pattern, true)); } } @@ -818,7 +818,7 @@ TaskOptions taskOptions for (int i = 0; i < excludeNextSteps.Length; i++) { if (excludeNextSteps[i].NeedsDirectoryRecursion && - (excludeNextSteps[i].DirectoryPattern == null || IsMatch(Path.GetFileName(subdir), excludeNextSteps[i].DirectoryPattern))) + (excludeNextSteps[i].DirectoryPattern == null || IsMatch(Path.GetFileName(subdir), excludeNextSteps[i].DirectoryPattern, true))) { RecursionState thisExcludeStep = searchesToExclude[i]; thisExcludeStep.BaseDirectory = subdir; @@ -938,7 +938,7 @@ private static bool MatchFileRecursionStep(RecursionState recursionState, string { if (recursionState.SearchData.Filespec != null) { - return IsMatch(Path.GetFileName(file), recursionState.SearchData.Filespec); + return IsMatch(Path.GetFileName(file), recursionState.SearchData.Filespec, true); } // if no file-spec provided, match the file to the regular expression @@ -1428,7 +1428,8 @@ internal Result() /// /// String which is matched against the pattern. /// Pattern against which string is matched. - internal static bool IsMatch(string input, string pattern) + /// Determines whether ignoring case when comparing two characters + internal static bool IsMatch(string input, string pattern, bool ignoreCase) { if (input == null) { @@ -1454,6 +1455,44 @@ internal static bool IsMatch(string input, string pattern) // Store the information whether the tail was checked when a pattern "*?" occurred bool tailChecked = false; +#if MONO // MONO doesn't support local functions + Func CompareIgnoreCase = (inputChar, patternChar, iIndex, pIndex) => +#else + // Function for comparing two characters, ignoring case + // PERF NOTE: + // Having a local function instead of a variable increases the speed by approx. 2 times. + // Passing inputChar and patternChar increases the speed by approx. 10%, when comparing + // to using the string indexer. The iIndex and pIndex parameters are only used + // when we have to compare two non ASCII characters. Using just string.Compare for + // character comparison, would reduce the speed by approx. 5 times. + bool CompareIgnoreCase(char inputChar, char patternChar, int iIndex, int pIndex) +#endif + { + // We will mostly be comparing ASCII characters, check this first + if (inputChar < 128 && patternChar < 128) + { + if (inputChar >= 'A' && inputChar <= 'Z' && patternChar >= 'a' && patternChar <= 'z') + { + return inputChar + 32 == patternChar; + } + if (inputChar >= 'a' && inputChar <= 'z' && patternChar >= 'A' && patternChar <= 'Z') + { + return inputChar == patternChar + 32; + } + return inputChar == patternChar; + } + if (inputChar > 128 && patternChar > 128) + { + return string.Compare(input, iIndex, pattern, pIndex, 1, StringComparison.OrdinalIgnoreCase) == 0; + } + // We don't need to compare, an ASCII character cannot have its lowercase/uppercase outside the ASCII table + // and a non ASCII character cannot have its lowercase/uppercase inside the ASCII table + return false; + } +#if MONO + ; // The end of the CompareIgnoreCase anonymous function +#endif + while (inputIndex < inputLength) { if (patternIndex < patternLength) @@ -1483,10 +1522,19 @@ internal static bool IsMatch(string input, string pattern) inputTailIndex--; // If we encountered a * wildcard we are not sure if it matches as there can be zero or more than one characters // so we have to fallback to the standard procedure e.g. ("aaaabaaad", "*?b*d") - if (pattern[patternTailIndex] == '*' || pattern[patternTailIndex] != input[inputTailIndex] && pattern[patternTailIndex] != '?') + if (pattern[patternTailIndex] == '*') { break; } + // If the tail doesn't match, we can safely return e.g. ("aaa", "*b") + if (( + (!ignoreCase && input[inputTailIndex] != pattern[patternTailIndex]) || + (ignoreCase && !CompareIgnoreCase(input[inputTailIndex], pattern[patternTailIndex], patternTailIndex, inputTailIndex)) + ) && + pattern[patternTailIndex] != '?') + { + return false; + } if (patternIndex == patternTailIndex) { return true; @@ -1502,7 +1550,9 @@ internal static bool IsMatch(string input, string pattern) // The ? wildcard cannot be skipped as we will have a wrong result for e.g. ("aab" "*?b") if (pattern[patternIndex] != '?') { - while (input[inputIndex] != pattern[patternIndex]) + while ( + (!ignoreCase && input[inputIndex] != pattern[patternIndex]) || + (ignoreCase && !CompareIgnoreCase(input[inputIndex], pattern[patternIndex], inputIndex, patternIndex))) { // Return if there is no character that match e.g. ("aa", "*b") if (++inputIndex >= inputLength) @@ -1517,7 +1567,10 @@ internal static bool IsMatch(string input, string pattern) } // If we have a match, step to the next character - if (pattern[patternIndex] == input[inputIndex] || pattern[patternIndex] == '?') + if ( + (!ignoreCase && input[inputIndex] == pattern[patternIndex]) || + (ignoreCase && CompareIgnoreCase(input[inputIndex], pattern[patternIndex], inputIndex, patternIndex)) || + pattern[patternIndex] == '?') { patternIndex++; inputIndex++; @@ -2040,7 +2093,7 @@ DirectoryExists directoryExists var excludeBaseDirectory = excludeState.BaseDirectory; var includeBaseDirectory = state.BaseDirectory; - if (excludeBaseDirectory != includeBaseDirectory) + if (string.Compare(excludeBaseDirectory, includeBaseDirectory, StringComparison.OrdinalIgnoreCase) != 0) { // What to do if the BaseDirectory for the exclude search doesn't match the one for inclusion? // - If paths don't match (one isn't a prefix of the other), then ignore the exclude search. Examples: @@ -2054,7 +2107,7 @@ DirectoryExists directoryExists } else if (excludeBaseDirectory.Length > includeBaseDirectory.Length) { - if (!excludeBaseDirectory.StartsWith(includeBaseDirectory)) + if (!excludeBaseDirectory.StartsWith(includeBaseDirectory, StringComparison.OrdinalIgnoreCase)) { // Exclude path is longer, but doesn't start with include path. So ignore it. continue; @@ -2069,7 +2122,7 @@ DirectoryExists directoryExists if (searchesToExcludeInSubdirs == null) { - searchesToExcludeInSubdirs = new Dictionary>(); + searchesToExcludeInSubdirs = new Dictionary>(StringComparer.OrdinalIgnoreCase); } List listForSubdir; if (!searchesToExcludeInSubdirs.TryGetValue(excludeBaseDirectory, out listForSubdir)) @@ -2083,7 +2136,7 @@ DirectoryExists directoryExists else { // Exclude base directory length is less than include base directory length. - if (!state.BaseDirectory.StartsWith(excludeState.BaseDirectory)) + if (!state.BaseDirectory.StartsWith(excludeState.BaseDirectory, StringComparison.OrdinalIgnoreCase)) { // Include path is longer, but doesn't start with the exclude path. So ignore exclude path // (since it won't match anything under the include path) diff --git a/src/Shared/UnitTests/FileMatcher_Tests.cs b/src/Shared/UnitTests/FileMatcher_Tests.cs index 87fa6a57e6e..10ce51d4cf7 100644 --- a/src/Shared/UnitTests/FileMatcher_Tests.cs +++ b/src/Shared/UnitTests/FileMatcher_Tests.cs @@ -78,6 +78,231 @@ public void GetFilesPatternMatching() } } + [Theory] + [InlineData( + @"src\**\inner\**\*.cs", // Include + new string[] { }, // Excludes + new[] // Expected matchings + { + @"src\foo\inner\foo.cs", + @"src\foo\inner\foo\foo.cs", + @"src\foo\inner\bar\bar.cs", + @"src\bar\inner\baz.cs", + @"src\bar\inner\baz\baz.cs", + @"src\bar\inner\foo\foo.cs" + } + )] + [InlineData( + @"src\**\inner\**\*.cs", // Include + new[] // Excludes + { + @"**\foo\**" + }, + new[] // Expected matchings + { + @"src\bar\inner\baz.cs", + @"src\bar\inner\baz\baz.cs" + } + )] + [InlineData( + @"src\**\inner\**\*.cs", // Include + new[] // Excludes + { + @"src\bar\inner\baz\**" + }, + new[] // Expected matchings + { + @"src\foo\inner\foo.cs", + @"src\foo\inner\foo\foo.cs", + @"src\foo\inner\bar\bar.cs", + @"src\bar\inner\baz.cs", + @"src\bar\inner\foo\foo.cs" + } + )] + [InlineData( + @"src\foo\**\*.cs", // Include + new[] // Excludes + { + @"src\foo\**\foo\**" + }, + new[] // Expected matchings + { + @"src\foo\foo.cs", + @"src\foo\inner\foo.cs", + @"src\foo\inner\bar\bar.cs" + } + )] + [InlineData( + @"src\foo\inner\**\*.cs", // Include + new[] // Excludes + { + @"src\foo\**\???\**" + }, + new[] // Expected matchings + { + @"src\foo\inner\foo.cs" + } + )] + [InlineData( + @"**\???\**\*.cs", // Include + new string[] { }, // Excludes + new[] // Expected matchings + { + @"src\foo.cs", + @"src\bar.cs", + @"src\baz.cs", + @"src\foo\foo.cs", + @"src\bar\bar.cs", + @"src\baz\baz.cs", + @"src\foo\inner\foo.cs", + @"src\foo\inner\foo\foo.cs", + @"src\foo\inner\bar\bar.cs", + @"src\bar\inner\baz.cs", + @"src\bar\inner\baz\baz.cs", + @"src\bar\inner\foo\foo.cs", + @"build\baz\foo.cs" + } + )] + [InlineData( + @"**\*.*", // Include + new[] // Excludes + { + @"**\???\**\*.cs" + }, + new[] // Expected matchings + { + @"readme.txt", + @"licence.md" + } + )] + [InlineData( + @"**\?a?\**\?a?\*.c?", // Include + new string[] { }, // Excludes + new[] // Expected matchings + { + @"src\bar\inner\baz\baz.cs" + } + )] + [InlineData( + @"**\?a?\**\?a?.c?", // Include + new[] // Excludes + { + @"**\?a?\**\?a?\*.c?" + }, + new[] // Expected matchings + { + @"src\bar\bar.cs", + @"src\baz\baz.cs", + @"src\foo\inner\bar\bar.cs", + @"src\bar\inner\baz.cs" + } + )] + public void GetFilesComplexGlobbingMatching(string includePattern, string[] excludePatterns, string[] expectedMatchings) + { + var workingPath = Path.Combine(Path.GetTempPath(), "Globbing"); + var files = new [] + { + @"src\foo.cs", + @"src\bar.cs", + @"src\baz.cs", + @"src\foo\foo.cs", + @"src\bar\bar.cs", + @"src\baz\baz.cs", + @"src\foo\inner\foo.cs", + @"src\foo\inner\foo\foo.cs", + @"src\foo\inner\bar\bar.cs", + @"src\bar\inner\baz.cs", + @"src\bar\inner\baz\baz.cs", + @"src\bar\inner\foo\foo.cs", + @"build\baz\foo.cs", + @"readme.txt", + @"licence.md" + }; + Action match = (include, excludes, hasNoMatches) => + { + string[] matchedFiles = null; + try + { + matchedFiles = FileMatcher.GetFiles(workingPath, include, excludes); + if (hasNoMatches) + { + Assert.Equal(0, matchedFiles.Length); + return; + } + // We have to lower file paths as the result could be in uppercase e.g. "SRC\foo.cs" + var normMatchedFiles = matchedFiles + .Select(o => o.Replace(Path.DirectorySeparatorChar, '\\').ToLowerInvariant()).ToArray(); + foreach (var matchedFile in normMatchedFiles) + { + Assert.Contains(matchedFile, expectedMatchings); + } + Assert.Equal(expectedMatchings.Length, matchedFiles.Length); + } + catch (Exception) + { + Console.WriteLine($"Globbing failed for include {include} with excludes {string.Join(",", excludes)}," + + $"should have no matches: {hasNoMatches}, " + + $"returned files: {(matchedFiles != null ? string.Join(",", matchedFiles) : null)}"); + throw; + } + }; + + try + { + // Create directories and files + foreach (var file in files) + { + var normFile = file.Replace('\\', Path.DirectorySeparatorChar); + var dirPath = Path.Combine(workingPath, Path.GetDirectoryName(normFile)); + + Directory.CreateDirectory(dirPath); + File.WriteAllBytes(Path.Combine(dirPath, Path.GetFileName(normFile)), new byte[5000]); + } + + // Normal matching + match(includePattern, excludePatterns, false); + // Include forward slash + match(includePattern.Replace('\\', '/'), excludePatterns, false); + // Excludes forward slash + match(includePattern, excludePatterns.Select(o => o.Replace('\\', '/')).ToArray(), false); + + // Backward compatibilities: + // 1. When an include or exclude starts with a fixed directory part e.g. "src/foo/**", + // then matching should be case-sensitive on Linux, as the directory was checked for its existance + // by using Directory.Exists, which is case-sensitive on Linux (on OSX is not). + // 2. On Unix, a file pattern e.g. "*.cs", should be matched case-sensitive, as they were retrieved + // by using the searchPattern parameter of Directory.GetFiles, which is case-sensitive on Unix. + // + var shouldHaveNoMatches = false; + + // Do not test uppercase excludes on Linux, as it is not simple to figure out which files shall + // be excluded + if (!NativeMethodsShared.IsLinux) + { + // Excludes uppercase + match(includePattern, excludePatterns.Select(o => o.ToUpperInvariant()).ToArray(), false); + } + else + { + // On Linux, we will have no matches for an uppercase include, when it starts with a fixed directory part + // e.g. "SRC/FOO" + shouldHaveNoMatches = !includePattern.StartsWith("**"); + } + if (NativeMethodsShared.IsUnixLike) + { + // On Unix, we will have no matches for an uppercase file pattern e.g. "*.CS" + shouldHaveNoMatches |= char.IsLetter(includePattern.Last()); + } + + // Include uppercase + match(includePattern.ToUpperInvariant(), excludePatterns, shouldHaveNoMatches); + } + finally + { + FileUtilities.DeleteWithoutTrailingBackslash(workingPath, true); + } + } + [Fact] public void WildcardMatching() { @@ -88,6 +313,9 @@ public void WildcardMatching() new Tuple("a", "", false), new Tuple("", "a", false), + // Non ASCII characters + new Tuple("šđčćž", "šđčćž", true), + // * wildcard new Tuple("abc", "*bc", true), new Tuple("abc", "a*c", true), @@ -148,7 +376,10 @@ public void WildcardMatching() { try { - Assert.Equal(input.Item3, FileMatcher.IsMatch(input.Item1, input.Item2)); + Assert.Equal(input.Item3, FileMatcher.IsMatch(input.Item1, input.Item2, false)); + Assert.Equal(input.Item3, FileMatcher.IsMatch(input.Item1, input.Item2, true)); + Assert.Equal(input.Item3, FileMatcher.IsMatch(input.Item1.ToUpperInvariant(), input.Item2, true)); + Assert.Equal(input.Item3, FileMatcher.IsMatch(input.Item1, input.Item2.ToUpperInvariant(), true)); } catch (Exception) {