From edbe8f5de75a975275ed1cba1d75d266c9bbf8c9 Mon Sep 17 00:00:00 2001
From: maca88 <bostjan.markezic@siol.net>
Date: Sat, 7 Oct 2017 20:25:56 +0200
Subject: [PATCH] Added case-insensitive support for the wildcard algorithm and
 corrected case-insensitive comparison for excludes

	modified:   src/Shared/FileMatcher.cs
---
 src/Shared/FileMatcher.cs                 |  79 ++++++--
 src/Shared/UnitTests/FileMatcher_Tests.cs | 233 +++++++++++++++++++++-
 2 files changed, 298 insertions(+), 14 deletions(-)
diff --git a/src/Shared/FileMatcher.cs b/src/Shared/FileMatcher.cs
index e48adb4c969..e7cbe484a1c 100644
--- a/src/Shared/FileMatcher.cs
+++ b/src/Shared/FileMatcher.cs
@@ -152,7 +152,7 @@ private static ImmutableArray<string> GetAccessibleFilesAndDirectories(string pa
                 {
                     return (ShouldEnforceMatching(pattern)
                         ? Directory.EnumerateFileSystemEntries(path, pattern)
-                            .Where(o => IsMatch(Path.GetFileName(o), pattern))
+                            .Where(o => IsMatch(Path.GetFileName(o), pattern, true))
                         : Directory.EnumerateFileSystemEntries(path, pattern)
                     ).ToImmutableArray();
                 }
@@ -231,7 +231,7 @@ bool stripProjectDirectory
                     files = Directory.EnumerateFiles(dir, filespec);
                     if (ShouldEnforceMatching(filespec))
                     {
-                        files = files.Where(o => IsMatch(Path.GetFileName(o), filespec));
+                        files = files.Where(o => IsMatch(Path.GetFileName(o), filespec, true));
                     }
                 }
                 // If the Item is based on a relative path we need to strip
@@ -292,7 +292,7 @@ string pattern
                     directories = Directory.EnumerateDirectories((path.Length == 0) ? s_thisDirectory : path, pattern);
                     if (ShouldEnforceMatching(pattern))
                     {
-                        directories = directories.Where(o => IsMatch(Path.GetFileName(o), pattern));
+                        directories = directories.Where(o => IsMatch(Path.GetFileName(o), pattern, true));
                     }
                 }
 
@@ -818,7 +818,7 @@ TaskOptions taskOptions
                     for (int i = 0; i < excludeNextSteps.Length; i++)
                     {
                         if (excludeNextSteps[i].NeedsDirectoryRecursion &&
-                            (excludeNextSteps[i].DirectoryPattern == null || IsMatch(Path.GetFileName(subdir), excludeNextSteps[i].DirectoryPattern)))
+                            (excludeNextSteps[i].DirectoryPattern == null || IsMatch(Path.GetFileName(subdir), excludeNextSteps[i].DirectoryPattern, true)))
                         {
                             RecursionState thisExcludeStep = searchesToExclude[i];
                             thisExcludeStep.BaseDirectory = subdir;
@@ -938,7 +938,7 @@ private static bool MatchFileRecursionStep(RecursionState recursionState, string
         {
             if (recursionState.SearchData.Filespec != null)
             {
-                return IsMatch(Path.GetFileName(file), recursionState.SearchData.Filespec);
+                return IsMatch(Path.GetFileName(file), recursionState.SearchData.Filespec, true);
             }
 
             // if no file-spec provided, match the file to the regular expression
@@ -1428,7 +1428,8 @@ internal Result()
         /// </summary>
         /// <param name="input">String which is matched against the pattern.</param>
         /// <param name="pattern">Pattern against which string is matched.</param>
-        internal static bool IsMatch(string input, string pattern)
+        /// <param name="ignoreCase">Determines whether ignoring case when comparing two characters</param>
+        internal static bool IsMatch(string input, string pattern, bool ignoreCase)
         {
             if (input == null)
             {
@@ -1454,6 +1455,44 @@ internal static bool IsMatch(string input, string pattern)
             // Store the information whether the tail was checked when a pattern "*?" occurred
             bool tailChecked = false;
 
+#if MONO    // MONO doesn't support local functions
+            Func<char, char, int, int, bool> CompareIgnoreCase = (inputChar, patternChar, iIndex, pIndex) =>
+#else
+            // Function for comparing two characters, ignoring case
+            // PERF NOTE:
+            // Having a local function instead of a variable increases the speed by approx. 2 times.
+            // Passing inputChar and patternChar increases the speed by approx. 10%, when comparing
+            // to using the string indexer. The iIndex and pIndex parameters are only used
+            // when we have to compare two non ASCII characters. Using just string.Compare for
+            // character comparison, would reduce the speed by approx. 5 times.
+            bool CompareIgnoreCase(char inputChar, char patternChar, int iIndex, int pIndex)
+#endif
+            {
+                // We will mostly be comparing ASCII characters, check this first
+                if (inputChar < 128 && patternChar < 128)
+                {
+                    if (inputChar >= 'A' && inputChar <= 'Z' && patternChar >= 'a' && patternChar <= 'z')
+                    {
+                        return inputChar + 32 == patternChar;
+                    }
+                    if (inputChar >= 'a' && inputChar <= 'z' && patternChar >= 'A' && patternChar <= 'Z')
+                    {
+                        return inputChar == patternChar + 32;
+                    }
+                    return inputChar == patternChar;
+                }
+                if (inputChar > 128 && patternChar > 128)
+                {
+                    return string.Compare(input, iIndex, pattern, pIndex, 1, StringComparison.OrdinalIgnoreCase) == 0;
+                }
+                // We don't need to compare, an ASCII character cannot have its lowercase/uppercase outside the ASCII table
+                // and a non ASCII character cannot have its lowercase/uppercase inside the ASCII table
+                return false;
+            }
+#if MONO
+            ; // The end of the CompareIgnoreCase anonymous function
+#endif
+
             while (inputIndex < inputLength)
             {
                 if (patternIndex < patternLength)
@@ -1483,10 +1522,19 @@ internal static bool IsMatch(string input, string pattern)
                                 inputTailIndex--;
                                 // If we encountered a * wildcard we are not sure if it matches as there can be zero or more than one characters
                                 // so we have to fallback to the standard procedure e.g. ("aaaabaaad", "*?b*d")
-                                if (pattern[patternTailIndex] == '*' || pattern[patternTailIndex] != input[inputTailIndex] && pattern[patternTailIndex] != '?')
+                                if (pattern[patternTailIndex] == '*')
                                 {
                                     break;
                                 }
+                                // If the tail doesn't match, we can safely return e.g. ("aaa", "*b")
+                                if ((
+                                        (!ignoreCase && input[inputTailIndex] != pattern[patternTailIndex]) ||
+                                        (ignoreCase && !CompareIgnoreCase(input[inputTailIndex], pattern[patternTailIndex], patternTailIndex, inputTailIndex))
+                                    ) &&
+                                    pattern[patternTailIndex] != '?')
+                                {
+                                    return false;
+                                }
                                 if (patternIndex == patternTailIndex)
                                 {
                                     return true;
@@ -1502,7 +1550,9 @@ internal static bool IsMatch(string input, string pattern)
                         // The ? wildcard cannot be skipped as we will have a wrong result for e.g. ("aab" "*?b")
                         if (pattern[patternIndex] != '?')
                         {
-                            while (input[inputIndex] != pattern[patternIndex])
+                            while (
+                                (!ignoreCase && input[inputIndex] != pattern[patternIndex]) ||
+                                (ignoreCase && !CompareIgnoreCase(input[inputIndex], pattern[patternIndex], inputIndex, patternIndex)))
                             {
                                 // Return if there is no character that match e.g. ("aa", "*b")
                                 if (++inputIndex >= inputLength)
@@ -1517,7 +1567,10 @@ internal static bool IsMatch(string input, string pattern)
                     }
 
                     // If we have a match, step to the next character
-                    if (pattern[patternIndex] == input[inputIndex] || pattern[patternIndex] == '?')
+                    if (
+                        (!ignoreCase && input[inputIndex] == pattern[patternIndex]) ||
+                        (ignoreCase && CompareIgnoreCase(input[inputIndex], pattern[patternIndex], inputIndex, patternIndex)) ||
+                        pattern[patternIndex] == '?')
                     {
                         patternIndex++;
                         inputIndex++;
@@ -2040,7 +2093,7 @@ DirectoryExists directoryExists
                     var excludeBaseDirectory = excludeState.BaseDirectory;
                     var includeBaseDirectory = state.BaseDirectory;
 
-                    if (excludeBaseDirectory != includeBaseDirectory)
+                    if (string.Compare(excludeBaseDirectory, includeBaseDirectory, StringComparison.OrdinalIgnoreCase) != 0)
                     {
                         //  What to do if the BaseDirectory for the exclude search doesn't match the one for inclusion?
                         //  - If paths don't match (one isn't a prefix of the other), then ignore the exclude search.  Examples:
@@ -2054,7 +2107,7 @@ DirectoryExists directoryExists
                         }
                         else if (excludeBaseDirectory.Length > includeBaseDirectory.Length)
                         {
-                            if (!excludeBaseDirectory.StartsWith(includeBaseDirectory))
+                            if (!excludeBaseDirectory.StartsWith(includeBaseDirectory, StringComparison.OrdinalIgnoreCase))
                             {
                                 //  Exclude path is longer, but doesn't start with include path.  So ignore it.
                                 continue;
@@ -2069,7 +2122,7 @@ DirectoryExists directoryExists
 
                             if (searchesToExcludeInSubdirs == null)
                             {
-                                searchesToExcludeInSubdirs = new Dictionary<string, List<RecursionState>>();
+                                searchesToExcludeInSubdirs = new Dictionary<string, List<RecursionState>>(StringComparer.OrdinalIgnoreCase);
                             }
                             List<RecursionState> listForSubdir;
                             if (!searchesToExcludeInSubdirs.TryGetValue(excludeBaseDirectory, out listForSubdir))
@@ -2083,7 +2136,7 @@ DirectoryExists directoryExists
                         else
                         {
                             //  Exclude base directory length is less than include base directory length.
-                            if (!state.BaseDirectory.StartsWith(excludeState.BaseDirectory))
+                            if (!state.BaseDirectory.StartsWith(excludeState.BaseDirectory, StringComparison.OrdinalIgnoreCase))
                             {
                                 //  Include path is longer, but doesn't start with the exclude path.  So ignore exclude path
                                 //  (since it won't match anything under the include path)
diff --git a/src/Shared/UnitTests/FileMatcher_Tests.cs b/src/Shared/UnitTests/FileMatcher_Tests.cs
index 87fa6a57e6e..10ce51d4cf7 100644
--- a/src/Shared/UnitTests/FileMatcher_Tests.cs
+++ b/src/Shared/UnitTests/FileMatcher_Tests.cs
@@ -78,6 +78,231 @@ public void GetFilesPatternMatching()
             }
         }
 
+        [Theory]
+        [InlineData(
+            @"src\**\inner\**\*.cs", // Include
+            new string[] { }, // Excludes
+            new[] // Expected matchings
+            {
+                @"src\foo\inner\foo.cs",
+                @"src\foo\inner\foo\foo.cs",
+                @"src\foo\inner\bar\bar.cs",
+                @"src\bar\inner\baz.cs",
+                @"src\bar\inner\baz\baz.cs",
+                @"src\bar\inner\foo\foo.cs"
+            }
+        )]
+        [InlineData(
+            @"src\**\inner\**\*.cs", // Include
+            new[] // Excludes
+            {
+                @"**\foo\**"
+            },
+            new[] // Expected matchings
+            {
+                @"src\bar\inner\baz.cs",
+                @"src\bar\inner\baz\baz.cs"
+            }
+        )]
+        [InlineData(
+            @"src\**\inner\**\*.cs", // Include
+            new[] // Excludes
+            {
+                @"src\bar\inner\baz\**"
+            },
+            new[] // Expected matchings
+            {
+                @"src\foo\inner\foo.cs",
+                @"src\foo\inner\foo\foo.cs",
+                @"src\foo\inner\bar\bar.cs",
+                @"src\bar\inner\baz.cs",
+                @"src\bar\inner\foo\foo.cs"
+            }
+        )]
+        [InlineData(
+            @"src\foo\**\*.cs", // Include
+            new[] // Excludes
+            {
+                @"src\foo\**\foo\**"
+            },
+            new[] // Expected matchings
+            {
+                @"src\foo\foo.cs",
+                @"src\foo\inner\foo.cs",
+                @"src\foo\inner\bar\bar.cs"
+            }
+        )]
+        [InlineData(
+            @"src\foo\inner\**\*.cs", // Include
+            new[] // Excludes
+            {
+                @"src\foo\**\???\**"
+            },
+            new[] // Expected matchings
+            {
+                @"src\foo\inner\foo.cs"
+            }
+        )]
+        [InlineData(
+            @"**\???\**\*.cs", // Include
+            new string[] { }, // Excludes
+            new[] // Expected matchings
+            {
+                @"src\foo.cs",
+                @"src\bar.cs",
+                @"src\baz.cs",
+                @"src\foo\foo.cs",
+                @"src\bar\bar.cs",
+                @"src\baz\baz.cs",
+                @"src\foo\inner\foo.cs",
+                @"src\foo\inner\foo\foo.cs",
+                @"src\foo\inner\bar\bar.cs",
+                @"src\bar\inner\baz.cs",
+                @"src\bar\inner\baz\baz.cs",
+                @"src\bar\inner\foo\foo.cs",
+                @"build\baz\foo.cs"
+            }
+        )]
+        [InlineData(
+            @"**\*.*", // Include
+            new[] // Excludes
+            {
+                @"**\???\**\*.cs"
+            },
+            new[] // Expected matchings
+            {
+                @"readme.txt",
+                @"licence.md"
+            }
+        )]
+        [InlineData(
+            @"**\?a?\**\?a?\*.c?", // Include
+            new string[] { }, // Excludes
+            new[] // Expected matchings
+            {
+                @"src\bar\inner\baz\baz.cs"
+            }
+        )]
+        [InlineData(
+            @"**\?a?\**\?a?.c?", // Include
+            new[] // Excludes
+            {
+                @"**\?a?\**\?a?\*.c?"
+            },
+            new[] // Expected matchings
+            {
+                @"src\bar\bar.cs",
+                @"src\baz\baz.cs",
+                @"src\foo\inner\bar\bar.cs",
+                @"src\bar\inner\baz.cs"
+            }
+        )]
+        public void GetFilesComplexGlobbingMatching(string includePattern, string[] excludePatterns, string[] expectedMatchings)
+        {
+            var workingPath = Path.Combine(Path.GetTempPath(), "Globbing");
+            var files = new []
+            {
+                @"src\foo.cs",
+                @"src\bar.cs",
+                @"src\baz.cs",
+                @"src\foo\foo.cs",
+                @"src\bar\bar.cs",
+                @"src\baz\baz.cs",
+                @"src\foo\inner\foo.cs",
+                @"src\foo\inner\foo\foo.cs",
+                @"src\foo\inner\bar\bar.cs",
+                @"src\bar\inner\baz.cs",
+                @"src\bar\inner\baz\baz.cs",
+                @"src\bar\inner\foo\foo.cs",
+                @"build\baz\foo.cs",
+                @"readme.txt",
+                @"licence.md"
+            };
+            Action<string, string[], bool> match = (include, excludes, hasNoMatches) =>
+            {
+                string[] matchedFiles = null;
+                try
+                {
+                    matchedFiles = FileMatcher.GetFiles(workingPath, include, excludes);
+                    if (hasNoMatches)
+                    {
+                        Assert.Equal(0, matchedFiles.Length);
+                        return;
+                    }
+                    // We have to lower file paths as the result could be in uppercase e.g. "SRC\foo.cs"
+                    var normMatchedFiles = matchedFiles
+                        .Select(o => o.Replace(Path.DirectorySeparatorChar, '\\').ToLowerInvariant()).ToArray();
+                    foreach (var matchedFile in normMatchedFiles)
+                    {
+                        Assert.Contains(matchedFile, expectedMatchings);
+                    }
+                    Assert.Equal(expectedMatchings.Length, matchedFiles.Length);
+                }
+                catch (Exception)
+                {
+                    Console.WriteLine($"Globbing failed for include {include} with excludes {string.Join(",", excludes)}," +
+                                      $"should have no matches: {hasNoMatches}, " +
+                                      $"returned files: {(matchedFiles != null ? string.Join(",", matchedFiles) : null)}");
+                    throw;
+                }
+            };
+
+            try
+            {
+                // Create directories and files
+                foreach (var file in files)
+                {
+                    var normFile = file.Replace('\\', Path.DirectorySeparatorChar);
+                    var dirPath = Path.Combine(workingPath, Path.GetDirectoryName(normFile));
+
+                    Directory.CreateDirectory(dirPath);
+                    File.WriteAllBytes(Path.Combine(dirPath, Path.GetFileName(normFile)), new byte[5000]);
+                }
+
+                // Normal matching
+                match(includePattern, excludePatterns, false);
+                // Include forward slash
+                match(includePattern.Replace('\\', '/'), excludePatterns, false);
+                // Excludes forward slash
+                match(includePattern, excludePatterns.Select(o => o.Replace('\\', '/')).ToArray(), false);
+
+                // Backward compatibilities:
+                // 1. When an include or exclude starts with a fixed directory part e.g. "src/foo/**",
+                //    then matching should be case-sensitive on Linux, as the directory was checked for its existance
+                //    by using Directory.Exists, which is case-sensitive on Linux (on OSX is not).
+                // 2. On Unix, a file pattern e.g. "*.cs", should be matched case-sensitive, as they were retrieved
+                //    by using the searchPattern parameter of Directory.GetFiles, which is case-sensitive on Unix.
+                //    
+                var shouldHaveNoMatches = false;
+
+                // Do not test uppercase excludes on Linux, as it is not simple to figure out which files shall
+                // be excluded
+                if (!NativeMethodsShared.IsLinux)
+                {
+                    // Excludes uppercase
+                    match(includePattern, excludePatterns.Select(o => o.ToUpperInvariant()).ToArray(), false);
+                }
+                else
+                {
+                    // On Linux, we will have no matches for an uppercase include, when it starts with a fixed directory part
+                    // e.g. "SRC/FOO"
+                    shouldHaveNoMatches = !includePattern.StartsWith("**");
+                }
+                if (NativeMethodsShared.IsUnixLike)
+                {
+                    // On Unix, we will have no matches for an uppercase file pattern e.g. "*.CS"
+                    shouldHaveNoMatches |= char.IsLetter(includePattern.Last());
+                }
+
+                // Include uppercase
+                match(includePattern.ToUpperInvariant(), excludePatterns, shouldHaveNoMatches);
+            }
+            finally
+            {
+                FileUtilities.DeleteWithoutTrailingBackslash(workingPath, true);
+            }
+        }
+
         [Fact]
         public void WildcardMatching()
         {
@@ -88,6 +313,9 @@ public void WildcardMatching()
                 new Tuple<string, string, bool>("a", "", false),
                 new Tuple<string, string, bool>("", "a", false),
 
+                // Non ASCII characters
+                new Tuple<string, string, bool>("šđčćž", "šđčćž", true),
+
                 // * wildcard
                 new Tuple<string, string, bool>("abc", "*bc", true),
                 new Tuple<string, string, bool>("abc", "a*c", true),
@@ -148,7 +376,10 @@ public void WildcardMatching()
             {
                 try
                 {
-                    Assert.Equal(input.Item3, FileMatcher.IsMatch(input.Item1, input.Item2));
+                    Assert.Equal(input.Item3, FileMatcher.IsMatch(input.Item1, input.Item2, false));
+                    Assert.Equal(input.Item3, FileMatcher.IsMatch(input.Item1, input.Item2, true));
+                    Assert.Equal(input.Item3, FileMatcher.IsMatch(input.Item1.ToUpperInvariant(), input.Item2, true));
+                    Assert.Equal(input.Item3, FileMatcher.IsMatch(input.Item1, input.Item2.ToUpperInvariant(), true));
                 }
                 catch (Exception)
                 {