From 40bd4b03addd562f3e93ac87adb9cca7c7efc14f Mon Sep 17 00:00:00 2001 From: jrfnl Date: Sat, 21 Aug 2021 15:20:16 +0200 Subject: [PATCH] ClassMapGenerator: stabilize the heredoc/nowdoc stripping I've looked into 10067 and have come to the conclusion that using a single regex to strip the heredoc/nowdocs is always going to run into trouble as: * Either the matching will be too greedy (issue 10067); * Or the matching will run into backtrace limits for large heredoc/nowdocs. We cannot solve both within a single regex. So, I'm proposing a slightly different solution which should support both and should also improve performance for files containing large heredoc/nowdocs. The `stripHereNowDocs()` function will find a start marker and remember the offset of the start marker. It will then find the end marker and strip the contents between the two (replace with `null`). The function will then recurse onto itself until all heredocs/nowdocs in a file have been removed. --- src/Composer/Autoload/ClassMapGenerator.php | 33 ++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/Composer/Autoload/ClassMapGenerator.php b/src/Composer/Autoload/ClassMapGenerator.php index 085136f0e151..6d9c00338958 100644 --- a/src/Composer/Autoload/ClassMapGenerator.php +++ b/src/Composer/Autoload/ClassMapGenerator.php @@ -246,7 +246,7 @@ private static function findClasses($path) } // strip heredocs/nowdocs - $contents = preg_replace('{<<<[ \t]*([\'"]?)(\w+)\\1(?:\r\n|\n|\r)(?:.*(?=[\r\n]+[ \t]*\\2))[\r\n]+[ \t]*\\2(?=\s*[;,.)])}s', 'null', $contents); + $contents = self::stripHereNowDocs($contents); // strip strings $contents = preg_replace('{"[^"\\\\]*+(\\\\.[^"\\\\]*+)*+"|\'[^\'\\\\]*+(\\\\.[^\'\\\\]*+)*+\'}s', 'null', $contents); // strip leading non-php code if needed @@ -303,4 +303,35 @@ private static function findClasses($path) return $classes; } + + /** + * Strip heredoc and nowdoc blocks from the contents of a file. + * + * @param string $contents File contents. + * + * @return string The cleaned up file contents. + */ + private static function stripHereNowDocs($contents) + { + // Find a heredoc/nowdoc start marker an its offset in the file. + $result = preg_match('{<<<[ \t]*([\'"]?)(?P\w+)\\1[\r\n]}', $contents, $startMatches, PREG_OFFSET_CAPTURE); + if ($result < 1) { + return $contents; + } + + $offset = ($startMatches['marker'][1] + strlen($startMatches['marker'][0])); + $pattern = '`[\r\n]+[ \t]*' . preg_quote($startMatches['marker'][0], '`') . '(?=\s*[;,.)])`'; + + // Find the corresponding heredoc/nowdoc end marker an its offset in the file. + $result = preg_match($pattern, $contents, $endMatches, PREG_OFFSET_CAPTURE, $offset); + if ($result < 1) { + return $contents; + } + + // Strip the complete heredoc/nowdoc and replace it with "null". + $contents = substr_replace($contents, 'null', $startMatches[0][1], (($endMatches[0][1] + strlen($endMatches[0][0])) - $startMatches[0][1])); + + // Recurse to strip the next heredoc/nowdoc until there are none left. + return self::stripHereNowDocs($contents); + } }