From 68b5944de3a245c12011feb22899ef640c8dadbe Mon Sep 17 00:00:00 2001 From: Charles Lyding <19598772+clydin@users.noreply.github.com> Date: Fri, 18 Nov 2022 19:22:57 -0500 Subject: [PATCH] fix(@angular-devkit/build-angular): use url function lexer to rebase Sass URLs When rebasing URLs found within Sass files (sass/scss), the previous regular expression based searching has been replaced with a lexer that scans the Sass files for CSS url() functions and extracts URL values. This change allows for more accurate discovery of URLs as well as reducing the amount of content traversals per file. The lexer logic is based on CSS Syntax Module Level 3 (https://www.w3.org/TR/css-syntax-3/). --- .../src/sass/rebasing-importer.ts | 222 +++++++++++++++--- 1 file changed, 186 insertions(+), 36 deletions(-) diff --git a/packages/angular_devkit/build_angular/src/sass/rebasing-importer.ts b/packages/angular_devkit/build_angular/src/sass/rebasing-importer.ts index eb4476a9b825..e1faac8f2547 100644 --- a/packages/angular_devkit/build_angular/src/sass/rebasing-importer.ts +++ b/packages/angular_devkit/build_angular/src/sass/rebasing-importer.ts @@ -13,12 +13,6 @@ import { basename, dirname, extname, join, relative } from 'node:path'; import { fileURLToPath, pathToFileURL } from 'node:url'; import type { FileImporter, Importer, ImporterResult, Syntax } from 'sass'; -/** - * A Regular expression used to find all `url()` functions within a stylesheet. - * From packages/angular_devkit/build_angular/src/webpack/plugins/postcss-cli-resources.ts - */ -const URL_REGEXP = /url(?:\(\s*(['"]?))(.*?)(?:\1\s*\))/g; - /** * A preprocessed cache entry for the files and directories within a previously searched * directory when performing Sass import resolution. @@ -54,44 +48,42 @@ abstract class UrlRebasingImporter implements Importer<'sync'> { load(canonicalUrl: URL): ImporterResult | null { const stylesheetPath = fileURLToPath(canonicalUrl); + const stylesheetDirectory = dirname(stylesheetPath); let contents = readFileSync(stylesheetPath, 'utf-8'); // Rebase any URLs that are found - if (contents.includes('url(')) { - const stylesheetDirectory = dirname(stylesheetPath); - - let match; - URL_REGEXP.lastIndex = 0; - let updatedContents; - while ((match = URL_REGEXP.exec(contents))) { - const originalUrl = match[2]; + let updatedContents; + for (const { start, end, value } of findUrls(contents)) { + // Skip if value is empty or a Sass variable + if (value.length === 0 || value.startsWith('$')) { + continue; + } - // If root-relative, absolute or protocol relative url, leave as-is - if (/^((?:\w+:)?\/\/|data:|chrome:|#|\/)/.test(originalUrl)) { - continue; - } + // Skip if root-relative, absolute or protocol relative url + if (/^((?:\w+:)?\/\/|data:|chrome:|#|\/)/.test(value)) { + continue; + } - const rebasedPath = relative(this.entryDirectory, join(stylesheetDirectory, originalUrl)); + const rebasedPath = relative(this.entryDirectory, join(stylesheetDirectory, value)); - // Normalize path separators and escape characters - // https://developer.mozilla.org/en-US/docs/Web/CSS/url#syntax - const rebasedUrl = './' + rebasedPath.replace(/\\/g, '/').replace(/[()\s'"]/g, '\\$&'); + // Normalize path separators and escape characters + // https://developer.mozilla.org/en-US/docs/Web/CSS/url#syntax + const rebasedUrl = './' + rebasedPath.replace(/\\/g, '/').replace(/[()\s'"]/g, '\\$&'); - updatedContents ??= new MagicString(contents); - updatedContents.update(match.index, match.index + match[0].length, `url(${rebasedUrl})`); - } + updatedContents ??= new MagicString(contents); + updatedContents.update(start, end, rebasedUrl); + } - if (updatedContents) { - contents = updatedContents.toString(); - if (this.rebaseSourceMaps) { - // Generate an intermediate source map for the rebasing changes - const map = updatedContents.generateMap({ - hires: true, - includeContent: true, - source: canonicalUrl.href, - }); - this.rebaseSourceMaps.set(canonicalUrl.href, map as RawSourceMap); - } + if (updatedContents) { + contents = updatedContents.toString(); + if (this.rebaseSourceMaps) { + // Generate an intermediate source map for the rebasing changes + const map = updatedContents.generateMap({ + hires: true, + includeContent: true, + source: canonicalUrl.href, + }); + this.rebaseSourceMaps.set(canonicalUrl.href, map as RawSourceMap); } } @@ -116,6 +108,164 @@ abstract class UrlRebasingImporter implements Importer<'sync'> { } } +/** + * Determines if a unicode code point is a CSS whitespace character. + * @param code The unicode code point to test. + * @returns true, if the code point is CSS whitespace; false, otherwise. + */ +function isWhitespace(code: number): boolean { + // Based on https://www.w3.org/TR/css-syntax-3/#whitespace + switch (code) { + case 0x0009: // tab + case 0x0020: // space + case 0x000a: // line feed + case 0x000c: // form feed + case 0x000d: // carriage return + return true; + default: + return false; + } +} + +/** + * Scans a CSS or Sass file and locates all valid url function values as defined by the CSS + * syntax specification. + * @param contents A string containing a CSS or Sass file to scan. + * @returns An iterable that yields each CSS url function value found. + */ +function* findUrls(contents: string): Iterable<{ start: number; end: number; value: string }> { + let pos = 0; + let width = 1; + let current = -1; + const next = () => { + pos += width; + current = contents.codePointAt(pos) ?? -1; + width = current > 0xffff ? 2 : 1; + + return current; + }; + + // Based on https://www.w3.org/TR/css-syntax-3/#consume-ident-like-token + while ((pos = contents.indexOf('url(', pos)) !== -1) { + // Set to position of the ( + pos += 3; + width = 1; + + // Consume all leading whitespace + while (isWhitespace(next())) { + /* empty */ + } + + // Initialize URL state + const url = { start: pos, end: -1, value: '' }; + let complete = false; + + // If " or ', then consume the value as a string + if (current === 0x0022 || current === 0x0027) { + const ending = current; + // Based on https://www.w3.org/TR/css-syntax-3/#consume-string-token + while (!complete) { + switch (next()) { + case -1: // EOF + return; + case 0x000a: // line feed + case 0x000c: // form feed + case 0x000d: // carriage return + // Invalid + complete = true; + break; + case 0x005c: // \ -- character escape + // If not EOF or newline, add the character after the escape + switch (next()) { + case -1: + return; + case 0x000a: // line feed + case 0x000c: // form feed + case 0x000d: // carriage return + // Skip when inside a string + break; + default: + // TODO: Handle hex escape codes + url.value += String.fromCodePoint(current); + break; + } + break; + case ending: + // Full string position should include the quotes for replacement + url.end = pos + 1; + complete = true; + yield url; + break; + default: + url.value += String.fromCodePoint(current); + break; + } + } + + next(); + continue; + } + + // Based on https://www.w3.org/TR/css-syntax-3/#consume-url-token + while (!complete) { + switch (current) { + case -1: // EOF + return; + case 0x0022: // " + case 0x0027: // ' + case 0x0028: // ( + // Invalid + complete = true; + break; + case 0x0029: // ) + // URL is valid and complete + url.end = pos; + complete = true; + break; + case 0x005c: // \ -- character escape + // If not EOF or newline, add the character after the escape + switch (next()) { + case -1: // EOF + return; + case 0x000a: // line feed + case 0x000c: // form feed + case 0x000d: // carriage return + // Invalid + complete = true; + break; + default: + // TODO: Handle hex escape codes + url.value += String.fromCodePoint(current); + break; + } + break; + default: + if (isWhitespace(current)) { + while (isWhitespace(next())) { + /* empty */ + } + // Unescaped whitespace is only valid before the closing ) + if (current === 0x0029) { + // URL is valid + url.end = pos; + } + complete = true; + } else { + // Add the character to the url value + url.value += String.fromCodePoint(current); + } + break; + } + next(); + } + + // An end position indicates a URL was found + if (url.end !== -1) { + yield url; + } + } +} + /** * Provides the Sass importer logic to resolve relative stylesheet imports via both import and use rules * and also rebase any `url()` function usage within those stylesheets. The rebasing will ensure that