From d5671cfb50a45ff2c1e45d52f2869776016bc6c7 Mon Sep 17 00:00:00 2001 From: Alan Agius Date: Fri, 2 Oct 2020 14:07:31 +0200 Subject: [PATCH 1/2] refactor(@angular-devkit/build-angular): use parse5-html-rewriting-stream instead of parse5-htmlparser2-tree-adapter Closes: #17019 --- package.json | 3 +- .../angular_devkit/build_angular/BUILD.bazel | 4 +- .../angular_devkit/build_angular/package.json | 3 +- .../utils/index-file/augment-index-html.ts | 196 ++++++++---------- .../utils/index-file/html-rewriting-stream.ts | 41 ++++ yarn.lock | 37 ++-- 6 files changed, 159 insertions(+), 125 deletions(-) create mode 100644 packages/angular_devkit/build_angular/src/utils/index-file/html-rewriting-stream.ts diff --git a/package.json b/package.json index 02485f9e4ab2..459322e2e3ac 100644 --- a/package.json +++ b/package.json @@ -111,6 +111,7 @@ "@types/node": "10.12.30", "@types/node-fetch": "^2.1.6", "@types/npm-package-arg": "^6.1.0", + "@types/parse5-html-rewriting-stream": "^5.1.2", "@types/pidusage": "^2.0.1", "@types/progress": "^2.0.3", "@types/request": "^2.47.1", @@ -180,9 +181,7 @@ "open": "7.3.0", "ora": "5.1.0", "pacote": "11.1.4", - "parse5": "6.0.1", "parse5-html-rewriting-stream": "6.0.1", - "parse5-htmlparser2-tree-adapter": "6.0.1", "pidtree": "^0.5.0", "pidusage": "^2.0.17", "pnp-webpack-plugin": "1.6.4", diff --git a/packages/angular_devkit/build_angular/BUILD.bazel b/packages/angular_devkit/build_angular/BUILD.bazel index 6894c5d6129a..c0ca0c55895f 100644 --- a/packages/angular_devkit/build_angular/BUILD.bazel +++ b/packages/angular_devkit/build_angular/BUILD.bazel @@ -125,6 +125,7 @@ ts_library( "@npm//@types/loader-utils", "@npm//@types/minimatch", "@npm//@types/node", + "@npm//@types/parse5-html-rewriting-stream", "@npm//@types/rimraf", "@npm//@types/semver", "@npm//@types/speed-measure-webpack-plugin", @@ -160,8 +161,7 @@ ts_library( "@npm//ng-packagr", "@npm//open", "@npm//ora", - "@npm//parse5", - "@npm//parse5-htmlparser2-tree-adapter", + "@npm//parse5-html-rewriting-stream", "@npm//pnp-webpack-plugin", "@npm//postcss", "@npm//postcss-import", diff --git a/packages/angular_devkit/build_angular/package.json b/packages/angular_devkit/build_angular/package.json index afd8be5d37c3..bc5088b3f721 100644 --- a/packages/angular_devkit/build_angular/package.json +++ b/packages/angular_devkit/build_angular/package.json @@ -44,8 +44,7 @@ "minimatch": "3.0.4", "open": "7.3.0", "ora": "5.1.0", - "parse5": "6.0.1", - "parse5-htmlparser2-tree-adapter": "6.0.1", + "parse5-html-rewriting-stream": "6.0.1", "pnp-webpack-plugin": "1.6.4", "postcss": "7.0.32", "postcss-import": "12.0.1", diff --git a/packages/angular_devkit/build_angular/src/utils/index-file/augment-index-html.ts b/packages/angular_devkit/build_angular/src/utils/index-file/augment-index-html.ts index 5c040f281aea..26b3ca7b42c1 100644 --- a/packages/angular_devkit/build_angular/src/utils/index-file/augment-index-html.ts +++ b/packages/angular_devkit/build_angular/src/utils/index-file/augment-index-html.ts @@ -7,10 +7,7 @@ */ import { createHash } from 'crypto'; -import { RawSource, ReplaceSource } from 'webpack-sources'; - -const parse5 = require('parse5'); -const treeAdapter = require('parse5-htmlparser2-tree-adapter'); +import { htmlRewritingStream } from './html-rewriting-stream'; export type LoadOutputFileFunctionType = (file: string) => Promise; @@ -59,12 +56,14 @@ export interface FileInfo { * after processing several configurations in order to build different sets of * bundles for differential serving. */ -// tslint:disable-next-line: no-big-function export async function augmentIndexHtml(params: AugmentIndexHtmlOptions): Promise { - const { loadOutputFile, files, noModuleFiles = [], moduleFiles = [], entrypoints } = params; + const { + loadOutputFile, files, noModuleFiles = [], moduleFiles = [], entrypoints, + sri, deployUrl = '', lang, baseHref, inputContent, + } = params; let { crossOrigin = 'none' } = params; - if (params.sri && crossOrigin === 'none') { + if (sri && crossOrigin === 'none') { crossOrigin = 'anonymous'; } @@ -90,33 +89,12 @@ export async function augmentIndexHtml(params: AugmentIndexHtmlOptions): Promise } } - // Find the head and body elements - const document = parse5.parse(params.inputContent, { - treeAdapter, - sourceCodeLocationInfo: true, - }); - - // tslint:disable: no-any - const htmlElement = document.children.find((c: any) => c.name === 'html'); - const headElement = htmlElement.children.find((c: any) => c.name === 'head'); - const bodyElement = htmlElement.children.find((c: any) => c.name === 'body'); - // tslint:enable: no-any - - if (!headElement || !bodyElement) { - throw new Error('Missing head and/or body elements'); - } - - // Inject into the html - const indexSource = new ReplaceSource(new RawSource(params.inputContent), params.input); - - const scriptsElements = treeAdapter.createDocumentFragment(); + const scriptTags: string[] = []; for (const script of scripts) { - const attrs: { name: string; value: string }[] = [ - { name: 'src', value: (params.deployUrl || '') + script }, - ]; + const attrs = [`src="${deployUrl}${script}"`]; if (crossOrigin !== 'none') { - attrs.push({ name: 'crossorigin', value: crossOrigin }); + attrs.push(`crossorigin="${crossOrigin}"`); } // We want to include nomodule or module when a file is not common amongs all @@ -130,111 +108,115 @@ export async function augmentIndexHtml(params: AugmentIndexHtmlOptions): Promise const isModuleType = moduleFiles.some(scriptPredictor); if (isNoModuleType && !isModuleType) { - attrs.push( - { name: 'nomodule', value: '' }, - { name: 'defer', value: '' }, - ); + attrs.push('nomodule', 'defer'); } else if (isModuleType && !isNoModuleType) { - attrs.push({ name: 'type', value: 'module' }); + attrs.push('type="module"'); } else { - attrs.push({ name: 'defer', value: '' }); + attrs.push('defer'); } } else { - attrs.push({ name: 'defer', value: '' }); + attrs.push('defer'); } - if (params.sri) { + if (sri) { const content = await loadOutputFile(script); - attrs.push(_generateSriAttributes(content)); + attrs.push(generateSriAttributes(content)); } - const baseElement = treeAdapter.createElement('script', undefined, attrs); - treeAdapter.setTemplateContent(scriptsElements, baseElement); + scriptTags.push(``); } - indexSource.insert( - // parse5 does not provide locations if malformed html is present - bodyElement.sourceCodeLocation?.endTag?.startOffset || params.inputContent.indexOf(''), - parse5.serialize(scriptsElements, { treeAdapter }).replace(/\=""/g, ''), - ); - - // Adjust base href if specified - if (typeof params.baseHref == 'string') { - // tslint:disable-next-line: no-any - let baseElement = headElement.children.find((t: any) => t.name === 'base'); - const baseFragment = treeAdapter.createDocumentFragment(); - - if (!baseElement) { - baseElement = treeAdapter.createElement('base', undefined, [ - { name: 'href', value: params.baseHref }, - ]); - - treeAdapter.setTemplateContent(baseFragment, baseElement); - indexSource.insert( - headElement.sourceCodeLocation.startTag.endOffset, - parse5.serialize(baseFragment, { treeAdapter }), - ); - } else { - baseElement.attribs['href'] = params.baseHref; - treeAdapter.setTemplateContent(baseFragment, baseElement); - indexSource.replace( - baseElement.sourceCodeLocation.startOffset, - baseElement.sourceCodeLocation.endOffset - 1, - parse5.serialize(baseFragment, { treeAdapter }), - ); - } - } - - const styleElements = treeAdapter.createDocumentFragment(); + const linkTags: string[] = []; for (const stylesheet of stylesheets) { const attrs = [ - { name: 'rel', value: 'stylesheet' }, - { name: 'href', value: (params.deployUrl || '') + stylesheet }, + `rel="stylesheet"`, + `href="${deployUrl}${stylesheet}"`, ]; if (crossOrigin !== 'none') { - attrs.push({ name: 'crossorigin', value: crossOrigin }); + attrs.push(`crossorigin="${crossOrigin}"`); } - if (params.sri) { + if (sri) { const content = await loadOutputFile(stylesheet); - attrs.push(_generateSriAttributes(content)); + attrs.push(generateSriAttributes(content)); } - const element = treeAdapter.createElement('link', undefined, attrs); - treeAdapter.setTemplateContent(styleElements, element); + linkTags.push(``); } - indexSource.insert( - // parse5 does not provide locations if malformed html is present - headElement.sourceCodeLocation?.endTag?.startOffset || params.inputContent.indexOf(''), - parse5.serialize(styleElements, { treeAdapter }), - ); - - // Adjust document locale if specified - if (typeof params.lang == 'string') { - const htmlFragment = treeAdapter.createDocumentFragment(); - htmlElement.attribs['lang'] = params.lang; - - // we want only openning tag - htmlElement.children = []; - - treeAdapter.setTemplateContent(htmlFragment, htmlElement); - indexSource.replace( - htmlElement.sourceCodeLocation.startTag.startOffset, - htmlElement.sourceCodeLocation.startTag.endOffset - 1, - parse5.serialize(htmlFragment, { treeAdapter }).replace('', ''), - ); - } + const { rewriter, transformedContent } = await htmlRewritingStream(inputContent); + const baseTagExists = inputContent.includes(' { + switch (tag.tagName) { + case 'html': + // Adjust document locale if specified + if (isString(lang)) { + updateAttribute(tag, 'lang', lang); + } + break; + case 'head': + // Base href should be added before any link, meta tags + if (!baseTagExists && isString(baseHref)) { + rewriter.emitStartTag(tag); + rewriter.emitRaw(``); + + return; + } + break; + case 'base': + // Adjust base href if specified + if (isString(baseHref)) { + updateAttribute(tag, 'href', baseHref); + } + break; + } + + rewriter.emitStartTag(tag); + }) + .on('endTag', tag => { + switch (tag.tagName) { + case 'head': + for (const linkTag of linkTags) { + rewriter.emitRaw(linkTag); + } + break; + case 'body': + // Add script tags + for (const scriptTag of scriptTags) { + rewriter.emitRaw(scriptTag); + } + break; + } + + rewriter.emitEndTag(tag); + }); - return indexSource.source(); + return transformedContent; } -function _generateSriAttributes(content: string) { +function generateSriAttributes(content: string): string { const algo = 'sha384'; const hash = createHash(algo) .update(content, 'utf8') .digest('base64'); - return { name: 'integrity', value: `${algo}-${hash}` }; + return `integrity="${algo}-${hash}"`; +} + +function updateAttribute(tag: { attrs: { name: string, value: string }[] }, name: string, value: string): void { + const index = tag.attrs.findIndex(a => a.name === name); + const newValue = { name, value }; + + if (index === -1) { + tag.attrs.push(newValue); + } else { + tag.attrs[index] = newValue; + } +} + +function isString(value: unknown): value is string { + return typeof value === 'string'; } diff --git a/packages/angular_devkit/build_angular/src/utils/index-file/html-rewriting-stream.ts b/packages/angular_devkit/build_angular/src/utils/index-file/html-rewriting-stream.ts new file mode 100644 index 000000000000..8cdf857d5cef --- /dev/null +++ b/packages/angular_devkit/build_angular/src/utils/index-file/html-rewriting-stream.ts @@ -0,0 +1,41 @@ +/** + * @license + * Copyright Google Inc. All Rights Reserved. + * + * Use of this source code is governed by an MIT-style license that can be + * found in the LICENSE file at https://angular.io/license + */ + +import { Readable, Writable } from 'stream'; + +export async function htmlRewritingStream(content: string): Promise<{ + rewriter: import('parse5-html-rewriting-stream'), + transformedContent: Promise, +}> { + const chunks: Buffer[] = []; + const rewriter = new (await import('parse5-html-rewriting-stream'))(); + + return { + rewriter, + transformedContent: new Promise(resolve => { + new Readable({ + encoding: 'utf8', + read(): void { + this.push(Buffer.from(content)); + this.push(null); + }, + }) + .pipe(rewriter) + .pipe(new Writable({ + write(chunk: string | Buffer, encoding: string | undefined, callback: Function): void { + chunks.push(typeof chunk === 'string' ? Buffer.from(chunk, encoding) : chunk); + callback(); + }, + final(callback: (error?: Error) => void): void { + callback(); + resolve(Buffer.concat(chunks).toString()); + }, + })); + }), + }; +} diff --git a/yarn.lock b/yarn.lock index 8e0211be8872..934dd8639619 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1570,6 +1570,26 @@ resolved "https://registry.yarnpkg.com/@types/parse-json/-/parse-json-4.0.0.tgz#2f8bb441434d163b35fb8ffdccd7138927ffb8c0" integrity sha512-//oorEZjL6sbPcKUaCdIGlIUeH26mgzimjBB77G6XRgnDl/L5wOnpyBGRe/Mmf5CVW3PwEBE1NjiMZ/ssFh4wA== +"@types/parse5-html-rewriting-stream@^5.1.2": + version "5.1.2" + resolved "https://registry.yarnpkg.com/@types/parse5-html-rewriting-stream/-/parse5-html-rewriting-stream-5.1.2.tgz#919d5bbf69ef61e11d873e7195891c3811491a03" + integrity sha512-7CHY6QlayurvYRST5xatE/ipIueph5V+EW2xU12P0CsNucuwygnuiE4foYsdQUEkhnKrTU62KmikANPnoxiGrg== + dependencies: + "@types/parse5-sax-parser" "*" + +"@types/parse5-sax-parser@*": + version "5.0.1" + resolved "https://registry.yarnpkg.com/@types/parse5-sax-parser/-/parse5-sax-parser-5.0.1.tgz#f1e26e82bb09e48cb0c16ff6d1e88aea1e538fd5" + integrity sha512-wBEwg10aACLggnb44CwzAA27M1Jrc/8TR16zA61/rKO5XZoi7JSfLjdpXbshsm7wOlM6hpfvwygh40rzM2RsQQ== + dependencies: + "@types/node" "*" + "@types/parse5" "*" + +"@types/parse5@*": + version "5.0.3" + resolved "https://registry.yarnpkg.com/@types/parse5/-/parse5-5.0.3.tgz#e7b5aebbac150f8b5fdd4a46e7f0bd8e65e19109" + integrity sha512-kUNnecmtkunAoQ3CnjmMkzNU/gtxG8guhi+Fk2U/kOpIKjIMKnXGp4IJCgQJrXSgMsWYimYG4TGjz/UzbGEBTw== + "@types/pidusage@^2.0.1": version "2.0.1" resolved "https://registry.yarnpkg.com/@types/pidusage/-/pidusage-2.0.1.tgz#45eb309be947dcfa177957ef662ce2a0a2311d48" @@ -8855,13 +8875,6 @@ parse5-html-rewriting-stream@6.0.1: parse5 "^6.0.1" parse5-sax-parser "^6.0.1" -parse5-htmlparser2-tree-adapter@6.0.1: - version "6.0.1" - resolved "https://registry.yarnpkg.com/parse5-htmlparser2-tree-adapter/-/parse5-htmlparser2-tree-adapter-6.0.1.tgz#2cdf9ad823321140370d4dbf5d3e92c7c8ddc6e6" - integrity sha512-qPuWvbLgvDGilKc5BoicRovlT4MtYT6JfJyBOMDsKoiT+GiuP5qyrPCnR9HcPECIJJmZh5jRndyNThnhhb/vlA== - dependencies: - parse5 "^6.0.1" - parse5-sax-parser@^6.0.1: version "6.0.1" resolved "https://registry.yarnpkg.com/parse5-sax-parser/-/parse5-sax-parser-6.0.1.tgz#98b4d366b5b266a7cd90b4b58906667af882daba" @@ -8874,16 +8887,16 @@ parse5@5.1.0: resolved "https://registry.yarnpkg.com/parse5/-/parse5-5.1.0.tgz#c59341c9723f414c452975564c7c00a68d58acd2" integrity sha512-fxNG2sQjHvlVAYmzBZS9YlDp6PTSSDwa98vkD4QgVDDCAo84z5X1t5XyJQ62ImdLXx5NdIIfihey6xpum9/gRQ== -parse5@6.0.1, parse5@^6.0.1: - version "6.0.1" - resolved "https://registry.yarnpkg.com/parse5/-/parse5-6.0.1.tgz#e1a1c085c569b3dc08321184f19a39cc27f7c30b" - integrity sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw== - parse5@^5.0.0: version "5.1.1" resolved "https://registry.yarnpkg.com/parse5/-/parse5-5.1.1.tgz#f68e4e5ba1852ac2cadc00f4555fff6c2abb6178" integrity sha512-ugq4DFI0Ptb+WWjAdOK16+u/nHfiIrcE+sh8kZMaM0WllQKLI9rOUq6c2b7cwPkXdzfQESqvoqK6ug7U/Yyzug== +parse5@^6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/parse5/-/parse5-6.0.1.tgz#e1a1c085c569b3dc08321184f19a39cc27f7c30b" + integrity sha512-Ofn/CTFzRGTTxwpNEs9PP93gXShHcTq255nzRYSKe8AkVpZY7e1fpmTfOyoIvjP5HG7Z2ZM7VS9PPhQGW2pOpw== + parseqs@0.0.5: version "0.0.5" resolved "https://registry.yarnpkg.com/parseqs/-/parseqs-0.0.5.tgz#d5208a3738e46766e291ba2ea173684921a8b89d" From 525a7e25726d2eae30dfefe6eb67bfe82f32db90 Mon Sep 17 00:00:00 2001 From: Alan Agius Date: Fri, 2 Oct 2020 14:58:40 +0200 Subject: [PATCH 2/2] refactor(@angular/pwa): use typed version of parse5-html-rewriting-stream --- packages/angular/pwa/BUILD.bazel | 3 ++- packages/angular/pwa/pwa/index.ts | 10 ++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/packages/angular/pwa/BUILD.bazel b/packages/angular/pwa/BUILD.bazel index b3c856a22cb0..525339f9254c 100644 --- a/packages/angular/pwa/BUILD.bazel +++ b/packages/angular/pwa/BUILD.bazel @@ -42,7 +42,7 @@ ts_library( "//packages/angular_devkit/schematics", "//packages/schematics/angular", "@npm//@types/node", - "@npm//parse5-html-rewriting-stream", + "@npm//@types/parse5-html-rewriting-stream", "@npm//rxjs", ], ) @@ -60,6 +60,7 @@ ts_library( deps = [ ":pwa", "//packages/angular_devkit/schematics/testing", + "@npm//parse5-html-rewriting-stream", ], ) diff --git a/packages/angular/pwa/pwa/index.ts b/packages/angular/pwa/pwa/index.ts index f5ad877c73c8..3fac23935f01 100644 --- a/packages/angular/pwa/pwa/index.ts +++ b/packages/angular/pwa/pwa/index.ts @@ -22,18 +22,16 @@ import { getWorkspace, updateWorkspace } from '@schematics/angular/utility/works import { Readable, Writable } from 'stream'; import { Schema as PwaOptions } from './schema'; -const RewritingStream = require('parse5-html-rewriting-stream'); - function updateIndexFile(path: string): Rule { - return (host: Tree) => { + return async (host: Tree) => { const buffer = host.read(path); if (buffer === null) { throw new SchematicsException(`Could not read index file: ${path}`); } - const rewriter = new RewritingStream(); + const rewriter = new (await import('parse5-html-rewriting-stream'))(); let needsNoScript = true; - rewriter.on('startTag', (startTag: { tagName: string }) => { + rewriter.on('startTag', startTag => { if (startTag.tagName === 'noscript') { needsNoScript = false; } @@ -41,7 +39,7 @@ function updateIndexFile(path: string): Rule { rewriter.emitStartTag(startTag); }); - rewriter.on('endTag', (endTag: { tagName: string }) => { + rewriter.on('endTag', endTag => { if (endTag.tagName === 'head') { rewriter.emitRaw(' \n'); rewriter.emitRaw(' \n');