Skip to content

Commit 181bea6

Browse files
committed
fix(core): don't crash if a content document has an '.xml' extension
Puppeteer won't laod documents with extension `.xml` as XHTML. When a Content Document’s extension is not `.xhtml` or `.html`, we first copy the document to a temp `.xhtml` file and before opening it in Puppeteer. Fixes #122
1 parent 63cc88b commit 181bea6

File tree

10 files changed

+93
-2
lines changed

10 files changed

+93
-2
lines changed

packages/ace-core/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
"@daisy/epub-utils": "^0.8.0",
2323
"@daisy/puppeteer-utils": "^0.7.0",
2424
"axe-core": "~2.6.1",
25+
"file-url": "^2.0.2",
2526
"h5o": "^0.11.3",
2627
"p-map": "^1.2.0",
2728
"puppeteer": "^1.0.0",

packages/ace-core/src/checker/checker-chromium.js

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
11
'use strict';
22

3-
const fs = require('fs');
3+
const fileUrl = require('file-url');
4+
const fs = require('fs-extra');
45
const path = require('path');
56
const pMap = require('p-map');
67
const puppeteer = require('puppeteer');
78
const os = require('os');
9+
const tmp = require('tmp');
810
const winston = require('winston');
911

1012
const axe2ace = require('@daisy/ace-report-axe');
1113
const utils = require('@daisy/puppeteer-utils');
1214

15+
tmp.setGracefulCleanup();
16+
1317
const scripts = [
1418
path.resolve(require.resolve('axe-core'), '../axe.min.js'),
1519
require.resolve('../scripts/vendor/outliner.min.js'),
@@ -21,8 +25,22 @@ const scripts = [
2125
async function checkSingle(spineItem, epub, browser) {
2226
winston.verbose(`- Processing ${spineItem.relpath}`);
2327
try {
28+
let url = spineItem.url;
29+
let ext = path.extname(spineItem.filepath);
30+
31+
// File extensions other than 'xhtml' or 'html' are not propertly loaded
32+
// by puppeteer, so we copy the file to a new `.xhtml` temp file.
33+
if (ext !== 'xhtml' && ext !== 'html') {
34+
winston.warn(`Copying document with extension '${ext}' to a temporary '.xhtml' file…`);
35+
const tmpdir = tmp.dirSync({ unsafeCleanup: true }).name;
36+
const tmpFile = path.join(tmpdir, `${path.basename(spineItem.filepath, ext)}.xhtml`)
37+
fs.copySync(spineItem.filepath, tmpFile);
38+
url = fileUrl(tmpFile);
39+
winston.debug(`checking copied file at ${url}`)
40+
}
41+
2442
const page = await browser.newPage();
25-
await page.goto(spineItem.url);
43+
await page.goto(url);
2644
await utils.addScripts(scripts, page);
2745

2846
const results = await page.evaluate(() => new Promise((resolve, reject) => {

tests/__tests__/regression.test.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,8 @@ test('issue #114: Description list item does not have a <dl> parent element', as
6565
const report = await ace('../data/issue-114');
6666
expect(report['earl:result']['earl:outcome']).toEqual('pass');
6767
});
68+
69+
test('issue #122: Failed to check HTML content with `.xml` extension', async () => {
70+
const report = await ace('../data/issue-122');
71+
expect(report['earl:result']['earl:outcome']).toEqual('pass');
72+
});
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="en">
2+
<head>
3+
<title>Minimal EPUB</title>
4+
</head>
5+
<body>
6+
<h1>Loomings</h1>
7+
<p>Call me Ishmael.</p>
8+
</body>
9+
</html>
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="en">
2+
<head>
3+
<title>Minimal EPUB</title>
4+
</head>
5+
<body>
6+
<h1>Loomings</h1>
7+
<p>Call me Ishmael.</p>
8+
</body>
9+
</html>

tests/data/issue-122/EPUB/nav.xhtml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="en">
2+
<head>
3+
<title>Minimal Nav</title>
4+
</head>
5+
<body>
6+
<nav epub:type="toc">
7+
<ol>
8+
<li><a href="content_001.xml">content 001</a></li>
9+
<li><a href="content_002.ace">content 002</a></li>
10+
</ol>
11+
</nav>
12+
</body>
13+
</html>

tests/data/issue-122/EPUB/package.opf

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" xml:lang="en" unique-identifier="uid">
3+
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
4+
<dc:title id="title">Minimal EPUB 3.0</dc:title>
5+
<dc:language>en</dc:language>
6+
<dc:identifier id="uid">NOID</dc:identifier>
7+
<meta property="dcterms:modified">2017-01-01T00:00:01Z</meta>
8+
<meta property="schema:accessibilityFeature">structuralNavigation</meta>
9+
<meta property="schema:accessibilitySummary">everything OK!</meta>
10+
<meta property="schema:accessibilityHazard">noFlashingHazard</meta>
11+
<meta property="schema:accessibilityHazard">noSoundHazard</meta>
12+
<meta property="schema:accessibilityHazard">noMotionSimulationHazard</meta>
13+
<meta property="schema:accessMode">textual</meta>
14+
<meta property="schema:accessModeSufficient">textual</meta>
15+
</metadata>
16+
<manifest>
17+
<item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
18+
<item id="content_001" href="content_001.xml" media-type="application/xhtml+xml"/>
19+
<item id="content_002" href="content_002.ace" media-type="application/xhtml+xml"/>
20+
</manifest>
21+
<spine>
22+
<itemref idref="content_001" />
23+
<itemref idref="content_002" />
24+
</spine>
25+
</package>
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<?xml version="1.0" encoding="UTF-8" ?>
2+
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
3+
<rootfiles>
4+
<rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
5+
</rootfiles>
6+
</container>

tests/data/issue-122/mimetype

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
application/epub+zip

yarn.lock

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2174,6 +2174,10 @@ file-exists-promise@^1.0.2:
21742174
dependencies:
21752175
es6-promise "^3.1.2"
21762176

2177+
file-url@^2.0.2:
2178+
version "2.0.2"
2179+
resolved "https://registry.yarnpkg.com/file-url/-/file-url-2.0.2.tgz#e951784d79095127d3713029ab063f40818ca2ae"
2180+
21772181
filename-regex@^2.0.0:
21782182
version "2.0.1"
21792183
resolved "https://registry.yarnpkg.com/filename-regex/-/filename-regex-2.0.1.tgz#c1c4b9bee3e09725ddb106b75c1e301fe2f18b26"

0 commit comments

Comments
 (0)