From 127d61f30eba0584a268cd80dc4ec4f37f7e2cb2 Mon Sep 17 00:00:00 2001 From: Jack <8209433+jacktuck@users.noreply.github.com> Date: Sun, 23 Oct 2022 04:28:46 +0100 Subject: [PATCH] fix: support for CDATA in oembed (#98) Co-authored-by @adrian-seijo --- Procfile | 1 + example.js | 44 +++++ package-lock.json | 292 +++++++++++++++++++++++------- package.json | 5 +- src/index.ts | 207 ++++++++++++--------- test/oembed/oembed-cdata.xml | 19 ++ test/oembed/oembed-xml-cdata.html | 8 + test/oembed/test.ts | 27 +++ 8 files changed, 440 insertions(+), 163 deletions(-) create mode 100644 Procfile create mode 100644 example.js create mode 100644 test/oembed/oembed-cdata.xml create mode 100644 test/oembed/oembed-xml-cdata.html diff --git a/Procfile b/Procfile new file mode 100644 index 0000000..fd4d942 --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +web: node example.js diff --git a/example.js b/example.js new file mode 100644 index 0000000..6d72b89 --- /dev/null +++ b/example.js @@ -0,0 +1,44 @@ +const http = require("http"); +const { parse } = require("url"); +const { unfurl } = require("./dist"); + +http + .createServer(function (req, res) { + const isUrl = /https?:\/\//; + const { url } = parse(req.url, true).query; + + if (!url) { + res.writeHead(400, "Please submit a url with querystring"); + res.end(); + return; + } + + if (!isUrl.test(url)) { + res.writeHead(400, "Please only submit http(s) urls"); + res.end(); + return; + } + + res.setHeader("Access-Control-Allow-Origin", "*"); + res.setHeader( + "Access-Control-Allow-Methods", + "GET, PUT, POST, DELETE, OPTIONS" + ); + res.setHeader( + "Access-Control-Allow-Headers", + "Content-Type, Access-Control-Allow-Headers, Authorization, X-Requested-With" + ); + + unfurl(url) + .then((data) => { + res.setHeader("Content-Type", "application/json"); + res.writeHead(200); + res.end(JSON.stringify(data)); + }) + .catch((err) => { + console.error(err); + res.writeHead(500, err); + res.end(); + }); + }) + .listen(process.env.PORT || 3000); //eslint-disable-line diff --git a/package-lock.json b/package-lock.json index 1c43c63..fe4151b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,10 +11,9 @@ "dependencies": { "debug": "^3.1.0", "he": "^1.2.0", - "htmlparser2": "^3.9.2", + "htmlparser2": "^8.0.1", "iconv-lite": "^0.4.24", - "node-fetch": "^2.6.7", - "source-map-support": "^0.5.9" + "node-fetch": "^2.6.7" }, "devDependencies": { "@types/htmlparser2": "^3.10.3", @@ -1850,7 +1849,8 @@ "node_modules/buffer-from": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz", - "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==" + "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==", + "dev": true }, "node_modules/builtin-modules": { "version": "1.1.1", @@ -2095,7 +2095,8 @@ "node_modules/core-util-is": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", - "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=" + "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=", + "dev": true }, "node_modules/coveralls": { "version": "3.0.2", @@ -2426,23 +2427,48 @@ } }, "node_modules/dom-serializer": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.0.tgz", - "integrity": "sha1-BzxpdUbOB4DOI75KKOKT5AvDDII=", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", "dependencies": { - "domelementtype": "~1.1.1", - "entities": "~1.1.1" + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" + }, + "funding": { + "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" } }, "node_modules/dom-serializer/node_modules/domelementtype": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.1.3.tgz", - "integrity": "sha1-vSh3PiZCiBrsUVRJJCmcXNgiGFs=" + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ] + }, + "node_modules/dom-serializer/node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" + } }, "node_modules/domelementtype": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.3.0.tgz", - "integrity": "sha1-sXrtguirWeUt2cGbF1bg/BhyBMI=" + "integrity": "sha1-sXrtguirWeUt2cGbF1bg/BhyBMI=", + "dev": true }, "node_modules/domexception": { "version": "1.0.1", @@ -2457,17 +2483,47 @@ "version": "2.4.2", "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.4.2.tgz", "integrity": "sha512-JiK04h0Ht5u/80fdLMCEmV4zkNh2BcoMFBmZ/91WtYZ8qVXSKjiw7fXMgFPnHcSZgOo3XdinHvmnDUeMf5R4wA==", + "dev": true, "dependencies": { "domelementtype": "1" } }, "node_modules/domutils": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.7.0.tgz", - "integrity": "sha512-Lgd2XcJ/NjEw+7tFvfKxOzCYKZsdct5lczQ2ZaQY8Djz7pfAD3Gbp8ySJWtreII/vDlMVmxwa6pHmdxIYgttDg==", + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.0.1.tgz", + "integrity": "sha512-z08c1l761iKhDFtfXO04C7kTdPBLi41zwOZl00WS8b5eiaebNpY00HKbztwBq+e3vyqWNwWF3mP9YLUeqIrF+Q==", "dependencies": { - "dom-serializer": "0", - "domelementtype": "1" + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.1" + }, + "funding": { + "url": "https://github.com/fb55/domutils?sponsor=1" + } + }, + "node_modules/domutils/node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ] + }, + "node_modules/domutils/node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "dependencies": { + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" } }, "node_modules/ecc-jsbn": { @@ -2482,9 +2538,15 @@ } }, "node_modules/entities": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/entities/-/entities-1.1.1.tgz", - "integrity": "sha1-blwtClYhtdra7O+AuQ7ftc13cvA=" + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.4.0.tgz", + "integrity": "sha512-oYp7156SP8LkeGD0GF85ad1X9Ai79WtRsZ2gxJqtBuzH+98YUV6jkHEKlZkMbcrjJjIVJNIDP/3WL9wQkoPbWA==", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } }, "node_modules/error-ex": { "version": "1.3.2", @@ -4648,16 +4710,46 @@ } }, "node_modules/htmlparser2": { - "version": "3.9.2", - "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.9.2.tgz", - "integrity": "sha1-G9+HrMoPP55T+k/M6w9LTLsAszg=", + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.1.tgz", + "integrity": "sha512-4lVbmc1diZC7GUJQtRQ5yBAeUCL1exyMwmForWkRLnwyzWBFxN633SALPMGYaWZvKe9j1pRZJpauvmxENSp/EA==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "entities": "^4.3.0" + } + }, + "node_modules/htmlparser2/node_modules/domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ] + }, + "node_modules/htmlparser2/node_modules/domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", "dependencies": { - "domelementtype": "^1.3.0", - "domhandler": "^2.3.0", - "domutils": "^1.5.1", - "entities": "^1.1.1", - "inherits": "^2.0.1", - "readable-stream": "^2.0.2" + "domelementtype": "^2.3.0" + }, + "engines": { + "node": ">= 4" + }, + "funding": { + "url": "https://github.com/fb55/domhandler?sponsor=1" } }, "node_modules/http-signature": { @@ -4773,7 +4865,8 @@ "node_modules/inherits": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", - "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" + "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=", + "dev": true }, "node_modules/invariant": { "version": "2.2.4", @@ -5089,7 +5182,8 @@ "node_modules/isarray": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", - "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=" + "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=", + "dev": true }, "node_modules/isexe": { "version": "2.0.0", @@ -6766,7 +6860,8 @@ "node_modules/process-nextick-args": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz", - "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==" + "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==", + "dev": true }, "node_modules/prompts": { "version": "0.1.14", @@ -6873,6 +6968,7 @@ "version": "2.3.6", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", + "dev": true, "dependencies": { "core-util-is": "~1.0.0", "inherits": "~2.0.3", @@ -7148,7 +7244,8 @@ "node_modules/safe-buffer": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", - "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "dev": true }, "node_modules/safe-regex": { "version": "1.1.0", @@ -7803,6 +7900,7 @@ "version": "0.6.1", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true, "engines": { "node": ">=0.10.0" } @@ -7824,6 +7922,7 @@ "version": "0.5.9", "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.9.tgz", "integrity": "sha512-gR6Rw4MvUlYy83vP0vxoVNzM6t8MUXqNuRsuBmBHQDu1Fh6X015FrLdgoDKcNdkwGubozq0P4N0Q37UyFVr1EA==", + "dev": true, "dependencies": { "buffer-from": "^1.0.0", "source-map": "^0.6.0" @@ -7954,6 +8053,7 @@ "version": "1.1.1", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "dev": true, "dependencies": { "safe-buffer": "~5.1.0" } @@ -8505,7 +8605,8 @@ "node_modules/util-deprecate": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", - "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=" + "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=", + "dev": true }, "node_modules/util.promisify": { "version": "1.0.0", @@ -10258,7 +10359,8 @@ "buffer-from": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.1.tgz", - "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==" + "integrity": "sha512-MQcXEUbCKtEo7bhqEs6560Hyd4XaovZlO/k9V3hjVUF/zwW7KBVdSK4gIt/bzwS9MbR5qob+F5jusZsb0YQK2A==", + "dev": true }, "builtin-modules": { "version": "1.1.1", @@ -10464,7 +10566,8 @@ "core-util-is": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", - "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=" + "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=", + "dev": true }, "coveralls": { "version": "3.0.2", @@ -10725,25 +10828,35 @@ } }, "dom-serializer": { - "version": "0.1.0", - "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.0.tgz", - "integrity": "sha1-BzxpdUbOB4DOI75KKOKT5AvDDII=", + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-2.0.0.tgz", + "integrity": "sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==", "requires": { - "domelementtype": "~1.1.1", - "entities": "~1.1.1" + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "entities": "^4.2.0" }, "dependencies": { "domelementtype": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.1.3.tgz", - "integrity": "sha1-vSh3PiZCiBrsUVRJJCmcXNgiGFs=" + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==" + }, + "domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "requires": { + "domelementtype": "^2.3.0" + } } } }, "domelementtype": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.3.0.tgz", - "integrity": "sha1-sXrtguirWeUt2cGbF1bg/BhyBMI=" + "integrity": "sha1-sXrtguirWeUt2cGbF1bg/BhyBMI=", + "dev": true }, "domexception": { "version": "1.0.1", @@ -10758,17 +10871,34 @@ "version": "2.4.2", "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.4.2.tgz", "integrity": "sha512-JiK04h0Ht5u/80fdLMCEmV4zkNh2BcoMFBmZ/91WtYZ8qVXSKjiw7fXMgFPnHcSZgOo3XdinHvmnDUeMf5R4wA==", + "dev": true, "requires": { "domelementtype": "1" } }, "domutils": { - "version": "1.7.0", - "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.7.0.tgz", - "integrity": "sha512-Lgd2XcJ/NjEw+7tFvfKxOzCYKZsdct5lczQ2ZaQY8Djz7pfAD3Gbp8ySJWtreII/vDlMVmxwa6pHmdxIYgttDg==", + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-3.0.1.tgz", + "integrity": "sha512-z08c1l761iKhDFtfXO04C7kTdPBLi41zwOZl00WS8b5eiaebNpY00HKbztwBq+e3vyqWNwWF3mP9YLUeqIrF+Q==", "requires": { - "dom-serializer": "0", - "domelementtype": "1" + "dom-serializer": "^2.0.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.1" + }, + "dependencies": { + "domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==" + }, + "domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "requires": { + "domelementtype": "^2.3.0" + } + } } }, "ecc-jsbn": { @@ -10783,9 +10913,9 @@ } }, "entities": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/entities/-/entities-1.1.1.tgz", - "integrity": "sha1-blwtClYhtdra7O+AuQ7ftc13cvA=" + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/entities/-/entities-4.4.0.tgz", + "integrity": "sha512-oYp7156SP8LkeGD0GF85ad1X9Ai79WtRsZ2gxJqtBuzH+98YUV6jkHEKlZkMbcrjJjIVJNIDP/3WL9wQkoPbWA==" }, "error-ex": { "version": "1.3.2", @@ -12503,16 +12633,29 @@ } }, "htmlparser2": { - "version": "3.9.2", - "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.9.2.tgz", - "integrity": "sha1-G9+HrMoPP55T+k/M6w9LTLsAszg=", + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.1.tgz", + "integrity": "sha512-4lVbmc1diZC7GUJQtRQ5yBAeUCL1exyMwmForWkRLnwyzWBFxN633SALPMGYaWZvKe9j1pRZJpauvmxENSp/EA==", "requires": { - "domelementtype": "^1.3.0", - "domhandler": "^2.3.0", - "domutils": "^1.5.1", - "entities": "^1.1.1", - "inherits": "^2.0.1", - "readable-stream": "^2.0.2" + "domelementtype": "^2.3.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", + "entities": "^4.3.0" + }, + "dependencies": { + "domelementtype": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz", + "integrity": "sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==" + }, + "domhandler": { + "version": "5.0.3", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-5.0.3.tgz", + "integrity": "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==", + "requires": { + "domelementtype": "^2.3.0" + } + } } }, "http-signature": { @@ -12599,7 +12742,8 @@ "inherits": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", - "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" + "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=", + "dev": true }, "invariant": { "version": "2.2.4", @@ -12838,7 +12982,8 @@ "isarray": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", - "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=" + "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=", + "dev": true }, "isexe": { "version": "2.0.0", @@ -14272,7 +14417,8 @@ "process-nextick-args": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz", - "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==" + "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==", + "dev": true }, "prompts": { "version": "0.1.14", @@ -14349,6 +14495,7 @@ "version": "2.3.6", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", + "dev": true, "requires": { "core-util-is": "~1.0.0", "inherits": "~2.0.3", @@ -14554,7 +14701,8 @@ "safe-buffer": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", - "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "dev": true }, "safe-regex": { "version": "1.1.0", @@ -15090,7 +15238,8 @@ "source-map": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", - "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==" + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "dev": true }, "source-map-resolve": { "version": "0.5.2", @@ -15109,6 +15258,7 @@ "version": "0.5.9", "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.9.tgz", "integrity": "sha512-gR6Rw4MvUlYy83vP0vxoVNzM6t8MUXqNuRsuBmBHQDu1Fh6X015FrLdgoDKcNdkwGubozq0P4N0Q37UyFVr1EA==", + "dev": true, "requires": { "buffer-from": "^1.0.0", "source-map": "^0.6.0" @@ -15221,6 +15371,7 @@ "version": "1.1.1", "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "dev": true, "requires": { "safe-buffer": "~5.1.0" } @@ -15658,7 +15809,8 @@ "util-deprecate": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", - "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=" + "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=", + "dev": true }, "util.promisify": { "version": "1.0.0", diff --git a/package.json b/package.json index d4c02a6..395bbd2 100644 --- a/package.json +++ b/package.json @@ -10,10 +10,9 @@ "dependencies": { "debug": "^3.1.0", "he": "^1.2.0", - "htmlparser2": "^3.9.2", + "htmlparser2": "^8.0.1", "iconv-lite": "^0.4.24", - "node-fetch": "^2.6.7", - "source-map-support": "^0.5.9" + "node-fetch": "^2.6.7" }, "devDependencies": { "@types/htmlparser2": "^3.10.3", diff --git a/src/index.ts b/src/index.ts index 6691acd..b2b3b69 100644 --- a/src/index.ts +++ b/src/index.ts @@ -7,6 +7,15 @@ import { Metadata, Opts } from "./types"; import { decode as he_decode } from "he"; import { decode as iconv_decode } from "iconv-lite"; +type ParserContext = { + isHtml?: boolean; + isOembed?: boolean; + favicon?: string; + text: string; + title?: string; + tagName?: string; +}; + const defaultHeaders = { Accept: "text/html, application/xhtml+xml", "User-Agent": "facebookexternalhit", @@ -29,17 +38,10 @@ function unfurl(url: string, opts?: Opts): Promise { Number.isInteger(opts.timeout) || (opts.timeout = 0); Number.isInteger(opts.size) || (opts.size = 0); - const ctx: { - url: string; - oembedUrl?: string; - } = { - url, - }; - return getPage(url, opts) - .then(getMetadata(ctx, opts)) - .then(getRemoteMetadata(ctx)) - .then(parse(ctx)); + .then(getMetadata(url, opts)) + .then(getRemoteMetadata(url)) + .then(parse(url)); } async function getPage(url: string, opts: Opts) { @@ -119,13 +121,13 @@ async function getPage(url: string, opts: Opts) { return buf.toString(); } -function getRemoteMetadata(ctx) { - return async function (metadata) { - if (!ctx._oembed) { +function getRemoteMetadata(url: string) { + return async function ({ oembed, metadata }) { + if (!oembed) { return metadata; } - const target = new URL(he_decode(ctx._oembed.href), ctx.url); + const target = new URL(he_decode(oembed.href), url); let res = await fetch(target.href); let contentType = res.headers.get("Content-Type"); @@ -142,71 +144,91 @@ function getRemoteMetadata(ctx) { let ret; if ( - ctx._oembed.type === "application/json+oembed" && + oembed.type === "application/json+oembed" && /application\/json/.test(contentType) ) { ret = await res.json(); } else if ( - ctx._oembed.type === "text/xml+oembed" && + oembed.type === "text/xml+oembed" && /(text|application)\/xml/.test(contentType) ) { - const data = await res.text(); + const data = (await res.text()) + .replace(/>/g, ">") + .replace(/</g, "<"); const content: { [key: string]: string } = {}; + const parserContext: ParserContext = { text: "" }; + ret = await new Promise((resolve) => { - const parser = new Parser({ - onopentag: function (name, attribs) { - if (this._is_html) { - if (!content.html) { - content.html = ""; + const parser = new Parser( + { + oncdataend: () => { + if ( + !content.html && + parserContext.text.trim().startsWith("<") && + parserContext.text.trim().endsWith(">") + ) { + content.html = parserContext.text.trim(); + } + }, + // eslint-disable-next-line + onopentag: function (name: string, attribs: any) { + if (parserContext.isHtml) { + if (!content.html) { + content.html = ""; + } + + content.html += `<${name} `; + content.html += Object.keys(attribs) + .reduce( + (str, k) => + str + + (attribs[k] ? `${k}="${attribs[k]}"` : `${k}`) + + " ", + "" + ) + .trim(); + content.html += ">"; } - content.html += `<${name} `; - content.html += Object.keys(attribs) - .reduce( - (str, k) => - str + (attribs[k] ? `${k}="${attribs[k]}"` : `${k}`) + " ", - "" - ) - .trim(); - content.html += ">"; - } - - if (name === "html") { - this._is_html = true; - } + if (name === "html") { + parserContext.isHtml = true; + } - this._tagname = name; - }, - ontext: function (text) { - if (!this._text) this._text = ""; - this._text += text; - }, - onclosetag: function (tagname) { - if (tagname === "oembed") { - return; - } + parserContext.tagName = name; + }, + ontext: function (text: string) { + parserContext.text += text; + }, + onclosetag: function (tagname: string) { + if (tagname === "oembed") { + return; + } - if (tagname === "html") { - this._is_html = false; - return; - } + if (tagname === "html") { + parserContext.isHtml = false; + return; + } - if (this._is_html) { - content.html += this._text.trim(); - content.html += ``; - } + if (parserContext.isHtml) { + content.html += parserContext.text.trim(); + content.html += ``; + } - content[tagname] = this._text.trim(); + content[tagname] = parserContext.text.trim(); - this._tagname = ""; - this._text = ""; + parserContext.tagName = ""; + parserContext.text = ""; + }, + onend: function () { + resolve(content); + }, }, - onend: function () { - resolve(content); - }, - }); + { + recognizeCDATA: true, + } + ); parser.write(data); parser.end(); @@ -226,37 +248,42 @@ function getRemoteMetadata(ctx) { }; } -function getMetadata(ctx, opts: Opts) { - return function (text) { +function getMetadata(url: string, opts: Opts) { + return function (text: string) { const metadata = []; + const parserContext: ParserContext = { text: "" }; + + let oembed: { type?: string; href?: string }; + let distanceFromRoot = 0; return new Promise((resolve) => { const parser = new Parser({ - _nodes_from_root: 0, - onend: function () { - if (this._favicon === undefined) { - metadata.push(["favicon", new URL("/favicon.ico", ctx.url).href]); + if (parserContext.favicon === undefined) { + metadata.push(["favicon", new URL("/favicon.ico", url).href]); } else { - metadata.push(["favicon", new URL(this._favicon, ctx.url).href]); + metadata.push([ + "favicon", + new URL(parserContext.favicon, url).href, + ]); } - resolve(metadata); + resolve({ oembed, metadata }); }, - onopentagname: function (tag) { - this._tagname = tag; + onopentagname: function (tag: string) { + parserContext.tagName = tag; }, - ontext: function (text) { - if (this._tagname === "title") { + ontext: function (text: string) { + if (parserContext.tagName === "title") { // makes sure we haven't already seen the title - if (this._title !== null) { - if (this._title === undefined) { - this._title = ""; + if (parserContext.title !== null) { + if (parserContext.title === undefined) { + parserContext.title = ""; } - this._title += text; + parserContext.title += text; } } }, @@ -265,7 +292,7 @@ function getMetadata(ctx, opts: Opts) { tagname: string, attribs: { [key: string]: string } ) { - this._nodes_from_root++; + distanceFromRoot++; if (opts.oembed && attribs.href) { // handle XML and JSON with a preference towards JSON since its more efficient for us @@ -274,9 +301,9 @@ function getMetadata(ctx, opts: Opts) { (attribs.type === "text/xml+oembed" || attribs.type === "application/json+oembed") ) { - if (!ctx._oembed || ctx._oembed.type === "text/xml+oembed") { + if (!oembed || oembed.type === "text/xml+oembed") { // prefer json - ctx._oembed = attribs; + oembed = attribs; } } } @@ -285,7 +312,7 @@ function getMetadata(ctx, opts: Opts) { attribs.href && (attribs.rel === "icon" || attribs.rel === "shortcut icon") ) { - this._favicon = attribs.href; + parserContext.favicon = attribs.href; } let pair: [string, string | string[]]; @@ -316,18 +343,18 @@ function getMetadata(ctx, opts: Opts) { }, onclosetag: function (tag: string) { - this._nodes_from_root--; - this._tagname = ""; + distanceFromRoot--; + parserContext.tagName = ""; - if (this._nodes_from_root <= 2 && tag === "title") { - metadata.push(["title", this._title]); - this._title = ""; + if (distanceFromRoot <= 2 && tag === "title") { + metadata.push(["title", parserContext.title]); + parserContext.title = ""; } // We want to parse as little as possible so finish once we see // if we have not seen a title tag within the head, we scan the entire // document instead - if (tag === "head" && this._title) { + if (tag === "head" && parserContext.title) { parser.reset(); } }, @@ -339,13 +366,13 @@ function getMetadata(ctx, opts: Opts) { }; } -function parse(ctx) { +function parse(url: string) { return function (metadata) { // eslint-disable-next-line const parsed: any = {}; - const ogVideoTags = []; const articleTags = []; + let lastParent; for (const meta of metadata) { @@ -379,7 +406,7 @@ function parse(ctx) { if (item.type === "number") { metaValue = parseInt(metaValue, 10); } else if (item.type === "url" && metaValue) { - metaValue = new URL(metaValue, ctx.url).href; + metaValue = new URL(metaValue, url).href; } if (parsed[item.entry] === undefined) { diff --git a/test/oembed/oembed-cdata.xml b/test/oembed/oembed-cdata.xml new file mode 100644 index 0000000..b85cd15 --- /dev/null +++ b/test/oembed/oembed-cdata.xml @@ -0,0 +1,19 @@ + + + +