From 4aa3d3902d9cfd0976749ab620f0f671bf58943f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= Date: Thu, 4 Feb 2021 13:15:34 -0800 Subject: [PATCH] refactor(utils): Simplify `quickExpr` (#1716) --- lib/utils.js | 21 ++++++++------------- test/cheerio.js | 8 +++++++- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/lib/utils.js b/lib/utils.js index f7d6bc7a77..25252134b1 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -84,8 +84,13 @@ exports.cloneDom = function (dom) { return clone; }; -/** A simple way to check for HTML strings or ID strings. */ -var quickExpr = /^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w-]*)$)/; +/** + * A simple way to check for HTML strings. Tests for a `<` within a string, + * immediate followed by a letter and eventually followed by a `>`. + * + * @private + */ +var quickExpr = /<[a-zA-Z][^]*>/; /** * Check if string is HTML. @@ -96,16 +101,6 @@ var quickExpr = /^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w-]*)$)/; * @returns {boolean} Indicates if `str` is HTML. */ exports.isHtml = function (str) { - // Faster than running regex, if str starts with `<` and ends with `>`, assume it's HTML - if ( - str.charAt(0) === '<' && - str.charAt(str.length - 1) === '>' && - str.length >= 3 - ) { - return true; - } - // Run the regex - var match = quickExpr.exec(str); - return !!(match && match[1]); + return quickExpr.test(str); }; diff --git a/test/cheerio.js b/test/cheerio.js index bbf23528f7..27bb9b7769 100644 --- a/test/cheerio.js +++ b/test/cheerio.js @@ -220,7 +220,7 @@ describe('cheerio', function () { }); it('should gracefully degrade on complex, unmatched queries', function () { - var $elem = cheerio('Eastern States Cup #8-fin 
Downhill '); + var $elem = cheerio('Eastern States Cup #8-fin <1br>Downhill '); expect($elem).toHaveLength(0); }); @@ -407,6 +407,12 @@ describe('cheerio', function () { expect(utils.isHtml('')).toBe(true); expect(utils.isHtml('\n\n')).toBe(true); expect(utils.isHtml('#main')).toBe(false); + expect(utils.isHtml('\n

foo

bar\n')).toBe(true); + expect(utils.isHtml('dog

fox

cat')).toBe(true); + expect(utils.isHtml('

fox

cat')).toBe(true); + expect(utils.isHtml('\n

fox

cat\n')).toBe(true); + expect(utils.isHtml('#

fox

cat#')).toBe(true); + expect(utils.isHtml('<123>')).toBe(false); }); }); });