Skip to content

Commit

Permalink
fix a regular expression escape and fix a bug to pass encoding test
Browse files Browse the repository at this point in the history
  • Loading branch information
cuixiping committed Mar 27, 2019
1 parent 277e882 commit f6f6298
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions lib/crawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -425,14 +425,14 @@ Crawler.prototype._parseCharset = function(res){
//Browsers treat gb2312 as gbk, but iconv-lite not.
//Replace gb2312 with gbk, in order to parse the pages which say gb2312 but actually are gbk.
function getCharset(str){
var charset = (str && str.match(/charset=['"]?([\w\.\-]+)/i) || [0, null])[1];
var charset = (str && str.match(/charset=['"]?([\w.-]+)/i) || [0, null])[1];
return charset && charset.replace(/:\d{4}$|[^0-9a-z]/g, '') == 'gb2312' ? 'gbk' : charset;
}
function charsetParser(header, binary, default_charset = 'utf-8') {
return getCharset(header) || getCharset(binary) || default_charset;
}

var charset = charsetParser(contentType(res));
var charset = charsetParser(contentType(res), null, null);
if(charset)
return charset;

Expand Down

0 comments on commit f6f6298

Please sign in to comment.