Skip to content
This repository has been archived by the owner on Jun 28, 2021. It is now read-only.

Commit

Permalink
fix: Detecting BOM when data is not enough
Browse files Browse the repository at this point in the history
/close #246
  • Loading branch information
jinliming2 authored and wdavidw committed Aug 6, 2019
1 parent c28279e commit 620125e
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 21 deletions.
37 changes: 29 additions & 8 deletions lib/es5/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ function _iterableToArray(iter) { if (Symbol.iterator in Object(iter) || Object.

function _arrayWithoutHoles(arr) { if (Array.isArray(arr)) { for (var i = 0, arr2 = new Array(arr.length); i < arr.length; i++) { arr2[i] = arr[i]; } return arr2; } }

function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; var ownKeys = Object.keys(source); if (typeof Object.getOwnPropertySymbols === 'function') { ownKeys = ownKeys.concat(Object.getOwnPropertySymbols(source).filter(function (sym) { return Object.getOwnPropertyDescriptor(source, sym).enumerable; })); } ownKeys.forEach(function (key) { _defineProperty(target, key, source[key]); }); } return target; }
function ownKeys(object, enumerableOnly) { var keys = Object.keys(object); if (Object.getOwnPropertySymbols) { var symbols = Object.getOwnPropertySymbols(object); if (enumerableOnly) symbols = symbols.filter(function (sym) { return Object.getOwnPropertyDescriptor(object, sym).enumerable; }); keys.push.apply(keys, symbols); } return keys; }

function _objectSpread(target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i] != null ? arguments[i] : {}; if (i % 2) { ownKeys(source, true).forEach(function (key) { _defineProperty(target, key, source[key]); }); } else if (Object.getOwnPropertyDescriptors) { Object.defineProperties(target, Object.getOwnPropertyDescriptors(source)); } else { ownKeys(source).forEach(function (key) { Object.defineProperty(target, key, Object.getOwnPropertyDescriptor(source, key)); }); } } return target; }

function _defineProperty(obj, key, value) { if (key in obj) { Object.defineProperty(obj, key, { value: value, enumerable: true, configurable: true, writable: true }); } else { obj[key] = value; } return obj; }

Expand Down Expand Up @@ -69,7 +71,7 @@ function (_Transform) {

_this = _possibleConstructorReturn(this, _getPrototypeOf(Parser).call(this, _objectSpread({}, {
readableObjectMode: true
}, opts)));
}, {}, opts)));
var options = {}; // Merge with user options

for (var opt in opts) {
Expand Down Expand Up @@ -396,6 +398,7 @@ function (_Transform) {
};
_this.options = options;
_this.state = {
bomSkipped: false,
castField: fnCastField,
commenting: false,
enabled: options.from_line === 1,
Expand Down Expand Up @@ -472,6 +475,7 @@ function (_Transform) {
to_line = _this$options.to_line;
var record_delimiter = this.options.record_delimiter;
var _this$state = this.state,
bomSkipped = _this$state.bomSkipped,
previousBuf = _this$state.previousBuf,
rawBuffer = _this$state.rawBuffer,
escapeIsQuote = _this$state.escapeIsQuote,
Expand All @@ -484,17 +488,34 @@ function (_Transform) {
this.push(null);
return;
} else {
// Handle UTF BOM
if (bom === true && bom_utf8.compare(nextBuf, 0, 3) === 0) {
buf = nextBuf.slice(3);
} else {
buf = nextBuf;
}
buf = nextBuf;
}
} else if (previousBuf !== undefined && nextBuf === undefined) {
buf = previousBuf;
} else {
buf = Buffer.concat([previousBuf, nextBuf]);
} // Handle UTF BOM


if (bomSkipped === false) {
if (bom === false) {
this.state.bomSkipped = true;
} else if (buf.length < 3) {
// No enough data
if (end === false) {
// Wait for more data
this.state.previousBuf = buf;
return;
} // skip BOM detect because data length < 3

} else {
if (bom_utf8.compare(buf, 0, 3) === 0) {
// Skip BOM
buf = buf.slice(3);
}

this.state.bomSkipped = true;
}
}

var bufLen = buf.length;
Expand Down
34 changes: 25 additions & 9 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,7 @@ class Parser extends Transform {
}
this.options = options
this.state = {
bomSkipped: false,
castField: fnCastField,
commenting: false,
enabled: options.from_line === 1,
Expand Down Expand Up @@ -355,26 +356,41 @@ class Parser extends Transform {
__parse(nextBuf, end){
const {bom, comment, escape, from, from_line, info, ltrim, max_record_size, quote, raw, relax, rtrim, skip_empty_lines, to, to_line} = this.options
let {record_delimiter} = this.options
const {previousBuf, rawBuffer, escapeIsQuote, trimChars} = this.state
const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote, trimChars} = this.state
let buf
if(previousBuf === undefined){
if(nextBuf === undefined){
// Handle empty string
this.push(null)
return
}else{
// Handle UTF BOM
if(bom === true && bom_utf8.compare(nextBuf, 0, 3) === 0){
buf = nextBuf.slice(3)
}else{
buf = nextBuf
}
buf = nextBuf
}
}else if(previousBuf !== undefined && nextBuf === undefined){
buf = previousBuf
}else{
buf = Buffer.concat([previousBuf, nextBuf])
}
// Handle UTF BOM
if(bomSkipped === false){
if(bom === false){
this.state.bomSkipped = true
}else if(buf.length < 3){
// No enough data
if(end === false){
// Wait for more data
this.state.previousBuf = buf
return
}
// skip BOM detect because data length < 3
}else{
if(bom_utf8.compare(buf, 0, 3) === 0){
// Skip BOM
buf = buf.slice(3)
}
this.state.bomSkipped = true
}
}
const bufLen = buf.length
let pos
for(pos = 0; pos < bufLen; pos++){
Expand Down Expand Up @@ -529,7 +545,7 @@ class Parser extends Transform {
if(err !== undefined) return err
}
}

const lappend = ltrim === false || this.state.quoting === true || this.state.field.length !== 0 || !this.__isCharTrimable(chr)
// rtrim in non quoting is handle in __onField
const rappend = rtrim === false || this.state.wasQuoting === false
Expand Down Expand Up @@ -763,7 +779,7 @@ class Parser extends Transform {
const numOfCharLeft = bufLen - i - 1
const requiredLength = Math.max(
// Skip if the remaining buffer smaller than comment
comment ? comment.length : 0,
comment ? comment.length : 0,
// Skip if the remaining buffer smaller than row delimiter
recordDelimiterMaxLength,
// Skip if the remaining buffer can be row delimiter following the closing quote
Expand Down
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
"@babel/preset-env": "^7.4.3",
"@types/mocha": "^5.2.6",
"@types/node": "^11.13.0",
"@types/should": "^13.0.0",
"coffeescript": "^2.4.0",
"csv-generate": "^3.2.0",
"csv-spectrum": "^1.0.0",
Expand Down
27 changes: 24 additions & 3 deletions test/option.bom.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
parse = require '../lib'

describe 'Option `bom`', ->

it 'preserve bom if not defined', (next) ->
parser = parse (err, data) ->
data.should.eql [
Expand All @@ -24,7 +24,7 @@ describe 'Option `bom`', ->
parser.write Buffer.from "\ufeffa,b,c\n"
parser.write Buffer.from 'd,e,f'
parser.end()

it 'throw parsing error if quote follow bom', (next) ->
parser = parse (err, data) ->
err.message.should.eql 'Invalid opening quote at line 1'
Expand Down Expand Up @@ -55,4 +55,25 @@ describe 'Option `bom`', ->
parser.write Buffer.from 'd,e,f'
parser.end()


it 'handle BOM even if no enough data in the first package', (next) ->
parser = parse bom: true, (err, data) ->
data.should.eql [
['a', 'b', 'c']
['d', 'e', 'f']
]
next()
parser.write Buffer.from [239]
parser.write Buffer.from [187]
parser.write Buffer.from [191]
parser.write Buffer.from "a,b,c\n"
parser.write Buffer.from "d,e,f"
parser.end()

it 'preserve data if no enough data to detect BOM', (next) ->
parser = parse bom: true, (err, data) ->
data.should.eql [
['\ufffd']
]
next()
parser.write Buffer.from [239, 187]
parser.end()

0 comments on commit 620125e

Please sign in to comment.