Skip to content

Commit

Permalink
Handle parsing utf-8 bom encoded files (mholt#961)
Browse files Browse the repository at this point in the history
Co-authored-by: peteruithoven <peter@metabolic.nl>
  • Loading branch information
peteruithoven and peteruithoven committed Nov 25, 2022
1 parent c1cbe16 commit aa00468
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 0 deletions.
9 changes: 9 additions & 0 deletions papaparse.js
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ License: MIT
}
else if (typeof _input === 'string')
{
_input = stripBom(_input);
if (_config.download)
streamer = new NetworkStreamer(_config);
else
Expand All @@ -248,6 +249,14 @@ License: MIT
streamer = new FileStreamer(_config);

return streamer.stream(_input);

// Strip character from UTF-8 BOM encoded files that cause issue parsing the file
function stripBom(string) {
if (string.charCodeAt(0) === 0xfeff) {
return string.slice(1);
}
return string;
}
}


Expand Down
11 changes: 11 additions & 0 deletions tests/node-tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ var Papa = require("../papaparse.js");
var fs = require('fs');
var assert = require('assert');
var longSampleRawCsv = fs.readFileSync(__dirname + '/long-sample.csv', 'utf8');
var utf8BomSampleRawCsv = fs.readFileSync(__dirname + '/utf-8-bom-sample.csv', 'utf8');

function assertLongSampleParsedCorrectly(parsedCsv) {
assert.equal(8, parsedCsv.data.length);
Expand Down Expand Up @@ -287,4 +288,14 @@ describe('PapaParse', function() {
}
});
});

it('handles utf-8 BOM encoded files', function(done) {
Papa.parse(utf8BomSampleRawCsv, {
header: true,
complete: function(parsedCsv) {
assert.deepEqual(parsedCsv.data[0], { A: 'X', B: 'Y', C: 'Z' });
done();
}
});
});
});
2 changes: 2 additions & 0 deletions tests/utf-8-bom-sample.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
A,B,C
X,Y,Z

0 comments on commit aa00468

Please sign in to comment.