-
Notifications
You must be signed in to change notification settings - Fork 279
Closed
Description
Describe the bug
I am using csv-parse@5.5.3 and it isn’t good at handling comments. For example, if a line starts with #, it should be a comment. Later on, rows with # fail to parse.
To Reproduce
Trying to parse https://download.geonames.org/export/dump/countryInfo.txt with the first 50 lines of comments. The Postal Code Format column has many # characters
const defaultParseOptions: ParseOptions = {
bom: true,
cast: true,
columns: false,
// comment: "#",
// comment_no_infix: true,
delimiter: " ",
escape: null,
groupColumnsByName: false,
quote: null,
record_delimiter: ["\n", "\r", "\r\n"],
relax_quotes: true,
skip_empty_lines: true,
};Additional context
Workaround
class RemoveCommentTransform extends Transform {
override _transform(
chunk: any,
_encoding: BufferEncoding,
callback: TransformCallback,
): void {
const line = String(chunk);
if (line.trim() !== "" && !line.startsWith("#")) {
callback(null, line + "\n");
} else {
callback(null);
}
}
}
const readableStream = fs.createReadStream(filePath);
const readlineIterator = readline.createInterface({
crlfDelay: Number.POSITIVE_INFINITY,
input: readableStream,
});
const readlineStream = Readable.from(readlineIterator);
const removeCommentTransform = new RemoveCommentTransform();
const parser = parse({
...defaultParseOptions,
...parseOptions,
});
readlineStream.pipe(removeCommentTransform).pipe(parser);Metadata
Metadata
Assignees
Labels
No labels