From caca5c3044541acfc9fe4a7f32167bb1179b6253 Mon Sep 17 00:00:00 2001 From: David Worms Date: Fri, 25 Aug 2023 13:04:40 +0200 Subject: [PATCH] feat(csv-parse): new comment_no_infix option (fix #325) --- packages/csv-parse/dist/cjs/index.cjs | 14 ++++++- packages/csv-parse/dist/cjs/index.d.cts | 6 +++ packages/csv-parse/dist/cjs/sync.cjs | 14 ++++++- packages/csv-parse/dist/esm/index.d.ts | 6 +++ packages/csv-parse/dist/esm/index.js | 14 ++++++- packages/csv-parse/dist/esm/sync.js | 14 ++++++- packages/csv-parse/dist/iife/index.js | 14 ++++++- packages/csv-parse/dist/iife/sync.js | 14 ++++++- packages/csv-parse/dist/umd/index.js | 14 ++++++- packages/csv-parse/dist/umd/sync.js | 14 ++++++- packages/csv-parse/lib/api/index.js | 4 +- .../csv-parse/lib/api/normalize_options.js | 10 +++++ packages/csv-parse/lib/index.d.ts | 6 +++ packages/csv-parse/test/api.types.ts | 3 +- .../test/option.comment_no_infix.coffee | 38 +++++++++++++++++++ 15 files changed, 166 insertions(+), 19 deletions(-) create mode 100644 packages/csv-parse/test/option.comment_no_infix.coffee diff --git a/packages/csv-parse/dist/cjs/index.cjs b/packages/csv-parse/dist/cjs/index.cjs index 4336b2744..3fffa1444 100644 --- a/packages/csv-parse/dist/cjs/index.cjs +++ b/packages/csv-parse/dist/cjs/index.cjs @@ -278,6 +278,16 @@ const normalize_options = function(opts){ ], options); } } + // Normalize option `comment_no_infix` + if(options.comment_no_infix === undefined || options.comment_no_infix === null || options.comment_no_infix === false){ + options.comment_no_infix = false; + }else if(options.comment_no_infix !== true){ + throw new CsvError('CSV_INVALID_OPTION_COMMENT', [ + 'Invalid option comment_no_infix:', + 'value must be a boolean,', + `got ${JSON.stringify(options.comment_no_infix)}` + ], options); + } // Normalize option `delimiter` const delimiter_json = JSON.stringify(options.delimiter); if(!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter]; @@ -639,7 +649,7 @@ const transform = function(original_options = {}) { }, // Central parser implementation parse: function(nextBuf, end, push, close){ - const {bom, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; + const {bom, comment_no_infix, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; let {comment, escape, quote, record_delimiter} = this.options; const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote} = this.state; let buf; @@ -838,7 +848,7 @@ const transform = function(original_options = {}) { continue; } const commentCount = comment === null ? 0 : this.__compareBytes(comment, buf, pos, chr); - if(commentCount !== 0){ + if(commentCount !== 0 && (comment_no_infix === false || this.state.field.length === 0)){ this.state.commenting = true; continue; } diff --git a/packages/csv-parse/dist/cjs/index.d.cts b/packages/csv-parse/dist/cjs/index.d.cts index d082c7133..ec8aa027e 100644 --- a/packages/csv-parse/dist/cjs/index.d.cts +++ b/packages/csv-parse/dist/cjs/index.d.cts @@ -88,6 +88,12 @@ export interface Options { * Treat all the characters after this one as a comment, default to '' (disabled). */ comment?: string; + /** + * Restrict the definition of comments to a full line. Comment characters + * defined in the middle of the line are not interpreted as such. The + * option require the activation of comments. + */ + comment_no_infix?: boolean; /** * Set the field delimiter. One character only, defaults to comma. */ diff --git a/packages/csv-parse/dist/cjs/sync.cjs b/packages/csv-parse/dist/cjs/sync.cjs index bb5ee4dfb..4d0058d38 100644 --- a/packages/csv-parse/dist/cjs/sync.cjs +++ b/packages/csv-parse/dist/cjs/sync.cjs @@ -276,6 +276,16 @@ const normalize_options = function(opts){ ], options); } } + // Normalize option `comment_no_infix` + if(options.comment_no_infix === undefined || options.comment_no_infix === null || options.comment_no_infix === false){ + options.comment_no_infix = false; + }else if(options.comment_no_infix !== true){ + throw new CsvError('CSV_INVALID_OPTION_COMMENT', [ + 'Invalid option comment_no_infix:', + 'value must be a boolean,', + `got ${JSON.stringify(options.comment_no_infix)}` + ], options); + } // Normalize option `delimiter` const delimiter_json = JSON.stringify(options.delimiter); if(!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter]; @@ -637,7 +647,7 @@ const transform = function(original_options = {}) { }, // Central parser implementation parse: function(nextBuf, end, push, close){ - const {bom, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; + const {bom, comment_no_infix, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; let {comment, escape, quote, record_delimiter} = this.options; const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote} = this.state; let buf; @@ -836,7 +846,7 @@ const transform = function(original_options = {}) { continue; } const commentCount = comment === null ? 0 : this.__compareBytes(comment, buf, pos, chr); - if(commentCount !== 0){ + if(commentCount !== 0 && (comment_no_infix === false || this.state.field.length === 0)){ this.state.commenting = true; continue; } diff --git a/packages/csv-parse/dist/esm/index.d.ts b/packages/csv-parse/dist/esm/index.d.ts index d082c7133..ec8aa027e 100644 --- a/packages/csv-parse/dist/esm/index.d.ts +++ b/packages/csv-parse/dist/esm/index.d.ts @@ -88,6 +88,12 @@ export interface Options { * Treat all the characters after this one as a comment, default to '' (disabled). */ comment?: string; + /** + * Restrict the definition of comments to a full line. Comment characters + * defined in the middle of the line are not interpreted as such. The + * option require the activation of comments. + */ + comment_no_infix?: boolean; /** * Set the field delimiter. One character only, defaults to comma. */ diff --git a/packages/csv-parse/dist/esm/index.js b/packages/csv-parse/dist/esm/index.js index 732835411..1371e2d39 100644 --- a/packages/csv-parse/dist/esm/index.js +++ b/packages/csv-parse/dist/esm/index.js @@ -5400,6 +5400,16 @@ const normalize_options = function(opts){ ], options); } } + // Normalize option `comment_no_infix` + if(options.comment_no_infix === undefined || options.comment_no_infix === null || options.comment_no_infix === false){ + options.comment_no_infix = false; + }else if(options.comment_no_infix !== true){ + throw new CsvError('CSV_INVALID_OPTION_COMMENT', [ + 'Invalid option comment_no_infix:', + 'value must be a boolean,', + `got ${JSON.stringify(options.comment_no_infix)}` + ], options); + } // Normalize option `delimiter` const delimiter_json = JSON.stringify(options.delimiter); if(!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter]; @@ -5761,7 +5771,7 @@ const transform = function(original_options = {}) { }, // Central parser implementation parse: function(nextBuf, end, push, close){ - const {bom, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; + const {bom, comment_no_infix, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; let {comment, escape, quote, record_delimiter} = this.options; const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote} = this.state; let buf; @@ -5960,7 +5970,7 @@ const transform = function(original_options = {}) { continue; } const commentCount = comment === null ? 0 : this.__compareBytes(comment, buf, pos, chr); - if(commentCount !== 0){ + if(commentCount !== 0 && (comment_no_infix === false || this.state.field.length === 0)){ this.state.commenting = true; continue; } diff --git a/packages/csv-parse/dist/esm/sync.js b/packages/csv-parse/dist/esm/sync.js index 09cf2f247..405662846 100644 --- a/packages/csv-parse/dist/esm/sync.js +++ b/packages/csv-parse/dist/esm/sync.js @@ -2246,6 +2246,16 @@ const normalize_options = function(opts){ ], options); } } + // Normalize option `comment_no_infix` + if(options.comment_no_infix === undefined || options.comment_no_infix === null || options.comment_no_infix === false){ + options.comment_no_infix = false; + }else if(options.comment_no_infix !== true){ + throw new CsvError('CSV_INVALID_OPTION_COMMENT', [ + 'Invalid option comment_no_infix:', + 'value must be a boolean,', + `got ${JSON.stringify(options.comment_no_infix)}` + ], options); + } // Normalize option `delimiter` const delimiter_json = JSON.stringify(options.delimiter); if(!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter]; @@ -2607,7 +2617,7 @@ const transform = function(original_options = {}) { }, // Central parser implementation parse: function(nextBuf, end, push, close){ - const {bom, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; + const {bom, comment_no_infix, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; let {comment, escape, quote, record_delimiter} = this.options; const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote} = this.state; let buf; @@ -2806,7 +2816,7 @@ const transform = function(original_options = {}) { continue; } const commentCount = comment === null ? 0 : this.__compareBytes(comment, buf, pos, chr); - if(commentCount !== 0){ + if(commentCount !== 0 && (comment_no_infix === false || this.state.field.length === 0)){ this.state.commenting = true; continue; } diff --git a/packages/csv-parse/dist/iife/index.js b/packages/csv-parse/dist/iife/index.js index 1638bc9fe..ea6b9afb1 100644 --- a/packages/csv-parse/dist/iife/index.js +++ b/packages/csv-parse/dist/iife/index.js @@ -5403,6 +5403,16 @@ var csv_parse = (function (exports) { ], options); } } + // Normalize option `comment_no_infix` + if(options.comment_no_infix === undefined || options.comment_no_infix === null || options.comment_no_infix === false){ + options.comment_no_infix = false; + }else if(options.comment_no_infix !== true){ + throw new CsvError('CSV_INVALID_OPTION_COMMENT', [ + 'Invalid option comment_no_infix:', + 'value must be a boolean,', + `got ${JSON.stringify(options.comment_no_infix)}` + ], options); + } // Normalize option `delimiter` const delimiter_json = JSON.stringify(options.delimiter); if(!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter]; @@ -5764,7 +5774,7 @@ var csv_parse = (function (exports) { }, // Central parser implementation parse: function(nextBuf, end, push, close){ - const {bom, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; + const {bom, comment_no_infix, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; let {comment, escape, quote, record_delimiter} = this.options; const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote} = this.state; let buf; @@ -5963,7 +5973,7 @@ var csv_parse = (function (exports) { continue; } const commentCount = comment === null ? 0 : this.__compareBytes(comment, buf, pos, chr); - if(commentCount !== 0){ + if(commentCount !== 0 && (comment_no_infix === false || this.state.field.length === 0)){ this.state.commenting = true; continue; } diff --git a/packages/csv-parse/dist/iife/sync.js b/packages/csv-parse/dist/iife/sync.js index bbd9a751a..b4e3de6cc 100644 --- a/packages/csv-parse/dist/iife/sync.js +++ b/packages/csv-parse/dist/iife/sync.js @@ -2249,6 +2249,16 @@ var csv_parse_sync = (function (exports) { ], options); } } + // Normalize option `comment_no_infix` + if(options.comment_no_infix === undefined || options.comment_no_infix === null || options.comment_no_infix === false){ + options.comment_no_infix = false; + }else if(options.comment_no_infix !== true){ + throw new CsvError('CSV_INVALID_OPTION_COMMENT', [ + 'Invalid option comment_no_infix:', + 'value must be a boolean,', + `got ${JSON.stringify(options.comment_no_infix)}` + ], options); + } // Normalize option `delimiter` const delimiter_json = JSON.stringify(options.delimiter); if(!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter]; @@ -2610,7 +2620,7 @@ var csv_parse_sync = (function (exports) { }, // Central parser implementation parse: function(nextBuf, end, push, close){ - const {bom, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; + const {bom, comment_no_infix, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; let {comment, escape, quote, record_delimiter} = this.options; const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote} = this.state; let buf; @@ -2809,7 +2819,7 @@ var csv_parse_sync = (function (exports) { continue; } const commentCount = comment === null ? 0 : this.__compareBytes(comment, buf, pos, chr); - if(commentCount !== 0){ + if(commentCount !== 0 && (comment_no_infix === false || this.state.field.length === 0)){ this.state.commenting = true; continue; } diff --git a/packages/csv-parse/dist/umd/index.js b/packages/csv-parse/dist/umd/index.js index d691d0fe1..36b36060a 100644 --- a/packages/csv-parse/dist/umd/index.js +++ b/packages/csv-parse/dist/umd/index.js @@ -5406,6 +5406,16 @@ ], options); } } + // Normalize option `comment_no_infix` + if(options.comment_no_infix === undefined || options.comment_no_infix === null || options.comment_no_infix === false){ + options.comment_no_infix = false; + }else if(options.comment_no_infix !== true){ + throw new CsvError('CSV_INVALID_OPTION_COMMENT', [ + 'Invalid option comment_no_infix:', + 'value must be a boolean,', + `got ${JSON.stringify(options.comment_no_infix)}` + ], options); + } // Normalize option `delimiter` const delimiter_json = JSON.stringify(options.delimiter); if(!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter]; @@ -5767,7 +5777,7 @@ }, // Central parser implementation parse: function(nextBuf, end, push, close){ - const {bom, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; + const {bom, comment_no_infix, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; let {comment, escape, quote, record_delimiter} = this.options; const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote} = this.state; let buf; @@ -5966,7 +5976,7 @@ continue; } const commentCount = comment === null ? 0 : this.__compareBytes(comment, buf, pos, chr); - if(commentCount !== 0){ + if(commentCount !== 0 && (comment_no_infix === false || this.state.field.length === 0)){ this.state.commenting = true; continue; } diff --git a/packages/csv-parse/dist/umd/sync.js b/packages/csv-parse/dist/umd/sync.js index 1c4fe5822..4b3c2b72d 100644 --- a/packages/csv-parse/dist/umd/sync.js +++ b/packages/csv-parse/dist/umd/sync.js @@ -2252,6 +2252,16 @@ ], options); } } + // Normalize option `comment_no_infix` + if(options.comment_no_infix === undefined || options.comment_no_infix === null || options.comment_no_infix === false){ + options.comment_no_infix = false; + }else if(options.comment_no_infix !== true){ + throw new CsvError('CSV_INVALID_OPTION_COMMENT', [ + 'Invalid option comment_no_infix:', + 'value must be a boolean,', + `got ${JSON.stringify(options.comment_no_infix)}` + ], options); + } // Normalize option `delimiter` const delimiter_json = JSON.stringify(options.delimiter); if(!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter]; @@ -2613,7 +2623,7 @@ }, // Central parser implementation parse: function(nextBuf, end, push, close){ - const {bom, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; + const {bom, comment_no_infix, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; let {comment, escape, quote, record_delimiter} = this.options; const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote} = this.state; let buf; @@ -2812,7 +2822,7 @@ continue; } const commentCount = comment === null ? 0 : this.__compareBytes(comment, buf, pos, chr); - if(commentCount !== 0){ + if(commentCount !== 0 && (comment_no_infix === false || this.state.field.length === 0)){ this.state.commenting = true; continue; } diff --git a/packages/csv-parse/lib/api/index.js b/packages/csv-parse/lib/api/index.js index b71843ba9..9833a68fc 100644 --- a/packages/csv-parse/lib/api/index.js +++ b/packages/csv-parse/lib/api/index.js @@ -61,7 +61,7 @@ const transform = function(original_options = {}) { }, // Central parser implementation parse: function(nextBuf, end, push, close){ - const {bom, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; + const {bom, comment_no_infix, encoding, from_line, ltrim, max_record_size,raw, relax_quotes, rtrim, skip_empty_lines, to, to_line} = this.options; let {comment, escape, quote, record_delimiter} = this.options; const {bomSkipped, previousBuf, rawBuffer, escapeIsQuote} = this.state; let buf; @@ -260,7 +260,7 @@ const transform = function(original_options = {}) { continue; } const commentCount = comment === null ? 0 : this.__compareBytes(comment, buf, pos, chr); - if(commentCount !== 0){ + if(commentCount !== 0 && (comment_no_infix === false || this.state.field.length === 0)){ this.state.commenting = true; continue; } diff --git a/packages/csv-parse/lib/api/normalize_options.js b/packages/csv-parse/lib/api/normalize_options.js index 98925943e..fa5838804 100644 --- a/packages/csv-parse/lib/api/normalize_options.js +++ b/packages/csv-parse/lib/api/normalize_options.js @@ -108,6 +108,16 @@ const normalize_options = function(opts){ ], options); } } + // Normalize option `comment_no_infix` + if(options.comment_no_infix === undefined || options.comment_no_infix === null || options.comment_no_infix === false){ + options.comment_no_infix = false; + }else if(options.comment_no_infix !== true){ + throw new CsvError('CSV_INVALID_OPTION_COMMENT', [ + 'Invalid option comment_no_infix:', + 'value must be a boolean,', + `got ${JSON.stringify(options.comment_no_infix)}` + ], options); + } // Normalize option `delimiter` const delimiter_json = JSON.stringify(options.delimiter); if(!Array.isArray(options.delimiter)) options.delimiter = [options.delimiter]; diff --git a/packages/csv-parse/lib/index.d.ts b/packages/csv-parse/lib/index.d.ts index d082c7133..ec8aa027e 100644 --- a/packages/csv-parse/lib/index.d.ts +++ b/packages/csv-parse/lib/index.d.ts @@ -88,6 +88,12 @@ export interface Options { * Treat all the characters after this one as a comment, default to '' (disabled). */ comment?: string; + /** + * Restrict the definition of comments to a full line. Comment characters + * defined in the middle of the line are not interpreted as such. The + * option require the activation of comments. + */ + comment_no_infix?: boolean; /** * Set the field delimiter. One character only, defaults to comma. */ diff --git a/packages/csv-parse/test/api.types.ts b/packages/csv-parse/test/api.types.ts index 82fbeb7a5..c57c34f27 100644 --- a/packages/csv-parse/test/api.types.ts +++ b/packages/csv-parse/test/api.types.ts @@ -28,7 +28,8 @@ describe('API Types', () => { const options: Options = parser.options const keys: string[] = Object.keys(options) keys.sort().should.eql([ - 'bom', 'cast', 'cast_date', 'cast_first_line_to_header', 'cast_function', 'columns', 'comment', 'delimiter', + 'bom', 'cast', 'cast_date', 'cast_first_line_to_header', + 'cast_function', 'columns', 'comment', 'comment_no_infix', 'delimiter', 'encoding', 'escape', 'from', 'from_line', 'group_columns_by_name', 'ignore_last_delimiters', 'info', 'ltrim', 'max_record_size', 'objname', 'on_record', 'on_skip', 'quote', 'raw', 'record_delimiter', diff --git a/packages/csv-parse/test/option.comment_no_infix.coffee b/packages/csv-parse/test/option.comment_no_infix.coffee new file mode 100644 index 000000000..e3ccf76dc --- /dev/null +++ b/packages/csv-parse/test/option.comment_no_infix.coffee @@ -0,0 +1,38 @@ + +import { parse } from '../lib/index.js' + +describe 'Option `comment_no_infix`', -> + + it 'validation', -> + parse '', comment_no_infix: undefined, (->) + parse '', comment_no_infix: null, (->) + parse '', comment_no_infix: false, (->) + parse '', comment_no_infix: true, (->) + (-> + parse '', comment_no_infix: '', (->) + ).should.throw + message: 'Invalid option comment_no_infix: value must be a boolean, got ""' + code: 'CSV_INVALID_OPTION_COMMENT' + (-> + parse '', comment_no_infix: 2, (->) + ).should.throw + message: 'Invalid option comment_no_infix: value must be a boolean, got 2' + code: 'CSV_INVALID_OPTION_COMMENT' + + it 'with `true`', (next) -> + parse ''' + a,b#,c + ''', comment: '#', comment_no_infix: true, (err, records) -> + records.should.eql [ + ['a', 'b#', 'c'] + ] unless err + next err + + it 'with `false`', (next) -> + parse ''' + a,b#,c + ''', comment: '#', comment_no_infix: false, (err, records) -> + records.should.eql [ + ['a', 'b'] + ] unless err + next err