diff --git a/dev/modules/text_csv_bundled_tests_plan.md b/dev/modules/text_csv_bundled_tests_plan.md
new file mode 100644
index 000000000..6a7053f4a
--- /dev/null
+++ b/dev/modules/text_csv_bundled_tests_plan.md
@@ -0,0 +1,117 @@
+# Text::CSV Bundled Module Tests Plan
+
+## Goal
+
+Add the original CPAN Text::CSV 2.06 test suite to `src/test/resources/module/Text-CSV/`
+and make all tests pass against a bundled Text::CSV stack that uses Java as the XS backend.
+
+## Architecture (Revised)
+
+Instead of reimplementing hundreds of methods in our simplified CSV.pm, we now use the
+**original CPAN modules** with Java replacing XS:
+
+```
+Text::CSV (CPAN 2.06 wrapper)
+ └─ tries Text::CSV_XS first, falls back to Text::CSV_PP
+ │
+ ├─ Text::CSV_XS (our Java-backed module)
+ │ └─ inherits from Text::CSV_PP
+ │ └─ $VERSION = "1.61" (satisfies >= 1.60 check)
+ │ └─ XSLoader::load('TextCsv') for Java parse/combine
+ │ └─ overrides parse()/combine() with Java acceleration
+ │
+ └─ Text::CSV_PP (CPAN 2.06, 6454 lines, pure Perl)
+ └─ complete implementation: all accessors, meta_info,
+ callbacks, types, formula, etc.
+```
+
+**Files:**
+- `src/main/perl/lib/Text/CSV.pm` — CPAN 2.06 wrapper (146 lines of code)
+- `src/main/perl/lib/Text/CSV_PP.pm` — CPAN 2.06 pure-Perl backend (6454 lines)
+- `src/main/perl/lib/Text/CSV_XS.pm` — our Java-backed XS replacement
+- `src/main/java/org/perlonjava/runtime/perlmodule/TextCsv.java` — Java parse/combine
+
+**Why this approach:**
+- The CPAN `Text::CSV_PP` already passes 39/40 tests (52,356/52,360 subtests) on PerlOnJava
+ (documented in `dev/modules/text_csv_fix_plan.md`)
+- All complex logic (accessors, meta_info, callbacks, types, formula, etc.) is handled
+ by the battle-tested CPAN code
+- Java only needs to implement the performance-critical parse/combine operations
+- No need to reimplement hundreds of methods
+
+## Test Files
+
+40 test files + `t/util.pl` helper + 2 CSV data files copied from Text-CSV-2.06 CPAN
+distribution to `src/test/resources/module/Text-CSV/`.
+
+## Implementation Plan
+
+### Phase 1: Bundle CPAN Modules
+
+1. Replace `src/main/perl/lib/Text/CSV.pm` with CPAN 2.06 wrapper
+2. Copy `Text/CSV_PP.pm` from CPAN to `src/main/perl/lib/Text/`
+3. Create `Text/CSV_XS.pm` that inherits from `Text::CSV_PP`:
+ - `$VERSION = "1.61"` (passes Text::CSV's `>= 1.60` check)
+ - Inherits all methods from CSV_PP via `@ISA`
+ - Exports same constants/functions
+ - Later: override parse/combine with Java acceleration
+
+### Phase 2: Fix TextCsv.java Registration
+
+The existing `TextCsv.java` registers methods on `Text::CSV` package. After the refactor:
+- Either update it to register on `Text::CSV_XS` package
+- Or disable it if Text::CSV_PP handles everything
+- The XSLoader::load('TextCsv') call in old CSV.pm needs to be removed/updated
+
+### Phase 3: Run Tests and Debug
+
+With CPAN modules bundled, most tests should pass immediately (39/40 based on prior work).
+Known remaining issue from `text_csv_fix_plan.md`:
+- t/70_rt.t: 4 subtest failures (raw non-UTF-8 bytes, IO::Handle edge cases)
+
+### Phase 4: Java Acceleration (Optional, Future)
+
+Override `parse()` and `combine()` in `Text::CSV_XS` to delegate to Java:
+- `Parse($str, $fields, $fflags)` — Java via XSLoader
+- `Combine(\$str, \@fields, $useIO)` — Java via XSLoader
+- `SetDiag($code, $msg)` — Java error management
+- `_cache_set` / `_cache_get_eolt` — Java cache
+
+This is optional since CSV_PP already works. Add it for performance.
+
+## XS API Contract (for future Java implementation)
+
+The XS module must provide these C-level functions (via XSLoader):
+
+| XS Method | Called From | Purpose |
+|-----------|------------|---------|
+| `Parse($str, $fields, $fflags)` | `parse()` | Core CSV parsing |
+| `Combine(\$str, \@fields, $useIO)` | `combine()`, `print()` | Core CSV combining |
+| `SetDiag($code, $msg?)` | everywhere | Error diagnostics |
+| `_cache_set($idx, $val)` | accessor setters | XS state cache |
+| `_cache_get_eolt()` | `eol_type()` | EOL type detection |
+| `_cache_diag()` | debugging | Cache dump |
+| `print($io, $fields)` | `print()` | Direct IO print |
+| `getline($io)` | `getline()` | Direct IO getline |
+| `getline_all($io, ...)` | `getline_all()` | Batch IO getline |
+| `error_input()` | `error_input()` | Last error input |
+
+## Progress Tracking
+
+### Current Status: Phase 1 in progress
+
+### Completed
+- [x] Copy CPAN test files to module/Text-CSV/ (2026-04-08)
+- [x] Architecture decision: use CPAN Text::CSV + CSV_PP + Java-backed CSV_XS
+- [x] Analyzed XS API contract from Text-CSV_XS-1.61 source
+
+### In Progress
+- [ ] Bundle CPAN Text::CSV.pm (replace our custom one)
+- [ ] Bundle CPAN Text::CSV_PP.pm
+- [ ] Create Text::CSV_XS.pm (inherits from CSV_PP)
+
+### Next Steps
+1. Handle TextCsv.java registration (update or disable)
+2. Build and run tests
+3. Debug any remaining failures
+4. (Future) Add Java acceleration for parse/combine
diff --git a/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java b/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java
index 33c87b8af..b987a7af9 100644
--- a/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java
+++ b/src/main/java/org/perlonjava/app/scriptengine/PerlLanguageProvider.java
@@ -57,6 +57,7 @@ public class PerlLanguageProvider {
public static void resetAll() {
globalInitialized = false;
resetAllGlobals();
+ DataSection.reset();
}
/**
diff --git a/src/main/java/org/perlonjava/core/Configuration.java b/src/main/java/org/perlonjava/core/Configuration.java
index 074716a33..d9d99414c 100644
--- a/src/main/java/org/perlonjava/core/Configuration.java
+++ b/src/main/java/org/perlonjava/core/Configuration.java
@@ -33,7 +33,7 @@ public final class Configuration {
* Automatically populated by Gradle/Maven during build.
* DO NOT EDIT MANUALLY - this value is replaced at build time.
*/
- public static final String gitCommitId = "cc2df66a9";
+ public static final String gitCommitId = "5eecf59d6";
/**
* Git commit date of the build (ISO format: YYYY-MM-DD).
@@ -48,7 +48,7 @@ public final class Configuration {
* Parsed by App::perlbrew and other tools via: perl -V | grep "Compiled at"
* DO NOT EDIT MANUALLY - this value is replaced at build time.
*/
- public static final String buildTimestamp = "Apr 8 2026 09:46:56";
+ public static final String buildTimestamp = "Apr 8 2026 10:45:34";
// Prevent instantiation
private Configuration() {
diff --git a/src/main/java/org/perlonjava/frontend/parser/DataSection.java b/src/main/java/org/perlonjava/frontend/parser/DataSection.java
index 22374a576..4f86787fb 100644
--- a/src/main/java/org/perlonjava/frontend/parser/DataSection.java
+++ b/src/main/java/org/perlonjava/frontend/parser/DataSection.java
@@ -28,6 +28,15 @@ public class DataSection {
*/
private static final Set placeholderCreated = new HashSet<>();
+ /**
+ * Resets all static state for DataSection.
+ * Called between test runs to prevent stale state from interfering.
+ */
+ public static void reset() {
+ processedPackages.clear();
+ placeholderCreated.clear();
+ }
+
/**
* Creates a placeholder DATA filehandle for a package early in parsing.
* This ensures the DATA filehandle exists during BEGIN block execution.
diff --git a/src/main/java/org/perlonjava/runtime/perlmodule/TextCsv.java b/src/main/java/org/perlonjava/runtime/perlmodule/TextCsv.java
index a017b006c..a769c9e50 100644
--- a/src/main/java/org/perlonjava/runtime/perlmodule/TextCsv.java
+++ b/src/main/java/org/perlonjava/runtime/perlmodule/TextCsv.java
@@ -33,16 +33,15 @@ public TextCsv() {
/**
* Initializes and registers all Text::CSV methods.
+ *
+ * NOTE: Registration is intentionally disabled because Text::CSV now
+ * delegates to Text::CSV_XS (which inherits from Text::CSV_PP).
+ * Registering Java-backed parse/combine on "Text::CSV" would override
+ * the pure-Perl implementations inherited through the CPAN wrapper.
*/
public static void initialize() {
- TextCsv csv = new TextCsv();
- try {
- // Register core CSV methods (high-level methods now in Perl)
- csv.registerMethod("parse", null);
- csv.registerMethod("combine", null);
- } catch (NoSuchMethodException e) {
- System.err.println("Warning: Missing Text::CSV method: " + e.getMessage());
- }
+ // No-op: Java-backed CSV methods are no longer used.
+ // The CPAN Text::CSV wrapper + Text::CSV_PP handle everything.
}
/**
diff --git a/src/main/perl/lib/Text/CSV.pm b/src/main/perl/lib/Text/CSV.pm
index c426e7fd8..1463fd3fc 100644
--- a/src/main/perl/lib/Text/CSV.pm
+++ b/src/main/perl/lib/Text/CSV.pm
@@ -1,557 +1,146 @@
package Text::CSV;
-#
-# Original Text::CSV module by ISHIGAKI (Kenichi Ishigaki)
-# A comma-separated values manipulator
-#
-# PerlOnJava implementation by Flavio S. Glock.
-# The implementation is in: src/main/java/org/perlonjava/perlmodule/TextCsv.java
-#
-
use strict;
-use warnings;
-
-our $VERSION = '2.06';
-
-XSLoader::load( 'TextCsv' );
-
-use constant cacheKey => "_CSVFormat";
-
-# NOTE: Core functionality is implemented in:
-# src/main/java/org/perlonjava/perlmodule/TextCsv.java
-
-# Additional pure-Perl convenience methods
-
-sub new {
- my $class = shift;
- my %args = @_ == 1 && ref $_[0] eq 'HASH' ? %{$_[0]} : @_;
-
- # Set default attributes
- my $self = {
- sep_char => ',',
- quote_char => '"',
- escape_char => undef,
- binary => 0,
- auto_diag => 0,
- always_quote => 0,
- eol => undef,
- allow_loose_quotes => 0,
- allow_whitespace => 0,
- blank_is_undef => 0,
- empty_is_undef => 0,
- quote_empty => 0,
- quote_space => 1,
- quote_binary => 1,
- decode_utf8 => 1,
- keep_meta_info => 0,
- strict => 0,
- formula => 'none',
- column_names => [],
-
- # Clear error state
- _ERROR_CODE => 0,
- _ERROR_STR => '',
- _ERROR_POS => 0,
- _ERROR_FIELD => 0,
-
- %args
- };
-
- return bless $self, $class;
-}
-
-sub sep_char {
- my ($self, $sep) = @_;
+use Exporter;
+use Carp ();
+use vars qw( $VERSION $DEBUG @ISA @EXPORT_OK %EXPORT_TAGS );
+@ISA = qw( Exporter );
+
+BEGIN {
+ $VERSION = '2.06';
+ $DEBUG = 0;
+}
+
+# if use CSV_XS, requires version
+my $Module_XS = 'Text::CSV_XS';
+my $Module_PP = 'Text::CSV_PP';
+my $XS_Version = '1.60';
+
+my $Is_Dynamic = 0;
+
+my @PublicMethods = qw/
+ version error_diag error_input
+ known_attributes
+ PV IV NV CSV_TYPE_PV CSV_TYPE_IV CSV_TYPE_NV
+ CSV_FLAGS_IS_QUOTED CSV_FLAGS_IS_BINARY CSV_FLAGS_ERROR_IN_FIELD CSV_FLAGS_IS_MISSING
+ /;
+
+%EXPORT_TAGS = (
+ CONSTANTS => [qw(
+ CSV_FLAGS_IS_QUOTED
+ CSV_FLAGS_IS_BINARY
+ CSV_FLAGS_ERROR_IN_FIELD
+ CSV_FLAGS_IS_MISSING
+ CSV_TYPE_PV
+ CSV_TYPE_IV
+ CSV_TYPE_NV
+ )],
+);
+@EXPORT_OK = (qw(csv PV IV NV), @{$EXPORT_TAGS{CONSTANTS}});
- if (defined $sep) {
- die "sep_char must be a single character" unless length($sep) == 1;
- $self->{sep_char} = $sep;
- delete $self->{+cacheKey}; # Invalidate cache if needed
- }
+#
- return $self->{sep_char};
-}
+# Check the environment variable to decide worker module.
-sub quote_char {
- my ($self, $quote) = @_;
+unless ($Text::CSV::Worker) {
+ $Text::CSV::DEBUG and Carp::carp("Check used worker module...");
- if (defined $quote) {
- die "quote_char must be a single character" unless length($quote) == 1;
- $self->{quote_char} = $quote;
- delete $self->{+cacheKey}; # Invalidate cache if needed
+ if (exists $ENV{PERL_TEXT_CSV}) {
+ if ($ENV{PERL_TEXT_CSV} eq '0' or $ENV{PERL_TEXT_CSV} eq 'Text::CSV_PP') {
+ _load_pp() or Carp::croak $@;
+ }
+ elsif ($ENV{PERL_TEXT_CSV} eq '1' or $ENV{PERL_TEXT_CSV} =~ /Text::CSV_XS\s*,\s*Text::CSV_PP/) {
+ _load_xs() or _load_pp() or Carp::croak $@;
+ }
+ elsif ($ENV{PERL_TEXT_CSV} eq '2' or $ENV{PERL_TEXT_CSV} eq 'Text::CSV_XS') {
+ _load_xs() or Carp::croak $@;
+ }
+ else {
+ Carp::croak "The value of environmental variable 'PERL_TEXT_CSV' is invalid.";
+ }
}
-
- return $self->{quote_char};
-}
-
-sub escape_char {
- my ($self, $escape) = @_;
-
- if (@_ > 1) {
- $self->{escape_char} = $escape;
- delete $self->{+cacheKey}; # Invalidate cache if needed
+ else {
+ _load_xs() or _load_pp() or Carp::croak $@;
}
- return $self->{escape_char};
}
-sub binary {
- my ($self, $binary) = @_;
+sub new { # normal mode
+ my $proto = shift;
+ my $class = ref($proto) || $proto;
- if (defined $binary) {
- $self->{binary} = $binary ? 1 : 0;
- delete $self->{+cacheKey}; # Invalidate cache if needed
+ unless ($proto) { # for Text::CSV_XS/PP::new(0);
+ return eval qq| $Text::CSV::Worker\::new( \$proto ) |;
}
- return $self->{binary};
-}
-
-sub auto_diag {
- my ($self, $auto_diag) = @_;
+ #if (ref $_[0] and $_[0]->{module}) {
+ # Carp::croak("Can't set 'module' in non dynamic mode.");
+ #}
- if (defined $auto_diag) {
- $self->{auto_diag} = $auto_diag ? 1 : 0;
+ if (my $obj = $Text::CSV::Worker->new(@_)) {
+ $obj->{_MODULE} = $Text::CSV::Worker;
+ bless $obj, $class;
+ return $obj;
}
-
- return $self->{auto_diag};
-}
-
-sub always_quote {
- my ($self, $always_quote) = @_;
-
- if (defined $always_quote) {
- $self->{always_quote} = $always_quote ? 1 : 0;
- delete $self->{+cacheKey}; # Invalidate cache if needed
+ else {
+ return;
}
- return $self->{always_quote};
}
-sub eol {
- my ($self, $eol) = @_;
-
- if (@_ > 1) {
- $self->{eol} = $eol;
- delete $self->{+cacheKey}; # Invalidate cache if needed
+sub csv {
+ if (@_ && ref $_[0] eq __PACKAGE__ or ref $_[0] eq __PACKAGE__->backend) {
+ splice @_, 0, 0, "csv";
}
-
- return $self->{eol};
+ my $backend = __PACKAGE__->backend;
+ no strict 'refs';
+ &{"$backend\::csv"}(@_);
}
-sub string {
- my $self = shift;
- return $self->{_string};
-}
+sub require_xs_version { $XS_Version; }
-sub fields {
- my $self = shift;
- return @{$self->{_fields} || []};
+sub module {
+ my $proto = shift;
+ return !ref($proto) ? $Text::CSV::Worker
+ : ref($proto->{_MODULE}) ? ref($proto->{_MODULE}) : $proto->{_MODULE};
}
-# Add this method after the fields() method:
-sub getline {
- my ($self, $fh) = @_;
-
- # Read a line from the filehandle
- my $line = <$fh>;
-
- return undef unless defined $line;
+*backend = *module;
- # Parse the line
- if ($self->parse($line)) {
- return $self->fields;
- }
-
- return undef;
+sub is_xs {
+ return $_[0]->module eq $Module_XS;
}
-sub column_names {
- my ($self, @names) = @_;
-
- if (@names) {
- # Flatten array ref if provided (e.g., $csv->column_names(\@headers))
- @names = @{$names[0]} if (scalar(@names) == 1 && ref($names[0]) eq 'ARRAY');
- $self->{column_names} = \@names;
- }
-
- return @{$self->{column_names} || []};
+sub is_pp {
+ return $_[0]->module eq $Module_PP;
}
-sub getline_hr {
- my ($self, $fh) = @_;
+sub is_dynamic { $Is_Dynamic; }
- # Check if column names are set
- my $col_names = $self->{column_names};
- unless ($col_names && @$col_names) {
- $self->_set_error(3002, "getline_hr() called before column_names()", 0, 0);
- return undef;
- }
+sub _load_xs { _load($Module_XS, $XS_Version) }
- # Get a line
- my $fields = $self->getline($fh);
- return undef unless $fields;
+sub _load_pp { _load($Module_PP) }
- # Convert to hash
- my %hash;
- for my $i (0 .. $#$col_names) {
- $hash{$col_names->[$i]} = $fields->[$i] // undef;
- }
+sub _load {
+ my ($module, $version) = @_;
+ $version ||= '';
- return \%hash;
-}
+ $Text::CSV::DEBUG and Carp::carp "Load $module.";
-sub error_diag {
- my $self = shift;
+ eval qq| use $module $version |;
- unless (ref $self) {
- # Class method call - return last global error
- return "";
- }
+ return if $@;
- # Instance method call
- if (wantarray) {
- return (
- $self->{_ERROR_CODE} // 0,
- $self->{_ERROR_STR} // "",
- $self->{_ERROR_POS} // 0,
- 0, # record number
- $self->{_ERROR_FIELD} // 0
- );
- }
- else {
- # Scalar context - return error string
- return $self->{_ERROR_STR} // "";
- }
-}
+ push @Text::CSV::ISA, $module;
+ $Text::CSV::Worker = $module;
-sub _set_error {
- my ($self, $code, $message, $pos, $field) = @_;
+ local $^W;
+ no strict qw(refs);
- $self->{_ERROR_CODE} = $code;
- $self->{_ERROR_STR} = $message;
- $self->{_ERROR_POS} = $pos;
- $self->{_ERROR_FIELD} = $field;
-
- # Handle auto_diag
- if ($self->{auto_diag}) {
- warn "# CSV ERROR: $code - $message\n";
+ for my $method (@PublicMethods) {
+ *{"Text::CSV::$method"} = \&{"$module\::$method"};
}
-}
-
-sub _clear_error {
- my $self = shift;
- $self->{_ERROR_CODE} = 0;
- $self->{_ERROR_STR} = '';
- $self->{_ERROR_POS} = 0;
- $self->{_ERROR_FIELD} = 0;
-}
-
-sub print {
- my ($self, $fh, $fields) = @_;
-
- # Validate arguments
- return 0 unless defined $fh && ref($fields) eq 'ARRAY';
-
- # Combine fields into a CSV string
- my $status = $self->combine(@$fields);
- return 0 unless $status;
-
- # Add EOL if configured
- my $output = $self->string;
- $output .= $self->{eol} if defined $self->{eol};
-
- # Print to filehandle
- print $fh $output;
-
return 1;
}
-sub say {
- my ($self, $fh, $fields) = @_;
-
- # Save current eol setting
- my $saved_eol = $self->eol;
-
- # Set eol to $/ if not defined
- $self->eol($/) unless defined $saved_eol;
-
- # Print the fields
- my $result = $self->print($fh, $fields);
-
- # Restore eol setting
- $self->eol($saved_eol);
-
- return $result;
-}
-
-sub getline_all {
- my ($self, $fh, $offset, $length) = @_;
- my @rows;
-
- # Handle offset
- if (defined $offset && $offset > 0) {
- for (1 .. $offset) {
- last unless $self->getline($fh);
- }
- }
-
- # Read rows
- my $count = 0;
- while (my $row = $self->getline($fh)) {
- push @rows, $row;
- $count++;
- last if defined $length && $count >= $length;
- }
-
- return \@rows;
-}
-
-sub header {
- my ($self, $fh, $opts) = @_;
- $opts ||= {};
-
- # Read first line
- my $row = $self->getline($fh);
- return unless $row;
-
- # Set column names
- $self->column_names(@$row);
-
- # Return column names in list context
- return @$row if wantarray;
-
- # Return self in scalar context
- return $self;
-}
-
-sub csv {
- # Function interface implementation
- my %opts = @_;
-
- my $in = delete $opts{in} or die "csv: missing 'in' parameter";
- my $out = delete $opts{out};
- my $headers = delete $opts{headers};
-
- # Create CSV object
- my $csv = Text::CSV->new(\%opts) or die Text::CSV->error_diag;
-
- # Handle input
- my $data;
- if (ref $in eq 'SCALAR') {
- # Parse string
- open my $fh, '<', $in or die $!;
- $data = _read_csv($csv, $fh, $headers);
- close $fh;
- }
- elsif (ref $in || -f $in) {
- # File or filehandle
- my $fh;
- if (ref $in) {
- $fh = $in;
- }
- else {
- open $fh, '<', $in or die "$in: $!";
- }
- $data = _read_csv($csv, $fh, $headers);
- close $fh unless ref $in;
- }
-
- # Handle output
- if ($out) {
- _write_csv($csv, $out, $data, $headers);
- }
-
- return $data;
-}
-
-sub _read_csv {
- my ($csv, $fh, $headers) = @_;
-
- if ($headers && $headers eq 'auto') {
- $csv->header($fh);
- my @rows;
- while (my $row = $csv->getline_hr($fh)) {
- push @rows, $row;
- }
- return \@rows;
- }
- else {
- return $csv->getline_all($fh);
- }
-}
-
-sub _write_csv {
- my ($csv, $out, $data, $headers) = @_;
-
- my $fh;
- if (ref $out eq 'SCALAR') {
- open $fh, '>', $out or die $!;
- }
- elsif (ref $out || $out) {
- $fh = ref $out ? $out : do {
- open my $fh, '>', $out or die "$out: $!";
- $fh;
- };
- }
-
- # Write header if needed
- if ($headers && ref $data eq 'ARRAY' && @$data && ref $data->[0] eq 'HASH') {
- my @cols = $csv->column_names;
- @cols = keys %{$data->[0]} unless @cols;
- $csv->print($fh, \@cols);
- }
-
- # Write data
- for my $row (@$data) {
- if (ref $row eq 'HASH') {
- my @cols = $csv->column_names;
- $csv->print($fh, [ @{$row}{@cols} ]);
- }
- else {
- $csv->print($fh, $row);
- }
- }
-
- close $fh unless ref $out;
-}
-
-# Re-export constants
-use constant {
- CSV_FLAGS_IS_QUOTED => 0x0001,
- CSV_FLAGS_IS_BINARY => 0x0002,
- CSV_FLAGS_ERROR_IN_FIELD => 0x0004,
- CSV_FLAGS_IS_MISSING => 0x0010,
-};
-
1;
-
-__END__
-
-=head1 NAME
-
-Text::CSV - comma-separated values manipulator
-
-=head1 SYNOPSIS
-
- use Text::CSV;
-
- my $csv = Text::CSV->new({ binary => 1 });
-
- # Parse CSV string
- if ($csv->parse($line)) {
- my @fields = $csv->fields();
- }
-
- # Combine fields into CSV
- if ($csv->combine(@fields)) {
- my $string = $csv->string();
- }
-
- # Read from file
- open my $fh, '<', 'file.csv' or die $!;
- while (my $row = $csv->getline($fh)) {
- # Process row
- }
-
- # Read with headers
- $csv->column_names($csv->getline($fh));
- while (my $hr = $csv->getline_hr($fh)) {
- print $hr->{column_name};
- }
-
-=head1 DESCRIPTION
-
-Text::CSV provides facilities for the composition and decomposition of
-comma-separated values using Text::CSV compatible API.
-
-This is a PerlOnJava implementation that uses Apache Commons CSV internally.
-
-=head1 METHODS
-
-=head2 new
-
-Create a new Text::CSV object with optional attributes.
-
-=head2 parse
-
-Parse a CSV string into fields.
-
-=head2 fields
-
-Return the fields from the last successful parse.
-
-=head2 combine
-
-Combine fields into a CSV string.
-
-=head2 string
-
-Return the CSV string from the last successful combine.
-
-=head2 getline
-
-Read and parse a line from a filehandle.
-
-=head2 getline_hr
-
-Read and parse a line, returning a hashref using column names.
-
-=head2 getline_all
-
-Read all remaining lines from a filehandle.
-
-=head2 print
-
-Print fields as CSV to a filehandle.
-
-=head2 say
-
-Print fields as CSV to a filehandle with record separator.
-
-=head2 column_names
-
-Get or set column names for hash-based operations.
-
-=head2 header
-
-Read the first line and use it as column names.
-
-=head2 error_diag
-
-Get error information from the last operation.
-
-=head2 csv
-
-Function interface for simple CSV operations.
-
-=head1 ATTRIBUTES
-
-=head2 sep_char
-
-Field separator character (default: ',')
-
-=head2 quote_char
-
-Quote character (default: '"')
-
-=head2 escape_char
-
-Escape character (default: undef)
-
-=head2 binary
-
-Allow binary characters (default: 0)
-
-=head2 auto_diag
-
-Automatic error diagnostics (default: 0)
-
-=head2 always_quote
-
-Always quote fields (default: 0)
-
-=head2 eol
-
-End of line string (default: undef)
-
-=cut
diff --git a/src/main/perl/lib/Text/CSV_PP.pm b/src/main/perl/lib/Text/CSV_PP.pm
new file mode 100644
index 000000000..9f8fc228c
--- /dev/null
+++ b/src/main/perl/lib/Text/CSV_PP.pm
@@ -0,0 +1,6454 @@
+package Text::CSV_PP;
+
+################################################################################
+#
+# Text::CSV_PP - Text::CSV_XS compatible pure-Perl module
+#
+################################################################################
+require 5.006001;
+
+use strict;
+use Exporter ();
+use vars qw($VERSION @ISA @EXPORT_OK %EXPORT_TAGS);
+use Carp;
+
+$VERSION = '2.06';
+@ISA = qw(Exporter);
+
+sub PV { 0 }
+sub IV { 1 }
+sub NV { 2 }
+
+sub CSV_TYPE_PV { PV }
+sub CSV_TYPE_IV { IV }
+sub CSV_TYPE_NV { NV }
+
+sub IS_QUOTED () { 0x0001; }
+sub IS_BINARY () { 0x0002; }
+sub IS_ERROR () { 0x0004; }
+sub IS_MISSING () { 0x0010; }
+
+sub CSV_FLAGS_IS_QUOTED { IS_QUOTED }
+sub CSV_FLAGS_IS_BINARY { IS_BINARY }
+sub CSV_FLAGS_ERROR_IN_FIELD { IS_ERROR }
+sub CSV_FLAGS_IS_MISSING { IS_MISSING }
+
+sub HOOK_ERROR () { 0x0001; }
+sub HOOK_AFTER_PARSE () { 0x0002; }
+sub HOOK_BEFORE_PRINT () { 0x0004; }
+
+sub EOL_TYPE_UNDEF () { 0 }
+sub EOL_TYPE_NL () { 1 }
+sub EOL_TYPE_CR () { 2 }
+sub EOL_TYPE_CRNL () { 3 }
+sub EOL_TYPE_OTHER () { 4 }
+
+sub useIO_EOF () { 0x0010; }
+
+%EXPORT_TAGS = (
+ CONSTANTS => [qw(
+ CSV_FLAGS_IS_QUOTED
+ CSV_FLAGS_IS_BINARY
+ CSV_FLAGS_ERROR_IN_FIELD
+ CSV_FLAGS_IS_MISSING
+
+ CSV_TYPE_PV
+ CSV_TYPE_IV
+ CSV_TYPE_NV
+ )],
+);
+@EXPORT_OK = (qw(csv PV IV NV), @{$EXPORT_TAGS{'CONSTANTS'}});
+
+my $ERRORS = {
+ # Generic errors
+ 1000 => "INI - constructor failed",
+ 1001 => "INI - sep_char is equal to quote_char or escape_char",
+ 1002 => "INI - allow_whitespace with escape_char or quote_char SP or TAB",
+ 1003 => "INI - \\r or \\n in main attr not allowed",
+ 1004 => "INI - callbacks should be undef or a hashref",
+ 1005 => "INI - EOL too long",
+ 1006 => "INI - SEP too long",
+ 1007 => "INI - QUOTE too long",
+ 1008 => "INI - SEP undefined",
+
+ 1010 => "INI - the header is empty",
+ 1011 => "INI - the header contains more than one valid separator",
+ 1012 => "INI - the header contains an empty field",
+ 1013 => "INI - the header contains nun-unique fields",
+ 1014 => "INI - header called on undefined stream",
+
+ # Syntax errors
+ 1500 => "PRM - Invalid/unsupported arguments(s)",
+ 1501 => "PRM - The key attribute is passed as an unsupported type",
+ 1502 => "PRM - The value attribute is passed without the key attribute",
+ 1503 => "PRM - The value attribute is passed as an unsupported type",
+
+ # Parse errors
+ 2010 => "ECR - QUO char inside quotes followed by CR not part of EOL",
+ 2011 => "ECR - Characters after end of quoted field",
+ 2012 => "EOF - End of data in parsing input stream",
+ 2013 => "ESP - Specification error for fragments RFC7111",
+ 2014 => "ENF - Inconsistent number of fields",
+ 2015 => "ERW - Empty row",
+ 2016 => "EOL - Inconsistent EOL",
+
+ # EIQ - Error Inside Quotes
+ 2021 => "EIQ - NL char inside quotes, binary off",
+ 2022 => "EIQ - CR char inside quotes, binary off",
+ 2023 => "EIQ - QUO character not allowed",
+ 2024 => "EIQ - EOF cannot be escaped, not even inside quotes",
+ 2025 => "EIQ - Loose unescaped escape",
+ 2026 => "EIQ - Binary character inside quoted field, binary off",
+ 2027 => "EIQ - Quoted field not terminated",
+
+ # EIF - Error Inside Field
+ 2030 => "EIF - NL char inside unquoted verbatim, binary off",
+ 2031 => "EIF - CR char is first char of field, not part of EOL",
+ 2032 => "EIF - CR char inside unquoted, not part of EOL",
+ 2034 => "EIF - Loose unescaped quote",
+ 2035 => "EIF - Escaped EOF in unquoted field",
+ 2036 => "EIF - ESC error",
+ 2037 => "EIF - Binary character in unquoted field, binary off",
+
+ # Combine errors
+ 2110 => "ECB - Binary character in Combine, binary off",
+
+ # IO errors
+ 2200 => "EIO - print to IO failed. See errno",
+
+ # Hash-Ref errors
+ 3001 => "EHR - Unsupported syntax for column_names ()",
+ 3002 => "EHR - getline_hr () called before column_names ()",
+ 3003 => "EHR - bind_columns () and column_names () fields count mismatch",
+ 3004 => "EHR - bind_columns () only accepts refs to scalars",
+ 3006 => "EHR - bind_columns () did not pass enough refs for parsed fields",
+ 3007 => "EHR - bind_columns needs refs to writable scalars",
+ 3008 => "EHR - unexpected error in bound fields",
+ 3009 => "EHR - print_hr () called before column_names ()",
+ 3010 => "EHR - print_hr () called with invalid arguments",
+
+ 4001 => "PRM - The key does not exist as field in the data",
+
+ 5001 => "PRM - The result does not match the output to append to",
+ 5002 => "PRM - Unsupported output",
+
+ 0 => "",
+};
+
+BEGIN {
+ if ($] < 5.006) {
+ $INC{'bytes.pm'} = 1 unless $INC{'bytes.pm'}; # dummy
+ no strict 'refs';
+ *{"utf8::is_utf8"} = sub { 0; };
+ *{"utf8::decode"} = sub { };
+ }
+ elsif ($] < 5.008) {
+ no strict 'refs';
+ *{"utf8::is_utf8"} = sub { 0; };
+ *{"utf8::decode"} = sub { };
+ *{"utf8::encode"} = sub { };
+ }
+ elsif (!defined &utf8::is_utf8) {
+ require Encode;
+ *utf8::is_utf8 = *Encode::is_utf8;
+ }
+
+ eval q| require Scalar::Util |;
+ if ($@) {
+ eval q| require B |;
+ if ($@) {
+ Carp::croak $@;
+ }
+ else {
+ my %tmap = qw(
+ B::NULL SCALAR
+ B::HV HASH
+ B::AV ARRAY
+ B::CV CODE
+ B::IO IO
+ B::GV GLOB
+ B::REGEXP REGEXP
+ );
+ *Scalar::Util::reftype = sub (\$) {
+ my $r = shift;
+ return undef unless length(ref($r));
+ my $t = ref(B::svref_2object($r));
+ return
+ exists $tmap{$t} ? $tmap{$t}
+ : length(ref($$r)) ? 'REF'
+ : 'SCALAR';
+ };
+ *Scalar::Util::readonly = sub (\$) {
+ my $b = B::svref_2object($_[0]);
+ $b->FLAGS & 0x00800000; # SVf_READONLY?
+ };
+ }
+ }
+}
+
+################################################################################
+#
+# Common pure perl methods, taken almost directly from Text::CSV_XS.
+# (These should be moved into a common class eventually, so that
+# both XS and PP don't need to apply the same changes.)
+#
+################################################################################
+
+################################################################################
+# version
+################################################################################
+
+sub version {
+ return $VERSION;
+}
+
+################################################################################
+# new
+################################################################################
+
+my %def_attr = (
+ eol => '',
+ sep_char => ',',
+ quote_char => '"',
+ escape_char => '"',
+ binary => 0,
+ decode_utf8 => 1,
+ auto_diag => 0,
+ diag_verbose => 0,
+ strict => 0,
+ strict_eol => 0,
+ blank_is_undef => 0,
+ empty_is_undef => 0,
+ allow_whitespace => 0,
+ allow_loose_quotes => 0,
+ allow_loose_escapes => 0,
+ allow_unquoted_escape => 0,
+ always_quote => 0,
+ quote_empty => 0,
+ quote_space => 1,
+ quote_binary => 1,
+ escape_null => 1,
+ keep_meta_info => 0,
+ verbatim => 0,
+ formula => 0,
+ skip_empty_rows => 0,
+ undef_str => undef,
+ comment_str => undef,
+ types => undef,
+ callbacks => undef,
+
+ _EOF => "",
+ _RECNO => 0,
+ _STATUS => undef,
+ _FIELDS => undef,
+ _FFLAGS => undef,
+ _STRING => undef,
+ _ERROR_INPUT => undef,
+ _COLUMN_NAMES => undef,
+ _BOUND_COLUMNS => undef,
+ _AHEAD => undef,
+ _FORMULA_CB => undef,
+ _EMPTROW_CB => undef,
+
+ ENCODING => undef,
+);
+
+my %attr_alias = (
+ quote_always => "always_quote",
+ verbose_diag => "diag_verbose",
+ quote_null => "escape_null",
+ escape => "escape_char",
+ comment => "comment_str",
+);
+
+my $last_err = Text::CSV_PP->SetDiag(0);
+my $ebcdic = ord("A") == 0xC1; # Faster than $Config{'ebcdic'}
+my @internal_kh;
+
+# NOT a method: is also used before bless
+sub _unhealthy_whitespace {
+ my ($self, $aw) = @_;
+ $aw or return 0; # no checks needed without allow_whitespace
+
+ my $quo = $self->{quote};
+ defined $quo && length($quo) or $quo = $self->{quote_char};
+ my $esc = $self->{escape_char};
+
+ defined $quo && $quo =~ m/^[ \t]/ and return 1002;
+ defined $esc && $esc =~ m/^[ \t]/ and return 1002;
+
+ return 0;
+}
+
+sub _check_sanity {
+ my $self = shift;
+
+ my $eol = $self->{eol};
+ my $sep = $self->{sep};
+ defined $sep && length($sep) or $sep = $self->{sep_char};
+ my $quo = $self->{quote};
+ defined $quo && length($quo) or $quo = $self->{quote_char};
+ my $esc = $self->{escape_char};
+
+# use DP;::diag ("SEP: '", DPeek ($sep),
+# "', QUO: '", DPeek ($quo),
+# "', ESC: '", DPeek ($esc),"'");
+
+ # sep_char should not be undefined
+ $sep ne "" or return 1008;
+ length($sep) > 16 and return 1006;
+ $sep =~ m/[\r\n]/ and return 1003;
+
+ if (defined $quo) {
+ $quo eq $sep and return 1001;
+ length($quo) > 16 and return 1007;
+ $quo =~ m/[\r\n]/ and return 1003;
+ }
+ if (defined $esc) {
+ $esc eq $sep and return 1001;
+ $esc =~ m/[\r\n]/ and return 1003;
+ }
+ if (defined $eol) {
+ length($eol) > 16 and return 1005;
+ }
+
+ return _unhealthy_whitespace($self, $self->{allow_whitespace});
+}
+
+sub known_attributes {
+ sort grep !m/^_/ => "sep", "quote", keys %def_attr;
+}
+
+sub new {
+ $last_err = Text::CSV_PP->SetDiag(1000,
+ "usage: my \$csv = Text::CSV_PP->new ([{ option => value, ... }]);");
+
+ my $proto = shift;
+ my $class = ref $proto || $proto or return;
+ @_ > 0 && ref $_[0] ne "HASH" and return;
+ my $attr = shift || {};
+ my %attr = map {
+ my $k = m/^[a-zA-Z]\w+$/ ? lc $_ : $_;
+ exists $attr_alias{$k} and $k = $attr_alias{$k};
+ ($k => $attr->{$_});
+ } keys %{$attr};
+
+ my $sep_aliased = 0;
+ if (exists $attr{sep}) {
+ $attr{sep_char} = delete $attr{sep};
+ $sep_aliased = 1;
+ }
+ my $quote_aliased = 0;
+ if (exists $attr{quote}) {
+ $attr{quote_char} = delete $attr{quote};
+ $quote_aliased = 1;
+ }
+ exists $attr{formula_handling} and
+ $attr{formula} = delete $attr{formula_handling};
+ my $attr_formula = delete $attr{formula};
+
+ for (keys %attr) {
+ if (m/^[a-z]/ && exists $def_attr{$_}) {
+ # uncoverable condition false
+ defined $attr{$_} && m/_char$/ and utf8::decode($attr{$_});
+ next;
+ }
+# croak?
+ $last_err = Text::CSV_PP->SetDiag(1000, "INI - Unknown attribute '$_'");
+ $attr{auto_diag} and error_diag();
+ return;
+ }
+ if ($sep_aliased) {
+ my @b = unpack "U0C*", $attr{sep_char};
+ if (@b > 1) {
+ $attr{sep} = $attr{sep_char};
+ $attr{sep_char} = "\0";
+ }
+ else {
+ $attr{sep} = undef;
+ }
+ }
+ if ($quote_aliased and defined $attr{quote_char}) {
+ my @b = unpack "U0C*", $attr{quote_char};
+ if (@b > 1) {
+ $attr{quote} = $attr{quote_char};
+ $attr{quote_char} = "\0";
+ }
+ else {
+ $attr{quote} = undef;
+ }
+ }
+
+ my $self = {%def_attr, %attr};
+ if (my $ec = _check_sanity($self)) {
+ $last_err = Text::CSV_PP->SetDiag($ec);
+ $attr{auto_diag} and error_diag();
+ return;
+ }
+ if (defined $self->{callbacks} && ref $self->{callbacks} ne "HASH") {
+ carp("The 'callbacks' attribute is set but is not a hash: ignored\n");
+ $self->{callbacks} = undef;
+ }
+
+ $last_err = Text::CSV_PP->SetDiag(0);
+ defined $\ && !exists $attr{eol} and $self->{eol} = $\;
+ bless $self, $class;
+ defined $self->{'types'} and $self->types($self->{'types'});
+ defined $self->{'skip_empty_rows'} and $self->{'skip_empty_rows'} = _supported_skip_empty_rows($self, $self->{'skip_empty_rows'});
+ defined $attr_formula and $self->{'formula'} = _supported_formula($self, $attr_formula);
+ $self;
+}
+
+# Keep in sync with XS!
+my %_cache_id = ( # Only expose what is accessed from within PM
+ quote_char => 0,
+ escape_char => 1,
+ sep_char => 2,
+ always_quote => 4,
+ quote_empty => 5,
+ quote_space => 6,
+ quote_binary => 7,
+ allow_loose_quotes => 8,
+ allow_loose_escapes => 9,
+ allow_unquoted_escape => 10,
+ allow_whitespace => 11,
+ blank_is_undef => 12,
+ empty_is_undef => 13,
+ auto_diag => 14,
+ diag_verbose => 15,
+ escape_null => 16,
+ formula => 18,
+ decode_utf8 => 21,
+ verbatim => 23,
+ strict_eol => 24,
+ eol_type => 27,
+ strict => 28,
+ skip_empty_rows => 29,
+ binary => 30,
+ keep_meta_info => 31,
+ _has_hooks => 32,
+ _has_ahead => 33,
+ _is_bound => 44,
+ eol => 100,
+ sep => 116,
+ quote => 132,
+ undef_str => 148,
+ comment_str => 156,
+ types => 92,
+);
+
+my %_hidden_cache_id = (
+ has_error_input => 20,
+ eol_is_cr => 26,
+ eol_len => 36,
+ sep_len => 37,
+ quo_len => 38,
+);
+
+my %_reverse_cache_id = (
+ map({ $_cache_id{$_} => $_ } keys %_cache_id),
+ map({ $_hidden_cache_id{$_} => $_ } keys %_hidden_cache_id),
+);
+
+# A `character'
+sub _set_attr_C {
+ my ($self, $name, $val, $ec) = @_;
+ defined $val and utf8::decode($val);
+ $self->{$name} = $val;
+ $ec = _check_sanity($self) and croak($self->SetDiag($ec));
+ $self->_cache_set($_cache_id{$name}, $val);
+}
+
+# A flag
+sub _set_attr_X {
+ my ($self, $name, $val) = @_;
+ defined $val or $val = 0;
+ $self->{$name} = $val;
+ $self->_cache_set($_cache_id{$name}, 0 + $val);
+}
+
+# A number
+sub _set_attr_N {
+ my ($self, $name, $val) = @_;
+ $self->{$name} = $val;
+ $self->_cache_set($_cache_id{$name}, 0 + $val);
+}
+
+# Accessor methods.
+# It is unwise to change them halfway through a single file!
+sub quote_char {
+ my $self = shift;
+ if (@_) {
+ $self->_set_attr_C("quote_char", shift);
+ $self->_cache_set($_cache_id{quote}, "");
+ }
+ $self->{quote_char};
+}
+
+sub quote {
+ my $self = shift;
+ if (@_) {
+ my $quote = shift;
+ defined $quote or $quote = "";
+ utf8::decode($quote);
+ my @b = unpack "U0C*", $quote;
+ if (@b > 1) {
+ @b > 16 and croak($self->SetDiag(1007));
+ $self->quote_char("\0");
+ }
+ else {
+ $self->quote_char($quote);
+ $quote = "";
+ }
+ $self->{quote} = $quote;
+
+ my $ec = _check_sanity($self);
+ $ec and croak($self->SetDiag($ec));
+
+ $self->_cache_set($_cache_id{quote}, $quote);
+ }
+ my $quote = $self->{quote};
+ defined $quote && length($quote) ? $quote : $self->{quote_char};
+}
+
+sub escape_char {
+ my $self = shift;
+ if (@_) {
+ my $ec = shift;
+ $self->_set_attr_C("escape_char", $ec);
+ $ec or $self->_set_attr_X("escape_null", 0);
+ }
+ $self->{escape_char};
+}
+
+sub sep_char {
+ my $self = shift;
+ if (@_) {
+ $self->_set_attr_C("sep_char", shift);
+ $self->_cache_set($_cache_id{sep}, "");
+ }
+ $self->{sep_char};
+}
+
+sub sep {
+ my $self = shift;
+ if (@_) {
+ my $sep = shift;
+ defined $sep or $sep = "";
+ utf8::decode($sep);
+ my @b = unpack "U0C*", $sep;
+ if (@b > 1) {
+ @b > 16 and croak($self->SetDiag(1006));
+ $self->sep_char("\0");
+ }
+ else {
+ $self->sep_char($sep);
+ $sep = "";
+ }
+ $self->{sep} = $sep;
+
+ my $ec = _check_sanity($self);
+ $ec and croak($self->SetDiag($ec));
+
+ $self->_cache_set($_cache_id{sep}, $sep);
+ }
+ my $sep = $self->{sep};
+ defined $sep && length($sep) ? $sep : $self->{sep_char};
+}
+
+sub eol {
+ my $self = shift;
+ if (@_) {
+ my $eol = shift;
+ defined $eol or $eol = ""; # Also reset strict_eol?
+ length($eol) > 16 and croak($self->SetDiag(1005));
+ $self->{eol} = $eol;
+ $self->_cache_set($_cache_id{eol}, $eol);
+ }
+ $self->{eol};
+}
+
+sub eol_type {
+ my $self = shift;
+ $self->_cache_get_eolt;
+}
+
+sub always_quote {
+ my $self = shift;
+ @_ and $self->_set_attr_X("always_quote", shift);
+ $self->{always_quote};
+}
+
+sub quote_space {
+ my $self = shift;
+ @_ and $self->_set_attr_X("quote_space", shift);
+ $self->{quote_space};
+}
+
+sub quote_empty {
+ my $self = shift;
+ @_ and $self->_set_attr_X("quote_empty", shift);
+ $self->{quote_empty};
+}
+
+sub escape_null {
+ my $self = shift;
+ @_ and $self->_set_attr_X("escape_null", shift);
+ $self->{escape_null};
+}
+
+sub quote_null { goto &escape_null; }
+
+sub quote_binary {
+ my $self = shift;
+ @_ and $self->_set_attr_X("quote_binary", shift);
+ $self->{quote_binary};
+}
+
+sub binary {
+ my $self = shift;
+ @_ and $self->_set_attr_X("binary", shift);
+ $self->{binary};
+}
+
+sub strict {
+ my $self = shift;
+ @_ and $self->_set_attr_X("strict", shift);
+ $self->{strict};
+}
+
+sub strict_eol {
+ my $self = shift;
+ @_ and $self->_set_attr_X("strict_eol", shift);
+ $self->{'strict_eol'};
+}
+
+sub _supported_skip_empty_rows {
+ my ($self, $f) = @_;
+ defined $f or return 0;
+ if ($self && $f && ref $f && ref $f eq "CODE") {
+ $self->{'_EMPTROW_CB'} = $f;
+ return 6;
+ }
+ $f =~ m/^(?: 0 | undef )$/xi ? 0 :
+ $f =~ m/^(?: 1 | skip )$/xi ? 1 :
+ $f =~ m/^(?: 2 | eof | stop )$/xi ? 2 :
+ $f =~ m/^(?: 3 | die )$/xi ? 3 :
+ $f =~ m/^(?: 4 | croak )$/xi ? 4 :
+ $f =~ m/^(?: 5 | error )$/xi ? 5 :
+ $f =~ m/^(?: 6 | cb )$/xi ? 6 : do {
+ $self ||= "Text::CSV_PP";
+ croak($self->_SetDiagInfo(1500, "skip_empty_rows '$f' is not supported"));
+ };
+}
+
+sub skip_empty_rows {
+ my $self = shift;
+ @_ and $self->_set_attr_N("skip_empty_rows", _supported_skip_empty_rows($self, shift));
+ my $ser = $self->{'skip_empty_rows'};
+ $ser == 6 or $self->{'_EMPTROW_CB'} = undef;
+ $ser <= 1 ? $ser : $ser == 2 ? "eof" : $ser == 3 ? "die" :
+ $ser == 4 ? "croak" : $ser == 5 ? "error" :
+ $self->{'_EMPTROW_CB'};
+}
+
+sub _SetDiagInfo {
+ my ($self, $err, $msg) = @_;
+ $self->SetDiag($err);
+ my $em = $self->error_diag();
+ $em =~ s/^\d+$// and $msg =~ s/^/# /;
+ my $sep = $em =~ m/[;\n]$/ ? "\n\t" : ": ";
+ join $sep => grep m/\S\S\S/ => $em, $msg;
+}
+
+sub _supported_formula {
+ my ($self, $f) = @_;
+ defined $f or return 5;
+ if ($self && $f && ref $f && ref $f eq "CODE") {
+ $self->{_FORMULA_CB} = $f;
+ return 6;
+ }
+ $f =~ m/^(?: 0 | none )$/xi ? 0 :
+ $f =~ m/^(?: 1 | die )$/xi ? 1 :
+ $f =~ m/^(?: 2 | croak )$/xi ? 2 :
+ $f =~ m/^(?: 3 | diag )$/xi ? 3 :
+ $f =~ m/^(?: 4 | empty | )$/xi ? 4 :
+ $f =~ m/^(?: 5 | undef )$/xi ? 5 :
+ $f =~ m/^(?: 6 | cb )$/xi ? 6 : do {
+ $self ||= "Text::CSV_PP";
+ croak($self->_SetDiagInfo(1500, "formula-handling '$f' is not supported"));
+ };
+}
+
+sub formula {
+ my $self = shift;
+ @_ and $self->_set_attr_N("formula", _supported_formula($self, shift));
+ $self->{formula} == 6 or $self->{_FORMULA_CB} = undef;
+ [qw( none die croak diag empty undef cb )]->[_supported_formula($self, $self->{formula})];
+}
+sub formula_handling {
+ my $self = shift;
+ $self->formula(@_);
+}
+
+sub decode_utf8 {
+ my $self = shift;
+ @_ and $self->_set_attr_X("decode_utf8", shift);
+ $self->{decode_utf8};
+}
+
+sub keep_meta_info {
+ my $self = shift;
+ if (@_) {
+ my $v = shift;
+ !defined $v || $v eq "" and $v = 0;
+ $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
+ $self->_set_attr_X("keep_meta_info", $v);
+ }
+ $self->{keep_meta_info};
+}
+
+sub allow_loose_quotes {
+ my $self = shift;
+ @_ and $self->_set_attr_X("allow_loose_quotes", shift);
+ $self->{allow_loose_quotes};
+}
+
+sub allow_loose_escapes {
+ my $self = shift;
+ @_ and $self->_set_attr_X("allow_loose_escapes", shift);
+ $self->{allow_loose_escapes};
+}
+
+sub allow_whitespace {
+ my $self = shift;
+ if (@_) {
+ my $aw = shift;
+ _unhealthy_whitespace($self, $aw) and
+ croak($self->SetDiag(1002));
+ $self->_set_attr_X("allow_whitespace", $aw);
+ }
+ $self->{allow_whitespace};
+}
+
+sub allow_unquoted_escape {
+ my $self = shift;
+ @_ and $self->_set_attr_X("allow_unquoted_escape", shift);
+ $self->{allow_unquoted_escape};
+}
+
+sub blank_is_undef {
+ my $self = shift;
+ @_ and $self->_set_attr_X("blank_is_undef", shift);
+ $self->{blank_is_undef};
+}
+
+sub empty_is_undef {
+ my $self = shift;
+ @_ and $self->_set_attr_X("empty_is_undef", shift);
+ $self->{empty_is_undef};
+}
+
+sub verbatim {
+ my $self = shift;
+ @_ and $self->_set_attr_X("verbatim", shift);
+ $self->{verbatim};
+}
+
+sub undef_str {
+ my $self = shift;
+ if (@_) {
+ my $v = shift;
+ $self->{undef_str} = defined $v ? "$v" : undef;
+ $self->_cache_set($_cache_id{undef_str}, $self->{undef_str});
+ }
+ $self->{undef_str};
+}
+
+sub comment_str {
+ my $self = shift;
+ if (@_) {
+ my $v = shift;
+ $self->{comment_str} = defined $v ? "$v" : undef;
+ $self->_cache_set($_cache_id{comment_str}, $self->{comment_str});
+ }
+ $self->{comment_str};
+}
+
+sub auto_diag {
+ my $self = shift;
+ if (@_) {
+ my $v = shift;
+ !defined $v || $v eq "" and $v = 0;
+ $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
+ $self->_set_attr_X("auto_diag", $v);
+ }
+ $self->{auto_diag};
+}
+
+sub diag_verbose {
+ my $self = shift;
+ if (@_) {
+ my $v = shift;
+ !defined $v || $v eq "" and $v = 0;
+ $v =~ m/^[0-9]/ or $v = lc $v eq "false" ? 0 : 1; # true/truth = 1
+ $self->_set_attr_X("diag_verbose", $v);
+ }
+ $self->{diag_verbose};
+}
+
+################################################################################
+# status
+################################################################################
+
+sub status {
+ my $self = shift;
+ return $self->{_STATUS};
+}
+
+sub eof {
+ my $self = shift;
+ return $self->{_EOF};
+}
+
+sub types {
+ my $self = shift;
+
+ if (@_) {
+ if (my $types = shift) {
+ $self->{'_types'} = join "", map { chr } @{$types};
+ $self->{'types'} = $types;
+ $self->_cache_set($_cache_id{'types'}, $self->{'_types'});
+ }
+ else {
+ delete $self->{'types'};
+ delete $self->{'_types'};
+ $self->_cache_set($_cache_id{'types'}, undef);
+ undef;
+ }
+ }
+ else {
+ $self->{'types'};
+ }
+}
+
+sub callbacks {
+ my $self = shift;
+ if (@_) {
+ my $cb;
+ my $hf = 0x00;
+ if (defined $_[0]) {
+ grep { !defined } @_ and croak($self->SetDiag(1004));
+ $cb = @_ == 1 && ref $_[0] eq "HASH" ? shift
+ : @_ % 2 == 0 ? {@_}
+ : croak($self->SetDiag(1004));
+ foreach my $cbk (keys %{$cb}) {
+ # A key cannot be a ref. That would be stored as the *string
+ # 'SCALAR(0x1f3e710)' or 'ARRAY(0x1a5ae18)'
+ $cbk =~ m/^[\w.]+$/ && ref $cb->{$cbk} eq "CODE" or
+ croak($self->SetDiag(1004));
+ }
+ exists $cb->{error} and $hf |= 0x01;
+ exists $cb->{after_parse} and $hf |= 0x02;
+ exists $cb->{before_print} and $hf |= 0x04;
+ }
+ elsif (@_ > 1) {
+ # (undef, whatever)
+ croak($self->SetDiag(1004));
+ }
+ $self->_set_attr_X("_has_hooks", $hf);
+ $self->{callbacks} = $cb;
+ }
+ $self->{callbacks};
+}
+
+################################################################################
+# error_diag
+################################################################################
+
+sub error_diag {
+ my $self = shift;
+ my @diag = (0 + $last_err, $last_err, 0, 0, 0, 0);
+
+ # Docs state to NEVER use UNIVERSAL::isa, because it will *never* call an
+ # overridden isa method in any class. Well, that is exacly what I want here
+ if ($self && ref $self and # Not a class method or direct call
+ UNIVERSAL::isa($self, __PACKAGE__) && exists $self->{_ERROR_DIAG}) {
+ $diag[0] = 0 + $self->{_ERROR_DIAG};
+ $diag[1] = $self->{_ERROR_DIAG};
+ $diag[2] = 1 + $self->{_ERROR_POS} if exists $self->{_ERROR_POS};
+ $diag[3] = $self->{_RECNO};
+ $diag[4] = $self->{_ERROR_FLD} if exists $self->{_ERROR_FLD};
+ $diag[5] = $self->{_ERROR_SRC} if exists $self->{_ERROR_SRC} && $self->{diag_verbose};
+
+ $diag[0] && $self->{callbacks} && $self->{callbacks}{error} and
+ return $self->{callbacks}{error}->(@diag);
+ }
+
+ my $context = wantarray;
+
+ unless (defined $context) { # Void context, auto-diag
+ if ($diag[0] && $diag[0] != 2012) {
+ my $msg = "# CSV_PP ERROR: $diag[0] - $diag[1] \@ rec $diag[3] pos $diag[2]\n";
+ $diag[4] and $msg =~ s/$/ field $diag[4]/;
+ $diag[5] and $msg =~ s/$/ (PP#$diag[5])/;
+
+ unless ($self && ref $self) { # auto_diag
+ # called without args in void context
+ warn $msg;
+ return;
+ }
+
+ $self->{diag_verbose} && $self->{_ERROR_INPUT} and
+ $msg .= $self->{_ERROR_INPUT} . "\n" .
+ (" " x ($diag[2] - 1)) . "^\n";
+
+ my $lvl = $self->{auto_diag};
+ if ($lvl < 2) {
+ my @c = caller(2);
+ if (@c >= 11 && $c[10] && ref $c[10] eq "HASH") {
+ my $hints = $c[10];
+ (exists $hints->{autodie} && $hints->{autodie} or
+ exists $hints->{'guard Fatal'} &&
+ !exists $hints->{'no Fatal'}) and
+ $lvl++;
+ # Future releases of autodie will probably set $^H{autodie}
+ # to "autodie @args", like "autodie :all" or "autodie open"
+ # so we can/should check for "open" or "new"
+ }
+ }
+ $lvl > 1 ? die $msg : warn $msg;
+ }
+ return;
+ }
+
+ return $context ? @diag : $diag[1];
+}
+
+sub record_number {
+ my $self = shift;
+ return $self->{_RECNO};
+}
+
+################################################################################
+# string
+################################################################################
+
+*string = \&_string;
+sub _string {
+ my $self = shift;
+ return ref $self->{_STRING} ? ${$self->{_STRING}} : undef;
+}
+
+################################################################################
+# fields
+################################################################################
+
+*fields = \&_fields;
+sub _fields {
+ my $self = shift;
+ return ref $self->{_FIELDS} ? @{$self->{_FIELDS}} : undef;
+}
+
+################################################################################
+# meta_info
+################################################################################
+
+sub meta_info {
+ my $self = shift;
+ return ref $self->{_FFLAGS} ? @{$self->{_FFLAGS}} : undef;
+}
+
+sub is_quoted {
+ my ($self, $idx) = @_;
+ ref $self->{_FFLAGS} &&
+ $idx >= 0 && $idx < @{$self->{_FFLAGS}} or return;
+ $self->{_FFLAGS}[$idx] & CSV_FLAGS_IS_QUOTED() ? 1 : 0;
+}
+
+sub is_binary {
+ my ($self, $idx) = @_;
+ ref $self->{_FFLAGS} &&
+ $idx >= 0 && $idx < @{$self->{_FFLAGS}} or return;
+ $self->{_FFLAGS}[$idx] & CSV_FLAGS_IS_BINARY() ? 1 : 0;
+}
+
+sub is_missing {
+ my ($self, $idx) = @_;
+ $idx < 0 || !ref $self->{_FFLAGS} and return;
+ $idx >= @{$self->{_FFLAGS}} and return 1;
+ $self->{_FFLAGS}[$idx] & CSV_FLAGS_IS_MISSING() ? 1 : 0;
+}
+
+################################################################################
+# combine
+################################################################################
+*combine = \&_combine;
+sub _combine {
+ my $self = shift;
+ my $str = "";
+ $self->{_FIELDS} = \@_;
+ $self->{_STATUS} = (@_ > 0) && $self->__combine(\$str, \@_, 0);
+ $self->{_STRING} = \$str;
+ $self->{_STATUS};
+}
+
+################################################################################
+# parse
+################################################################################
+*parse = \&_parse;
+sub _parse {
+ my ($self, $str) = @_;
+
+ ref $str and croak($self->SetDiag(1500));
+
+ my $fields = [];
+ my $fflags = [];
+ $self->{_STRING} = \$str;
+ if (defined $str && $self->__parse($fields, $fflags, $str, 0)) {
+ $self->{_FIELDS} = $fields;
+ $self->{_FFLAGS} = $fflags;
+ $self->{_STATUS} = 1;
+ }
+ else {
+ $self->{_FIELDS} = undef;
+ $self->{_FFLAGS} = undef;
+ $self->{_STATUS} = 0;
+ }
+ $self->{_STATUS};
+}
+
+sub column_names {
+ my ($self, @keys) = @_;
+
+ @keys or
+ return defined $self->{_COLUMN_NAMES} ? @{$self->{_COLUMN_NAMES}} : ();
+ @keys == 1 && !defined $keys[0] and
+ return $self->{_COLUMN_NAMES} = undef;
+
+ if (@keys == 1 && ref $keys[0] eq "ARRAY") {
+ @keys = @{$keys[0]};
+ }
+ elsif (join "", map { defined $_ ? ref $_ : "" } @keys) {
+ croak($self->SetDiag(3001));
+ }
+
+ $self->{_BOUND_COLUMNS} && @keys != @{$self->{_BOUND_COLUMNS}} and
+ croak($self->SetDiag(3003));
+
+ $self->{_COLUMN_NAMES} = [map { defined $_ ? $_ : "\cAUNDEF\cA" } @keys];
+ @{$self->{_COLUMN_NAMES}};
+}
+
+sub header {
+ my ($self, $fh, @args) = @_;
+
+ $fh or croak($self->SetDiag(1014));
+
+ my (@seps, %args);
+ for (@args) {
+ if (ref $_ eq "ARRAY") {
+ push @seps, @{$_};
+ next;
+ }
+ if (ref $_ eq "HASH") {
+ %args = %{$_};
+ next;
+ }
+ croak('usage: $csv->header ($fh, [ seps ], { options })');
+ }
+
+ defined $args{munge} && !defined $args{munge_column_names} and
+ $args{munge_column_names} = $args{munge}; # munge as alias
+ defined $args{detect_bom} or $args{detect_bom} = 1;
+ defined $args{set_column_names} or $args{set_column_names} = 1;
+ defined $args{munge_column_names} or $args{munge_column_names} = "lc";
+
+ # Reset any previous leftovers
+ $self->{_RECNO} = 0;
+ $self->{_AHEAD} = undef;
+ $self->{_COLUMN_NAMES} = undef if $args{set_column_names};
+ $self->{_BOUND_COLUMNS} = undef if $args{set_column_names};
+ $self->_cache_set($_cache_id{'_has_ahead'}, 0);
+
+ if (defined $args{sep_set}) {
+ ref $args{sep_set} eq "ARRAY" or
+ croak($self->_SetDiagInfo(1500, "sep_set should be an array ref"));
+ @seps = @{$args{sep_set}};
+ }
+
+ $^O eq "MSWin32" and binmode $fh;
+ my $hdr = <$fh>;
+ # check if $hdr can be empty here, I don't think so
+ defined $hdr && $hdr ne "" or croak($self->SetDiag(1010));
+
+ my %sep;
+ @seps or @seps = (",", ";");
+ foreach my $sep (@seps) {
+ index($hdr, $sep) >= 0 and $sep{$sep}++;
+ }
+
+ keys %sep >= 2 and croak($self->SetDiag(1011));
+
+ $self->sep(keys %sep);
+ my $enc = "";
+ if ($args{detect_bom}) { # UTF-7 is not supported
+ if ($hdr =~ s/^\x00\x00\xfe\xff//) { $enc = "utf-32be" }
+ elsif ($hdr =~ s/^\xff\xfe\x00\x00//) { $enc = "utf-32le" }
+ elsif ($hdr =~ s/^\xfe\xff//) { $enc = "utf-16be" }
+ elsif ($hdr =~ s/^\xff\xfe//) { $enc = "utf-16le" }
+ elsif ($hdr =~ s/^\xef\xbb\xbf//) { $enc = "utf-8" }
+ elsif ($hdr =~ s/^\xf7\x64\x4c//) { $enc = "utf-1" }
+ elsif ($hdr =~ s/^\xdd\x73\x66\x73//) { $enc = "utf-ebcdic" }
+ elsif ($hdr =~ s/^\x0e\xfe\xff//) { $enc = "scsu" }
+ elsif ($hdr =~ s/^\xfb\xee\x28//) { $enc = "bocu-1" }
+ elsif ($hdr =~ s/^\x84\x31\x95\x33//) { $enc = "gb-18030" }
+ elsif ($hdr =~ s/^\x{feff}//) { $enc = "" }
+
+ $self->{ENCODING} = $enc ? uc $enc : undef;
+
+ $hdr eq "" and croak($self->SetDiag(1010));
+
+ if ($enc) {
+ $ebcdic && $enc eq "utf-ebcdic" and $enc = "";
+ if ($enc =~ m/([13]).le$/) {
+ my $l = 0 + $1;
+ my $x;
+ $hdr .= "\0" x $l;
+ read $fh, $x, $l;
+ }
+ if ($enc) {
+ if ($enc ne "utf-8") {
+ require Encode;
+ $hdr = Encode::decode($enc, $hdr);
+ }
+ binmode $fh, ":encoding($enc)";
+ }
+ }
+ }
+
+ my ($ahead, $eol);
+ if ($hdr and $hdr =~ s/\Asep=(\S)([\r\n]+)//i) { # Also look in xs:Parse
+ $self->sep($1);
+ length $hdr or $hdr = <$fh>;
+ }
+
+ if ($hdr =~ s/^([^\r\n]+)([\r\n]+)([^\r\n].+)\z/$1/s) {
+ $eol = $2;
+ $ahead = $3;
+ }
+
+ my $hr = \$hdr; # Will cause croak on perl-5.6.x
+ open my $h, "<", $hr or croak($self->SetDiag(1010));
+
+ my $row = $self->getline($h) or croak();
+ close $h;
+
+ if ($args{'munge_column_names'} eq "lc") {
+ $_ = lc for @{$row};
+ }
+ elsif ($args{'munge_column_names'} eq "uc") {
+ $_ = uc for @{$row};
+ }
+ elsif ($args{'munge_column_names'} eq "db") {
+ for (@{$row}) {
+ s/\W+/_/g;
+ s/^_+//;
+ $_ = lc;
+ }
+ }
+
+ if ($ahead) { # Must be after getline, which creates the cache
+ $self->_cache_set($_cache_id{_has_ahead}, 1);
+ $self->{_AHEAD} = $ahead;
+ $eol =~ m/^\r([^\n]|\z)/ and $self->eol($eol);
+ }
+
+ my @hdr = @{$row};
+ ref $args{munge_column_names} eq "CODE" and
+ @hdr = map { $args{munge_column_names}->($_) } @hdr;
+ ref $args{munge_column_names} eq "HASH" and
+ @hdr = map { $args{munge_column_names}->{$_} || $_ } @hdr;
+ my %hdr; $hdr{$_}++ for @hdr;
+ exists $hdr{''} and croak($self->SetDiag(1012));
+ unless (keys %hdr == @hdr) {
+ croak($self->_SetDiagInfo(1013, join ", " =>
+ map { "$_ ($hdr{$_})" } grep { $hdr{$_} > 1 } keys %hdr));
+ }
+ $args{set_column_names} and $self->column_names(@hdr);
+ wantarray ? @hdr : $self;
+}
+
+sub bind_columns {
+ my ($self, @refs) = @_;
+
+ @refs or
+ return defined $self->{_BOUND_COLUMNS} ? @{$self->{_BOUND_COLUMNS}} : undef;
+ if (@refs == 1 && !defined $refs[0]) {
+ $self->{_COLUMN_NAMES} = undef;
+ return $self->{_BOUND_COLUMNS} = undef;
+ }
+
+ $self->{_COLUMN_NAMES} && @refs != @{$self->{_COLUMN_NAMES}} and
+ croak($self->SetDiag(3003));
+ join "", map { ref $_ eq "SCALAR" ? "" : "*" } @refs and
+ croak($self->SetDiag(3004));
+
+ $self->_set_attr_N("_is_bound", scalar @refs);
+ $self->{_BOUND_COLUMNS} = [@refs];
+ @refs;
+}
+
+sub getline_hr {
+ my ($self, @args, %hr) = @_;
+ $self->{_COLUMN_NAMES} or croak($self->SetDiag(3002));
+ my $fr = $self->getline(@args) or return;
+ if (ref $self->{_FFLAGS}) { # missing
+ $self->{_FFLAGS}[$_] = CSV_FLAGS_IS_MISSING()
+ for (@{$fr} ? $#{$fr} + 1 : 0) .. $#{$self->{_COLUMN_NAMES}};
+ @{$fr} == 1 && (!defined $fr->[0] || $fr->[0] eq "") and
+ $self->{_FFLAGS}[0] ||= CSV_FLAGS_IS_MISSING();
+ }
+ @hr{@{$self->{_COLUMN_NAMES}}} = @{$fr};
+ \%hr;
+}
+
+sub getline_hr_all {
+ my ($self, @args) = @_;
+
+ $self->{_COLUMN_NAMES} or croak($self->SetDiag(3002));
+
+ my @cn = @{$self->{_COLUMN_NAMES}};
+
+ [map { my %h; @h{@cn} = @{$_}; \%h } @{$self->getline_all(@args)}];
+}
+
+sub say {
+ my ($self, $io, @f) = @_;
+ my $eol = $self->eol();
+ # say ($fh, undef) does not propage actual undef to print ()
+ my $state = $self->print($io, @f == 1 && !defined $f[0] ? undef : @f);
+ unless (length $eol) {
+ $eol = $self->eol_type() || $\ || $/;
+ print $io $eol;
+ }
+ return $state;
+}
+
+sub print_hr {
+ my ($self, $io, $hr) = @_;
+ $self->{_COLUMN_NAMES} or croak($self->SetDiag(3009));
+ ref $hr eq "HASH" or croak($self->SetDiag(3010));
+ $self->print($io, [map { $hr->{$_} } $self->column_names()]);
+}
+
+sub fragment {
+ my ($self, $io, $spec) = @_;
+
+ my $qd = qr{\s* [0-9]+ \s* }x; # digit
+ my $qs = qr{\s* (?: [0-9]+ | \* ) \s*}x; # digit or star
+ my $qr = qr{$qd (?: - $qs )?}x; # range
+ my $qc = qr{$qr (?: ; $qr )*}x; # list
+ defined $spec && $spec =~ m{^ \s*
+ \x23 ? \s* # optional leading #
+ ( row | col | cell ) \s* =
+ ( $qc # for row and col
+ | $qd , $qd (?: - $qs , $qs)? # for cell (ranges)
+ (?: ; $qd , $qd (?: - $qs , $qs)? )* # and cell (range) lists
+ ) \s* $}xi or croak($self->SetDiag(2013));
+ my ($type, $range) = (lc $1, $2);
+
+ my @h = $self->column_names();
+
+ my @c;
+ if ($type eq "cell") {
+ my @spec;
+ my $min_row;
+ my $max_row = 0;
+ for (split m/\s*;\s*/ => $range) {
+ my ($tlr, $tlc, $brr, $brc) = (m{
+ ^ \s* ([0-9]+ ) \s* , \s* ([0-9]+ ) \s*
+ (?: - \s* ([0-9]+ | \*) \s* , \s* ([0-9]+ | \*) \s* )?
+ $}x) or croak($self->SetDiag(2013));
+ defined $brr or ($brr, $brc) = ($tlr, $tlc);
+ $tlr == 0 || $tlc == 0 ||
+ ($brr ne "*" && ($brr == 0 || $brr < $tlr)) ||
+ ($brc ne "*" && ($brc == 0 || $brc < $tlc))
+ and croak($self->SetDiag(2013));
+ $tlc--;
+ $brc-- unless $brc eq "*";
+ defined $min_row or $min_row = $tlr;
+ $tlr < $min_row and $min_row = $tlr;
+ $brr eq "*" || $brr > $max_row and
+ $max_row = $brr;
+ push @spec, [$tlr, $tlc, $brr, $brc];
+ }
+ my $r = 0;
+ while (my $row = $self->getline($io)) {
+ ++$r < $min_row and next;
+ my %row;
+ my $lc;
+ foreach my $s (@spec) {
+ my ($tlr, $tlc, $brr, $brc) = @{$s};
+ $r < $tlr || ($brr ne "*" && $r > $brr) and next;
+ !defined $lc || $tlc < $lc and $lc = $tlc;
+ my $rr = $brc eq "*" ? $#{$row} : $brc;
+ $row{$_} = $row->[$_] for $tlc .. $rr;
+ }
+ push @c, [@row{sort { $a <=> $b } keys %row}];
+ if (@h) {
+ my %h; @h{@h} = @{$c[-1]};
+ $c[-1] = \%h;
+ }
+ $max_row ne "*" && $r == $max_row and last;
+ }
+ return \@c;
+ }
+
+ # row or col
+ my @r;
+ my $eod = 0;
+ for (split m/\s*;\s*/ => $range) {
+ my ($from, $to) = m/^\s* ([0-9]+) (?: \s* - \s* ([0-9]+ | \* ))? \s* $/x
+ or croak($self->SetDiag(2013));
+ $to ||= $from;
+ $to eq "*" and ($to, $eod) = ($from, 1);
+ # $to cannot be <= 0 due to regex and ||=
+ $from <= 0 || $to < $from and croak($self->SetDiag(2013));
+ $r[$_] = 1 for $from .. $to;
+ }
+
+ my $r = 0;
+ $type eq "col" and shift @r;
+ $_ ||= 0 for @r;
+ while (my $row = $self->getline($io)) {
+ $r++;
+ if ($type eq "row") {
+ if (($r > $#r && $eod) || $r[$r]) {
+ push @c, $row;
+ if (@h) {
+ my %h; @h{@h} = @{$c[-1]};
+ $c[-1] = \%h;
+ }
+ }
+ next;
+ }
+ push @c, [map { ($_ > $#r && $eod) || $r[$_] ? $row->[$_] : () } 0 .. $#{$row}];
+ if (@h) {
+ my %h; @h{@h} = @{$c[-1]};
+ $c[-1] = \%h;
+ }
+ }
+
+ return \@c;
+}
+
+my $csv_usage = q{usage: my $aoa = csv (in => $file);};
+
+sub _csv_attr {
+ my %attr = (@_ == 1 && ref $_[0] eq "HASH" ? %{$_[0]} : @_) or croak();
+
+ $attr{binary} = 1;
+ $attr{strict_eol} = 1;
+
+ my $enc = delete $attr{enc} || delete $attr{encoding} || "";
+ $enc eq "auto" and ($attr{detect_bom}, $enc) = (1, "");
+ my $stack = $enc =~ s/(:\w.*)// ? $1 : "";
+ $enc =~ m/^[-\w.]+$/ and $enc = ":encoding($enc)";
+ $enc .= $stack;
+
+ my $hdrs = delete $attr{'headers'};
+ my $frag = delete $attr{'fragment'};
+ my $key = delete $attr{'key'};
+ my $val = delete $attr{'value'};
+ my $kh = delete $attr{'keep_headers'} ||
+ delete $attr{'keep_column_names'} ||
+ delete $attr{'kh'};
+
+ my $cbai = delete $attr{'callbacks'}{'after_in'} ||
+ delete $attr{'after_in'} ||
+ delete $attr{'callbacks'}{'after_parse'} ||
+ delete $attr{'after_parse'};
+ my $cbbo = delete $attr{'callbacks'}{'before_out'} ||
+ delete $attr{'before_out'};
+ my $cboi = delete $attr{'callbacks'}{'on_in'} ||
+ delete $attr{'on_in'};
+ my $cboe = delete $attr{'callbacks'}{'on_error'} ||
+ delete $attr{'on_error'};
+
+ my $hd_s = delete $attr{'sep_set'} ||
+ delete $attr{'seps'};
+ my $hd_b = delete $attr{'detect_bom'} ||
+ delete $attr{'bom'};
+ my $hd_m = delete $attr{'munge'} ||
+ delete $attr{'munge_column_names'};
+ my $hd_c = delete $attr{'set_column_names'};
+
+ my $fh;
+ my $sink = 0;
+ my $cls = 0; # If I open a file, I have to close it
+ my $in = delete $attr{in} || delete $attr{file} or croak($csv_usage);
+ my $out = exists $attr{out} && !$attr{out} ? \"skip"
+ : delete $attr{out} || delete $attr{file};
+
+ ref $in eq "CODE" || ref $in eq "ARRAY" and $out ||= \*STDOUT;
+
+ my ($fho, $fho_cls);
+ if ($in && $out and (!ref $in || ref $in eq "GLOB" || ref \$in eq "GLOB")
+ and (!ref $out || ref $out eq "GLOB" || ref \$out eq "GLOB")) {
+ if (ref $out or "GLOB" eq ref \$out) {
+ $fho = $out;
+ }
+ else {
+ open $fho, ">", $out or croak "$out: $!\n";
+ if (my $e = $attr{'encoding'}) {
+ binmode $fho, ":encoding($e)";
+ $hd_b and print $fho "\x{feff}";
+ }
+ $fho_cls = 1;
+ }
+ if ($cboi && !$cbai) {
+ $cbai = $cboi;
+ $cboi = undef;
+ }
+ if ($cbai) {
+ my $cb = $cbai;
+ $cbai = sub { $cb->(@_); $_[0]->say($fho, $_[1]); 0 };
+ }
+ else {
+ $cbai = sub { $_[0]->say($fho, $_[1]); 0 };
+ }
+
+ # Put all callbacks back in place for streaming behavior
+ $attr{'callbacks'}{'after_parse'} = $cbai; $cbai = undef;
+ $attr{'callbacks'}{'before_out'} = $cbbo; $cbbo = undef;
+ $attr{'callbacks'}{'on_in'} = $cboi; $cboi = undef;
+ $attr{'callbacks'}{'on_error'} = $cboe; $cboe = undef;
+ $out = undef;
+ $sink = 1;
+ }
+
+ if ($out) {
+ if (ref $out and ("ARRAY" eq ref $out or "HASH" eq ref $out)) {
+ delete $attr{out};
+ $sink = 1;
+ }
+ elsif ((ref $out and "SCALAR" ne ref $out) or "GLOB" eq ref \$out) {
+ $fh = $out;
+ }
+ elsif (ref $out and "SCALAR" eq ref $out and defined ${$out} and ${$out} eq "skip") {
+ delete $attr{out};
+ $sink = 1;
+ }
+ else {
+ open $fh, ">", $out or croak("$out: $!");
+ $cls = 1;
+ }
+ if ($fh) {
+ if ($enc) {
+ binmode $fh, $enc;
+ my $fn = fileno $fh; # This is a workaround for a bug in PerlIO::via::gzip
+ }
+ unless (defined $attr{eol} || defined $fho) {
+ my @layers = eval { PerlIO::get_layers($fh) };
+ $attr{eol} = (grep m/crlf/ => @layers) ? "\n" : "\r\n";
+ }
+ }
+ }
+
+ if (ref $in eq "CODE" or ref $in eq "ARRAY") {
+ # All done
+ }
+ elsif (ref $in eq "SCALAR") {
+ # Strings with code points over 0xFF may not be mapped into in-memory file handles
+ # "<$enc" does not change that :(
+ open $fh, "<", $in or croak("Cannot open from SCALAR using PerlIO");
+ $cls = 1;
+ }
+ elsif (ref $in or "GLOB" eq ref \$in) {
+ if (!ref $in && $] < 5.008005) {
+ $fh = \*{$in}; # uncoverable statement ancient perl version required
+ }
+ else {
+ $fh = $in;
+ }
+ }
+ else {
+ open $fh, "<$enc", $in or croak("$in: $!");
+ $cls = 1;
+ }
+ $fh || $sink or croak(qq{No valid source passed. "in" is required});
+
+ for ([quo => "quote"],
+ [esc => "escape"],
+ [escape => "escape_char"],
+ ) {
+ my ($f, $t) = @{$_};
+ exists $attr{$f} and !exists $attr{$t} and $attr{$t} = delete $attr{$f};
+ }
+
+ my $fltr = delete $attr{filter};
+ my %fltr = (
+ not_blank => sub { @{$_[1]} > 1 or defined $_[1][0] && $_[1][0] ne "" },
+ not_empty => sub { grep { defined && $_ ne "" } @{$_[1]} },
+ filled => sub { grep { defined && m/\S/ } @{$_[1]} },
+ );
+ defined $fltr && !ref $fltr && exists $fltr{$fltr} and
+ $fltr = {0 => $fltr{$fltr}};
+ ref $fltr eq "CODE" and $fltr = {0 => $fltr};
+ ref $fltr eq "HASH" or $fltr = undef;
+
+ my $form = delete $attr{formula};
+
+ defined $attr{auto_diag} or $attr{auto_diag} = 1;
+ defined $attr{escape_null} or $attr{escape_null} = 0;
+ my $csv = delete $attr{csv} || Text::CSV_PP->new(\%attr)
+ or croak($last_err);
+ defined $form and $csv->formula($form);
+ defined $cboe and $csv->callbacks(error => $cboe);
+
+ $kh && !ref $kh && $kh =~ m/^(?:1|yes|true|internal|auto)$/i and
+ $kh = \@internal_kh;
+
+ return {
+ csv => $csv,
+ attr => {%attr},
+ fh => $fh,
+ cls => $cls,
+ in => $in,
+ sink => $sink,
+ out => $out,
+ enc => $enc,
+ fho => $fho,
+ fhoc => $fho_cls,
+ hdrs => $hdrs,
+ key => $key,
+ val => $val,
+ kh => $kh,
+ frag => $frag,
+ fltr => $fltr,
+ cbai => $cbai,
+ cbbo => $cbbo,
+ cboi => $cboi,
+ hd_s => $hd_s,
+ hd_b => $hd_b,
+ hd_m => $hd_m,
+ hd_c => $hd_c,
+ };
+}
+
+sub csv {
+ @_ && (ref $_[0] eq __PACKAGE__ or ref $_[0] eq 'Text::CSV') and splice @_, 0, 0, "csv";
+ @_ or croak($csv_usage);
+
+ my $c = _csv_attr(@_);
+
+ my ($csv, $in, $fh, $hdrs) = @{$c}{qw( csv in fh hdrs )};
+ my %hdr;
+ if (ref $hdrs eq "HASH") {
+ %hdr = %{$hdrs};
+ $hdrs = "auto";
+ }
+
+ if ($c->{out} && !$c->{sink}) {
+ !$hdrs && ref $c->{'kh'} && $c->{'kh'} == \@internal_kh and
+ $hdrs = $c->{'kh'};
+
+ if (ref $in eq "CODE") {
+ my $hdr = 1;
+ while (my $row = $in->($csv)) {
+ if (ref $row eq "ARRAY") {
+ $csv->print($fh, $row);
+ next;
+ }
+ if (ref $row eq "HASH") {
+ if ($hdr) {
+ $hdrs ||= [map { $hdr{$_} || $_ } keys %{$row}];
+ $csv->print($fh, $hdrs);
+ $hdr = 0;
+ }
+ $csv->print($fh, [@{$row}{@{$hdrs}}]);
+ }
+ }
+ }
+ elsif (@{$in} == 0 or ref $in->[0] eq "ARRAY") { # aoa
+ ref $hdrs and $csv->print($fh, $hdrs);
+ for (@{$in}) {
+ $c->{cboi} and $c->{cboi}->($csv, $_);
+ $c->{cbbo} and $c->{cbbo}->($csv, $_);
+ $csv->print($fh, $_);
+ }
+ }
+ else { # aoh
+ my @hdrs = ref $hdrs ? @{$hdrs} : keys %{$in->[0]};
+ defined $hdrs or $hdrs = "auto";
+ ref $hdrs || $hdrs eq "auto" and @hdrs and
+ $csv->print($fh, [map { $hdr{$_} || $_ } @hdrs]);
+ for (@{$in}) {
+ local %_;
+ *_ = $_;
+ $c->{cboi} and $c->{cboi}->($csv, $_);
+ $c->{cbbo} and $c->{cbbo}->($csv, $_);
+ $csv->print($fh, [@{$_}{@hdrs}]);
+ }
+ }
+
+ $c->{cls} and close $fh;
+ $c->{fho_cls} and close $c->{fho};
+ return 1;
+ }
+
+ my @row1;
+ if (defined $c->{hd_s} || defined $c->{hd_b} || defined $c->{hd_m} || defined $c->{hd_c}) {
+ my %harg;
+ !defined $c->{'hd_s'} && $c->{'attr'}{'sep_char'} and
+ $c->{'hd_s'} = [$c->{'attr'}{'sep_char'}];
+ !defined $c->{'hd_s'} && $c->{'attr'}{'sep'} and
+ $c->{'hd_s'} = [$c->{'attr'}{'sep'}];
+ defined $c->{'hd_s'} and $harg{'sep_set'} = $c->{'hd_s'};
+ defined $c->{'hd_b'} and $harg{'detect_bom'} = $c->{'hd_b'};
+ defined $c->{'hd_m'} and $harg{'munge_column_names'} = $hdrs ? "none" : $c->{'hd_m'};
+ defined $c->{'hd_c'} and $harg{'set_column_names'} = $hdrs ? 0 : $c->{'hd_c'};
+ @row1 = $csv->header($fh, \%harg);
+ my @hdr = $csv->column_names();
+ @hdr and $hdrs ||= \@hdr;
+ }
+
+ if ($c->{kh}) {
+ @internal_kh = ();
+ ref $c->{kh} eq "ARRAY" or croak($csv->SetDiag(1501));
+ $hdrs ||= "auto";
+ }
+
+ my $key = $c->{key};
+ if ($key) {
+ !ref $key or ref $key eq "ARRAY" && @{$key} > 1 or croak($csv->SetDiag(1501));
+ $hdrs ||= "auto";
+ }
+ my $val = $c->{val};
+ if ($val) {
+ $key or croak($csv->SetDiag(1502));
+ !ref $val or ref $val eq "ARRAY" && @{$val} > 0 or croak($csv->SetDiag(1503));
+ }
+
+ $c->{fltr} && grep m/\D/ => keys %{$c->{fltr}} and $hdrs ||= "auto";
+ if (defined $hdrs) {
+ if (!ref $hdrs or ref $hdrs eq "CODE") {
+ my $h = $c->{'hd_b'}
+ ? [$csv->column_names()]
+ : $csv->getline($fh);
+ my $has_h = $h && @$h;
+
+ if (ref $hdrs) {
+ $has_h or return;
+ my $cr = $hdrs;
+ $hdrs = [map { $cr->($hdr{$_} || $_) } @{$h}];
+ }
+ elsif ($hdrs eq "skip") {
+ # discard;
+ }
+ elsif ($hdrs eq "auto") {
+ $has_h or return;
+ $hdrs = [map { $hdr{$_} || $_ } @{$h}];
+ }
+ elsif ($hdrs eq "lc") {
+ $has_h or return;
+ $hdrs = [map { lc($hdr{$_} || $_) } @{$h}];
+ }
+ elsif ($hdrs eq "uc") {
+ $has_h or return;
+ $hdrs = [map { uc($hdr{$_} || $_) } @{$h}];
+ }
+ }
+ $c->{kh} and $hdrs and @{$c->{kh}} = @{$hdrs};
+ }
+
+ if ($c->{fltr}) {
+ my %f = %{$c->{fltr}};
+ # convert headers to index
+ my @hdr;
+ if (ref $hdrs) {
+ @hdr = @{$hdrs};
+ for (0 .. $#hdr) {
+ exists $f{$hdr[$_]} and $f{$_ + 1} = delete $f{$hdr[$_]};
+ }
+ }
+ $csv->callbacks(after_parse => sub {
+ my ($CSV, $ROW) = @_; # lexical sub-variables in caps
+ foreach my $FLD (sort keys %f) {
+ local $_ = $ROW->[$FLD - 1];
+ local %_;
+ @hdr and @_{@hdr} = @{$ROW};
+ $f{$FLD}->($CSV, $ROW) or return \"skip";
+ $ROW->[$FLD - 1] = $_;
+ }
+ });
+ }
+
+ my $frag = $c->{frag};
+ my $ref = ref $hdrs
+ ? # aoh
+ do {
+ my @h = $csv->column_names($hdrs);
+ my %h; $h{$_}++ for @h;
+ exists $h{''} and croak($csv->SetDiag(1012));
+ unless (keys %h == @h) {
+ croak($csv->_SetDiagInfo(1013, join ", " =>
+ map { "$_ ($h{$_})" } grep { $h{$_} > 1 } keys %h));
+ }
+ $frag ? $csv->fragment($fh, $frag) :
+ $key ? do {
+ my ($k, $j, @f) = ref $key ? (undef, @{$key}) : ($key);
+ if (my @mk = grep { !exists $h{$_} } grep { defined } $k, @f) {
+ croak($csv->_SetDiagInfo(4001, join ", " => @mk));
+ }
+ +{map {
+ my $r = $_;
+ my $K = defined $k ? $r->{$k} : join $j => @{$r}{@f};
+ ($K => (
+ $val
+ ? ref $val
+ ? {map { $_ => $r->{$_} } @{$val}}
+ : $r->{$val}
+ : $r));
+ } @{$csv->getline_hr_all($fh)}};
+ }
+ : $csv->getline_hr_all($fh);
+ }
+ : # aoa
+ $frag ? $csv->fragment($fh, $frag)
+ : $csv->getline_all($fh);
+ if ($ref) {
+ @row1 && !$c->{hd_c} && !ref $hdrs and unshift @{$ref}, \@row1;
+ }
+ else {
+ Text::CSV_PP->auto_diag();
+ }
+ $c->{cls} and close $fh;
+ $c->{fho_cls} and close $c->{fho};
+ if ($ref and $c->{cbai} || $c->{cboi}) {
+ # Default is ARRAYref, but with key =>, you'll get a hashref
+ foreach my $r (ref $ref eq "ARRAY" ? @{$ref} : values %{$ref}) {
+ local %_;
+ ref $r eq "HASH" and *_ = $r;
+ $c->{cbai} and $c->{cbai}->($csv, $r);
+ $c->{cboi} and $c->{cboi}->($csv, $r);
+ }
+ }
+
+ if ($c->{sink}) {
+ my $ro = ref $c->{out} or return;
+
+ $ro eq "SCALAR" && ${$c->{out}} eq "skip" and
+ return;
+
+ $ro eq ref $ref or
+ croak($csv->_SetDiagInfo(5001, "Output type mismatch"));
+
+ if ($ro eq "ARRAY") {
+ if (@{$c->{out}} and @$ref and ref $c->{out}[0] eq ref $ref->[0]) {
+ push @{$c->{out}} => @$ref;
+ return $c->{out};
+ }
+ croak($csv->_SetDiagInfo(5001, "Output type mismatch"));
+ }
+
+ if ($ro eq "HASH") {
+ @{$c->{out}}{keys %{$ref}} = values %{$ref};
+ return $c->{out};
+ }
+
+ croak($csv->_SetDiagInfo(5002, "Unsupported output type"));
+ }
+
+ defined wantarray or
+ return csv(
+ in => $ref,
+ headers => $hdrs,
+ %{$c->{attr}},
+ );
+
+ $last_err ||= $csv->{_ERROR_DIAG};
+ return $ref;
+}
+
+# The end of the common pure perl part.
+
+################################################################################
+#
+# The following are methods implemented in XS in Text::CSV_XS or
+# helper methods for Text::CSV_PP only
+#
+################################################################################
+
+my $last_error;
+sub _setup_ctx {
+ my $self = shift;
+
+ $last_error = undef;
+
+ my $ctx;
+ if ($self->{_CACHE}) {
+ %$ctx = %{$self->{_CACHE}};
+ } else {
+ $ctx->{sep} = ',';
+ if (defined $self->{sep_char}) {
+ $ctx->{sep} = $self->{sep_char};
+ }
+ if (defined $self->{sep} and $self->{sep} ne '') {
+ use bytes;
+ $ctx->{sep} = $self->{sep};
+ my $sep_len = length($ctx->{sep});
+ $ctx->{sep_len} = $sep_len if $sep_len > 1;
+ }
+
+ $ctx->{quo} = '"';
+ if (exists $self->{quote_char}) {
+ my $quote_char = $self->{quote_char};
+ if (defined $quote_char and length $quote_char) {
+ $ctx->{quo} = $quote_char;
+ } else {
+ $ctx->{quo} = "\0";
+ }
+ }
+ if (defined $self->{quote} and $self->{quote} ne '') {
+ use bytes;
+ $ctx->{quo} = $self->{quote};
+ my $quote_len = length($ctx->{quo});
+ $ctx->{quo_len} = $quote_len if $quote_len > 1;
+ }
+
+ $ctx->{escape_char} = '"';
+ if (exists $self->{escape_char}) {
+ my $escape_char = $self->{escape_char};
+ if (defined $escape_char and length $escape_char) {
+ $ctx->{escape_char} = $escape_char;
+ } else {
+ $ctx->{escape_char} = "\0";
+ }
+ }
+
+ if (defined $self->{eol}) {
+ my $eol = $self->{eol};
+ my $eol_len = length($eol);
+ $ctx->{eol} = $eol;
+ $ctx->{eol_len} = $eol_len;
+ if ($eol_len == 1 and $eol eq "\015") {
+ $ctx->{eol_is_cr} = 1;
+ $ctx->{eol_type} = EOL_TYPE_CR;
+ }
+ elsif ($eol_len == 1 && $eol eq "\012") {
+ $ctx->{eol_type} = EOL_TYPE_NL;
+ }
+ elsif ($eol_len == 2 && $eol eq "\015\012") {
+ $ctx->{eol_type} = EOL_TYPE_CRNL;
+ }
+ }
+
+ $ctx->{undef_flg} = 0;
+ if (defined $self->{undef_str}) {
+ $ctx->{undef_str} = $self->{undef_str};
+ $ctx->{undef_flg} = 3 if utf8::is_utf8($self->{undef_str});
+ } else {
+ $ctx->{undef_str} = undef;
+ }
+ if (defined $self->{comment_str}) {
+ $ctx->{comment_str} = $self->{comment_str};
+ }
+
+ if (defined $self->{_types}) {
+ $ctx->{types} = $self->{_types};
+ $ctx->{types_len} = length($ctx->{types});
+ }
+
+ if (defined $self->{_is_bound}) {
+ $ctx->{is_bound} = $self->{_is_bound};
+ }
+
+ if (defined $self->{callbacks}) {
+ my $cb = $self->{callbacks};
+ $ctx->{has_hooks} = 0;
+ if (defined $cb->{after_parse} and ref $cb->{after_parse} eq 'CODE') {
+ $ctx->{has_hooks} |= HOOK_AFTER_PARSE;
+ }
+ if (defined $cb->{before_print} and ref $cb->{before_print} eq 'CODE') {
+ $ctx->{has_hooks} |= HOOK_BEFORE_PRINT;
+ }
+ }
+
+ for (qw/
+ binary decode_utf8 always_quote strict strict_eol quote_empty
+ allow_loose_quotes allow_loose_escapes
+ allow_unquoted_escape allow_whitespace blank_is_undef
+ empty_is_undef verbatim auto_diag diag_verbose
+ keep_meta_info formula skip_empty_rows
+ /) {
+ $ctx->{$_} = defined $self->{$_} ? $self->{$_} : 0;
+ }
+ for (qw/quote_space escape_null quote_binary/) {
+ $ctx->{$_} = defined $self->{$_} ? $self->{$_} : 1;
+ }
+ if ($ctx->{escape_char} eq "\0") {
+ $ctx->{escape_null} = 0;
+ }
+
+ # FIXME: readonly
+ %{$self->{_CACHE}} = %$ctx;
+ }
+
+ $ctx->{utf8} = 0;
+ $ctx->{size} = 0;
+ $ctx->{used} = 0;
+
+ if ($ctx->{is_bound}) {
+ my $bound = $self->{_BOUND_COLUMNS};
+ if ($bound and ref $bound eq 'ARRAY') {
+ $ctx->{bound} = $bound;
+ } else {
+ $ctx->{is_bound} = 0;
+ }
+ }
+
+ $ctx->{eol_pos} = -1;
+ $ctx->{eolx} = $ctx->{eol_len}
+ ? $ctx->{verbatim} || $ctx->{eol_len} >= 2
+ ? 1
+ : $ctx->{eol} =~ /\A[\015\012]/ ? 0 : 1
+ : 0;
+ if ($ctx->{eol_type} && $ctx->{strict_eol} && !$ctx->{eol}) {
+ $ctx->{eol_is_cr} = 0;
+ }
+ if ($ctx->{sep_len} and $ctx->{sep_len} > 1 and _is_valid_utf8($ctx->{sep})) {
+ $ctx->{utf8} = 1;
+ }
+ if ($ctx->{quo_len} and $ctx->{quo_len} > 1 and _is_valid_utf8($ctx->{quo})) {
+ $ctx->{utf8} = 1;
+ }
+
+ if ($ctx->{strict} && !$ctx->{strict_n} && $self->{_COLUMN_NAMES} && ref $self->{_COLUMN_NAMES} eq 'ARRAY') {
+ $ctx->{strict_n} = @{$self->{_COLUMN_NAMES}};
+ }
+ $ctx;
+}
+
+sub _eol_type {
+ my $c = shift;
+ return EOL_TYPE_NL if $c eq "\012";
+ return EOL_TYPE_CR if $c eq "\015";
+ return EOL_TYPE_OTHER;
+}
+
+sub _set_eol_type {
+ my ($self, $ctx, $type) = @_;
+ if (!$ctx->{eol_type}) {
+ $ctx->{eol_type} = $type;
+ $self->_cache_set($_cache_id{eol_type} => $type);
+ }
+}
+
+sub _cache_get_eolt {
+ my $self = shift;
+ return unless exists $self->{_CACHE};
+ my $cache = $self->{_CACHE};
+
+ my $eol_type = $cache->{eol_type} || 0;
+ return "\012" if $eol_type == EOL_TYPE_NL;
+ return "\015" if $eol_type == EOL_TYPE_CR;
+ return "\015\012" if $eol_type == EOL_TYPE_CRNL;
+ return $cache->{eol} if $eol_type == EOL_TYPE_OTHER;
+ return;
+}
+
+sub _cache_set {
+ my ($self, $idx, $value) = @_;
+ return unless exists $self->{_CACHE};
+ my $cache = $self->{_CACHE};
+
+ my $key = $_reverse_cache_id{$idx};
+ if (!defined $key) {
+ warn(sprintf "Unknown cache index %d ignored\n", $idx);
+ } elsif ($key eq 'sep_char') {
+ $cache->{sep} = $value;
+ $cache->{sep_len} = 0;
+ }
+ elsif ($key eq 'quote_char') {
+ $cache->{quo} = $value;
+ $cache->{quo_len} = 0;
+ }
+ elsif ($key eq '_has_ahead') {
+ $cache->{has_ahead} = $value;
+ }
+ elsif ($key eq '_has_hooks') {
+ $cache->{has_hooks} = $value;
+ }
+ elsif ($key eq '_is_bound') {
+ $cache->{is_bound} = $value;
+ }
+ elsif ($key eq 'sep') {
+ use bytes;
+ my $len = bytes::length($value);
+ $cache->{sep} = $value if $len;
+ $cache->{sep_len} = $len == 1 ? 0 : $len;
+ }
+ elsif ($key eq 'quote') {
+ use bytes;
+ my $len = bytes::length($value);
+ $cache->{quo} = $value if $len;
+ $cache->{quo_len} = $len == 1 ? 0 : $len;
+ }
+ elsif ($key eq 'eol') {
+ $cache->{eol} = $value;
+ $cache->{eol_len} = my $len = defined $value ? length($value) : 0;
+ $cache->{eol_type} = $len == 0 ? EOL_TYPE_UNDEF
+ : $len == 1 && $value eq "\012" ? EOL_TYPE_NL
+ : $len == 1 && $value eq "\015" ? EOL_TYPE_CR
+ : $len == 2 && $value eq "\015\012" ? EOL_TYPE_CRNL
+ : EOL_TYPE_OTHER;
+ $cache->{eol_is_cr} = $cache->{eol_type} == EOL_TYPE_CR ? 1 : 0;
+ }
+ elsif ($key eq 'undef_str') {
+ if (defined $value) {
+ $cache->{undef_str} = $value;
+ $cache->{undef_flg} = 3 if utf8::is_utf8($value);
+ } else {
+ $cache->{undef_str} = undef;
+ $cache->{undef_flg} = 0;
+ }
+ }
+ else {
+ $cache->{$key} = $value;
+ }
+ return 1;
+}
+
+sub _cache_diag {
+ my $self = shift;
+ unless (exists $self->{_CACHE}) {
+ warn("CACHE: invalid\n");
+ return;
+ }
+
+ my $cache = $self->{_CACHE};
+ warn("CACHE:\n");
+ $self->__cache_show_char(quote_char => $cache->{quo});
+ $self->__cache_show_char(escape_char => $cache->{escape_char});
+ $self->__cache_show_char(sep_char => $cache->{sep});
+ for (qw/
+ binary decode_utf8 allow_loose_escapes allow_loose_quotes allow_unquoted_escape
+ allow_whitespace always_quote quote_empty quote_space
+ escape_null quote_binary auto_diag diag_verbose formula strict strict_n strict_eol eol_type skip_empty_rows
+ has_error_input blank_is_undef empty_is_undef has_ahead
+ keep_meta_info verbatim useIO has_hooks eol_is_cr eol_len
+ /) {
+ $self->__cache_show_byte($_ => $cache->{$_});
+ }
+ $self->__cache_show_str(eol => $cache->{eol_len}, $cache->{eol});
+ $self->__cache_show_byte(sep_len => $cache->{sep_len});
+ if ($cache->{sep_len} and $cache->{sep_len} > 1) {
+ $self->__cache_show_str(sep => $cache->{sep_len}, $cache->{sep});
+ }
+ $self->__cache_show_byte(quo_len => $cache->{quo_len});
+ if ($cache->{quo_len} and $cache->{quo_len} > 1) {
+ $self->__cache_show_str(quote => $cache->{quo_len}, $cache->{quo});
+ }
+ if ($cache->{types_len}) {
+ $self->__cache_show_str(types => $cache->{types_len}, $cache->{types});
+ } else {
+ $self->__cache_show_str(types => 0, "");
+ }
+ if ($cache->{bptr}) {
+ $self->__cache_show_str(bptr => length($cache->{bptr}), $cache->{bptr});
+ }
+ if ($cache->{tmp}) {
+ $self->__cache_show_str(tmp => length($cache->{tmp}), $cache->{tmp});
+ }
+}
+
+sub __cache_show_byte {
+ my ($self, $key, $value) = @_;
+ warn(sprintf " %-21s %02x:%3d\n", $key, defined $value ? ord($value) : 0, defined $value ? $value : 0);
+}
+
+sub __cache_show_char {
+ my ($self, $key, $value) = @_;
+ my $v = $value;
+ if (defined $value) {
+ my @b = unpack "U0C*", $value;
+ $v = pack "U*", $b[0];
+ }
+ warn(sprintf " %-21s %02x:%s\n", $key, defined $v ? ord($v) : 0, $self->__pretty_str($v, 1));
+}
+
+sub __cache_show_str {
+ my ($self, $key, $len, $value) = @_;
+ warn(sprintf " %-21s %02d:%s\n", $key, $len, $self->__pretty_str($value, $len));
+}
+
+sub __pretty_str { # FIXME
+ my ($self, $str, $len) = @_;
+ return '' unless defined $str;
+ $str = substr($str, 0, $len);
+ $str =~ s/"/\\"/g;
+ $str =~ s/([^\x09\x20-\x7e])/sprintf '\\x{%x}', ord($1)/eg;
+ qq{"$str"};
+}
+
+sub _hook {
+ my ($self, $name, $fields) = @_;
+ return 0 unless $self->{callbacks};
+
+ my $cb = $self->{callbacks}{$name};
+ return 0 unless $cb && ref $cb eq 'CODE';
+
+ my (@res) = $cb->($self, $fields);
+ if (@res) {
+ return 0 if ref $res[0] eq 'SCALAR' and ${$res[0]} eq "skip";
+ }
+ scalar @res;
+}
+
+################################################################################
+# methods for combine
+################################################################################
+
+sub __combine {
+ my ($self, $dst, $fields, $useIO) = @_;
+
+ my $ctx = $self->_setup_ctx;
+
+ my ($binary, $quot, $sep, $esc, $quote_space) = @{$ctx}{qw/binary quo sep escape_char quote_space/};
+
+ if (!defined $quot or $quot eq "\0") { $quot = ''; }
+
+ my $re_esc;
+ if ($esc ne '' and $esc ne "\0") {
+ if ($quot ne '') {
+ $re_esc = $self->{_re_comb_escape}->{$quot}->{$esc} ||= qr/(\Q$quot\E|\Q$esc\E)/;
+ } else {
+ $re_esc = $self->{_re_comb_escape}->{$quot}->{$esc} ||= qr/(\Q$esc\E)/;
+ }
+ }
+
+ my $bound = 0;
+ my $n = @$fields - 1;
+ if ($n < 0 and $ctx->{is_bound}) {
+ $n = $ctx->{is_bound} - 1;
+ $bound = 1;
+ }
+
+ my $check_meta = ($ctx->{keep_meta_info} >= 10 and @{$self->{_FFLAGS} || []} >= $n) ? 1 : 0;
+
+ my $must_be_quoted;
+ my @results;
+ for (my $i = 0; $i <= $n; $i++) {
+ my $v_ref;
+ if ($bound) {
+ $v_ref = $self->__bound_field($ctx, $i, 1);
+ } else {
+ if (@$fields > $i) {
+ $v_ref = \($fields->[$i]);
+ }
+ }
+ next unless $v_ref;
+
+ my $value = $$v_ref;
+
+ if (!defined $value) {
+ if ($ctx->{undef_str}) {
+ if ($ctx->{undef_flg}) {
+ $ctx->{utf8} = 1;
+ $ctx->{binary} = 1;
+ }
+ push @results, $ctx->{undef_str};
+ } else {
+ push @results, '';
+ }
+ next;
+ }
+
+ if (substr($value, 0, 1) eq '=' && $ctx->{formula}) {
+ $value = $self->_formula($ctx, $value, $i);
+ if (!defined $value) {
+ push @results, '';
+ next;
+ }
+ }
+
+ $must_be_quoted = $ctx->{always_quote} ? 1 : 0;
+ if ($value eq '') {
+ $must_be_quoted++ if $ctx->{quote_empty} or ($check_meta && $self->is_quoted($i));
+ }
+ else {
+
+ if (utf8::is_utf8 $value) {
+ $ctx->{utf8} = 1;
+ $ctx->{binary} = 1;
+ }
+
+ $must_be_quoted++ if $check_meta && $self->is_quoted($i);
+
+ if (!$must_be_quoted and $quot ne '') {
+ use bytes;
+ $must_be_quoted++ if
+ ($value =~ /\Q$quot\E/) ||
+ ($sep ne '' and $sep ne "\0" and $value =~ /\Q$sep\E/) ||
+ ($esc ne '' and $esc ne "\0" and $value =~ /\Q$esc\E/) ||
+ ($ctx->{quote_binary} && $value =~ /[\x00-\x1f\x7f-\xa0]/) ||
+ ($ctx->{quote_space} && $value =~ /[\x09\x20]/);
+ }
+
+ if (!$ctx->{binary} and $value =~ /[^\x09\x20-\x7E]/) {
+ # an argument contained an invalid character...
+ $self->{_ERROR_INPUT} = $value;
+ $self->SetDiag(2110);
+ return 0;
+ }
+
+ if ($re_esc) {
+ $value =~ s/($re_esc)/$esc$1/g;
+ }
+ if ($ctx->{escape_null}) {
+ $value =~ s/\0/${esc}0/g;
+ }
+ }
+
+ if ($must_be_quoted) {
+ $value = $quot . $value . $quot;
+ }
+ push @results, $value;
+ }
+
+ $$dst = join($sep, @results) . (defined $ctx->{eol} ? $ctx->{eol} : '');
+
+ return 1;
+}
+
+sub _formula {
+ my ($self, $ctx, $value, $i) = @_;
+
+ my $fa = $ctx->{formula} or return;
+ if ($fa == 1) { die "Formulas are forbidden\n" }
+ if ($fa == 2) { die "Formulas are forbidden\n" } # XS croak behaves like PP's "die"
+
+ if ($fa == 3) {
+ my $rec = '';
+ if ($ctx->{recno}) {
+ $rec = sprintf " in record %lu", $ctx->{recno} + 1;
+ }
+ my $field = '';
+ my $column_names = $self->{_COLUMN_NAMES};
+ if (ref $column_names eq 'ARRAY' and @$column_names >= $i - 1) {
+ my $column_name = $column_names->[$i - 1];
+ $field = sprintf " (column: '%.100s')", $column_name if defined $column_name;
+ }
+ warn sprintf("Field %d%s%s contains formula '%s'\n", $i, $field, $rec, $value);
+ return $value;
+ }
+
+ if ($fa == 4) {
+ return '';
+ }
+ if ($fa == 5) {
+ return undef;
+ }
+
+ if ($fa == 6) {
+ if (ref $self->{_FORMULA_CB} eq 'CODE') {
+ local $_ = $value;
+ return $self->{_FORMULA_CB}->();
+ }
+ }
+ return;
+}
+
+sub print {
+ my ($self, $io, $fields) = @_;
+
+ require IO::Handle;
+
+ if (!defined $fields) {
+ $fields = [];
+ } elsif (ref($fields) ne 'ARRAY') {
+ Carp::croak("Expected fields to be an array ref");
+ }
+
+ $self->_hook(before_print => $fields);
+
+ my $str = "";
+ $self->__combine(\$str, $fields, 1) or return '';
+
+ local $\ = '';
+
+ $io->print($str) or $self->_set_error_diag(2200);
+}
+
+################################################################################
+# methods for parse
+################################################################################
+
+sub __parse { # cx_xsParse
+ my ($self, $fields, $fflags, $src, $useIO) = @_;
+
+ my $ctx = $self->_setup_ctx;
+
+ my $state = $self->___parse($ctx, $fields, $fflags, $src, $useIO);
+ if ($state and ($ctx->{has_hooks} || 0) & HOOK_AFTER_PARSE) {
+ $self->_hook(after_parse => $fields);
+ }
+ return $state || !$last_error;
+}
+
+sub ___parse { # cx_c_xsParse
+ my ($self, $ctx, $fields, $fflags, $src, $useIO) = @_;
+
+ local $/ = $ctx->{eol} if $ctx->{eolx} or $ctx->{eol_is_cr};
+
+ if ($ctx->{useIO} = $useIO) {
+ require IO::Handle;
+
+ $ctx->{tmp} = undef;
+ if ($ctx->{has_ahead} and defined $self->{_AHEAD}) {
+ $ctx->{tmp} = $self->{_AHEAD};
+ $ctx->{size} = length $ctx->{tmp};
+ $ctx->{used} = 0;
+ }
+ } else {
+ $ctx->{tmp} = $src;
+ $ctx->{size} = length $src;
+ $ctx->{used} = 0;
+ $ctx->{utf8} = utf8::is_utf8($src);
+ }
+ if ($ctx->{has_error_input}) {
+ $self->{_ERROR_INPUT} = undef;
+ $ctx->{has_error_input} = 0;
+ }
+
+ my $result = $self->____parse($ctx, $src, $fields, $fflags);
+ $self->{_RECNO} = ++($ctx->{recno});
+ $self->{_EOF} = '';
+
+ if ($ctx->{strict}) {
+ my $nf = $ctx->{is_bound} ? $ctx->{fld_idx} : @$fields;
+ if ($nf and !$ctx->{strict_n}) {
+ $ctx->{strict_n} = $nf;
+ }
+ if ($ctx->{strict_n} > 0 and $nf != $ctx->{strict_n}) {
+ unless ($ctx->{useIO} & useIO_EOF) {
+ unless ($last_error || (!$ctx->{useIO} and $ctx->{has_ahead})) {
+ $self->__parse_error($ctx, 2014, $ctx->{used});
+ }
+ }
+ if ($last_error) {
+ $result = undef;
+ }
+ }
+ }
+
+ if ($ctx->{useIO}) {
+ if (defined $ctx->{tmp} and $ctx->{used} < $ctx->{size} and $ctx->{has_ahead}) {
+ $self->{_AHEAD} = substr($ctx->{tmp}, $ctx->{used}, $ctx->{size} - $ctx->{used});
+ } else {
+ $ctx->{has_ahead} = 0;
+ if ($ctx->{useIO} & useIO_EOF) {
+ $self->{_EOF} = 1;
+ }
+ }
+ %{$self->{_CACHE}} = %$ctx;
+
+ if ($fflags) {
+ if ($ctx->{keep_meta_info}) {
+ $self->{_FFLAGS} = $fflags;
+ } else {
+ undef $fflags;
+ }
+ }
+ } else {
+ %{$self->{_CACHE}} = %$ctx;
+ }
+
+ if ($result and $ctx->{types}) {
+ my $len = @$fields;
+ for (my $i = 0; $i <= $len && $i <= $ctx->{types_len}; $i++) {
+ my $value = $fields->[$i];
+ next unless defined $value;
+ my $type = ord(substr($ctx->{types}, $i, 1));
+ if ($type == IV) {
+ $fields->[$i] = int($value);
+ } elsif ($type == NV) {
+ $fields->[$i] = $value + 0.0;
+ }
+ }
+ }
+
+ $result;
+}
+
+sub ____parse { # cx_Parse
+ my ($self, $ctx, $src, $fields, $fflags) = @_;
+
+ my ($quot, $sep, $esc, $eol) = @{$ctx}{qw/quo sep escape_char eol/};
+
+ utf8::encode($sep) if !$ctx->{utf8} and $ctx->{sep_len};
+ utf8::encode($quot) if !$ctx->{utf8} and $ctx->{quo_len};
+ utf8::encode($eol) if !$ctx->{utf8} and $ctx->{eol_len};
+
+ my $seenSomething = 0;
+ my $spl = -1;
+ my $waitingForField = 1;
+ my ($value, $v_ref, $c0);
+ $ctx->{fld_idx} = my $fnum = 0;
+ $ctx->{flag} = 0;
+
+ my $re_str = join '|', map({ $_ eq "\0" ? '[\\0]' : quotemeta($_) } sort { length $b <=> length $a } grep { defined $_ and $_ ne '' } $sep, $quot, $esc, $eol), "\015", "\012", "\x09", " ";
+ $ctx->{_re} = qr/$re_str/;
+ my $re = qr/$re_str|[^\x09\x20-\x7E]|$/;
+
+LOOP:
+ while ($self->__get_from_src($ctx, $src)) {
+ while ($ctx->{tmp} =~ /\G(.*?)($re)/gs) {
+ my ($hit, $c) = ($1, $2);
+ $ctx->{used} = pos($ctx->{tmp});
+ if (!$waitingForField and $c eq '' and $hit ne '' and $ctx->{useIO} and !($ctx->{useIO} & useIO_EOF)) {
+ $self->{_AHEAD} = $hit;
+ $ctx->{has_ahead} = 1;
+ $ctx->{has_leftover} = 1;
+ last;
+ }
+ last if $seenSomething and $hit eq '' and $c eq ''; # EOF
+
+ # new field
+ if (!$v_ref) {
+ if ($ctx->{is_bound}) {
+ $v_ref = $self->__bound_field($ctx, $fnum, 0);
+ } else {
+ $value = '';
+ $v_ref = \$value;
+ }
+ $fnum++;
+ return unless $v_ref;
+ $ctx->{flag} = 0;
+ $ctx->{fld_idx}++;
+ $c0 = '';
+ }
+
+ $seenSomething = 1;
+ $spl++;
+
+ if (defined $hit and $hit ne '') {
+ if ($waitingForField) {
+ if (!$spl && $ctx->{comment_str} && $ctx->{tmp} =~ /\A\Q$ctx->{comment_str}/) {
+ $ctx->{used} = $ctx->{size};
+ $ctx->{fld_idx} = $ctx->{strict_n} ? $ctx->{strict_n} : 0;
+ $seenSomething = 0;
+ unless ($ctx->{useIO}) {
+ $ctx->{has_ahead} = 214;
+ }
+ next LOOP;
+ }
+ $waitingForField = 0;
+ }
+ if ($hit =~ /[^\x09\x20-\x7E]/) {
+ $ctx->{flag} |= IS_BINARY;
+ }
+ $$v_ref .= $hit;
+ }
+
+ RESTART:
+ if (defined $c and defined $sep and $c eq $sep) {
+ if ($waitingForField) {
+ # ,1,"foo, 3",,bar,
+ # ^ ^
+ if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
+ $$v_ref = undef;
+ } else {
+ $$v_ref = "";
+ }
+ unless ($ctx->{is_bound}) {
+ push @$fields, $$v_ref;
+ }
+ $v_ref = undef;
+ if ($ctx->{keep_meta_info} and $fflags) {
+ push @$fflags, $ctx->{flag};
+ }
+ } elsif ($ctx->{flag} & IS_QUOTED) {
+ # ,1,"foo, 3",,bar,
+ # ^
+ $$v_ref .= $c;
+ } else {
+ # ,1,"foo, 3",,bar,
+ # ^ ^ ^
+ $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
+ $v_ref = undef;
+ $waitingForField = 1;
+ }
+ }
+ elsif (defined $c and defined $quot and $quot ne "\0" and $c eq $quot) {
+ if ($waitingForField) {
+ # ,1,"foo, 3",,bar,\r\n
+ # ^
+ $ctx->{flag} |= IS_QUOTED;
+ $waitingForField = 0;
+ next;
+ }
+ if ($ctx->{flag} & IS_QUOTED) {
+ # ,1,"foo, 3",,bar,\r\n
+ # ^
+ my $quoesc = 0;
+ my $c2 = $self->__get($ctx, $src);
+
+ if ($ctx->{allow_whitespace}) {
+ # , 1 , "foo, 3" , , bar , \r\n
+ # ^
+ while ($self->__is_whitespace($ctx, $c2)) {
+ if ($ctx->{allow_loose_quotes} and !(defined $esc and $c2 eq $esc)) {
+ $$v_ref .= $c;
+ $c = $c2;
+ }
+ $c2 = $self->__get($ctx, $src);
+ }
+ }
+
+ if (!defined $c2) { # EOF
+ # ,1,"foo, 3"
+ # ^
+ $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
+ return 1;
+ }
+
+ if (defined $c2 and defined $sep and $c2 eq $sep) {
+ # ,1,"foo, 3",,bar,\r\n
+ # ^
+ $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
+ $v_ref = undef;
+ $waitingForField = 1;
+ next;
+ }
+ if (defined $c2 and ($c2 eq "\012" or (defined $eol and $c2 eq $eol))) { # FIXME: EOLX
+ # ,1,"foo, 3",,"bar"\n
+ # ^
+ my $eolt = _eol_type($c2);
+ if ($ctx->{strict_eol} and $ctx->{eol_type} and $ctx->{eol_type} != $eolt) {
+ $self->__error_eol($ctx) or return;
+ }
+ $self->_set_eol_type($ctx, $eolt);
+ $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
+ return 1;
+ }
+
+ if (defined $esc and $c eq $esc) {
+ $quoesc = 1;
+ if (defined $c2 and $c2 eq '0') {
+ # ,1,"foo, 3"056",,bar,\r\n
+ # ^
+ $$v_ref .= "\0";
+ next;
+ }
+ if (defined $c2 and defined $quot and $c2 eq $quot) {
+ # ,1,"foo, 3""56",,bar,\r\n
+ # ^
+ if ($ctx->{utf8}) {
+ $ctx->{flag} |= IS_BINARY;
+ }
+ $$v_ref .= $c2;
+ next;
+ }
+ if ($ctx->{allow_loose_escapes} and defined $c2 and $c2 ne "\015") {
+ # ,1,"foo, 3"56",,bar,\r\n
+ # ^
+ $$v_ref .= $c;
+ $c = $c2;
+ goto RESTART;
+ }
+ }
+ if (defined $c2 and $c2 eq "\015") {
+ if ($ctx->{eol_is_cr}) {
+ # ,1,"foo, 3"\r
+ # ^
+ $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
+ return 1;
+ }
+
+ my $c3 = $self->__get($ctx, $src);
+ if (defined $c3 and $c3 eq "\012") {
+ # ,1,"foo, 3"\r\n
+ # ^
+ if ($ctx->{strict_eol} and $ctx->{eol_type} and $ctx->{eol_type} != EOL_TYPE_CRNL) {
+ $self->__error_eol($ctx) or return;
+ }
+ $self->_set_eol_type($ctx, EOL_TYPE_CRNL);
+ $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
+ return 1;
+ }
+
+ if ($ctx->{useIO} and !$ctx->{eol_len}) {
+ if ($c3 eq "\015") { # \r followed by an empty line
+ # ,1,"foo, 3"\r\r
+ # ^
+ if ($ctx->{strict_eol} and $ctx->{eol_type}) {
+ unless ($ctx->{eol_type} == EOL_TYPE_CR) {
+ $self->__error_eol($ctx) or return;
+ }
+ $ctx->{used}--;
+ $ctx->{has_ahead}++;
+ $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
+ return 1;
+ }
+ $self->__set_eol_is_cr($ctx);
+ if ($ctx->{flag} & IS_QUOTED) {
+ $ctx->{flag} ^= IS_QUOTED;
+ }
+ $c = $c0 = "\015";
+ goto EOLX;
+ }
+ if ($c3 !~ /[^\x09\x20-\x7E]/) {
+ # ,1,"foo\n 3",,"bar"\r
+ # baz,4
+ # ^
+ if ($ctx->{strict_eol} and $ctx->{eol_type}) {
+ unless ($ctx->{eol_type} == EOL_TYPE_CR) {
+ $self->__error_eol($ctx) or return;
+ }
+ $ctx->{eol_is_cr} = 1;
+ } else {
+ $self->__set_eol_is_cr($ctx);
+ }
+ $ctx->{used}--;
+ $ctx->{has_ahead} = 1;
+ $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
+ return 1;
+ }
+ }
+
+ $self->__parse_error($ctx, $quoesc ? 2023 : 2010, $ctx->{used} - 2);
+ return;
+ }
+
+ if ($ctx->{allow_loose_quotes} and !$quoesc) {
+ # ,1,"foo, 3"456",,bar,\r\n
+ # ^
+ $$v_ref .= $c;
+ $c = $c2;
+ goto RESTART;
+ }
+ # 1,"foo" ",3
+ # ^
+ if ($quoesc) {
+ $ctx->{used}--;
+ $self->__error_inside_quotes($ctx, 2023);
+ return;
+ }
+ $self->__error_inside_quotes($ctx, 2011);
+ return;
+ }
+ # !waitingForField, !InsideQuotes
+ if ($ctx->{allow_loose_quotes}) { # 1,foo "boo" d'uh,1
+ $ctx->{flag} |= IS_ERROR;
+ $$v_ref .= $c;
+ } else {
+ $self->__error_inside_field($ctx, 2034);
+ return;
+ }
+ }
+ elsif (defined $c and defined $esc and $esc ne "\0" and $c eq $esc) {
+ # This means quote_char != escape_char
+ if ($waitingForField) {
+ $waitingForField = 0;
+ if ($ctx->{allow_unquoted_escape}) {
+ # The escape character is the first character of an
+ # unquoted field
+ # ... get and store next character
+ my $c2 = $self->__get($ctx, $src);
+ $$v_ref = "";
+
+ if (!defined $c2) { # EOF
+ $ctx->{used}--;
+ $self->__error_inside_field($ctx, 2035);
+ return;
+ }
+ if ($c2 eq '0') {
+ $$v_ref .= "\0";
+ }
+ elsif (
+ (defined $quot and $c2 eq $quot) or
+ (defined $sep and $c2 eq $sep) or
+ (defined $esc and $c2 eq $esc) or
+ $ctx->{allow_loose_escapes}
+ ) {
+ if ($ctx->{utf8}) {
+ $ctx->{flag} |= IS_BINARY;
+ }
+ $$v_ref .= $c2;
+ } else {
+ $self->__parse_inside_quotes($ctx, 2025);
+ return;
+ }
+ }
+ }
+ elsif ($ctx->{flag} & IS_QUOTED) {
+ my $c2 = $self->__get($ctx, $src);
+ if (!defined $c2) { # EOF
+ $ctx->{used}--;
+ $self->__error_inside_quotes($ctx, 2024);
+ return;
+ }
+ if ($c2 eq '0') {
+ $$v_ref .= "\0";
+ }
+ elsif (
+ (defined $quot and $c2 eq $quot) or
+ (defined $sep and $c2 eq $sep) or
+ (defined $esc and $c2 eq $esc) or
+ $ctx->{allow_loose_escapes}
+ ) {
+ if ($ctx->{utf8}) {
+ $ctx->{flag} |= IS_BINARY;
+ }
+ $$v_ref .= $c2;
+ } else {
+ $ctx->{used}--;
+ $self->__error_inside_quotes($ctx, 2025);
+ return;
+ }
+ }
+ elsif ($v_ref) {
+ my $c2 = $self->__get($ctx, $src);
+ if (!defined $c2) { # EOF
+ $ctx->{used}--;
+ $self->__error_inside_field($ctx, 2035);
+ return;
+ }
+ $$v_ref .= $c2;
+ }
+ else {
+ $self->__error_inside_field($ctx, 2036);
+ return;
+ }
+ }
+ elsif (defined $c and ($c eq "\012" or $c eq '' or (defined $eol and $c eq $eol and $eol ne "\015"))) { # EOL
+ EOLX:
+ my $eolt = (($c eq "\012" || $c eq "\015") && $c0 eq "\015") ? EOL_TYPE_CRNL : _eol_type($c);
+ $c0 = '';
+ unless ($ctx->{flag} & CSV_FLAGS_IS_QUOTED) {
+ if ($ctx->{strict_eol} and $ctx->{eol_type} and $ctx->{eol_type} != $eolt) {
+ $self->__error_eol($ctx) or return;
+ }
+ $self->_set_eol_type($ctx, $eolt);
+ }
+ if ($fnum == 1 && $ctx->{flag} == 0 && (!$v_ref || $$v_ref eq '') && $ctx->{skip_empty_rows}) {
+ ### SkipEmptyRow
+ my $ser = $ctx->{skip_empty_rows};
+ if ($ser == 3) { $self->SetDiag(2015); die "Empty row\n"; }
+ if ($ser == 4) { $self->SetDiag(2015); die "Empty row\n"; }
+ if ($ser == 5) { $self->SetDiag(2015); return undef; }
+
+ if ($ser <= 2) { # skip & eof
+ $ctx->{fld_idx} = 0;
+ $c = $self->__get($ctx, $src);
+ if (!defined $c or $ser == 2) { # EOF
+ $v_ref = undef;
+ $seenSomething = 0;
+ if ($ser == 2) { return undef; }
+ last LOOP;
+ }
+ }
+
+ if ($ser == 6) {
+ my $cb = $self->{_EMPTROW_CB};
+ unless ($cb && ref $cb eq 'CODE') {
+ return undef; # A callback is wanted, but none found
+ }
+ local $_ = $v_ref;
+ my $rv = $cb->();
+ # Result should be a ref to a list.
+ unless (ref $rv eq 'ARRAY') {
+ return undef;
+ }
+ my $n = @$rv;
+ if ($n <= 0) {
+ return 1;
+ }
+ if ($ctx->{is_bound} && $ctx->{is_bound} < $n) {
+ $n = $ctx->{is_bound} - 1;
+ }
+ for (my $i = 0; $i < $n; $i++) {
+ my $rvi = $rv->[$i];
+ $self->__push_value($ctx, \$rvi, $fields, $fflags, $ctx->{flag}, $fnum);
+ }
+ return 1;
+ }
+ goto RESTART;
+ }
+
+ if ($waitingForField) {
+ # ,1,"foo, 3",,bar,
+ # ^
+ if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
+ $$v_ref = undef;
+ } else {
+ $$v_ref = "";
+ }
+ unless ($ctx->{is_bound}) {
+ push @$fields, $$v_ref;
+ }
+ if ($ctx->{keep_meta_info} and $fflags) {
+ push @$fflags, $ctx->{flag};
+ }
+ return 1;
+ }
+ if ($ctx->{flag} & IS_QUOTED) {
+ # ,1,"foo\n 3",,bar,
+ # ^
+ $ctx->{flag} |= IS_BINARY;
+ unless ($ctx->{binary}) {
+ $self->__error_inside_quotes($ctx, 2021);
+ return;
+ }
+ $$v_ref .= $c;
+ }
+ elsif ($ctx->{verbatim}) {
+ # ,1,foo\n 3,,bar,
+ # This feature should be deprecated
+ $ctx->{flag} |= IS_BINARY;
+ unless ($ctx->{binary}) {
+ $self->__error_inside_field($ctx, 2030);
+ return;
+ }
+ $$v_ref .= $c unless $ctx->{eol} eq $c and $ctx->{useIO};
+ }
+ else {
+ # sep=,
+ # ^
+ if (!$ctx->{recno} and $ctx->{fld_idx} == 1 and $ctx->{useIO} and $hit =~ /^sep=(.{1,16})$/i) {
+ $ctx->{sep} = $1;
+ use bytes;
+ my $len = length $ctx->{sep};
+ if ($len <= 16) {
+ $ctx->{sep_len} = $len == 1 ? 0 : $len;
+ return $self->____parse($ctx, $src, $fields, $fflags);
+ }
+ }
+
+ # ,1,"foo\n 3",,bar
+ # ^
+ $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
+ return 1;
+ }
+ }
+ elsif (defined $c and $c eq "\015" and !$ctx->{verbatim}) {
+ $c0 = "\015";
+ if ($waitingForField) {
+ if ($ctx->{eol_is_cr}) {
+ # ,1,"foo\n 3",,bar,\r
+ # ^
+ $c = "\012";
+ goto EOLX;
+ }
+
+ my $c2 = $self->__get($ctx, $src);
+ if (!defined $c2) { # EOF
+ # ,1,"foo\n 3",,bar,\r
+ # ^
+ $c = undef;
+ last unless $seenSomething;
+ goto RESTART;
+ }
+ if ($c2 eq "\012") { # \r is not optional before EOLX!
+ # ,1,"foo\n 3",,bar,\r\n
+ # ^
+ if ($ctx->{strict_eol} and $ctx->{eol_type} and $ctx->{eol_type} != EOL_TYPE_CRNL) {
+ $self->__error_eol($ctx) or return;
+ }
+ $self->_set_eol_type($ctx, EOL_TYPE_CRNL);
+ $c = $c2;
+ goto EOLX;
+ }
+
+ if ($ctx->{useIO} and !$ctx->{eol_len}) {
+ if ($c2 eq "\012") { # \r followed by an empty line
+ # ,1,"foo\n 3",,bar,\r\r
+ # ^
+ if ($ctx->{strict_eol} and $ctx->{eol_type}) {
+ unless ($ctx->{eol_type} == EOL_TYPE_CR) {
+ $self->__error_eol($ctx) or return;
+ }
+ $ctx->{eol_is_cr} = 1;
+ } else {
+ $self->__set_eol_is_cr($ctx);
+ }
+ goto EOLX;
+ }
+ $waitingForField = 0;
+ if ($c2 !~ /[^\x09\x20-\x7E]/) {
+ # ,1,"foo\n 3",,bar,\r
+ # baz,4
+ # ^
+ if ($ctx->{strict_eol} and $ctx->{eol_type}) {
+ unless ($ctx->{eol_type} == EOL_TYPE_CR) {
+ $self->__error_eol($ctx) or return;
+ }
+ } else {
+ $self->__set_eol_is_cr($ctx);
+ }
+ $ctx->{used}--;
+ $ctx->{has_ahead} = 1;
+ if ($fnum == 1 && $ctx->{flag} == 0 && (!$v_ref or $$v_ref eq '') && $ctx->{skip_empty_rows}) {
+ ### SkipEmptyRow
+ my $ser = $ctx->{skip_empty_rows};
+ if ($ser == 3) { $self->SetDiag(2015); die "Empty row\n"; }
+ if ($ser == 4) { $self->SetDiag(2015); die "Empty row\n"; }
+ if ($ser == 5) { $self->SetDiag(2015); return undef; }
+
+ if ($ser <= 2) { # skip & eof
+ $ctx->{fld_idx} = 0;
+ $c = $self->__get($ctx, $src);
+ if (!defined $c) { # EOF
+ $v_ref = undef;
+ $waitingForField = 1;
+ $seenSomething = 0;
+ last LOOP;
+ }
+ }
+
+ if ($ser == 6) {
+ my $cb = $self->{_EMPTROW_CB};
+ unless ($cb && ref $cb eq 'CODE') {
+ return undef; # A callback is wanted, but none found
+ }
+ local $_ = $v_ref;
+ my $rv = $cb->();
+ # Result should be a ref to a list.
+ unless (ref $rv eq 'ARRAY') {
+ return undef;
+ }
+ my $n = @$rv;
+ if ($n <= 0) {
+ return 1;
+ }
+ if ($ctx->{is_bound} && $ctx->{is_bound} < $n) {
+ $n = $ctx->{is_bound} - 1;
+ }
+ for (my $i = 0; $i < $n; $i++) {
+ my $rvi = $rv->[$i];
+ $self->__push_value($ctx, \$rvi, $fields, $fflags, $ctx->{flag}, $fnum);
+ }
+ return 1;
+ }
+
+ $$v_ref = $c2;
+ goto RESTART;
+ }
+ $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
+ return 1;
+ }
+ }
+
+ # ,1,"foo\n 3",,bar,\r\t
+ # ^
+ $ctx->{used}--;
+ $self->__error_inside_field($ctx, 2031);
+ return;
+ }
+ if ($ctx->{flag} & IS_QUOTED) {
+ # ,1,"foo\r 3",,bar,\r\t
+ # ^
+ $ctx->{flag} |= IS_BINARY;
+ unless ($ctx->{binary}) {
+ $self->__error_inside_quotes($ctx, 2022);
+ return;
+ }
+ $$v_ref .= $c;
+ }
+ else {
+ if ($ctx->{eol_is_cr}) {
+ # ,1,"foo\n 3",,bar\r
+ # ^
+ goto EOLX;
+ }
+
+ my $c2 = $self->__get($ctx, $src);
+ if (defined $c2 and $c2 eq "\012") { # \r is not optional before EOLX!
+ # ,1,"foo\n 3",,bar\r\n
+ # ^
+ if ($ctx->{strict_eol} and $ctx->{eol_type} and $ctx->{eol_type} != EOL_TYPE_CRNL) {
+ $self->__error_eol($ctx) or return;
+ }
+ $self->_set_eol_type($ctx, EOL_TYPE_CRNL);
+ goto EOLX;
+ }
+
+ if ($ctx->{useIO} and !$ctx->{eol_len}) {
+ if ($c2 !~ /[^\x09\x20-\x7E]/
+ # ,1,"foo\n 3",,bar\r
+ # baz,4
+ # ^
+ or $c2 eq "\015"
+ # ,1,"foo\n 3",,bar,\r\r
+ # ^
+ ) {
+ if ($ctx->{strict_eol} and $ctx->{eol_type}) {
+ unless ($ctx->{eol_type} == EOL_TYPE_CR) {
+ $self->__error_eol($ctx) or return;
+ }
+ } else {
+ $self->__set_eol_is_cr($ctx);
+ }
+ $ctx->{used}--;
+ $ctx->{has_ahead} = 1;
+ if ($fnum == 1 && $ctx->{flag} == 0 && (!$v_ref or $$v_ref eq '') && $ctx->{skip_empty_rows}) {
+ ### SKipEmptyRow
+ my $ser = $ctx->{skip_empty_rows};
+ if ($ser == 3) { $self->SetDiag(2015); die "Empty row\n"; }
+ if ($ser == 4) { $self->SetDiag(2015); die "Empty row\n"; }
+ if ($ser == 5) { $self->SetDiag(2015); return undef; }
+
+ if ($ser <= 2) { # skip & eof
+ $ctx->{fld_idx} = 0;
+ $c = $self->__get($ctx, $src);
+ if (!defined $c) { # EOL
+ $v_ref = undef;
+ $seenSomething = 0;
+ last LOOP;
+ }
+ }
+
+ if ($ser == 6) {
+ my $cb = $self->{_EMPTROW_CB};
+ unless ($cb && ref $cb eq 'CODE') {
+ return undef; # A callback is wanted, but none found
+ }
+ local $_ = $v_ref;
+ my $rv = $cb->();
+ # Result should be a ref to a list.
+ unless (ref $rv eq 'ARRAY') {
+ return undef;
+ }
+ my $n = @$rv;
+ if ($n <= 0) {
+ return 1;
+ }
+ if ($ctx->{is_bound} && $ctx->{is_bound} < $n) {
+ $n = $ctx->{is_bound} - 1;
+ }
+ for (my $i = 0; $i < $n; $i++) {
+ my $rvi = $rv->[$i];
+ $self->__push_value($ctx, \$rvi, $fields, $fflags, $ctx->{flag}, $fnum);
+ }
+ return 1;
+ }
+ goto RESTART;
+ }
+ $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
+ return 1;
+ }
+ }
+
+ # ,1,"foo\n 3",,bar\r\t
+ # ^
+ $self->__error_inside_field($ctx, 2032);
+ return;
+ }
+ }
+ else {
+ if ($ctx->{eolx} and $c eq $eol) {
+ $c = '';
+ goto EOLX;
+ }
+
+ if ($waitingForField) {
+ if (!$spl && $ctx->{comment_str} && $ctx->{tmp} =~ /\A$ctx->{comment_str}/) {
+ $ctx->{used} = $ctx->{size};
+ $ctx->{fld_idx} = $ctx->{strict_n} ? $ctx->{strict_n} - 1 : 0;
+ $seenSomething = 0;
+ unless ($ctx->{useIO}) {
+ $ctx->{has_ahead} = 214; # abuse
+ }
+ next LOOP;
+ }
+ if ($ctx->{allow_whitespace} and $self->__is_whitespace($ctx, $c)) {
+ do {
+ $c = $self->__get($ctx, $src);
+ last if !defined $c;
+ } while $self->__is_whitespace($ctx, $c);
+ goto RESTART;
+ }
+ $waitingForField = 0;
+ goto RESTART;
+ }
+ if ($ctx->{flag} & IS_QUOTED) {
+ if (!defined $c or $c =~ /[^\x09\x20-\x7E]/) {
+ $ctx->{flag} |= IS_BINARY;
+ unless ($ctx->{binary} or $ctx->{utf8}) {
+ $self->__error_inside_quotes($ctx, 2026);
+ return;
+ }
+ }
+ $$v_ref .= $c;
+ } else {
+ if (!defined $c or $c =~ /[^\x09\x20-\x7E]/) {
+ last if $ctx->{useIO} && !defined $c;
+ $ctx->{flag} |= IS_BINARY;
+ unless ($ctx->{binary} or $ctx->{utf8}) {
+ $self->__error_inside_field($ctx, 2037);
+ return;
+ }
+ }
+ $$v_ref .= $c;
+ }
+ }
+ last LOOP if $ctx->{useIO} and $ctx->{verbatim} and $ctx->{used} == $ctx->{size};
+ }
+ }
+
+ if ($waitingForField) {
+ unless ($ctx->{useIO}) {
+ if ($ctx->{has_ahead} and $ctx->{has_ahead} == 214) {
+ return 1;
+ }
+ $seenSomething++;
+ }
+ if ($seenSomething) {
+ # new field
+ if (!$v_ref) {
+ if ($ctx->{is_bound}) {
+ $v_ref = $self->__bound_field($ctx, $fnum, 0);
+ } else {
+ $value = '';
+ $v_ref = \$value;
+ }
+ $fnum++;
+ return unless $v_ref;
+ $ctx->{flag} = 0;
+ $ctx->{fld_idx}++;
+ }
+ if ($ctx->{blank_is_undef} or $ctx->{empty_is_undef}) {
+ $$v_ref = undef;
+ } else {
+ $$v_ref = "";
+ }
+ unless ($ctx->{is_bound}) {
+ push @$fields, $$v_ref;
+ }
+ if ($ctx->{keep_meta_info} and $fflags) {
+ push @$fflags, $ctx->{flag};
+ }
+ return 1;
+ }
+ $self->SetDiag(2012);
+ return;
+ }
+
+ if ($ctx->{flag} & IS_QUOTED) {
+ $self->__error_inside_quotes($ctx, 2027);
+ return;
+ }
+
+ if ($v_ref) {
+ $self->__push_value($ctx, $v_ref, $fields, $fflags, $ctx->{flag}, $fnum);
+ } elsif ($ctx->{flag} == 0 && $fnum == 1 && $ctx->{skip_empty_rows} == 1) {
+ return undef;
+ }
+ return 1;
+}
+
+sub __get_from_src {
+ my ($self, $ctx, $src) = @_;
+ return 1 if defined $ctx->{tmp} and $ctx->{used} <= 0;
+ return 1 if $ctx->{used} < $ctx->{size};
+ return unless $ctx->{useIO};
+ my $res = $src->getline;
+ if (defined $res) {
+ if ($ctx->{has_ahead}) {
+ $ctx->{tmp} = $self->{_AHEAD};
+ $ctx->{tmp} .= $ctx->{eol} if $ctx->{eol_len};
+ $ctx->{tmp} .= $res;
+ $ctx->{has_ahead} = 0;
+ } else {
+ $ctx->{tmp} = $res;
+ }
+ if ($ctx->{size} = length $ctx->{tmp}) {
+ $ctx->{used} = -1;
+ $ctx->{utf8} = 1 if utf8::is_utf8($ctx->{tmp});
+ pos($ctx->{tmp}) = 0;
+ return 1;
+ }
+ } elsif (delete $ctx->{has_leftover}) {
+ $ctx->{tmp} = $self->{_AHEAD};
+ $ctx->{has_ahead} = 0;
+ $ctx->{useIO} |= useIO_EOF;
+ if ($ctx->{size} = length $ctx->{tmp}) {
+ $ctx->{used} = -1;
+ $ctx->{utf8} = 1 if utf8::is_utf8($ctx->{tmp});
+ pos($ctx->{tmp}) = 0;
+ return 1;
+ }
+ }
+ $ctx->{tmp} = '' unless defined $ctx->{tmp};
+ $ctx->{useIO} |= useIO_EOF;
+ return;
+}
+
+sub __set_eol_is_cr {
+ my ($self, $ctx) = @_;
+ $ctx->{eol_is_cr} = 1;
+ $ctx->{eol_len} = 1;
+ $ctx->{eol} = "\015";
+ $ctx->{eol_type} = EOL_TYPE_CR;
+ %{$self->{_CACHE}} = %$ctx;
+
+ $self->{eol} = $ctx->{eol};
+}
+
+sub __bound_field {
+ my ($self, $ctx, $i, $keep) = @_;
+ if ($i >= $ctx->{is_bound}) {
+ $self->SetDiag(3006);
+ return;
+ }
+ if (ref $ctx->{bound} eq 'ARRAY') {
+ my $ref = $ctx->{bound}[$i];
+ if (ref $ref) {
+ if ($keep) {
+ return $ref;
+ }
+ unless (Scalar::Util::readonly($$ref)) {
+ $$ref = "";
+ return $ref;
+ }
+ }
+ }
+ $self->SetDiag(3008);
+ return;
+}
+
+sub __get {
+ my ($self, $ctx, $src) = @_;
+ return unless defined $ctx->{used};
+ if ($ctx->{used} >= $ctx->{size}) {
+ if ($self->__get_from_src($ctx, $src)) {
+ return $self->__get($ctx, $src);
+ }
+ return;
+ }
+ my $pos = pos($ctx->{tmp});
+ if ($ctx->{tmp} =~ /\G($ctx->{_re}|.)/gs) {
+ my $c = $1;
+ if ($c =~ /[^\x09\012\015\x20-\x7e]/) {
+ $ctx->{flag} |= IS_BINARY;
+ }
+ $ctx->{used} = pos($ctx->{tmp});
+ return $c;
+ } else {
+ if ($self->__get_from_src($ctx, $src)) {
+ return $self->__get($ctx, $src);
+ }
+ pos($ctx->{tmp}) = $pos;
+ return;
+ }
+}
+
+sub __error_inside_quotes {
+ my ($self, $ctx, $error) = @_;
+ $self->__parse_error($ctx, $error, $ctx->{used} - 1);
+}
+
+sub __error_inside_field {
+ my ($self, $ctx, $error) = @_;
+ $self->__parse_error($ctx, $error, $ctx->{used} - 1);
+}
+
+sub __parse_error {
+ my ($self, $ctx, $error, $pos, $line) = @_;
+ $line ||= (caller(1))[2];
+ $self->{_ERROR_POS} = $pos;
+ $self->{_ERROR_FLD} = $ctx->{fld_idx};
+ $self->{_ERROR_INPUT} = $ctx->{tmp} if $ctx->{tmp};
+ $self->_set_diag($ctx, $error, $line);
+ return;
+}
+
+sub __error_eol {
+ my ($self, $ctx) = @_;
+ unless ($ctx->{strict_eol} & 0x40) {
+ $self->__parse_error($ctx, 2016, $ctx->{used} - 1);
+ }
+ if ($ctx->{strict_eol} & 0x0e) {
+ if (!$ctx->{is_bound}) {
+ return;
+ }
+ }
+ $ctx->{strict_eol} |= 0x40;
+}
+
+sub __is_whitespace {
+ my ($self, $ctx, $c) = @_;
+ return unless defined $c;
+ return (
+ (!defined $ctx->{sep} or $c ne $ctx->{sep}) &&
+ (!defined $ctx->{quo} or $c ne $ctx->{quo}) &&
+ (!defined $ctx->{escape_char} or $c ne $ctx->{escape_char}) &&
+ ($c eq " " or $c eq "\t")
+ );
+}
+
+sub __push_value { # AV_PUSH (part of)
+ my ($self, $ctx, $v_ref, $fields, $fflags, $flag, $fnum) = @_;
+ utf8::encode($$v_ref) if $ctx->{utf8};
+ if ($ctx->{formula} && defined $$v_ref && substr($$v_ref, 0, 1) eq '=') {
+ my $value = $self->_formula($ctx, $$v_ref, $fnum);
+ push @$fields, defined $value ? $value : undef;
+ return;
+ }
+ if (
+ (!defined $$v_ref or $$v_ref eq '') and
+ ($ctx->{empty_is_undef} or (!($flag & IS_QUOTED) and $ctx->{blank_is_undef}))
+ ) {
+ $$v_ref = undef;
+ } else {
+ if ($ctx->{allow_whitespace} && !($flag & IS_QUOTED)) {
+ $$v_ref =~ s/[ \t]+$//;
+ }
+ if ($flag & IS_BINARY and $ctx->{decode_utf8} and ($ctx->{utf8} || _is_valid_utf8($$v_ref))) {
+ utf8::decode($$v_ref);
+ }
+ }
+ unless ($ctx->{is_bound}) {
+ push @$fields, $$v_ref;
+ }
+ if ($ctx->{keep_meta_info} and $fflags) {
+ push @$fflags, $flag;
+ }
+}
+
+sub getline {
+ my ($self, $io) = @_;
+
+ my (@fields, @fflags);
+ my $res = $self->__parse(\@fields, \@fflags, $io, 1);
+ $res ? \@fields : undef;
+}
+
+sub getline_all {
+ my ($self, $io, $offset, $len) = @_;
+
+ my $ctx = $self->_setup_ctx;
+
+ my $tail = 0;
+ my $n = 0;
+ $offset ||= 0;
+
+ if ($offset < 0) {
+ $tail = -$offset;
+ $offset = -1;
+ }
+
+ my (@row, @list);
+ while ($self->___parse($ctx, \@row, undef, $io, 1)) {
+ $ctx = $self->_setup_ctx;
+
+ if ($offset > 0) {
+ $offset--;
+ @row = ();
+ next;
+ }
+ if ($n++ >= $tail and $tail) {
+ shift @list;
+ $n--;
+ }
+ if (($ctx->{has_hooks} || 0) & HOOK_AFTER_PARSE) {
+ unless ($self->_hook(after_parse => \@row)) {
+ @row = ();
+ next;
+ }
+ }
+ push @list, [@row];
+ @row = ();
+
+ last if defined $len && $n >= $len and $offset >= 0; # exceeds limit size
+ }
+
+ if (defined $len && $n > $len) {
+ @list = splice(@list, 0, $len);
+ }
+
+ return \@list;
+}
+
+sub _is_valid_utf8 {
+ return ($_[0] =~ /^(?:
+ [\x00-\x7F]
+ |[\xC2-\xDF][\x80-\xBF]
+ |[\xE0][\xA0-\xBF][\x80-\xBF]
+ |[\xE1-\xEC][\x80-\xBF][\x80-\xBF]
+ |[\xED][\x80-\x9F][\x80-\xBF]
+ |[\xEE-\xEF][\x80-\xBF][\x80-\xBF]
+ |[\xF0][\x90-\xBF][\x80-\xBF][\x80-\xBF]
+ |[\xF1-\xF3][\x80-\xBF][\x80-\xBF][\x80-\xBF]
+ |[\xF4][\x80-\x8F][\x80-\xBF][\x80-\xBF]
+ )+$/x) ? 1 : 0;
+}
+
+################################################################################
+# methods for errors
+################################################################################
+
+sub _set_error_diag {
+ my ($self, $error, $pos) = @_;
+
+ $self->SetDiag($error);
+
+ if (defined $pos) {
+ $_[0]->{_ERROR_POS} = $pos;
+ }
+
+ return;
+}
+
+sub error_input {
+ my $self = shift;
+ if ($self and ((Scalar::Util::reftype($self) || '') eq 'HASH' or (ref $self) =~ /^Text::CSV/)) {
+ return $self->{_ERROR_INPUT};
+ }
+ return;
+}
+
+sub _sv_diag {
+ my ($self, $error) = @_;
+ bless [$error, $ERRORS->{$error}], 'Text::CSV::ErrorDiag';
+}
+
+sub _set_diag {
+ my ($self, $ctx, $error, $line) = @_;
+
+ $last_error = $self->_sv_diag($error);
+ $self->{_ERROR_DIAG} = $last_error;
+ if ($error == 0) {
+ $self->{_ERROR_POS} = 0;
+ $self->{_ERROR_FLD} = 0;
+ $self->{_ERROR_INPUT} = undef;
+ $ctx->{has_error_input} = 0;
+ }
+ if ($line) {
+ $self->{_ERROR_SRC} = $line;
+ }
+ if ($error == 2012) { # EOF
+ $self->{_EOF} = 1;
+ }
+ if ($ctx->{auto_diag}) {
+ $self->error_diag;
+ }
+ return $last_error;
+}
+
+sub SetDiag {
+ my ($self, $error, $errstr) = @_;
+ my $res;
+ if (ref $self) {
+ my $ctx = $self->_setup_ctx;
+ $res = $self->_set_diag($ctx, $error);
+ } else {
+ $last_error = $error;
+ $res = $self->_sv_diag($error);
+ }
+ if (defined $errstr) {
+ $res->[1] = $errstr;
+ }
+ $res;
+}
+
+################################################################################
+package Text::CSV::ErrorDiag;
+
+use strict;
+use overload (
+ '""' => \&stringify,
+ '+' => \&numeric,
+ '-' => \&numeric,
+ '*' => \&numeric,
+ '/' => \&numeric,
+ fallback => 1,
+);
+
+sub numeric {
+ my ($left, $right) = @_;
+ return ref $left ? $left->[0] : $right->[0];
+}
+
+sub stringify {
+ $_[0]->[1];
+}
+################################################################################
+1;
+__END__
+
+=head1 NAME
+
+Text::CSV_PP - Text::CSV_XS compatible pure-Perl module
+
+
+=head1 SYNOPSIS
+
+This section is taken from Text::CSV_XS.
+
+ # Functional interface
+ use Text::CSV_PP qw( csv );
+
+ # Read whole file in memory
+ my $aoa = csv (in => "data.csv"); # as array of array
+ my $aoh = csv (in => "data.csv",
+ headers => "auto"); # as array of hash
+
+ # Write array of arrays as csv file
+ csv (in => $aoa, out => "file.csv", sep_char => ";");
+
+ # Only show lines where "code" is odd
+ csv (in => "data.csv", filter => { code => sub { $_ % 2 }});
+
+ # Object interface
+ use Text::CSV_PP;
+
+ my @rows;
+ # Read/parse CSV
+ my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
+ open my $fh, "<:encoding(utf8)", "test.csv" or die "test.csv: $!";
+ while (my $row = $csv->getline ($fh)) {
+ $row->[2] =~ m/pattern/ or next; # 3rd field should match
+ push @rows, $row;
+ }
+ close $fh;
+
+ # and write as CSV
+ open $fh, ">:encoding(utf8)", "new.csv" or die "new.csv: $!";
+ $csv->say ($fh, $_) for @rows;
+ close $fh or die "new.csv: $!";
+
+=head1 DESCRIPTION
+
+Text::CSV_PP is a pure-perl module that provides facilities for the
+composition and decomposition of comma-separated values. This is
+(almost) compatible with much faster L, and mainly
+used as its fallback module when you use L module without
+having installed Text::CSV_XS. If you don't have any reason to use
+this module directly, use Text::CSV for speed boost and portability
+(or maybe Text::CSV_XS when you write an one-off script and don't need
+to care about portability).
+
+The following caveats are taken from the doc of Text::CSV_XS.
+
+=head2 Embedded newlines
+
+B: The default behavior is to accept only ASCII characters
+in the range from C<0x20> (space) to C<0x7E> (tilde). This means that the
+fields can not contain newlines. If your data contains newlines embedded in
+fields, or characters above C<0x7E> (tilde), or binary data, you B>
+set C<< binary => 1 >> in the call to L. To cover the widest range of
+parsing options, you will always want to set binary.
+
+But you still have the problem that you have to pass a correct line to the
+L method, which is more complicated from the usual point of usage:
+
+ my $csv = Text::CSV_PP->new ({ binary => 1, eol => $/ });
+ while (<>) { # WRONG!
+ $csv->parse ($_);
+ my @fields = $csv->fields ();
+ }
+
+this will break, as the C might read broken lines: it does not care
+about the quoting. If you need to support embedded newlines, the way to go
+is to B pass L|/eol> in the parser (it accepts C<\n>, C<\r>,
+B C<\r\n> by default) and then
+
+ my $csv = Text::CSV_PP->new ({ binary => 1 });
+ open my $fh, "<", $file or die "$file: $!";
+ while (my $row = $csv->getline ($fh)) {
+ my @fields = @$row;
+ }
+
+The old(er) way of using global file handles is still supported
+
+ while (my $row = $csv->getline (*ARGV)) { ... }
+
+=head2 Unicode
+
+Unicode is only tested to work with perl-5.8.2 and up.
+
+See also L.
+
+The simplest way to ensure the correct encoding is used for in- and output
+is by either setting layers on the filehandles, or setting the L
+argument for L.
+
+ open my $fh, "<:encoding(UTF-8)", "in.csv" or die "in.csv: $!";
+or
+ my $aoa = csv (in => "in.csv", encoding => "UTF-8");
+
+ open my $fh, ">:encoding(UTF-8)", "out.csv" or die "out.csv: $!";
+or
+ csv (in => $aoa, out => "out.csv", encoding => "UTF-8");
+
+On parsing (both for L and L), if the source is marked
+being UTF8, then all fields that are marked binary will also be marked UTF8.
+
+On combining (L and L): if any of the combining fields
+was marked UTF8, the resulting string will be marked as UTF8. Note however
+that all fields I the first field marked UTF8 and contained 8-bit
+characters that were not upgraded to UTF8, these will be C in the
+resulting string too, possibly causing unexpected errors. If you pass data
+of different encoding, or you don't know if there is different encoding,
+force it to be upgraded before you pass them on:
+
+ $csv->print ($fh, [ map { utf8::upgrade (my $x = $_); $x } @data ]);
+
+For complete control over encoding, please use L:
+
+ use Text::CSV::Encoded;
+ my $csv = Text::CSV::Encoded->new ({
+ encoding_in => "iso-8859-1", # the encoding comes into Perl
+ encoding_out => "cp1252", # the encoding comes out of Perl
+ });
+
+ $csv = Text::CSV::Encoded->new ({ encoding => "utf8" });
+ # combine () and print () accept *literally* utf8 encoded data
+ # parse () and getline () return *literally* utf8 encoded data
+
+ $csv = Text::CSV::Encoded->new ({ encoding => undef }); # default
+ # combine () and print () accept UTF8 marked data
+ # parse () and getline () return UTF8 marked data
+
+=head2 BOM
+
+BOM (or Byte Order Mark) handling is available only inside the L
+method. This method supports the following encodings: C, C,
+C, C, C, C, C, C,
+C, and C. See L.
+
+If a file has a BOM, the easiest way to deal with that is
+
+ my $aoh = csv (in => $file, detect_bom => 1);
+
+All records will be encoded based on the detected BOM.
+
+This implies a call to the L method, which defaults to also set
+the L. So this is B the same as
+
+ my $aoh = csv (in => $file, headers => "auto");
+
+which only reads the first record to set L but ignores any
+meaning of possible present BOM.
+
+=head1 METHODS
+
+This section is also taken from Text::CSV_XS.
+
+=head2 version
+
+(Class method) Returns the current module version.
+
+=head2 new
+
+(Class method) Returns a new instance of class Text::CSV_PP. The attributes
+are described by the (optional) hash ref C<\%attr>.
+
+ my $csv = Text::CSV_PP->new ({ attributes ... });
+
+The following attributes are available:
+
+=head3 eol
+
+ my $csv = Text::CSV_PP->new ({ eol => $/ });
+ $csv->eol (undef);
+ my $eol = $csv->eol;
+
+The end-of-line string to add to rows for L or the record separator
+for L.
+
+When not passed in a B instance, the default behavior is to accept
+C<\n>, C<\r>, and C<\r\n>, so it is probably safer to not specify C at
+all. Passing C or the empty string behave the same.
+
+When not passed in a B instance, records are not terminated at
+all, so it is probably wise to pass something you expect. A safe choice for
+C on output is either C<$/> or C<\r\n>.
+
+Common values for C are C<"\012"> (C<\n> or Line Feed), C<"\015\012">
+(C<\r\n> or Carriage Return, Line Feed), and C<"\015"> (C<\r> or Carriage
+Return). The L|/eol> attribute cannot exceed 7 (ASCII) characters.
+
+If both C<$/> and L|/eol> equal C<"\015">, parsing lines that end on
+only a Carriage Return without Line Feed, will be Ld correct.
+
+=head3 eol_type
+
+ my $eol = $csv->eol_type;
+
+This read-only method returns the internal state of what is considered the
+valid EOL for parsing.
+
+=head3 sep_char
+
+ my $csv = Text::CSV_PP->new ({ sep_char => ";" });
+ $csv->sep_char (";");
+ my $c = $csv->sep_char;
+
+The char used to separate fields, by default a comma. (C<,>). Limited to a
+single-byte character, usually in the range from C<0x20> (space) to C<0x7E>
+(tilde). When longer sequences are required, use L|/sep>.
+
+The separation character can not be equal to the quote character or to the
+escape character.
+
+=head3 sep
+
+ my $csv = Text::CSV_PP->new ({ sep => "\N{FULLWIDTH COMMA}" });
+ $csv->sep (";");
+ my $sep = $csv->sep;
+
+The chars used to separate fields, by default undefined. Limited to 8 bytes.
+
+When set, overrules L|/sep_char>. If its length is one byte it
+acts as an alias to L|/sep_char>.
+
+=head3 quote_char
+
+ my $csv = Text::CSV_PP->new ({ quote_char => "'" });
+ $csv->quote_char (undef);
+ my $c = $csv->quote_char;
+
+The character to quote fields containing blanks or binary data, by default
+the double quote character (C<">). A value of undef suppresses quote chars
+(for simple cases only). Limited to a single-byte character, usually in the
+range from C<0x20> (space) to C<0x7E> (tilde). When longer sequences are
+required, use L|/quote>.
+
+C can not be equal to L|/sep_char>.
+
+=head3 quote
+
+ my $csv = Text::CSV_PP->new ({ quote => "\N{FULLWIDTH QUOTATION MARK}" });
+ $csv->quote ("'");
+ my $quote = $csv->quote;
+
+The chars used to quote fields, by default undefined. Limited to 8 bytes.
+
+When set, overrules L|/quote_char>. If its length is one byte
+it acts as an alias to L|/quote_char>.
+
+This method does not support C. Use L|/quote_char> to
+disable quotation.
+
+=head3 escape_char
+
+ my $csv = Text::CSV_PP->new ({ escape_char => "\\" });
+ $csv->escape_char (":");
+ my $c = $csv->escape_char;
+
+The character to escape certain characters inside quoted fields. This is
+limited to a single-byte character, usually in the range from C<0x20>
+(space) to C<0x7E> (tilde).
+
+The C defaults to being the double-quote mark (C<">). In other
+words the same as the default L|/quote_char>. This means that
+doubling the quote mark in a field escapes it:
+
+ "foo","bar","Escape ""quote mark"" with two ""quote marks""","baz"
+
+If you change the L|/quote_char> without changing the
+C, the C will still be the double-quote (C<">).
+If instead you want to escape the L|/quote_char> by doubling
+it you will need to also change the C to be the same as what
+you have changed the L|/quote_char> to.
+
+Setting C to C or C<""> will completely disable escapes
+and is greatly discouraged. This will also disable C.
+
+The escape character can not be equal to the separation character.
+
+=head3 binary
+
+ my $csv = Text::CSV_PP->new ({ binary => 1 });
+ $csv->binary (0);
+ my $f = $csv->binary;
+
+If this attribute is C<1>, you may use binary characters in quoted fields,
+including line feeds, carriage returns and C bytes. (The latter could
+be escaped as C<"0>.) By default this feature is off.
+
+If a string is marked UTF8, C will be turned on automatically when
+binary characters other than C and C are encountered. Note that a
+simple string like C<"\x{00a0}"> might still be binary, but not marked UTF8,
+so setting C<< { binary => 1 } >> is still a wise option.
+
+=head3 strict
+
+ my $csv = Text::CSV_PP->new ({ strict => 1 });
+ $csv->strict (0);
+ my $f = $csv->strict;
+
+If this attribute is set to C<1>, any row that parses to a different number
+of fields than the previous row will cause the parser to throw error 2014.
+
+Empty rows or rows that result in no fields (like comment lines) are exempt
+from these checks.
+
+=head3 strict_eol
+
+ my $csv = Text::CSV_PP->new ({ strict_eol => 1 });
+ $csv->strict_eol (0);
+ my $f = $csv->strict_eol;
+
+If this attribute is set to C<0>, no EOL consistency checks are done.
+
+If this attribute is set to C<1>, any row that parses with a EOL other than
+the EOL from the first row will cause a warning. The error will be ignored
+and parsing continues. This warning is only thrown once. Note that in data
+with various different line endings, C<\r\r> will still throw an error that
+cannot be ignored.
+
+If this attribute is set to C<2> or higher, any row that parses with a EOL
+other than the EOL from the first row will cause error C<2016> to be thrown.
+The line being parsed to this error might not be stored in the result.
+
+=head3 skip_empty_rows
+
+ my $csv = Text::CSV_PP->new ({ skip_empty_rows => 1 });
+ $csv->skip_empty_rows ("eof");
+ my $f = $csv->skip_empty_rows;
+
+This attribute defines the behavior for empty rows: an L immediately
+following the start of line. Default behavior is to return one single empty
+field.
+
+This attribute is only used in parsing. This attribute is ineffective when
+using L and L.
+
+Possible values for this attribute are
+
+=over 2
+
+=item 0 | undef
+
+ my $csv = Text::CSV_PP->new ({ skip_empty_rows => 0 });
+ $csv->skip_empty_rows (undef);
+
+No special action is taken. The result will be one single empty field.
+
+=item 1 | "skip"
+
+ my $csv = Text::CSV_PP->new ({ skip_empty_rows => 1 });
+ $csv->skip_empty_rows ("skip");
+
+The row will be skipped.
+
+=item 2 | "eof" | "stop"
+
+ my $csv = Text::CSV_PP->new ({ skip_empty_rows => 2 });
+ $csv->skip_empty_rows ("eof");
+
+The parsing will stop as if an L was detected.
+
+=item 3 | "die"
+
+ my $csv = Text::CSV_PP->new ({ skip_empty_rows => 3 });
+ $csv->skip_empty_rows ("die");
+
+The parsing will stop. The internal error code will be set to 2015 and the
+parser will C.
+
+=item 4 | "croak"
+
+ my $csv = Text::CSV_PP->new ({ skip_empty_rows => 4 });
+ $csv->skip_empty_rows ("croak");
+
+The parsing will stop. The internal error code will be set to 2015 and the
+parser will C.
+
+=item 5 | "error"
+
+ my $csv = Text::CSV_PP->new ({ skip_empty_rows => 5 });
+ $csv->skip_empty_rows ("error");
+
+The parsing will fail. The internal error code will be set to 2015.
+
+=item callback
+
+ my $csv = Text::CSV_PP->new ({ skip_empty_rows => sub { [] } });
+ $csv->skip_empty_rows (sub { [ 42, $., undef, "empty" ] });
+
+The callback is invoked and its result used instead. If you want the parse
+to stop after the callback, make sure to return a false value.
+
+The returned value from the callback should be an array-ref. Any other type
+will cause the parse to stop, so these are equivalent in behavior:
+
+ csv (in => $fh, skip_empty_rows => "stop");
+ csv (in => $fh. skip_empty_rows => sub { 0; });
+
+=back
+
+Without arguments, the current value is returned: C<0>, C<1>, C, C,
+C or the callback.
+
+=head3 formula_handling
+
+Alias for L
+
+=head3 formula
+
+ my $csv = Text::CSV_PP->new ({ formula => "none" });
+ $csv->formula ("none");
+ my $f = $csv->formula;
+
+This defines the behavior of fields containing I. As formulas are
+considered dangerous in spreadsheets, this attribute can define an optional
+action to be taken if a field starts with an equal sign (C<=>).
+
+For purpose of code-readability, this can also be written as
+
+ my $csv = Text::CSV_PP->new ({ formula_handling => "none" });
+ $csv->formula_handling ("none");
+ my $f = $csv->formula_handling;
+
+Possible values for this attribute are
+
+=over 2
+
+=item none
+
+Take no specific action. This is the default.
+
+ $csv->formula ("none");
+
+=item die
+
+Cause the process to C whenever a leading C<=> is encountered.
+
+ $csv->formula ("die");
+
+=item croak
+
+Cause the process to C whenever a leading C<=> is encountered. (See
+L)
+
+ $csv->formula ("croak");
+
+=item diag
+
+Report position and content of the field whenever a leading C<=> is found.
+The value of the field is unchanged.
+
+ $csv->formula ("diag");
+
+=item empty
+
+Replace the content of fields that start with a C<=> with the empty string.
+
+ $csv->formula ("empty");
+ $csv->formula ("");
+
+=item undef
+
+Replace the content of fields that start with a C<=> with C.
+
+ $csv->formula ("undef");
+ $csv->formula (undef);
+
+=item a callback
+
+Modify the content of fields that start with a C<=> with the return-value
+of the callback. The original content of the field is available inside the
+callback as C<$_>;
+
+ # Replace all formula's with 42
+ $csv->formula (sub { 42; });
+
+ # same as $csv->formula ("empty") but slower
+ $csv->formula (sub { "" });
+
+ # Allow =4+12
+ $csv->formula (sub { s/^=(\d+\+\d+)$/$1/eer });
+
+ # Allow more complex calculations
+ $csv->formula (sub { eval { s{^=([-+*/0-9()]+)$}{$1}ee }; $_ });
+
+=back
+
+All other values will give a warning and then fallback to C.
+
+=head3 decode_utf8
+
+ my $csv = Text::CSV_PP->new ({ decode_utf8 => 1 });
+ $csv->decode_utf8 (0);
+ my $f = $csv->decode_utf8;
+
+This attributes defaults to TRUE.
+
+While I, fields that are valid UTF-8, are automatically set to be
+UTF-8, so that
+
+ $csv->parse ("\xC4\xA8\n");
+
+results in
+
+ PV("\304\250"\0) [UTF8 "\x{128}"]
+
+Sometimes it might not be a desired action. To prevent those upgrades, set
+this attribute to false, and the result will be
+
+ PV("\304\250"\0)
+
+=head3 auto_diag
+
+ my $csv = Text::CSV_PP->new ({ auto_diag => 1 });
+ $csv->auto_diag (2);
+ my $l = $csv->auto_diag;
+
+Set this attribute to a number between C<1> and C<9> causes L
+to be automatically called in void context upon errors.
+
+In case of error C<2012 - EOF>, this call will be void.
+
+If C is set to a numeric value greater than C<1>, it will C
+on errors instead of C. If set to anything unrecognized, it will be
+silently ignored.
+
+Future extensions to this feature will include more reliable auto-detection
+of C being active in the scope of which the error occurred which
+will increment the value of C with C<1> the moment the error is
+detected.
+
+=head3 diag_verbose
+
+ my $csv = Text::CSV_PP->new ({ diag_verbose => 1 });
+ $csv->diag_verbose (2);
+ my $l = $csv->diag_verbose;
+
+Set the verbosity of the output triggered by C. Currently only
+adds the current input-record-number (if known) to the diagnostic output
+with an indication of the position of the error.
+
+=head3 blank_is_undef
+
+ my $csv = Text::CSV_PP->new ({ blank_is_undef => 1 });
+ $csv->blank_is_undef (0);
+ my $f = $csv->blank_is_undef;
+
+Under normal circumstances, C data makes no distinction between quoted-
+and unquoted empty fields. These both end up in an empty string field once
+read, thus
+
+ 1,"",," ",2
+
+is read as
+
+ ("1", "", "", " ", "2")
+
+When I C files with either L|/always_quote>
+or L|/quote_empty> set, the unquoted I field is the
+result of an undefined value. To enable this distinction when I
+C data, the C attribute will cause unquoted empty
+fields to be set to C, causing the above to be parsed as
+
+ ("1", "", undef, " ", "2")
+
+Note that this is specifically important when loading C fields into a
+database that allows C values, as the perl equivalent for C is
+C in L land.
+
+=head3 empty_is_undef
+
+ my $csv = Text::CSV_PP->new ({ empty_is_undef => 1 });
+ $csv->empty_is_undef (0);
+ my $f = $csv->empty_is_undef;
+
+Going one step further than L|/blank_is_undef>, this
+attribute converts all empty fields to C, so
+
+ 1,"",," ",2
+
+is read as
+
+ (1, undef, undef, " ", 2)
+
+Note that this affects only fields that are originally empty, not fields
+that are empty after stripping allowed whitespace. YMMV.
+
+=head3 allow_whitespace
+
+ my $csv = Text::CSV_PP->new ({ allow_whitespace => 1 });
+ $csv->allow_whitespace (0);
+ my $f = $csv->allow_whitespace;
+
+When this option is set to true, the whitespace (C's and C's)
+surrounding the separation character is removed when parsing. If either
+C or C is one of the three characters L|/sep_char>,
+L|/quote_char>, or L|/escape_char> it will not
+be considered whitespace.
+
+Now lines like:
+
+ 1 , "foo" , bar , 3 , zapp
+
+are parsed as valid C, even though it violates the C specs.
+
+Note that B whitespace is stripped from both start and end of each
+field. That would make it I than a I to enable parsing bad
+C lines, as
+
+ 1, 2.0, 3, ape , monkey
+
+will now be parsed as
+
+ ("1", "2.0", "3", "ape", "monkey")
+
+even if the original line was perfectly acceptable C.
+
+=head3 allow_loose_quotes
+
+ my $csv = Text::CSV_PP->new ({ allow_loose_quotes => 1 });
+ $csv->allow_loose_quotes (0);
+ my $f = $csv->allow_loose_quotes;
+
+By default, parsing unquoted fields containing L|/quote_char>
+characters like
+
+ 1,foo "bar" baz,42
+
+would result in parse error 2034. Though it is still bad practice to allow
+this format, we cannot help the fact that some vendors make their
+applications spit out lines styled this way.
+
+If there is B bad C data, like
+
+ 1,"foo "bar" baz",42
+
+or
+
+ 1,""foo bar baz"",42
+
+there is a way to get this data-line parsed and leave the quotes inside the
+quoted field as-is. This can be achieved by setting C
+B making sure that the L|/escape_char> is I equal
+to L|/quote_char>.
+
+=head3 allow_loose_escapes
+
+ my $csv = Text::CSV_PP->new ({ allow_loose_escapes => 1 });
+ $csv->allow_loose_escapes (0);
+ my $f = $csv->allow_loose_escapes;
+
+Parsing fields that have L|/escape_char> characters that
+escape characters that do not need to be escaped, like:
+
+ my $csv = Text::CSV_PP->new ({ escape_char => "\\" });
+ $csv->parse (qq{1,"my bar\'s",baz,42});
+
+would result in parse error 2025. Though it is bad practice to allow this
+format, this attribute enables you to treat all escape character sequences
+equal.
+
+=head3 allow_unquoted_escape
+
+ my $csv = Text::CSV_PP->new ({ allow_unquoted_escape => 1 });
+ $csv->allow_unquoted_escape (0);
+ my $f = $csv->allow_unquoted_escape;
+
+A backward compatibility issue where L|/escape_char> differs
+from L|/quote_char> prevents L|/escape_char>
+to be in the first position of a field. If L|/quote_char> is
+equal to the default C<"> and L|/escape_char> is set to C<\>,
+this would be illegal:
+
+ 1,\0,2
+
+Setting this attribute to C<1> might help to overcome issues with backward
+compatibility and allow this style.
+
+=head3 always_quote
+
+ my $csv = Text::CSV_PP->new ({ always_quote => 1 });
+ $csv->always_quote (0);
+ my $f = $csv->always_quote;
+
+By default the generated fields are quoted only if they I to be. For
+example, if they contain the separator character. If you set this attribute
+to C<1> then I defined fields will be quoted. (C fields are not
+quoted, see L). This makes it quite often easier to handle
+exported data in external applications.
+
+=head3 quote_space
+
+ my $csv = Text::CSV_PP->new ({ quote_space => 1 });
+ $csv->quote_space (0);
+ my $f = $csv->quote_space;
+
+By default, a space in a field would trigger quotation. As no rule exists
+this to be forced in C, nor any for the opposite, the default is true
+for safety. You can exclude the space from this trigger by setting this
+attribute to 0.
+
+=head3 quote_empty
+
+ my $csv = Text::CSV_PP->new ({ quote_empty => 1 });
+ $csv->quote_empty (0);
+ my $f = $csv->quote_empty;
+
+By default the generated fields are quoted only if they I to be. An
+empty (defined) field does not need quotation. If you set this attribute to
+C<1> then I defined fields will be quoted. (C fields are not
+quoted, see L). See also L|/always_quote>.
+
+=head3 quote_binary
+
+ my $csv = Text::CSV_PP->new ({ quote_binary => 1 });
+ $csv->quote_binary (0);
+ my $f = $csv->quote_binary;
+
+By default, all "unsafe" bytes inside a string cause the combined field to
+be quoted. By setting this attribute to C<0>, you can disable that trigger
+for bytes C<< >= 0x7F >>.
+
+=head3 escape_null
+
+ my $csv = Text::CSV_PP->new ({ escape_null => 1 });
+ $csv->escape_null (0);
+ my $f = $csv->escape_null;
+
+By default, a C byte in a field would be escaped. This option enables
+you to treat the C byte as a simple binary character in binary mode
+(the C<< { binary => 1 } >> is set). The default is true. You can prevent
+C escapes by setting this attribute to C<0>.
+
+When the C attribute is set to undefined, this attribute will
+be set to false.
+
+The default setting will encode "=\x00=" as
+
+ "="0="
+
+With C set, this will result in
+
+ "=\x00="
+
+The default when using the C function is C.
+
+For backward compatibility reasons, the deprecated old name C
+is still recognized.
+
+=head3 keep_meta_info
+
+ my $csv = Text::CSV_PP->new ({ keep_meta_info => 1 });
+ $csv->keep_meta_info (0);
+ my $f = $csv->keep_meta_info;
+
+By default, the parsing of input records is as simple and fast as possible.
+However, some parsing information - like quotation of the original field -
+is lost in that process. Setting this flag to true enables retrieving that
+information after parsing with the methods L, L,
+and L described below. Default is false for performance.
+
+If you set this attribute to a value greater than 9, then you can control
+output quotation style like it was used in the input of the the last parsed
+record (unless quotation was added because of other reasons).
+
+ my $csv = Text::CSV_PP->new ({
+ binary => 1,
+ keep_meta_info => 1,
+ quote_space => 0,
+ });
+
+ my $row = $csv->parse (q{1,,"", ," ",f,"g","h""h",help,"help"});
+
+ $csv->print (*STDOUT, \@row);
+ # 1,,, , ,f,g,"h""h",help,help
+ $csv->keep_meta_info (11);
+ $csv->print (*STDOUT, \@row);
+ # 1,,"", ," ",f,"g","h""h",help,"help"
+
+=head3 undef_str
+
+ my $csv = Text::CSV_PP->new ({ undef_str => "\\N" });
+ $csv->undef_str (undef);
+ my $s = $csv->undef_str;
+
+This attribute optionally defines the output of undefined fields. The value
+passed is not changed at all, so if it needs quotation, the quotation needs
+to be included in the value of the attribute. Use with caution, as passing
+a value like C<",",,,,"""> will for sure mess up your output. The default
+for this attribute is C, meaning no special treatment.
+
+This attribute is useful when exporting CSV data to be imported in custom
+loaders, like for MySQL, that recognize special sequences for C data.
+
+This attribute has no meaning when parsing CSV data.
+
+=head3 comment_str
+
+ my $csv = Text::CSV_PP->new ({ comment_str => "#" });
+ $csv->comment_str (undef);
+ my $s = $csv->comment_str;
+
+This attribute optionally defines a string to be recognized as comment. If
+this attribute is defined, all lines starting with this sequence will not
+be parsed as CSV but skipped as comment.
+
+This attribute has no meaning when generating CSV.
+
+Comment strings that start with any of the special characters/sequences are
+not supported (so it cannot start with any of L, L,
+L, L, L, or L).
+
+For convenience, C is an alias for C.
+
+=head3 verbatim
+
+ my $csv = Text::CSV_PP->new ({ verbatim => 1 });
+ $csv->verbatim (0);
+ my $f = $csv->verbatim;
+
+This is a quite controversial attribute to set, but makes some hard things
+possible.
+
+The rationale behind this attribute is to tell the parser that the normally
+special characters newline (C) and Carriage Return (C) will not be
+special when this flag is set, and be dealt with as being ordinary binary
+characters. This will ease working with data with embedded newlines.
+
+When C is used with L, L auto-C's
+every line.
+
+Imagine a file format like
+
+ M^^Hans^Janssen^Klas 2\n2A^Ja^11-06-2007#\r\n
+
+where, the line ending is a very specific C<"#\r\n">, and the sep_char is a
+C<^> (caret). None of the fields is quoted, but embedded binary data is
+likely to be present. With the specific line ending, this should not be too
+hard to detect.
+
+By default, Text::CSV_PP' parse function is instructed to only know about
+C<"\n"> and C<"\r"> to be legal line endings, and so has to deal with the
+embedded newline as a real C, so it can scan the next line if
+binary is true, and the newline is inside a quoted field. With this option,
+we tell L to parse the line as if C<"\n"> is just nothing more than
+a binary character.
+
+For L this means that the parser has no more idea about line ending
+and L Cs line endings on reading.
+
+=head3 types
+
+A set of column types; the attribute is immediately passed to the L
+method.
+
+=head3 callbacks
+
+See the L section below.
+
+=head3 accessors
+
+To sum it up,
+
+ $csv = Text::CSV_PP->new ();
+
+is equivalent to
+
+ $csv = Text::CSV_PP->new ({
+ eol => undef, # \r, \n, or \r\n
+ sep_char => ',',
+ sep => undef,
+ quote_char => '"',
+ quote => undef,
+ escape_char => '"',
+ binary => 0,
+ decode_utf8 => 1,
+ auto_diag => 0,
+ diag_verbose => 0,
+ blank_is_undef => 0,
+ empty_is_undef => 0,
+ allow_whitespace => 0,
+ allow_loose_quotes => 0,
+ allow_loose_escapes => 0,
+ allow_unquoted_escape => 0,
+ always_quote => 0,
+ quote_empty => 0,
+ quote_space => 1,
+ escape_null => 1,
+ quote_binary => 1,
+ keep_meta_info => 0,
+ strict => 0,
+ skip_empty_rows => 0,
+ formula => 0,
+ verbatim => 0,
+ undef_str => undef,
+ comment_str => undef,
+ types => undef,
+ callbacks => undef,
+ });
+
+For all of the above mentioned flags, an accessor method is available where
+you can inquire the current value, or change the value
+
+ my $quote = $csv->quote_char;
+ $csv->binary (1);
+
+It is not wise to change these settings halfway through writing C data
+to a stream. If however you want to create a new stream using the available
+C object, there is no harm in changing them.
+
+If the L constructor call fails, it returns C, and makes the
+fail reason available through the L method.
+
+ $csv = Text::CSV_PP->new ({ ecs_char => 1 }) or
+ die "".Text::CSV_PP->error_diag ();
+
+L will return a string like
+
+ "INI - Unknown attribute 'ecs_char'"
+
+=head2 known_attributes
+
+ @attr = Text::CSV_PP->known_attributes;
+ @attr = Text::CSV_PP::known_attributes;
+ @attr = $csv->known_attributes;
+
+This method will return an ordered list of all the supported attributes as
+described above. This can be useful for knowing what attributes are valid
+in classes that use or extend Text::CSV_PP.
+
+=head2 print
+
+ $status = $csv->print ($fh, $colref);
+
+Similar to L + L + L, but much more efficient.
+It expects an array ref as input (not an array!) and the resulting string
+is not really created, but immediately written to the C<$fh> object,
+typically an IO handle or any other object that offers a L method.
+
+For performance reasons C does not create a result string, so all
+L, L, L, and L methods will return
+undefined information after executing this method.
+
+If C<$colref> is C (explicit, not through a variable argument) and
+L was used to specify fields to be printed, it is possible
+to make performance improvements, as otherwise data would have to be copied
+as arguments to the method call:
+
+ $csv->bind_columns (\($foo, $bar));
+ $status = $csv->print ($fh, undef);
+
+A short benchmark
+
+ my @data = ("aa" .. "zz");
+ $csv->bind_columns (\(@data));
+
+ $csv->print ($fh, [ @data ]); # 11800 recs/sec
+ $csv->print ($fh, \@data ); # 57600 recs/sec
+ $csv->print ($fh, undef ); # 48500 recs/sec
+
+=head2 say
+
+ $status = $csv->say ($fh, $colref);
+
+Like L|/print>, but L|/eol> defaults to C<$\>.
+
+=head2 print_hr
+
+ $csv->print_hr ($fh, $ref);
+
+Provides an easy way to print a C<$ref> (as fetched with L)
+provided the column names are set with L.
+
+It is just a wrapper method with basic parameter checks over
+
+ $csv->print ($fh, [ map { $ref->{$_} } $csv->column_names ]);
+
+=head2 combine
+
+ $status = $csv->combine (@fields);
+
+This method constructs a C record from C<@fields>, returning success
+or failure. Failure can result from lack of arguments or an argument that
+contains an invalid character. Upon success, L can be called to
+retrieve the resultant C string. Upon failure, the value returned by
+L is undefined and L could be called to retrieve the
+invalid argument.
+
+=head2 string
+
+ $line = $csv->string ();
+
+This method returns the input to L or the resultant C string
+of L, whichever was called more recently.
+
+=head2 getline
+
+ $colref = $csv->getline ($fh);
+
+This is the counterpart to L, as L is the counterpart to
+L: it parses a row from the C<$fh> handle using the L
+method associated with C<$fh> and parses this row into an array ref. This
+array ref is returned by the function or C for failure. When C<$fh>
+does not support C, you are likely to hit errors.
+
+When fields are bound with L the return value is a reference
+to an empty list.
+
+The L, L, and L methods are meaningless again.
+
+=head2 getline_all
+
+ $arrayref = $csv->getline_all ($fh);
+ $arrayref = $csv->getline_all ($fh, $offset);
+ $arrayref = $csv->getline_all ($fh, $offset, $length);
+
+This will return a reference to a list of L results.
+In this call, C is disabled. If C<$offset> is negative, as
+with C, only the last C records of C<$fh> are taken
+into consideration. Parameters C<$offset> and C<$length> are expected to be
+integers. Non-integer values are interpreted as integer without check.
+
+Given a CSV file with 10 lines:
+
+ lines call
+ ----- ---------------------------------------------------------
+ 0..9 $csv->getline_all ($fh) # all
+ 0..9 $csv->getline_all ($fh, 0) # all
+ 8..9 $csv->getline_all ($fh, 8) # start at 8
+ - $csv->getline_all ($fh, 0, 0) # start at 0 first 0 rows
+ 0..4 $csv->getline_all ($fh, 0, 5) # start at 0 first 5 rows
+ 4..5 $csv->getline_all ($fh, 4, 2) # start at 4 first 2 rows
+ 8..9 $csv->getline_all ($fh, -2) # last 2 rows
+ 6..7 $csv->getline_all ($fh, -4, 2) # first 2 of last 4 rows
+
+=head2 getline_hr
+
+The L and L methods work together to allow you
+to have rows returned as hashrefs. You must call L first to
+declare your column names.
+
+ $csv->column_names (qw( code name price description ));
+ $hr = $csv->getline_hr ($fh);
+ print "Price for $hr->{name} is $hr->{price} EUR\n";
+
+L will croak if called before L.
+
+Note that L creates a hashref for every row and will be much
+slower than the combined use of L and L but still
+offering the same easy to use hashref inside the loop:
+
+ my @cols = @{$csv->getline ($fh)};
+ $csv->column_names (@cols);
+ while (my $row = $csv->getline_hr ($fh)) {
+ print $row->{price};
+ }
+
+Could easily be rewritten to the much faster:
+
+ my @cols = @{$csv->getline ($fh)};
+ my $row = {};
+ $csv->bind_columns (\@{$row}{@cols});
+ while ($csv->getline ($fh)) {
+ print $row->{price};
+ }
+
+Your mileage may vary for the size of the data and the number of rows. With
+perl-5.14.2 the comparison for a 100_000 line file with 14 columns:
+
+ Rate hashrefs getlines
+ hashrefs 1.00/s -- -76%
+ getlines 4.15/s 313% --
+
+=head2 getline_hr_all
+
+ $arrayref = $csv->getline_hr_all ($fh);
+ $arrayref = $csv->getline_hr_all ($fh, $offset);
+ $arrayref = $csv->getline_hr_all ($fh, $offset, $length);
+
+This will return a reference to a list of L
+results. In this call, L|/keep_meta_info> is disabled.
+
+=head2 parse
+
+ $status = $csv->parse ($line);
+
+This method decomposes a C string into fields, returning success or
+failure. Failure can result from a lack of argument or the given C
+string is improperly formatted. Upon success, L can be called to
+retrieve the decomposed fields. Upon failure calling L will return
+undefined data and L can be called to retrieve the invalid
+argument.
+
+You may use the L method for setting column types. See L'
+description below.
+
+The C<$line> argument is supposed to be a simple scalar. Everything else is
+supposed to croak and set error 1500.
+
+=head2 fragment
+
+This function tries to implement RFC7111 (URI Fragment Identifiers for the
+text/csv Media Type) - https://datatracker.ietf.org/doc/html/rfc7111
+
+ my $AoA = $csv->fragment ($fh, $spec);
+
+In specifications, C<*> is used to specify the I item, a dash (C<->)
+to indicate a range. All indices are C<1>-based: the first row or column
+has index C<1>. Selections can be combined with the semi-colon (C<;>).
+
+When using this method in combination with L, the returned
+reference will point to a list of hashes instead of a list of lists. A
+disjointed cell-based combined selection might return rows with different
+number of columns making the use of hashes unpredictable.
+
+ $csv->column_names ("Name", "Age");
+ my $AoH = $csv->fragment ($fh, "col=3;8");
+
+If the L callback is active, it is also called on every line
+parsed and skipped before the fragment.
+
+=over 2
+
+=item row
+
+ row=4
+ row=5-7
+ row=6-*
+ row=1-2;4;6-*
+
+=item col
+
+ col=2
+ col=1-3
+ col=4-*
+ col=1-2;4;7-*
+
+=item cell
+
+In cell-based selection, the comma (C<,>) is used to pair row and column
+
+ cell=4,1
+
+The range operator (C<->) using Cs can be used to define top-left and
+bottom-right C location
+
+ cell=3,1-4,6
+
+The C<*> is only allowed in the second part of a pair
+
+ cell=3,2-*,2 # row 3 till end, only column 2
+ cell=3,2-3,* # column 2 till end, only row 3
+ cell=3,2-*,* # strip row 1 and 2, and column 1
+
+Cells and cell ranges may be combined with C<;>, possibly resulting in rows
+with different numbers of columns
+
+ cell=1,1-2,2;3,3-4,4;1,4;4,1
+
+Disjointed selections will only return selected cells. The cells that are
+not specified will not be included in the returned set, not even as
+C. As an example given a C like
+
+ 11,12,13,...19
+ 21,22,...28,29
+ : :
+ 91,...97,98,99
+
+with C will return:
+
+ 11,12,14
+ 21,22
+ 33,34
+ 41,43,44
+
+Overlapping cell-specs will return those cells only once, So
+C will return:
+
+ 11,12,13
+ 21,22,23,24
+ 31,32,33,34
+ 42,43,44
+
+=back
+
+L does B allow different
+types of specs to be combined (either C I C I C| ).
+Passing an invalid fragment specification will croak and set error 2013.
+
+=head2 column_names
+
+Set the "keys" that will be used in the L | | | | | calls. If no keys
+(column names) are passed, it will return the current setting as a list.
+
+L accepts a list of scalars (the column names) or a single
+array_ref, so you can pass the return value from L too:
+
+ $csv->column_names ($csv->getline ($fh));
+
+L does B checking on duplicates at all, which might lead
+to unexpected results. Undefined entries will be replaced with the string
+C<"\cAUNDEF\cA">, so
+
+ $csv->column_names (undef, "", "name", "name");
+ $hr = $csv->getline_hr ($fh);
+
+will set C<< $hr->{"\cAUNDEF\cA"} >> to the 1st field, C<< $hr->{""} >> to
+the 2nd field, and C<< $hr->{name} >> to the 4th field, discarding the 3rd
+field.
+
+L croaks on invalid arguments.
+
+=head2 header
+
+This method does NOT work in perl-5.6.x
+
+Parse the CSV header and set L|/sep>, column_names and encoding.
+
+ my @hdr = $csv->header ($fh);
+ $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
+ $csv->header ($fh, { detect_bom => 1, munge_column_names => "lc" });
+
+The first argument should be a file handle.
+
+This method resets some object properties, as it is supposed to be invoked
+only once per file or stream. It will leave attributes C and
+C alone if setting column names is disabled. Reading headers
+on previously process objects might fail on perl-5.8.0 and older.
+
+Assuming that the file opened for parsing has a header, and the header does
+not contain problematic characters like embedded newlines, read the first
+line from the open handle then auto-detect whether the header separates the
+column names with a character from the allowed separator list.
+
+If any of the allowed separators matches, and none of the I allowed
+separators match, set L|/sep> to that separator for the current
+CSV_PP instance and use it to parse the first line, map those to lowercase,
+and use that to set the instance L:
+
+ my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
+ open my $fh, "<", "file.csv";
+ binmode $fh; # for Windows
+ $csv->header ($fh);
+ while (my $row = $csv->getline_hr ($fh)) {
+ ...
+ }
+
+If the header is empty, contains more than one unique separator out of the
+allowed set, contains empty fields, or contains identical fields (after
+folding), it will croak with error 1010, 1011, 1012, or 1013 respectively.
+
+If the header contains embedded newlines or is not valid CSV in any other
+way, this method will croak and leave the parse error untouched.
+
+A successful call to C will always set the L|/sep> of the
+C<$csv> object. This behavior can not be disabled.
+
+=head3 return value
+
+On error this method will croak.
+
+In list context, the headers will be returned whether they are used to set
+L or not.
+
+In scalar context, the instance itself is returned. B: the values as
+found in the header will effectively be B if C is
+false.
+
+=head3 Options
+
+=over 2
+
+=item sep_set
+
+ $csv->header ($fh, { sep_set => [ ";", ",", "|", "\t" ] });
+
+The list of legal separators defaults to C<[ ";", "," ]> and can be changed
+by this option. As this is probably the most often used option, it can be
+passed on its own as an unnamed argument:
+
+ $csv->header ($fh, [ ";", ",", "|", "\t", "::", "\x{2063}" ]);
+
+Multi-byte sequences are allowed, both multi-character and Unicode. See
+L|/sep>.
+
+=item detect_bom
+
+ $csv->header ($fh, { detect_bom => 1 });
+
+The default behavior is to detect if the header line starts with a BOM. If
+the header has a BOM, use that to set the encoding of C<$fh>. This default
+behavior can be disabled by passing a false value to C.
+
+Supported encodings from BOM are: UTF-8, UTF-16BE, UTF-16LE, UTF-32BE, and
+UTF-32LE. BOM also supports UTF-1, UTF-EBCDIC, SCSU, BOCU-1, and GB-18030
+but L does not (yet). UTF-7 is not supported.
+
+If a supported BOM was detected as start of the stream, it is stored in the
+object attribute C.
+
+ my $enc = $csv->{ENCODING};
+
+The encoding is used with C on C<$fh>.
+
+If the handle was opened in a (correct) encoding, this method will B
+alter the encoding, as it checks the leading B of the first line. In
+case the stream starts with a decoded BOM (C), C<{ENCODING}> will be
+C<""> (empty) instead of the default C.
+
+=item munge_column_names
+
+This option offers the means to modify the column names into something that
+is most useful to the application. The default is to map all column names
+to lower case.
+
+ $csv->header ($fh, { munge_column_names => "lc" });
+
+The following values are available:
+
+ lc - lower case
+ uc - upper case
+ db - valid DB field names
+ none - do not change
+ \%hash - supply a mapping
+ \&cb - supply a callback
+
+=over 2
+
+=item Lower case
+
+ $csv->header ($fh, { munge_column_names => "lc" });
+
+The header is changed to all lower-case
+
+ $_ = lc;
+
+=item Upper case
+
+ $csv->header ($fh, { munge_column_names => "uc" });
+
+The header is changed to all upper-case
+
+ $_ = uc;
+
+=item Literal
+
+ $csv->header ($fh, { munge_column_names => "none" });
+
+=item Hash
+
+ $csv->header ($fh, { munge_column_names => { foo => "sombrero" });
+
+if a value does not exist, the original value is used unchanged
+
+=item Database
+
+ $csv->header ($fh, { munge_column_names => "db" });
+
+=over 2
+
+=item -
+
+lower-case
+
+=item -
+
+all sequences of non-word characters are replaced with an underscore
+
+=item -
+
+all leading underscores are removed
+
+=back
+
+ $_ = lc (s/\W+/_/gr =~ s/^_+//r);
+
+=item Callback
+
+ $csv->header ($fh, { munge_column_names => sub { fc } });
+ $csv->header ($fh, { munge_column_names => sub { "column_".$col++ } });
+ $csv->header ($fh, { munge_column_names => sub { lc (s/\W+/_/gr) } });
+
+As this callback is called in a C
+will be invoked on the opened stream to check if there is a BOM and set the
+encoding accordingly. This is equal to passing a true value in the option
+L|/detect_bom>.
+
+Encodings can be stacked, as supported by C:
+
+ # Using PerlIO::via::gzip
+ csv (in => \@csv,
+ out => "test.csv:via.gz",
+ encoding => ":via(gzip):encoding(utf-8)",
+ );
+ $aoa = csv (in => "test.csv:via.gz", encoding => ":via(gzip)");
+
+ # Using PerlIO::gzip
+ csv (in => \@csv,
+ out => "test.csv:via.gz",
+ encoding => ":gzip:encoding(utf-8)",
+ );
+ $aoa = csv (in => "test.csv:gzip.gz", encoding => ":gzip");
+
+=head3 detect_bom
+
+If C is given, the method L will be invoked on the
+opened stream to check if there is a BOM and set the encoding accordingly.
+
+C can be abbreviated to C.
+
+This is the same as setting L|/encoding> to C<"auto">.
+
+Note that as the method L is invoked, its default is to also set
+the headers.
+
+=head3 headers
+
+If this attribute is not given, the default behavior is to produce an array
+of arrays.
+
+If C is supplied, it should be an anonymous list of column names,
+an anonymous hashref, a coderef, or a literal flag: C, C, C,
+or C.
+
+=over 2
+
+=item skip
+
+When C is used, the header will not be included in the output.
+
+ my $aoa = csv (in => $fh, headers => "skip");
+
+C is invalid/ignored in combinations with L|/detect_bom>.
+
+=item auto
+
+If C is used, the first line of the C source will be read as the
+list of field headers and used to produce an array of hashes.
+
+ my $aoh = csv (in => $fh, headers => "auto");
+
+=item lc
+
+If C is used, the first line of the C source will be read as the
+list of field headers mapped to lower case and used to produce an array of
+hashes. This is a variation of C.
+
+ my $aoh = csv (in => $fh, headers => "lc");
+
+=item uc
+
+If C is used, the first line of the C source will be read as the
+list of field headers mapped to upper case and used to produce an array of
+hashes. This is a variation of C.
+
+ my $aoh = csv (in => $fh, headers => "uc");
+
+=item CODE
+
+If a coderef is used, the first line of the C source will be read as
+the list of mangled field headers in which each field is passed as the only
+argument to the coderef. This list is used to produce an array of hashes.
+
+ my $aoh = csv (in => $fh,
+ headers => sub { lc ($_[0]) =~ s/kode/code/gr });
+
+this example is a variation of using C where all occurrences of C
+are replaced with C.
+
+=item ARRAY
+
+If C is an anonymous list, the entries in the list will be used
+as field names. The first line is considered data instead of headers.
+
+ my $aoh = csv (in => $fh, headers => [qw( Foo Bar )]);
+ csv (in => $aoa, out => $fh, headers => [qw( code description price )]);
+
+=item HASH
+
+If C is a hash reference, this implies C, but header fields
+that exist as key in the hashref will be replaced by the value for that
+key. Given a CSV file like
+
+ post-kode,city,name,id number,fubble
+ 1234AA,Duckstad,Donald,13,"X313DF"
+
+using
+
+ csv (headers => { "post-kode" => "pc", "id number" => "ID" }, ...
+
+will return an entry like
+
+ { pc => "1234AA",
+ city => "Duckstad",
+ name => "Donald",
+ ID => "13",
+ fubble => "X313DF",
+ }
+
+=back
+
+See also L|/munge_column_names> and
+L|/set_column_names>.
+
+=head3 munge_column_names
+
+If C is set, the method L is invoked on the
+opened stream with all matching arguments to detect and set the headers.
+
+C can be abbreviated to C.
+
+=head3 key
+
+If passed, will default L|/headers> to C<"auto"> and return a
+hashref instead of an array of hashes. Allowed values are simple scalars or
+array-references where the first element is the joiner and the rest are the
+fields to join to combine the key.
+
+ my $ref = csv (in => "test.csv", key => "code");
+ my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ]);
+
+with test.csv like
+
+ code,product,price,color
+ 1,pc,850,gray
+ 2,keyboard,12,white
+ 3,mouse,5,black
+
+the first example will return
+
+ { 1 => {
+ code => 1,
+ color => 'gray',
+ price => 850,
+ product => 'pc'
+ },
+ 2 => {
+ code => 2,
+ color => 'white',
+ price => 12,
+ product => 'keyboard'
+ },
+ 3 => {
+ code => 3,
+ color => 'black',
+ price => 5,
+ product => 'mouse'
+ }
+ }
+
+the second example will return
+
+ { "1:gray" => {
+ code => 1,
+ color => 'gray',
+ price => 850,
+ product => 'pc'
+ },
+ "2:white" => {
+ code => 2,
+ color => 'white',
+ price => 12,
+ product => 'keyboard'
+ },
+ "3:black" => {
+ code => 3,
+ color => 'black',
+ price => 5,
+ product => 'mouse'
+ }
+ }
+
+The C attribute can be combined with L|/headers> for C
+date that has no header line, like
+
+ my $ref = csv (
+ in => "foo.csv",
+ headers => [qw( c_foo foo bar description stock )],
+ key => "c_foo",
+ );
+
+=head3 value
+
+Used to create key-value hashes.
+
+Only allowed when C is valid. A C can be either a single column
+label or an anonymous list of column labels. In the first case, the value
+will be a simple scalar value, in the latter case, it will be a hashref.
+
+ my $ref = csv (in => "test.csv", key => "code",
+ value => "price");
+ my $ref = csv (in => "test.csv", key => "code",
+ value => [ "product", "price" ]);
+ my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ],
+ value => "price");
+ my $ref = csv (in => "test.csv", key => [ ":" => "code", "color" ],
+ value => [ "product", "price" ]);
+
+with test.csv like
+
+ code,product,price,color
+ 1,pc,850,gray
+ 2,keyboard,12,white
+ 3,mouse,5,black
+
+the first example will return
+
+ { 1 => 850,
+ 2 => 12,
+ 3 => 5,
+ }
+
+the second example will return
+
+ { 1 => {
+ price => 850,
+ product => 'pc'
+ },
+ 2 => {
+ price => 12,
+ product => 'keyboard'
+ },
+ 3 => {
+ price => 5,
+ product => 'mouse'
+ }
+ }
+
+the third example will return
+
+ { "1:gray" => 850,
+ "2:white" => 12,
+ "3:black" => 5,
+ }
+
+the fourth example will return
+
+ { "1:gray" => {
+ price => 850,
+ product => 'pc'
+ },
+ "2:white" => {
+ price => 12,
+ product => 'keyboard'
+ },
+ "3:black" => {
+ price => 5,
+ product => 'mouse'
+ }
+ }
+
+=head3 keep_headers
+
+When using hashes, keep the column names into the arrayref passed, so all
+headers are available after the call in the original order.
+
+ my $aoh = csv (in => "file.csv", keep_headers => \my @hdr);
+
+This attribute can be abbreviated to C or passed as C.
+
+This attribute implies a default of C for the C attribute.
+
+The headers can also be kept internally to keep stable header order:
+
+ csv (in => csv (in => "file.csv", kh => "internal"),
+ out => "new.csv",
+ kh => "internal");
+
+where C can also be C<1>, C, or C. This is similar to
+
+ my @h;
+ csv (in => csv (in => "file.csv", kh => \@h),
+ out => "new.csv",
+ headers => \@h);
+
+=head3 fragment
+
+Only output the fragment as defined in the L method. This option
+is ignored when I C. See L.
+
+Combining all of them could give something like
+
+ use Text::CSV_PP qw( csv );
+ my $aoh = csv (
+ in => "test.txt",
+ encoding => "utf-8",
+ headers => "auto",
+ sep_char => "|",
+ fragment => "row=3;6-9;15-*",
+ );
+ say $aoh->[15]{Foo};
+
+=head3 sep_set
+
+If C is set, the method L is invoked on the opened stream
+to detect and set L|/sep_char> with the given set.
+
+C can be abbreviated to C. If neither C not C
+is given, but C is defined, C defaults to C<[ sep ]>. This is
+only supported for perl version 5.10 and up.
+
+Note that as the L method is invoked, its default is to also set
+the headers.
+
+=head3 set_column_names
+
+If C is passed, the method L is invoked on the
+opened stream with all arguments meant for L.
+
+If C is passed as a false value, the content of the first
+row is only preserved if the output is AoA:
+
+With an input-file like
+
+ bAr,foo
+ 1,2
+ 3,4,5
+
+This call
+
+ my $aoa = csv (in => $file, set_column_names => 0);
+
+will result in
+
+ [[ "bar", "foo" ],
+ [ "1", "2" ],
+ [ "3", "4", "5" ]]
+
+and
+
+ my $aoa = csv (in => $file, set_column_names => 0, munge => "none");
+
+will result in
+
+ [[ "bAr", "foo" ],
+ [ "1", "2" ],
+ [ "3", "4", "5" ]]
+
+=head3 csv
+
+The I L can also be called as a method or with an existing
+Text::CSV_PP object. This could help if the function is to be invoked a lot
+of times and the overhead of creating the object internally over and over
+again would be prevented by passing an existing instance.
+
+ my $csv = Text::CSV_PP->new ({ binary => 1, auto_diag => 1 });
+
+ my $aoa = $csv->csv (in => $fh);
+ my $aoa = csv (in => $fh, csv => $csv);
+
+both act the same. Running this 20000 times on a 20 lines CSV file, showed
+a 53% speedup.
+
+=head2 Callbacks
+
+Callbacks enable actions triggered from the I of Text::CSV_PP.
+
+While most of what this enables can easily be done in an unrolled loop as
+described in the L callbacks can be used to meet special demands
+or enhance the L function.
+
+=over 2
+
+=item error
+
+ $csv->callbacks (error => sub { $csv->SetDiag (0) });
+
+the C callback is invoked when an error occurs, but I when
+L is set to a true value. A callback is invoked with the values
+returned by L:
+
+ my ($c, $s);
+
+ sub ignore3006 {
+ my ($err, $msg, $pos, $recno, $fldno) = @_;
+ if ($err == 3006) {
+ # ignore this error
+ ($c, $s) = (undef, undef);
+ Text::CSV_PP->SetDiag (0);
+ }
+ # Any other error
+ return;
+ } # ignore3006
+
+ $csv->callbacks (error => \&ignore3006);
+ $csv->bind_columns (\$c, \$s);
+ while ($csv->getline ($fh)) {
+ # Error 3006 will not stop the loop
+ }
+
+=item after_parse
+
+ $csv->callbacks (after_parse => sub { push @{$_[1]}, "NEW" });
+ while (my $row = $csv->getline ($fh)) {
+ $row->[-1] eq "NEW";
+ }
+
+This callback is invoked after parsing with L only if no error
+occurred. The callback is invoked with two arguments: the current C
+parser object and an array reference to the fields parsed.
+
+The return code of the callback is ignored unless it is a reference to the
+string "skip", in which case the record will be skipped in L.
+
+ sub add_from_db {
+ my ($csv, $row) = @_;
+ $sth->execute ($row->[4]);
+ push @$row, $sth->fetchrow_array;
+ } # add_from_db
+
+ my $aoa = csv (in => "file.csv", callbacks => {
+ after_parse => \&add_from_db });
+
+This hook can be used for validation:
+
+=over 2
+
+=item FAIL
+
+Die if any of the records does not validate a rule:
+
+ after_parse => sub {
+ $_[1][4] =~ m/^[0-9]{4}\s?[A-Z]{2}$/ or
+ die "5th field does not have a valid Dutch zipcode";
+ }
+
+=item DEFAULT
+
+Replace invalid fields with a default value:
+
+ after_parse => sub { $_[1][2] =~ m/^\d+$/ or $_[1][2] = 0 }
+
+=item SKIP
+
+Skip records that have invalid fields (only applies to L):
+
+ after_parse => sub { $_[1][0] =~ m/^\d+$/ or return \"skip"; }
+
+=back
+
+=item before_print
+
+ my $idx = 1;
+ $csv->callbacks (before_print => sub { $_[1][0] = $idx++ });
+ $csv->print (*STDOUT, [ 0, $_ ]) for @members;
+
+This callback is invoked before printing with L only if no error
+occurred. The callback is invoked with two arguments: the current C
+parser object and an array reference to the fields passed.
+
+The return code of the callback is ignored.
+
+ sub max_4_fields {
+ my ($csv, $row) = @_;
+ @$row > 4 and splice @$row, 4;
+ } # max_4_fields
+
+ csv (in => csv (in => "file.csv"), out => *STDOUT,
+ callbacks => { before_print => \&max_4_fields });
+
+This callback is not active for L.
+
+=back
+
+=head3 Callbacks for csv ()
+
+The L allows for some callbacks that do not integrate in XS internals
+but only feature the L function.
+
+ csv (in => "file.csv",
+ callbacks => {
+ filter => { 6 => sub { $_ > 15 } }, # first
+ after_parse => sub { say "AFTER PARSE"; }, # first
+ after_in => sub { say "AFTER IN"; }, # second
+ on_in => sub { say "ON IN"; }, # third
+ },
+ );
+
+ csv (in => $aoh,
+ out => "file.csv",
+ callbacks => {
+ on_in => sub { say "ON IN"; }, # first
+ before_out => sub { say "BEFORE OUT"; }, # second
+ before_print => sub { say "BEFORE PRINT"; }, # third
+ },
+ );
+
+=over 2
+
+=item filter
+
+This callback can be used to filter records. It is called just after a new
+record has been scanned. The callback accepts a:
+
+=over 2
+
+=item hashref
+
+The keys are the index to the row (the field name or field number, 1-based)
+and the values are subs to return a true or false value.
+
+ csv (in => "file.csv", filter => {
+ 3 => sub { m/a/ }, # third field should contain an "a"
+ 5 => sub { length > 4 }, # length of the 5th field minimal 5
+ });
+
+ csv (in => "file.csv", filter => { foo => sub { $_ > 4 }});
+
+If the keys to the filter hash contain any character that is not a digit it
+will also implicitly set L to C<"auto"> unless L was
+already passed as argument. When headers are active, returning an array of
+hashes, the filter is not applicable to the header itself.
+
+All sub results should match, as in AND.
+
+The context of the callback sets C<$_> localized to the field indicated by
+the filter. The two arguments are as with all other callbacks, so the other
+fields in the current row can be seen:
+
+ filter => { 3 => sub { $_ > 100 ? $_[1][1] =~ m/A/ : $_[1][6] =~ m/B/ }}
+
+If the context is set to return a list of hashes (L is defined),
+the current record will also be available in the localized C<%_>:
+
+ filter => { 3 => sub { $_ > 100 && $_{foo} =~ m/A/ && $_{bar} < 1000 }}
+
+If the filter is used to I the content by changing C<$_>, make sure
+that the sub returns true in order not to have that record skipped:
+
+ filter => { 2 => sub { $_ = uc }}
+
+will upper-case the second field, and then skip it if the resulting content
+evaluates to false. To always accept, end with truth:
+
+ filter => { 2 => sub { $_ = uc; 1 }}
+
+=item coderef
+
+ csv (in => "file.csv", filter => sub { $n++; 0; });
+
+If the argument to C is a coderef, it is an alias or shortcut to a
+filter on column 0:
+
+ csv (filter => sub { $n++; 0 });
+
+is equal to
+
+ csv (filter => { 0 => sub { $n++; 0 });
+
+=item filter-name
+
+ csv (in => "file.csv", filter => "not_blank");
+ csv (in => "file.csv", filter => "not_empty");
+ csv (in => "file.csv", filter => "filled");
+
+These are predefined filters
+
+Given a file like (line numbers prefixed for doc purpose only):
+
+ 1:1,2,3
+ 2:
+ 3:,
+ 4:""
+ 5:,,
+ 6:, ,
+ 7:"",
+ 8:" "
+ 9:4,5,6
+
+=over 2
+
+=item not_blank
+
+Filter out the blank lines
+
+This filter is a shortcut for
+
+ filter => { 0 => sub { @{$_[1]} > 1 or
+ defined $_[1][0] && $_[1][0] ne "" } }
+
+Due to the implementation, it is currently impossible to also filter lines
+that consists only of a quoted empty field. These lines are also considered
+blank lines.
+
+With the given example, lines 2 and 4 will be skipped.
+
+=item not_empty
+
+Filter out lines where all the fields are empty.
+
+This filter is a shortcut for
+
+ filter => { 0 => sub { grep { defined && $_ ne "" } @{$_[1]} } }
+
+A space is not regarded being empty, so given the example data, lines 2, 3,
+4, 5, and 7 are skipped.
+
+=item filled
+
+Filter out lines that have no visible data
+
+This filter is a shortcut for
+
+ filter => { 0 => sub { grep { defined && m/\S/ } @{$_[1]} } }
+
+This filter rejects all lines that I have at least one field that does
+not evaluate to the empty string.
+
+With the given example data, this filter would skip lines 2 through 8.
+
+=back
+
+=back
+
+One could also use modules like L:
+
+ use Types::Standard -types;
+
+ my $type = Tuple[Str, Str, Int, Bool, Optional[Num]];
+ my $check = $type->compiled_check;
+
+ # filter with compiled check and warnings
+ my $aoa = csv (
+ in => \$data,
+ filter => {
+ 0 => sub {
+ my $ok = $check->($_[1]) or
+ warn $type->get_message ($_[1]), "\n";
+ return $ok;
+ },
+ },
+ );
+
+=item after_in
+
+This callback is invoked for each record after all records have been parsed
+but before returning the reference to the caller. The hook is invoked with
+two arguments: the current C parser object and a reference to the
+record. The reference can be a reference to a HASH or a reference to an
+ARRAY as determined by the arguments.
+
+This callback can also be passed as an attribute without the C
+wrapper.
+
+=item before_out
+
+This callback is invoked for each record before the record is printed. The
+hook is invoked with two arguments: the current C parser object and a
+reference to the record. The reference can be a reference to a HASH or a
+reference to an ARRAY as determined by the arguments.
+
+This callback can also be passed as an attribute without the C
+wrapper.
+
+This callback makes the row available in C<%_> if the row is a hashref. In
+this case C<%_> is writable and will change the original row.
+
+=item on_in
+
+This callback acts exactly as the L or the L hooks.
+
+This callback can also be passed as an attribute without the C
+wrapper.
+
+This callback makes the row available in C<%_> if the row is a hashref. In
+this case C<%_> is writable and will change the original row. So e.g. with
+
+ my $aoh = csv (
+ in => \"foo\n1\n2\n",
+ headers => "auto",
+ on_in => sub { $_{bar} = 2; },
+ );
+
+C<$aoh> will be:
+
+ [ { foo => 1,
+ bar => 2,
+ }
+ { foo => 2,
+ bar => 2,
+ }
+ ]
+
+=item on_error
+
+This callback acts exactly as the L hook.
+
+ my @err;
+ my $aoa = csv (in => $fh, on_error => sub { @err = @_ });
+
+is identical to
+
+ my $aoa = csv (in => $fh, callbacks => {
+ error => sub { @err = @_ },
+ });
+
+It can be used for ignoring errors as well as for just keeping the error in
+case of analysis after the C function has returned.
+
+ my @err;
+ my $aoa = csv (in => "bad.csv, on_error => sub { @err = @_ });
+ die Text::CSV_PP->error_diag if @err or !$aoa;
+
+=back
+
+=head1 DIAGNOSTICS
+
+This section is also taken from Text::CSV_XS.
+
+Still under construction ...
+
+If an error occurs, C<< $csv->error_diag >> can be used to get information
+on the cause of the failure. Note that for speed reasons the internal value
+is never cleared on success, so using the value returned by L
+in normal cases - when no error occurred - may cause unexpected results.
+
+If the constructor failed, the cause can be found using L as a
+class method, like C<< Text::CSV_PP->error_diag >>.
+
+The C<< $csv->error_diag >> method is automatically invoked upon error when
+the contractor was called with L|/auto_diag> set to C<1> or
+C<2>, or when L is in effect. When set to C<1>, this will cause a
+C with the error message, when set to C<2>, it will C. C<2012 -
+EOF> is excluded from L|/auto_diag> reports.
+
+Errors can be (individually) caught using the L callback.
+
+The errors as described below are available. I have tried to make the error
+itself explanatory enough, but more descriptions will be added. For most of
+these errors, the first three capitals describe the error category:
+
+=over 2
+
+=item *
+INI
+
+Initialization error or option conflict.
+
+=item *
+ECR
+
+Carriage-Return related parse error.
+
+=item *
+EOF
+
+End-Of-File related parse error.
+
+=item *
+EIQ
+
+Parse error inside quotation.
+
+=item *
+EIF
+
+Parse error inside field.
+
+=item *
+ECB
+
+Combine error.
+
+=item *
+EHR
+
+HashRef parse related error.
+
+=back
+
+And below should be the complete list of error codes that can be returned:
+
+=over 2
+
+=item *
+1001 "INI - sep_char is equal to quote_char or escape_char"
+
+The L cannot be equal to L or to L, as this
+would invalidate all parsing rules.
+
+=item *
+1002 "INI - allow_whitespace with escape_char or quote_char SP or TAB"
+
+Using the L|/allow_whitespace> attribute when either
+L|/quote_char> or L|/escape_char> is equal to
+C or C is too ambiguous to allow.
+
+=item *
+1003 "INI - \r or \n in main attr not allowed"
+
+Using default L|/eol> characters in either L|/sep_char>,
+L|/quote_char>, or L|/escape_char> is not
+allowed.
+
+=item *
+1004 "INI - callbacks should be undef or a hashref"
+
+The L|/Callbacks> attribute only allows one to be C or
+a hash reference.
+
+=item *
+1005 "INI - EOL too long"
+
+The value passed for EOL is exceeding its maximum length (16).
+
+=item *
+1006 "INI - SEP too long"
+
+The value passed for SEP is exceeding its maximum length (16).
+
+=item *
+1007 "INI - QUOTE too long"
+
+The value passed for QUOTE is exceeding its maximum length (16).
+
+=item *
+1008 "INI - SEP undefined"
+
+The value passed for SEP should be defined and not empty.
+
+=item *
+1010 "INI - the header is empty"
+
+The header line parsed in the L is empty.
+
+=item *
+1011 "INI - the header contains more than one valid separator"
+
+The header line parsed in the L contains more than one (unique)
+separator character out of the allowed set of separators.
+
+=item *
+1012 "INI - the header contains an empty field"
+
+The header line parsed in the L contains an empty field.
+
+=item *
+1013 "INI - the header contains nun-unique fields"
+
+The header line parsed in the L contains at least two identical
+fields.
+
+=item *
+1014 "INI - header called on undefined stream"
+
+The header line cannot be parsed from an undefined source.
+
+=item *
+1500 "PRM - Invalid/unsupported argument(s)"
+
+Function or method called with invalid argument(s) or parameter(s).
+
+=item *
+1501 "PRM - The key attribute is passed as an unsupported type"
+
+The C attribute is of an unsupported type.
+
+=item *
+1502 "PRM - The value attribute is passed without the key attribute"
+
+The C attribute is only allowed when a valid key is given.
+
+=item *
+1503 "PRM - The value attribute is passed as an unsupported type"
+
+The C attribute is of an unsupported type.
+
+=item *
+2010 "ECR - QUO char inside quotes followed by CR not part of EOL"
+
+When L|/eol> has been set to anything but the default, like
+C<"\r\t\n">, and the C<"\r"> is following the B (closing)
+L|/quote_char>, where the characters following the C<"\r"> do
+not make up the L|/eol> sequence, this is an error.
+
+=item *
+2011 "ECR - Characters after end of quoted field"
+
+Sequences like C<1,foo,"bar"baz,22,1> are not allowed. C<"bar"> is a quoted
+field and after the closing double-quote, there should be either a new-line
+sequence or a separation character.
+
+=item *
+2012 "EOF - End of data in parsing input stream"
+
+Self-explaining. End-of-file while inside parsing a stream. Can happen only
+when reading from streams with L, as using L is done on
+strings that are not required to have a trailing L|/eol>.
+
+=item *
+2013 "INI - Specification error for fragments RFC7111"
+
+Invalid specification for URI L specification.
+
+=item *
+2014 "ENF - Inconsistent number of fields"
+
+Inconsistent number of fields under strict parsing.
+
+=item *
+2015 "ERW - Empty row"
+
+An empty row was not allowed.
+
+=item *
+2016 "EOL - Inconsistent EOL"
+
+Inconsistent End-Of-Line detected under strict_eol parsing.
+
+=item *
+2021 "EIQ - NL char inside quotes, binary off"
+
+Sequences like C<1,"foo\nbar",22,1> are allowed only when the binary option
+has been selected with the constructor.
+
+=item *
+2022 "EIQ - CR char inside quotes, binary off"
+
+Sequences like C<1,"foo\rbar",22,1> are allowed only when the binary option
+has been selected with the constructor.
+
+=item *
+2023 "EIQ - QUO character not allowed"
+
+Sequences like C<"foo "bar" baz",qu> and C<2023,",2008-04-05,"Foo, Bar",\n>
+will cause this error.
+
+=item *
+2024 "EIQ - EOF cannot be escaped, not even inside quotes"
+
+The escape character is not allowed as last character in an input stream.
+
+=item *
+2025 "EIQ - Loose unescaped escape"
+
+An escape character should escape only characters that need escaping.
+
+Allowing the escape for other characters is possible with the attribute
+L.
+
+=item *
+2026 "EIQ - Binary character inside quoted field, binary off"
+
+Binary characters are not allowed by default. Exceptions are fields that
+contain valid UTF-8, that will automatically be upgraded if the content is
+valid UTF-8. Set L|/binary> to C<1> to accept binary data.
+
+=item *
+2027 "EIQ - Quoted field not terminated"
+
+When parsing a field that started with a quotation character, the field is
+expected to be closed with a quotation character. When the parsed line is
+exhausted before the quote is found, that field is not terminated.
+
+=item *
+2030 "EIF - NL char inside unquoted verbatim, binary off"
+
+=item *
+2031 "EIF - CR char is first char of field, not part of EOL"
+
+=item *
+2032 "EIF - CR char inside unquoted, not part of EOL"
+
+=item *
+2034 "EIF - Loose unescaped quote"
+
+=item *
+2035 "EIF - Escaped EOF in unquoted field"
+
+=item *
+2036 "EIF - ESC error"
+
+=item *
+2037 "EIF - Binary character in unquoted field, binary off"
+
+=item *
+2110 "ECB - Binary character in Combine, binary off"
+
+=item *
+2200 "EIO - print to IO failed. See errno"
+
+=item *
+3001 "EHR - Unsupported syntax for column_names ()"
+
+=item *
+3002 "EHR - getline_hr () called before column_names ()"
+
+=item *
+3003 "EHR - bind_columns () and column_names () fields count mismatch"
+
+=item *
+3004 "EHR - bind_columns () only accepts refs to scalars"
+
+=item *
+3006 "EHR - bind_columns () did not pass enough refs for parsed fields"
+
+=item *
+3007 "EHR - bind_columns needs refs to writable scalars"
+
+=item *
+3008 "EHR - unexpected error in bound fields"
+
+=item *
+3009 "EHR - print_hr () called before column_names ()"
+
+=item *
+3010 "EHR - print_hr () called with invalid arguments"
+
+=back
+
+=head1 SEE ALSO
+
+L, L
+
+Older versions took many regexp from L
+
+=head1 AUTHOR
+
+Kenichi Ishigaki, Eishigaki[at]cpan.orgE
+Makamaka Hannyaharamitu, Emakamaka[at]cpan.orgE
+
+Text::CSV_XS was written by Ejoe[at]ispsoft.deE
+and maintained by Eh.m.brand[at]xs4all.nlE.
+
+Text::CSV was written by Ealan[at]mfgrtl.comE.
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright 2017- by Kenichi Ishigaki, Eishigaki[at]cpan.orgE
+Copyright 2005-2015 by Makamaka Hannyaharamitu, Emakamaka[at]cpan.orgE
+
+Most of the code and doc is directly taken from the pure perl part of
+Text::CSV_XS.
+
+Copyright (C) 2007-2016 H.Merijn Brand. All rights reserved.
+Copyright (C) 1998-2001 Jochen Wiedmann. All rights reserved.
+Copyright (C) 1997 Alan Citterman. All rights reserved.
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself.
+
+=cut
diff --git a/src/main/perl/lib/Text/CSV_XS.pm b/src/main/perl/lib/Text/CSV_XS.pm
new file mode 100644
index 000000000..2c5f4af81
--- /dev/null
+++ b/src/main/perl/lib/Text/CSV_XS.pm
@@ -0,0 +1,43 @@
+package Text::CSV_XS;
+
+# Text::CSV_XS - Pure-Perl XS replacement for PerlOnJava
+#
+# This module inherits from Text::CSV_PP and provides the Text::CSV_XS
+# interface. Text::CSV will detect this module and prefer it over CSV_PP.
+#
+# The CPAN Text::CSV wrapper uses \&{"Text::CSV_XS::$method"} to alias
+# PublicMethods. Since inherited methods aren't in the stash, we must
+# explicitly install them so the alias lookup succeeds.
+
+use strict;
+use warnings;
+
+use Text::CSV_PP;
+use vars qw( $VERSION @ISA @EXPORT_OK %EXPORT_TAGS );
+
+$VERSION = "1.61";
+@ISA = qw( Text::CSV_PP );
+
+# Re-export everything from CSV_PP
+@EXPORT_OK = @Text::CSV_PP::EXPORT_OK;
+%EXPORT_TAGS = %Text::CSV_PP::EXPORT_TAGS;
+
+# Text::CSV's _load() does \&{"Text::CSV_XS::$method"} for each PublicMethod.
+# Since we inherit from CSV_PP, those symbols aren't in our stash directly.
+# Install them so the alias resolves.
+{
+ no strict 'refs';
+ for my $method (qw(
+ version error_diag error_input known_attributes
+ PV IV NV CSV_TYPE_PV CSV_TYPE_IV CSV_TYPE_NV
+ CSV_FLAGS_IS_QUOTED CSV_FLAGS_IS_BINARY
+ CSV_FLAGS_ERROR_IN_FIELD CSV_FLAGS_IS_MISSING
+ )) {
+ next if defined &{"Text::CSV_XS::$method"};
+ if (my $ref = Text::CSV_PP->can($method)) {
+ *{"Text::CSV_XS::$method"} = $ref;
+ }
+ }
+}
+
+1;
diff --git a/src/test/resources/module/Text-CSV/files/macosx.csv b/src/test/resources/module/Text-CSV/files/macosx.csv
new file mode 100644
index 000000000..3e4dc5340
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/files/macosx.csv
@@ -0,0 +1 @@
+"'\'\\'\\\'""\""\\""\\\""",,,,,,,,,,,,,,
Exported 12/16/2008 10:30 AM,,,,,,Category,Category name,,,,,Category name 2,,
Username,Last Name,First Name M.,Section/Group,Status,Notes,Assignment,Category name 1,Category name 2,Category name 3,woot!,dqwdqwd,Category name 2 1,Total Score,Class Grade
,,,,,,Grading scale,Points,Points,Points,Points,Points,Points,,
,,,,,,Points possible,11,11,11,11,11,11,,
dcwalker,,,,Dropped,,,1,34,1,,,,109,
jdr99,,,devs,Active,"qwd
qwd
qwd",,12,0,1,,,,39,
jlaney,,,devs,Active,,,,2,23,,,,114,
mcrawfor,,,devs,Active,"line 1
line 2
line 3 XX fwe
and
so
on
yea!",,,,,,,,,
,,,,,,,,,,,,,,
,,,,,,Mean,6.5,12.0,8.33,#DIV/0!,#DIV/0!,#DIV/0!,87.33,
,,,,,,Median,6.5,2.0,1.0,#NUM!,#NUM!,#NUM!,109.0,
,,,,,,Mode,#N/A,#N/A,1.0,#N/A,#N/A,#N/A,#N/A,
,,,,,,Min,1.0,0.0,1.0,0.0,0.0,0.0,39.0,
,,,,,,Max,12.0,34.0,23.0,0.0,0.0,0.0,114.0,
,,,,,,Std. Dev.,7.78,19.08,12.7,#DIV/0!,#DIV/0!,#DIV/0!,41.93,
\ No newline at end of file
diff --git a/src/test/resources/module/Text-CSV/files/utf8.csv b/src/test/resources/module/Text-CSV/files/utf8.csv
new file mode 100644
index 000000000..29a75a6d2
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/files/utf8.csv
@@ -0,0 +1 @@
+"Øl/Vin",0
diff --git a/src/test/resources/module/Text-CSV/lib/Text/CSV_PP.pm b/src/test/resources/module/Text-CSV/lib/Text/CSV_PP.pm
new file mode 120000
index 000000000..c6e19df68
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/lib/Text/CSV_PP.pm
@@ -0,0 +1 @@
+../../../../../../main/perl/lib/Text/CSV_PP.pm
\ No newline at end of file
diff --git a/src/test/resources/module/Text-CSV/t/01_is_pp.t b/src/test/resources/module/Text-CSV/t/01_is_pp.t
new file mode 100644
index 000000000..59525b914
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/01_is_pp.t
@@ -0,0 +1,22 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1; # use warnings core since 5.6
+
+use Test::More tests => 4;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ }
+
+if (!$ENV{PERL_TEXT_CSV} or $ENV{PERL_TEXT_CSV} eq 'Text::CSV_PP' or !eval { require Text::CSV_XS; 1 }) {
+ ok my $csv = Text::CSV->new;
+ ok $csv->is_pp;
+ is $csv->module => 'Text::CSV_PP';
+} else {
+ ok my $csv = Text::CSV->new;
+ ok $csv->is_xs;
+ is $csv->module => 'Text::CSV_XS';
+}
diff --git a/src/test/resources/module/Text-CSV/t/10_base.t b/src/test/resources/module/Text-CSV/t/10_base.t
new file mode 100644
index 000000000..89ffa1038
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/10_base.t
@@ -0,0 +1,111 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1; # use warnings core since 5.6
+
+use Test::More tests => 64;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ }
+
+# empty subclass test
+#
+package Empty_Subclass;
+
+@Empty_Subclass::ISA = qw( Text::CSV );
+
+package main;
+
+ok (new Text::CSV, "Indirect object notation");
+
+# Important: Do not modify these tests unless you have a good
+# reason. This file ought to guarantee compatibility to Text::CSV.
+#
+my $empty = Empty_Subclass->new ();
+is (ref $empty, "Empty_Subclass", "Empty Subclass");
+is ($empty->version (), Text::CSV->version (), "Version");
+ok ($empty->parse (""), "Subclass parse ()");
+ok ($empty->combine (""), "Subclass combine ()");
+
+ok ($empty->new, "new () based on object");
+
+my $csv;
+ok ($csv = Text::CSV->new, "new ()");
+is ($csv->fields, undef, "fields () before parse ()");
+is ($csv->string, undef, "string () undef before combine");
+
+# Important: Do not modify these tests unless you have a good
+# reason. This file ought to guarantee compatibility to Text::CSV.
+#
+ok (1, "combine () & string () tests");
+ok (!$csv->combine (), "Missing arguments");
+ok (!$csv->combine ("abc", "def\n", "ghi"), "Bad character");
+is ( $csv->error_input, "def\n", "Error_input ()");
+ok ( $csv->combine (""), "Empty string - combine ()");
+is ( $csv->string, '', "Empty string - string ()");
+ok ( $csv->combine ("", " "), "Two fields, one space - combine ()");
+is ( $csv->string, '," "', "Two fields, one space - string ()");
+ok ( $csv->combine ("", 'I said, "Hi!"', ""), "Hi! - combine ()");
+is ( $csv->string, ',"I said, ""Hi!""",', "Hi! - string ()");
+ok ( $csv->combine ('"', "abc"), "abc - combine ()");
+is ( $csv->string, '"""",abc', "abc - string ()");
+ok ( $csv->combine (","), "comma - combine ()");
+is ( $csv->string, '","', "comma - string ()");
+ok ( $csv->combine ("abc", '"'), "abc + \" - combine ()");
+is ( $csv->string, 'abc,""""', "abc + \" - string ()");
+ok ( $csv->combine ("abc", "def", "ghi", "j,k"), "abc .. j,k - combine ()");
+is ( $csv->string, 'abc,def,ghi,"j,k"', "abc .. j,k - string ()");
+ok ( $csv->combine ("abc\tdef", "ghi"), "abc + TAB - combine ()");
+is ( $csv->string, qq("abc\tdef",ghi), "abc + TAB - string ()");
+
+ok (1, "parse () tests");
+ok (!$csv->parse (), "Missing arguments");
+ok ( $csv->parse ("\n"), "Single newline");
+ok (!$csv->parse ('"abc'), "Missing closing \"");
+ok (!$csv->parse ('ab"c'), "\" outside of \"'s");
+ok (!$csv->parse ('"ab"c"'), "Bad character sequence");
+ok (!$csv->parse (qq("abc\nc")), "Bad character (NL)");
+ok (!$csv->status (), "Wrong status ()");
+ok ( $csv->parse ('","'), "comma - parse ()");
+is ( scalar $csv->fields (), 1, "comma - fields () - count");
+is (($csv->fields ())[0], ",", "comma - fields () - content");
+ok ( $csv->parse (qq("","I said,\t""Hi!""","")), "Hi! - parse ()");
+is ( scalar $csv->fields (), 3, "Hi! - fields () - count");
+
+is (($csv->fields ())[0], "", "Hi! - fields () - field 1");
+is (($csv->fields ())[1], qq(I said,\t"Hi!"), "Hi! - fields () - field 2");
+is (($csv->fields ())[2], "", "Hi! - fields () - field 3");
+ok ( $csv->status (), "status ()");
+
+ok ( $csv->parse (""), "Empty line");
+is ( scalar $csv->fields (), 1, "Empty - count");
+is (($csv->fields ())[0], "", "One empty field");
+
+# Are Integers and Reals quoted?
+#
+# Important: Do not modify these tests unless you have a good
+# reason. This file ought to guarantee compatibility to Text::CSV.
+#
+ok (1, "Integers and Reals");
+ok ( $csv->combine ("", 2, 3.25, "a", "a b"), "Mixed - combine ()");
+is ( $csv->string, ',2,3.25,a,"a b"', "Mixed - string ()");
+
+# New from object
+ok ($csv->new (), "\$csv->new ()");
+
+my $state;
+for ( [ 0, 0 ],
+ [ 0, "foo" ],
+ [ 0, {} ],
+ [ 0, \0 ],
+ [ 0, *STDOUT ],
+ ) {
+ eval { $state = $csv->print (@$_) };
+ ok (!$state, "print needs (IO, ARRAY_REF)");
+ ok ($@ =~ m/^Expected fields to be an array ref/, "Error msg");
+ }
+
+1;
diff --git a/src/test/resources/module/Text-CSV/t/12_acc.t b/src/test/resources/module/Text-CSV/t/12_acc.t
new file mode 100644
index 000000000..56539317e
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/12_acc.t
@@ -0,0 +1,284 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1; # use warnings core since 5.6
+
+use Test::More tests => 245;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ }
+
+my $Backend = Text::CSV->backend;
+
+my $csv;
+ok ($csv = Text::CSV->new, "new ()");
+
+is ($csv->quote_char, '"', "quote_char");
+is ($csv->quote, '"', "quote");
+is ($csv->escape_char, '"', "escape_char");
+is ($csv->sep_char, ",", "sep_char");
+is ($csv->sep, ",", "sep");
+is ($csv->eol, "", "eol");
+is ($csv->always_quote, 0, "always_quote");
+is ($csv->binary, 0, "binary");
+is ($csv->keep_meta_info, 0, "keep_meta_info");
+is ($csv->allow_loose_quotes, 0, "allow_loose_quotes");
+is ($csv->allow_loose_escapes, 0, "allow_loose_escapes");
+is ($csv->allow_unquoted_escape, 0, "allow_unquoted_escape");
+is ($csv->allow_whitespace, 0, "allow_whitespace");
+is ($csv->blank_is_undef, 0, "blank_is_undef");
+is ($csv->empty_is_undef, 0, "empty_is_undef");
+is ($csv->auto_diag, 0, "auto_diag");
+is ($csv->diag_verbose, 0, "diag_verbose");
+is ($csv->verbatim, 0, "verbatim");
+is ($csv->formula, "none", "formula");
+is ($csv->strict, 0, "strict");
+is ($csv->strict_eol, 0, "strict_eol");
+is ($csv->skip_empty_rows, 0, "skip_empty_rows");
+is ($csv->quote_space, 1, "quote_space");
+is ($csv->quote_empty, 0, "quote_empty");
+is ($csv->escape_null, 1, "escape_null");
+is ($csv->quote_null, 1, "quote_null");
+is ($csv->quote_binary, 1, "quote_binary");
+is ($csv->record_number, 0, "record_number");
+is ($csv->decode_utf8, 1, "decode_utf8");
+is ($csv->undef_str, undef, "undef_str");
+is ($csv->comment_str, undef, "comment_str");
+
+is ($csv->binary (1), 1, "binary (1)");
+my @fld = ( 'txt =, "Hi!"', "Yes", "", 2, undef, "1.09", "\r", undef );
+ok ($csv->combine (@fld), "combine");
+is ($csv->string,
+ qq{"txt =, ""Hi!""",Yes,,2,,1.09,"\r",}, "string");
+
+is ($csv->sep_char (";"), ";", "sep_char (;)");
+is ($csv->sep ("**"), "**", "sep (**)");
+is ($csv->sep (";"), ";", "sep (;)");
+is ($csv->sep_char (), ";", "sep_char ()");
+is ($csv->quote_char ("="), "=", "quote_char (=)");
+is ($csv->quote_char (undef), undef, "quote_char (undef)");
+is ($csv->{quote_char}, undef, "{quote_char} (undef)");
+is ($csv->quote (undef), "", "quote (undef)");
+is ($csv->quote (""), "", "quote (undef)");
+is ($csv->quote ("**"), "**", "quote (**)");
+is ($csv->quote ("="), "=", "quote (=)");
+is ($csv->eol (undef), "", "eol (undef)");
+is ($csv->eol (""), "", "eol ('')");
+is ($csv->eol ("\r"), "\r", "eol (\\r)");
+is ($csv->keep_meta_info (1), 1, "keep_meta_info (1)");
+is ($csv->keep_meta_info (0), 0, "keep_meta_info (0)");
+is ($csv->keep_meta_info (""), 0, "keep_meta_info ('')");
+is ($csv->keep_meta_info (undef), 0, "keep_meta_info (undef)");
+is ($csv->keep_meta_info ("false"), 0, "keep_meta_info (undef)");
+is ($csv->keep_meta_info ("true"), 1, "keep_meta_info (undef)");
+is ($csv->always_quote (undef), 0, "always_quote (undef)");
+is ($csv->always_quote (1), 1, "always_quote (1)");
+is ($csv->allow_loose_quotes (1), 1, "allow_loose_quotes (1)");
+is ($csv->allow_loose_escapes (1), 1, "allow_loose_escapes (1)");
+is ($csv->allow_unquoted_escape (1), 1, "allow_unquoted_escape (1)");
+is ($csv->allow_whitespace (1), 1, "allow_whitespace (1)");
+is ($csv->blank_is_undef (1), 1, "blank_is_undef (1)");
+is ($csv->empty_is_undef (1), 1, "empty_is_undef (1)");
+is ($csv->auto_diag (1), 1, "auto_diag (1)");
+is ($csv->auto_diag (2), 2, "auto_diag (2)");
+is ($csv->auto_diag (9), 9, "auto_diag (9)");
+is ($csv->auto_diag ("true"), 1, "auto_diag (\"true\")");
+is ($csv->auto_diag ("false"), 0, "auto_diag (\"false\")");
+is ($csv->auto_diag (undef), 0, "auto_diag (undef)");
+is ($csv->auto_diag (""), 0, "auto_diag (\"\")");
+is ($csv->diag_verbose (1), 1, "diag_verbose (1)");
+is ($csv->diag_verbose (2), 2, "diag_verbose (2)");
+is ($csv->diag_verbose (9), 9, "diag_verbose (9)");
+is ($csv->diag_verbose ("true"), 1, "diag_verbose (\"true\")");
+is ($csv->diag_verbose ("false"), 0, "diag_verbose (\"false\")");
+is ($csv->diag_verbose (undef), 0, "diag_verbose (undef)");
+is ($csv->diag_verbose (""), 0, "diag_verbose (\"\")");
+is ($csv->verbatim (1), 1, "verbatim (1)");
+is ($csv->formula ("diag"), "diag", "formula (\"diag\")");
+is ($csv->strict (1), 1, "strict (1)");
+is ($csv->strict_eol (1), 1, "strict_eol (1)");
+is ($csv->skip_empty_rows (1), 1, "skip_empty_rows (1)");
+is ($csv->quote_space (1), 1, "quote_space (1)");
+is ($csv->quote_empty (1), 1, "quote_empty (1)");
+is ($csv->escape_null (1), 1, "escape_null (1)");
+is ($csv->quote_null (1), 1, "quote_null (1)");
+is ($csv->quote_binary (1), 1, "quote_binary (1)");
+is ($csv->escape_char (undef), undef, "escape_char (undef)");
+is ($csv->{escape_char}, undef, "{escape_char} (undef)");
+is ($csv->escape_char ("\\"), "\\", "escape_char (\\)");
+ok ($csv->combine (@fld), "combine");
+is ($csv->string,
+ qq{=txt \\=, "Hi!"=;=Yes=;==;=2=;;=1.09=;=\r=;\r}, "string");
+is ($csv->undef_str ("-"), "-", "undef_str");
+is ($csv->comment_str ("#"), "#", "comment_str");
+
+is ($csv->allow_whitespace (0), 0, "allow_whitespace (0)");
+is ($csv->quote_space (0), 0, "quote_space (0)");
+is ($csv->quote_empty (0), 0, "quote_empty (0)");
+is ($csv->escape_null (0), 0, "escape_null (0)");
+is ($csv->quote_null (0), 0, "quote_null (0)");
+is ($csv->quote_binary (0), 0, "quote_binary (0)");
+is ($csv->decode_utf8 (0), 0, "decode_utf8 (0)");
+is ($csv->sep ("--"), "--", "sep (\"--\")");
+is ($csv->sep_char (), "\0", "sep_char");
+is ($csv->quote ("++"), "++", "quote (\"++\")");
+is ($csv->quote_char (), "\0", "quote_char");
+is ($csv->undef_str (undef), undef, "undef_str");
+is ($csv->comment_str (undef), undef, "comment_str");
+
+# Test single-byte specials in UTF-8 mode
+is ($csv->sep ("|"), "|", "sep |");
+is ($csv->sep_char (), "|", "sep_char");
+chop (my $s = "|\x{20ac}");
+is ($csv->sep ($s), "|", "sep |");
+is ($csv->sep (), "|", "sep_char");
+is ($csv->sep_char (), "|", "sep_char");
+is ($csv->quote ("'"), "'", "quote '");
+is ($csv->quote_char (), "'", "quote_char");
+chop (my $q = "'\x{20ac}");
+is ($csv->quote ($q), "'", "quote '");
+is ($csv->quote (), "'", "quote_char");
+is ($csv->quote_char (), "'", "quote_char");
+
+# Funny settings, all three translate to \0 internally
+ok ($csv = Text::CSV->new ({
+ sep => "::::::::::",
+ quote_char => undef,
+ escape_char => undef,
+ }), "new (undef ...)");
+is ($csv->sep_char, "\0", "sep_char undef");
+is ($csv->sep, "::::::::::", "sep long");
+is ($csv->quote_char, undef, "quote_char undef");
+is ($csv->quote, undef, "quote undef");
+is ($csv->escape_char, undef, "escape_char undef");
+ok ($csv->parse ("foo"), "parse (foo)");
+$csv->sep_char (",");
+is ($csv->record_number, 1, "record_number");
+ok ($csv->parse ("foo"), "parse (foo)");
+is ($csv->record_number, 2, "record_number");
+ok (!$csv->parse ("foo,foo\0bar"), "parse (foo)");
+$csv->escape_char ("\\");
+ok (!$csv->parse ("foo,foo\0bar"), "parse (foo)");
+$csv->binary (1);
+ok ( $csv->parse ("foo,foo\0bar"), "parse (foo)");
+
+# Attribute aliasses
+ok ($csv = Text::CSV->new ({ quote_always => 1, verbose_diag => 1}));
+is ($csv->always_quote, 1, "always_quote = quote_always");
+is ($csv->diag_verbose, 1, "diag_verbose = verbose_diag");
+ok ($csv = Text::CSV->new ({ escape_char => undef }), "undef escape aliases");
+is ($csv->escape_char, undef, "escape_char is undef");
+ok ($csv = Text::CSV->new ({ quote => undef }), "undef quote aliases");
+is ($csv->quote_char, undef, "quote_char is undef");
+is ($csv->quote, undef, "quote is undef");
+
+# Some forbidden combinations
+foreach my $ws (" ", "\t") {
+ ok ($csv = Text::CSV->new ({ escape_char => $ws }), "New blank escape");
+ eval { ok ($csv->allow_whitespace (1), "Allow ws") };
+ is (($csv->error_diag)[0], 1002, "Wrong combo");
+ ok ($csv = Text::CSV->new ({ quote_char => $ws }), "New blank quote");
+ eval { ok ($csv->allow_whitespace (1), "Allow ws") };
+ is (($csv->error_diag)[0], 1002, "Wrong combo");
+ ok ($csv = Text::CSV->new ({ allow_whitespace => 1 }), "New ws 1");
+ eval { ok ($csv->escape_char ($ws), "esc") };
+ is (($csv->error_diag)[0], 1002, "Wrong combo");
+ ok ($csv = Text::CSV->new ({ allow_whitespace => 1 }), "New ws 1");
+ eval { ok ($csv->quote_char ($ws), "esc") };
+ is (($csv->error_diag)[0], 1002, "Wrong combo");
+ }
+foreach my $esc (undef, "", " ", "\t", "!!!!!!") {
+ foreach my $quo (undef, "", " ", "\t", "!!!!!!") {
+ defined $esc && $esc =~ m/[ \t]/ or
+ defined $quo && $quo =~ m/[ \t]/ or next;
+ my $wc = join " " => map {
+ !defined $_ ? "" :
+ $_ eq "" ? "" :
+ $_ eq " " ? "" :
+ $_ eq "\t" ? "" : $_ }
+ "esc:", $esc, "quo:", $quo;
+ eval { $csv = Text::CSV->new ({
+ escape => $esc,
+ quote => $quo,
+ allow_whitespace => 1,
+ }) };
+ like ((Text::CSV::error_diag)[1], qr{^INI - allow_whitespace}, "Wrong combo - error message: $wc");
+ is ((Text::CSV::error_diag)[0], 1002, "Wrong combo - numeric error: $wc");
+ }
+ }
+
+# Test 1003 in constructor
+foreach my $x ("\r", "\n", "\r\n", "x\n", "\rx") {
+ foreach my $attr (qw( sep_char quote_char escape_char )) {
+ #ok (1, "attr: $attr => ", $x =~ s/\n/\\n/gr =~ s/\r/\\r/gr);
+ eval { $csv = Text::CSV->new ({ $attr => $x }) };
+ is ((Text::CSV::error_diag)[0], 1003, "eol in $attr");
+ }
+ }
+# Test 1003 in methods
+foreach my $attr (qw( sep_char quote_char escape_char )) {
+ ok ($csv = Text::CSV->new, "New");
+ eval { ok ($csv->$attr ("\n"), "$attr => \\n") };
+ is (($csv->error_diag)[0], 1003, "not allowed");
+ }
+
+# Too long attr (max 16)
+$csv = Text::CSV->new ({ quote => "'" });
+my $xl = "X" x 32;
+eval { $csv->eol ($xl); };
+is (($csv->error_diag)[0], 1005, "eol too long");
+is ($csv->eol (), "", "eol unchanged");
+eval { $csv->sep ($xl); };
+is (($csv->error_diag)[0], 1006, "sep too long");
+is ($csv->sep (), ",", "sep unchanged");
+eval { $csv->quote ($xl); };
+is (($csv->error_diag)[0], 1007, "quo too long");
+is ($csv->quote (), "'", "quo unchanged");
+eval { $csv = Text::CSV->new ({ eol => $xl }); };
+is ($csv, undef, "new with EOL too long");
+is ((Text::CSV::error_diag)[0], 1005, "error set");
+eval { $csv = Text::CSV->new ({ sep => $xl }); };
+is ($csv, undef, "new with SEP too long");
+is ((Text::CSV::error_diag)[0], 1006, "error set");
+eval { $csv = Text::CSV->new ({ quote => $xl }); };
+is ($csv, undef, "new with QUO too long");
+is ((Text::CSV::error_diag)[0], 1007, "error set");
+
+# And test erroneous calls
+is (Text::CSV::new (0), undef, "new () as function");
+is (Text::CSV::error_diag (), "usage: my \$csv = $Backend->new ([{ option => value, ... }]);",
+ "Generic usage () message");
+is (Text::CSV->new ({ oel => "" }), undef, "typo in attr");
+is (Text::CSV::error_diag (), "INI - Unknown attribute 'oel'", "Unsupported attr");
+is (Text::CSV->new ({ _STATUS => "" }), undef, "private attr");
+is (Text::CSV::error_diag (), "INI - Unknown attribute '_STATUS'", "Unsupported private attr");
+
+foreach my $arg (undef, 0, "", " ", 1, [], [ 0 ], *STDOUT) {
+ is (Text::CSV->new ($arg), undef, "Illegal type for first arg");
+ is ((Text::CSV::error_diag)[0], 1000, "Should be a hashref - numeric error");
+ }
+
+my $attr = [ sort qw(
+ eol
+ sep_char sep quote_char quote escape_char
+ binary decode_utf8
+ auto_diag diag_verbose
+ blank_is_undef empty_is_undef
+ allow_whitespace allow_loose_quotes allow_loose_escapes allow_unquoted_escape
+ always_quote quote_space quote_empty quote_binary
+ escape_null
+ keep_meta_info
+ verbatim strict strict_eol skip_empty_rows formula
+ undef_str comment_str
+ types
+ callbacks
+ ENCODING
+ )];
+is_deeply ([ Text::CSV::known_attributes () ], $attr, "Known attributes (function)");
+is_deeply ([ Text::CSV->known_attributes () ], $attr, "Known attributes (class method)");
+is_deeply ([ Text::CSV->new->known_attributes () ], $attr, "Known attributes (method)");
+
+1;
diff --git a/src/test/resources/module/Text-CSV/t/15_flags.t b/src/test/resources/module/Text-CSV/t/15_flags.t
new file mode 100644
index 000000000..ae75d8c70
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/15_flags.t
@@ -0,0 +1,287 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1; # use warnings core since 5.6
+
+use Test::More tests => 229;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ }
+
+sub crnlsp {
+ my $csv = shift;
+ ok (!$csv->parse (), "Missing arguments");
+ ok ( $csv->parse ("\n"), "NL");
+ if ($csv->eol eq "\r") {
+ ok ( $csv->parse ("\r"), "CR");
+ ok ( $csv->parse ("\r\r"), "CR CR");
+ ok ( $csv->parse ("\r "), "CR + Space");
+ ok ( $csv->parse (" \r"), "Space + CR");
+ }
+ else {
+ ok (!$csv->parse ("\r"), "CR");
+ ok (!$csv->parse ("\r\r"), "CR CR");
+ if ($csv->binary) {
+ ok ( $csv->parse ("\r "), "CR + Space");
+ ok ( $csv->parse (" \r"), "Space + CR");
+ }
+ else {
+ ok (!$csv->parse ("\r "), "CR + Space");
+ ok (!$csv->parse (" \r"), "Space + CR");
+ }
+ }
+ ok ( $csv->parse ("\r\n"), "CR NL");
+ ok ( $csv->parse ("\n "), "NL + Space");
+ ok ( $csv->parse ("\r\n "), "CR NL + Space");
+ if ($csv->binary) {
+ ok ( $csv->parse (qq{"\n"}), "Quoted NL");
+ ok ( $csv->parse (qq{"\r"}), "Quoted CR");
+ ok ( $csv->parse (qq{"\r\n"}), "Quoted CR NL");
+ ok ( $csv->parse (qq{"\n "}), "Quoted NL + Space");
+ ok ( $csv->parse (qq{"\r "}), "Quoted CR + Space");
+ ok ( $csv->parse (qq{"\r\n "}), "Quoted CR NL + Space");
+ }
+ else {
+ ok (!$csv->parse (qq{"\n"}), "Quoted NL");
+ ok (!$csv->parse (qq{"\r"}), "Quoted CR");
+ ok (!$csv->parse (qq{"\r\n"}), "Quoted CR NL");
+ ok (!$csv->parse (qq{"\n "}), "Quoted NL + Space");
+ ok (!$csv->parse (qq{"\r "}), "Quoted CR + Space");
+ ok (!$csv->parse (qq{"\r\n "}), "Quoted CR NL + Space");
+ }
+ ok (!$csv->parse (qq{"\r\r\n"\r}), "Quoted CR CR NL >CR");
+ ok (!$csv->parse (qq{"\r\r\n"\r\r}), "Quoted CR CR NL >CR CR");
+ ok (!$csv->parse (qq{"\r\r\n"\r\r\n}), "Quoted CR CR NL >CR CR NL");
+ ok (!$csv->parse (qq{"\r\r\n"\t \r}), "Quoted CR CR NL >TAB Space CR");
+ ok (!$csv->parse (qq{"\r\r\n"\t \r\r}), "Quoted CR CR NL >TAB Space CR CR");
+ ok (!$csv->parse (qq{"\r\r\n"\t \r\r\n}), "Quoted CR CR NL >TAB Space CR CR NL");
+ } # crnlsp
+
+{ my $csv = Text::CSV->new ();
+ my $cb6 = chr (0xb6); # A random binary character
+
+ is ($csv->meta_info, undef, "meta_info () before parse ()");
+
+ ok (1, "parse () tests - No meta_info");
+ crnlsp ($csv);
+ ok (!$csv->parse ('"abc'), "Missing closing \"");
+ ok (!$csv->parse ('ab"c'), "\" outside of \"'s");
+ ok (!$csv->parse ('"ab"c"'), "Bad character sequence");
+ ok (!$csv->parse ("ab${cb6}c"), "Binary character");
+ ok (!$csv->parse (qq{"ab${cb6}c"}), "Binary character in quotes");
+ ok (!$csv->parse (qq("abc\nc")), "Bad character (NL)");
+ ok (!$csv->status (), "Wrong status ()");
+ ok ( $csv->parse ('","'), "comma - parse ()");
+ is ( scalar $csv->fields (), 1, "comma - fields () - count");
+ is ( scalar $csv->meta_info (), 0, "comma - meta_info () - count");
+ is (($csv->fields ())[0], ",", "comma - fields () - content");
+ is (($csv->meta_info ())[0], undef, "comma - meta_info () - content");
+ ok ( $csv->parse (qq("","I said,\t""Hi!""","")), "Hi! - parse ()");
+ is ( scalar $csv->fields (), 3, "Hi! - fields () - count");
+ is ( scalar $csv->meta_info (), 0, "Hi! - meta_info () - count");
+ }
+
+{ my $csv = Text::CSV->new ({ keep_meta_info => 1 });
+
+ ok (1, "parse () tests - With flags");
+ is ( $csv->meta_info, undef, "meta_info before parse");
+
+ ok (!$csv->parse (), "Missing arguments");
+ is ( $csv->meta_info, undef, "meta_info after failing parse");
+ crnlsp ($csv);
+ ok (!$csv->parse ('"abc'), "Missing closing \"");
+ ok (!$csv->parse ('ab"c'), "\" outside of \"'s");
+ ok (!$csv->parse ('"ab"c"'), "Bad character sequence");
+ ok (!$csv->parse (qq("abc\nc")), "Bad character (NL)");
+ ok (!$csv->status (), "Wrong status ()");
+ ok ( $csv->parse ('","'), "comma - parse ()");
+ is ( scalar $csv->fields (), 1, "comma - fields () - count");
+ is ( scalar $csv->meta_info (), 1, "comma - meta_info () - count");
+ is (($csv->fields ())[0], ",", "comma - fields () - content");
+ is (($csv->meta_info ())[0], 1, "comma - meta_info () - content");
+ ok ( $csv->parse (qq("","I said,\t""Hi!""",)), "Hi! - parse ()");
+ is ( scalar $csv->fields (), 3, "Hi! - fields () - count");
+ is ( scalar $csv->meta_info (), 3, "Hi! - meta_info () - count");
+
+ is (($csv->fields ())[0], "", "Hi! - fields () - field 1");
+ is (($csv->meta_info ())[0], 1, "Hi! - meta_info () - field 1");
+ is (($csv->fields ())[1], qq(I said,\t"Hi!"), "Hi! - fields () - field 2");
+ is (($csv->meta_info ())[1], 1, "Hi! - meta_info () - field 2");
+ is (($csv->fields ())[2], "", "Hi! - fields () - field 3");
+ is (($csv->meta_info ())[2], 0, "Hi! - meta_info () - field 3");
+ }
+
+{ my $csv = Text::CSV->new ({
+ keep_meta_info => 1,
+ binary => 1,
+ quote_space => 0,
+ });
+ ok ($csv->parse (qq{1,,"", ," ",f,"g","h""h",h\xb6lp,"h\xb6lp"}), "Parse");
+ ok (my @f = $csv->fields, "fields");
+ is_deeply (\@f, [ 1, "", "", " ", " ", "f", "g", "h\"h",
+ "h\xb6lp", "h\xb6lp" ], "fields content");
+ ok ($csv->combine (@f), "combine");
+ is ($csv->string,
+ qq{1,,, , ,f,g,"h""h",h\xb6lp,h\xb6lp}, "string 1");
+ ok ($csv->parse (qq{1,,"", ," ",f,"g","h""h",h\xb6lp,"h\xb6lp"}), "Parse");
+ is ($csv->keep_meta_info (11), 11, "keep meta on out");
+ ok ($csv->combine (@f), "combine");
+ is ($csv->string,
+ qq{1,,"", ," ",f,"g","h""h",h\xb6lp,"h\xb6lp"}, "string 11");
+ ok ($csv->parse (qq{1,,"1193-1",4,"",,6}), "parse under 11");
+ ok ($csv->combine ($csv->fields), "combine");
+ is ($csv->string, qq{1,,"1193-1",4,"",,6}, "return same");
+ }
+
+{ my $csv = Text::CSV->new ({ keep_meta_info => 1, eol => "\r" });
+
+ ok (1, "parse () tests - With flags");
+ is ( $csv->meta_info, undef, "meta_info before parse");
+
+ ok (!$csv->parse (), "Missing arguments");
+ is ( $csv->meta_info, undef, "meta_info after failing parse");
+ crnlsp ($csv);
+ ok (!$csv->parse ('"abc'), "Missing closing \"");
+ ok (!$csv->parse ('ab"c'), "\" outside of \"'s");
+ ok (!$csv->parse ('"ab"c"'), "Bad character sequence");
+ ok (!$csv->parse (qq("abc\nc")), "Bad character (NL)");
+ ok (!$csv->status (), "Wrong status ()");
+ ok ( $csv->parse ('","'), "comma - parse ()");
+ is ( scalar $csv->fields (), 1, "comma - fields () - count");
+ is ( scalar $csv->meta_info (), 1, "comma - meta_info () - count");
+ is (($csv->fields ())[0], ",", "comma - fields () - content");
+ is (($csv->meta_info ())[0], 1, "comma - meta_info () - content");
+ ok ( $csv->parse (qq("","I said,\t""Hi!""",)), "Hi! - parse ()");
+ is ( scalar $csv->fields (), 3, "Hi! - fields () - count");
+ is ( scalar $csv->meta_info (), 3, "Hi! - meta_info () - count");
+
+ is (($csv->fields ())[0], "", "Hi! - fields () - field 1");
+ is (($csv->meta_info ())[0], 1, "Hi! - meta_info () - field 1");
+ is (($csv->fields ())[1], qq(I said,\t"Hi!"), "Hi! - fields () - field 2");
+ is (($csv->meta_info ())[1], 1, "Hi! - meta_info () - field 2");
+ is (($csv->fields ())[2], "", "Hi! - fields () - field 3");
+ is (($csv->meta_info ())[2], 0, "Hi! - meta_info () - field 3");
+ }
+
+{ my $csv = Text::CSV->new ({ keep_meta_info => 1, binary => 1 });
+
+ is ($csv->is_quoted (0), undef, "is_quoted () before parse");
+ is ($csv->is_binary (0), undef, "is_binary () before parse");
+ is ($csv->is_missing (0), undef, "is_missing () before parse");
+
+ my $bintxt = chr ($] < 5.006 ? 0xbf : 0x20ac);
+ ok ( $csv->parse (qq{,"1","a\rb",0,"a\nb",1,\x8e,"a\r\n","$bintxt","",}),
+ "parse () - mixed quoted/binary");
+ is (scalar $csv->fields, 11, "fields () - count");
+ my @fflg;
+ ok (@fflg = $csv->meta_info, "meta_info ()");
+ is (scalar @fflg, 11, "meta_info () - count");
+ is_deeply ([ @fflg ], [ 0, 1, 3, 0, 3, 0, 2, 3, 3, 1, 0 ], "meta_info ()");
+
+ is ($csv->is_quoted (0), 0, "fflag 0 - not quoted");
+ is ($csv->is_binary (0), 0, "fflag 0 - not binary");
+ is ($csv->is_missing (0), 0, "fflag 0 - not missig");
+ is ($csv->is_quoted (2), 1, "fflag 2 - quoted");
+ is ($csv->is_binary (2), 1, "fflag 2 - binary");
+ is ($csv->is_missing (2), 0, "fflag 2 - not missing");
+
+ is ($csv->is_quoted (6), 0, "fflag 5 - not quoted");
+ is ($csv->is_binary (6), 1, "fflag 5 - binary");
+ is ($csv->is_missing (6), 0, "fflag 5 - not missing");
+
+ is ($csv->is_quoted (-1), undef, "fflag -1 - undefined");
+ is ($csv->is_binary (-8), undef, "fflag -8 - undefined");
+ is ($csv->is_missing (-8), undef, "fflag -8 - undefined");
+
+ is ($csv->is_quoted (21), undef, "fflag 21 - undefined");
+ is ($csv->is_binary (98), undef, "fflag 98 - undefined");
+ is ($csv->is_missing (98), 1, "fflag 98 - missing");
+ }
+
+{ my $csv = Text::CSV->new ({ escape_char => "+" });
+
+ ok ( $csv->parse ("+"), "ESC");
+ ok (!$csv->parse ("++"), "ESC ESC");
+ ok ( $csv->parse ("+ "), "ESC Space");
+ ok ( $csv->parse ("+0"), "ESC NUL");
+ ok ( $csv->parse ("+\n"), "ESC NL");
+ ok (!$csv->parse ("+\r"), "ESC CR");
+ ok ( $csv->parse ("+\r\n"), "ESC CR NL");
+ ok (!$csv->parse (qq{"+"}), "Quo ESC");
+ ok (!$csv->parse (qq{""+}), "Quo ESC >");
+ ok ( $csv->parse (qq{"++"}), "Quo ESC ESC");
+ ok (!$csv->parse (qq{"+ "}), "Quo ESC Space");
+ ok ( $csv->parse (qq{"+0"}), "Quo ESC NUL");
+ ok (!$csv->parse (qq{"+\n"}), "Quo ESC NL");
+ ok (!$csv->parse (qq{"+\r"}), "Quo ESC CR");
+ ok (!$csv->parse (qq{"+\r\n"}), "Quo ESC CR NL");
+ }
+
+{ my $csv = Text::CSV->new ({ escape_char => "+", binary => 1 });
+
+ ok ( $csv->parse ("+"), "ESC");
+ ok (!$csv->parse ("++"), "ESC ESC");
+ ok ( $csv->parse ("+ "), "ESC Space");
+ ok ( $csv->parse ("+0"), "ESC NUL");
+ ok ( $csv->parse ("+\n"), "ESC NL");
+ ok (!$csv->parse ("+\r"), "ESC CR");
+ ok ( $csv->parse ("+\r\n"), "ESC CR NL");
+ ok (!$csv->parse (qq{"+"}), "Quo ESC");
+ ok ( $csv->parse (qq{"++"}), "Quo ESC ESC");
+ ok (!$csv->parse (qq{"+ "}), "Quo ESC Space");
+ ok ( $csv->parse (qq{"+0"}), "Quo ESC NUL");
+ ok (!$csv->parse (qq{"+\n"}), "Quo ESC NL");
+ ok (!$csv->parse (qq{"+\r"}), "Quo ESC CR");
+ ok (!$csv->parse (qq{"+\r\n"}), "Quo ESC CR NL");
+ }
+
+ok (1, "Testing always_quote");
+{ ok (my $csv = Text::CSV->new ({ always_quote => 0 }), "new (aq => 0)");
+ ok ($csv->combine (1..3), "Combine");
+ is ($csv->string, q{1,2,3}, "String");
+ is ($csv->always_quote, 0, "Attr 0");
+ ok ($csv->always_quote (1), "Attr 1");
+ ok ($csv->combine (1..3), "Combine");
+ is ($csv->string, q{"1","2","3"}, "String");
+ is ($csv->always_quote, 1, "Attr 1");
+ is ($csv->always_quote (0), 0, "Attr 0");
+ ok ($csv->combine (1..3), "Combine");
+ is ($csv->string, q{1,2,3}, "String");
+ is ($csv->always_quote, 0, "Attr 0");
+ }
+
+ok (1, "Testing quote_space");
+{ ok (my $csv = Text::CSV->new ({ quote_space => 1 }), "new (qs => 1)");
+ ok ($csv->combine (1, " ", 3), "Combine");
+ is ($csv->string, q{1," ",3}, "String");
+ is ($csv->quote_space, 1, "Attr 1");
+ is ($csv->quote_space (0), 0, "Attr 0");
+ ok ($csv->combine (1, " ", 3), "Combine");
+ is ($csv->string, q{1, ,3}, "String");
+ is ($csv->quote_space, 0, "Attr 0");
+ is ($csv->quote_space (1), 1, "Attr 1");
+ ok ($csv->combine (1, " ", 3), "Combine");
+ is ($csv->string, q{1," ",3}, "String");
+ is ($csv->quote_space, 1, "Attr 1");
+ }
+
+ok (1, "Testing quote_empty");
+{ ok (my $csv = Text::CSV->new (), "new (default)");
+ is ($csv->quote_empty, 0, "default = 0");
+ ok ($csv->combine (1, undef, "", " ", 2), "combine qe = 0");
+ is ($csv->string, qq{1,,," ",2}, "string");
+ is ($csv->quote_empty (1), 1, "enable quote_empty");
+ ok ($csv->combine (1, undef, "", " ", 2), "combine qe = 1");
+ is ($csv->string, qq{1,,""," ",2}, "string");
+ }
+
+# https://rt.cpan.org/Public/Bug/Display.html?id=109097
+ok (1, "Testing quote_char as undef");
+{ my $csv = Text::CSV->new ({ quote_char => undef });
+ is ($csv->escape_char, '"', "Escape Char defaults to double quotes");
+ ok ($csv->combine ('space here', '"quoted"', '"quoted and spaces"'), "Combine");
+ is ($csv->string, q{space here,""quoted"",""quoted and spaces""}, "String");
+ }
diff --git a/src/test/resources/module/Text-CSV/t/16_import.t b/src/test/resources/module/Text-CSV/t/16_import.t
new file mode 100644
index 000000000..aa16cc6b9
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/16_import.t
@@ -0,0 +1,64 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Test::More tests => 41;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV", qw( :CONSTANTS PV IV NV );
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ }
+
+is ( PV, 0, "Type PV");
+is ( IV, 1, "Type IV");
+is ( NV, 2, "Type NV");
+
+is ( PV (), 0, "Type PV f");
+is ( IV (), 1, "Type IV f");
+is ( NV (), 2, "Type NV f");
+
+is (Text::CSV::PV, 0, "Type T:C:PV");
+is (Text::CSV::IV, 1, "Type T:C:IV");
+is (Text::CSV::NV, 2, "Type T:C:NV");
+
+is (Text::CSV::PV (), 0, "Type T:C:PV f");
+is (Text::CSV::IV (), 1, "Type T:C:IV f");
+is (Text::CSV::NV (), 2, "Type T:C:NV f");
+
+is ( CSV_TYPE_PV, 0, "Type CT_PV");
+is ( CSV_TYPE_IV, 1, "Type CT_IV");
+is ( CSV_TYPE_NV, 2, "Type CT_NV");
+
+is ( CSV_TYPE_PV (), 0, "Type CT_PV f");
+is ( CSV_TYPE_IV (), 1, "Type CT_IV f");
+is ( CSV_TYPE_NV (), 2, "Type CT_NV f");
+
+is (Text::CSV::CSV_TYPE_PV, 0, "Type T:C:CT_PV");
+is (Text::CSV::CSV_TYPE_IV, 1, "Type T:C:CT_IV");
+is (Text::CSV::CSV_TYPE_NV, 2, "Type T:C:CT_NV");
+
+is (Text::CSV::CSV_TYPE_PV (), 0, "Type T:C:CT_PV f");
+is (Text::CSV::CSV_TYPE_IV (), 1, "Type T:C:CT_IV f");
+is (Text::CSV::CSV_TYPE_NV (), 2, "Type T:C:CT_NV f");
+
+is ( CSV_FLAGS_IS_QUOTED, 1, "is_Q");
+is ( CSV_FLAGS_IS_BINARY, 2, "is_B");
+is ( CSV_FLAGS_ERROR_IN_FIELD, 4, "is_E");
+is ( CSV_FLAGS_IS_MISSING, 16, "is_M");
+
+is ( CSV_FLAGS_IS_QUOTED (), 1, "is_Q f");
+is ( CSV_FLAGS_IS_BINARY (), 2, "is_B f");
+is ( CSV_FLAGS_ERROR_IN_FIELD (), 4, "is_E f");
+is ( CSV_FLAGS_IS_MISSING (), 16, "is_M f");
+
+is (Text::CSV::CSV_FLAGS_IS_QUOTED, 1, "is_Q");
+is (Text::CSV::CSV_FLAGS_IS_BINARY, 2, "is_B");
+is (Text::CSV::CSV_FLAGS_ERROR_IN_FIELD, 4, "is_E");
+is (Text::CSV::CSV_FLAGS_IS_MISSING, 16, "is_M");
+
+is (Text::CSV::CSV_FLAGS_IS_QUOTED (), 1, "T:C:is_Q f");
+is (Text::CSV::CSV_FLAGS_IS_BINARY (), 2, "T:C:is_B f");
+is (Text::CSV::CSV_FLAGS_ERROR_IN_FIELD (), 4, "T:C:is_E f");
+is (Text::CSV::CSV_FLAGS_IS_MISSING (), 16, "T:C:is_M f");
diff --git a/src/test/resources/module/Text-CSV/t/20_file.t b/src/test/resources/module/Text-CSV/t/20_file.t
new file mode 100644
index 000000000..30d22da6b
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/20_file.t
@@ -0,0 +1,153 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1; # use warnings;
+
+use Test::More tests => 109;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+$| = 1;
+$/ = "\n";
+$\ = undef;
+
+my $tfn = "_20test.csv"; END { -f $tfn and unlink $tfn; }
+
+my $csv = Text::CSV->new ();
+
+my $UTF8 = ($ENV{LANG} || "C").($ENV{LC_ALL} || "C") =~ m/utf-?8/i ? 1 : 0;
+
+open FH, ">", $tfn or die "$tfn: $!";
+ok (!$csv->print (*FH, ["abc", "def\007", "ghi"]), "print bad character");
+close FH;
+
+for ( [ 1, 1, 1, '""' ],
+ [ 2, 1, 1, '', '' ],
+ [ 3, 1, 0, '', 'I said, "Hi!"', '' ],
+ [ 4, 1, 0, '"', 'abc' ],
+ [ 5, 1, 0, 'abc', '"' ],
+ [ 6, 1, 1, 'abc', 'def', 'ghi' ],
+ [ 7, 1, 1, "abc\tdef", 'ghi' ],
+ [ 8, 1, 0, '"abc' ],
+ [ 9, 1, 0, 'ab"c' ],
+ [ 10, 1, 0, '"ab"c"' ],
+ [ 11, 0, 0, qq("abc\nc") ],
+ [ 12, 1, 1, q(","), ',' ],
+ [ 13, 1, 0, qq("","I said,\t""Hi!""",""), '', qq(I said,\t"Hi!"), '' ],
+ ) {
+ my ($tst, $validp, $validg, @arg, $row) = @$_;
+
+ open FH, ">", $tfn or die "$tfn: $!";
+ is ($csv->print (*FH, \@arg), $validp||"", "$tst - print ()");
+ close FH;
+
+ open FH, ">", $tfn or die "$tfn: $!";
+ print FH join ",", @arg;
+ close FH;
+
+ open FH, "<", $tfn or die "$tfn: $!";
+ $row = $csv->getline (*FH);
+ unless ($validg) {
+ is ($row, undef, "$tst - false getline ()");
+ next;
+ }
+ ok ($row, "$tst - good getline ()");
+ $tst == 12 and @arg = (",", "", "");
+ foreach my $a (0 .. $#arg) {
+ (my $exp = $arg[$a]) =~ s/^"(.*)"$/$1/;
+ is ($row->[$a], $exp, "$tst - field $a");
+ }
+ }
+
+unlink $tfn;
+
+# This test because of a problem with DBD::CSV
+
+ok (1, "Tests for DBD::CSV");
+open FH, ">", $tfn or die "$tfn: $!";
+$csv->binary (1);
+$csv->eol ("\r\n");
+ok ($csv->print (*FH, [ "id", "name" ]), "Bad character");
+ok ($csv->print (*FH, [ 1, "Alligator Descartes" ]), "Name 1");
+ok ($csv->print (*FH, [ "3", "Jochen Wiedmann" ]), "Name 2");
+ok ($csv->print (*FH, [ 2, "Tim Bunce" ]), "Name 3");
+ok ($csv->print (*FH, [ " 4", "Andreas König" ]), "Name 4");
+ok ($csv->print (*FH, [ 5 ]), "Name 5");
+close FH;
+
+my $expected = <<"CONTENTS";
+id,name\015
+1,"Alligator Descartes"\015
+3,"Jochen Wiedmann"\015
+2,"Tim Bunce"\015
+" 4","Andreas König"\015
+5\015
+CONTENTS
+
+open FH, "<", $tfn or die "$tfn: $!";
+my $content = do { local $/; };
+close FH;
+is ($content, $expected, "Content");
+open FH, ">", $tfn or die "$tfn: $!";
+print FH $content;
+close FH;
+open FH, "<", $tfn or die "$tfn: $!";
+
+my $fields;
+print "# Retrieving data\n";
+for (0 .. 5) {
+ ok ($fields = $csv->getline (*FH), "Fetch field $_");
+ is ($csv->eof, "", "EOF");
+ print "# Row $_: $fields (@$fields)\n";
+ }
+is ($csv->getline (*FH), undef, "Fetch field 6");
+is ($csv->eof, 1, "EOF");
+
+# Edge cases
+for ([ 1, 1, 0, "\n" ],
+ [ 2, 1, 0, "+\n" ],
+ [ 3, 1, 0, "+" ],
+ [ 4, 0, 2021, qq{"+"\n} ],
+ [ 5, 0, 2025, qq{"+\n} ],
+ [ 6, 0, 2011, qq{""+\n} ],
+ [ 7, 0, 2027, qq{"+"} ],
+ [ 8, 0, 2024, qq{"+} ],
+ [ 9, 0, 2011, qq{""+} ],
+ [ 10, 1, 0, "\r" ],
+ [ 11, 0, 2031, "\r\b" ],
+ [ 12, 0, 2032, "+\r\b" ],
+ [ 13, 0, 2032, "+\r\b+" ],
+ [ 14, 0, 2022, qq{"\r"} ],
+ [ 15, 0, 2022, qq{"\r\b" } ],
+ [ 16, 0, 2022, qq{"\r\b"\t} ],
+ [ 17, 0, 2025, qq{"+\r\b"} ],
+ [ 18, 0, 2025, qq{"+\r\b+"} ],
+ [ 19, 0, 2022, qq{"\r"\b} ],
+ [ 20, 0, 2022, qq{"\r\b"\b} ],
+ [ 21, 0, 2025, qq{"+\r\b"\b} ],
+ [ 22, 0, 2025, qq{"+\r\b+"\b} ],
+ [ 23, 0, 2037, qq{\b} ],
+ [ 24, 0, 2026, qq{"\b"} ],
+ ) {
+ my ($tst, $valid, $err, $str) = @$_;
+ my $raw = $] < 5.008 ? "" : ":raw";
+ open FH, ">$raw", $tfn or die "$tfn: $!";
+ print FH $str;
+ close FH;
+
+ $csv = Text::CSV->new ({ escape_char => "+" });
+ open FH, "<$raw", $tfn or die "$tfn: $!";
+ my $row = $csv->getline (*FH);
+ close FH;
+ my @err = $csv->error_diag;
+ my $sstr = _readable ($str);
+ SKIP: {
+ ok ($valid ? $row : !$row, "$tst - getline ESC +, '$sstr'");
+ is ($err[0], $err, "Error expected $err");
+ }
+ }
diff --git a/src/test/resources/module/Text-CSV/t/21_lexicalio.t b/src/test/resources/module/Text-CSV/t/21_lexicalio.t
new file mode 100644
index 000000000..8cab0eeb9
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/21_lexicalio.t
@@ -0,0 +1,164 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1; # use warnings;
+
+use Test::More;
+
+BEGIN {
+ if ($] < 5.006) {
+ plan skip_all => "No lexical file handles in in this ancient perl version";
+ }
+ else {
+ plan tests => 109;
+ }
+}
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+$| = 1;
+$/ = "\n";
+$\ = undef;
+
+my $io;
+my $tfn = "_21test.csv"; END { -f $tfn and unlink $tfn; }
+my $csv = Text::CSV->new ();
+
+my $UTF8 = ($ENV{LANG} || "C").($ENV{LC_ALL} || "C") =~ m/utf-?8/i ? 1 : 0;
+
+open $io, ">", $tfn or die "$tfn: $!";
+ok (!$csv->print ($io, ["abc", "def\007", "ghi"]), "print bad character");
+close $io;
+
+for ( [ 1, 1, 1, '""' ],
+ [ 2, 1, 1, '', '' ],
+ [ 3, 1, 0, '', 'I said, "Hi!"', '' ],
+ [ 4, 1, 0, '"', 'abc' ],
+ [ 5, 1, 0, 'abc', '"' ],
+ [ 6, 1, 1, 'abc', 'def', 'ghi' ],
+ [ 7, 1, 1, "abc\tdef", 'ghi' ],
+ [ 8, 1, 0, '"abc' ],
+ [ 9, 1, 0, 'ab"c' ],
+ [ 10, 1, 0, '"ab"c"' ],
+ [ 11, 0, 0, qq("abc\nc") ],
+ [ 12, 1, 1, q(","), ',' ],
+ [ 13, 1, 0, qq("","I said,\t""Hi!""",""), '', qq(I said,\t"Hi!"), '' ],
+ ) {
+ my ($tst, $validp, $validg, @arg, $row) = @$_;
+
+ open $io, ">", $tfn or die "$tfn: $!";
+ is ($csv->print ($io, \@arg), $validp||"", "$tst - print ()");
+ close $io;
+
+ open $io, ">", $tfn or die "$tfn: $!";
+ print $io join ",", @arg;
+ close $io;
+
+ open $io, "<", $tfn or die "$tfn: $!";
+ $row = $csv->getline ($io);
+ unless ($validg) {
+ is ($row, undef, "$tst - false getline ()");
+ next;
+ }
+ ok ($row, "$tst - good getline ()");
+ $tst == 12 and @arg = (",", "", "");
+ foreach my $a (0 .. $#arg) {
+ (my $exp = $arg[$a]) =~ s/^"(.*)"$/$1/;
+ is ($row->[$a], $exp, "$tst - field $a");
+ }
+ }
+
+unlink $tfn;
+
+# This test because of a problem with DBD::CSV
+
+ok (1, "Tests for DBD::CSV");
+open $io, ">", $tfn or die "$tfn: $!";
+$csv->binary (1);
+$csv->eol ("\r\n");
+ok ($csv->print ($io, [ "id", "name" ]), "Bad character");
+ok ($csv->print ($io, [ 1, "Alligator Descartes" ]), "Name 1");
+ok ($csv->print ($io, [ "3", "Jochen Wiedmann" ]), "Name 2");
+ok ($csv->print ($io, [ 2, "Tim Bunce" ]), "Name 3");
+ok ($csv->print ($io, [ " 4", "Andreas König" ]), "Name 4");
+ok ($csv->print ($io, [ 5 ]), "Name 5");
+close $io;
+
+my $expected = <<"CONTENTS";
+id,name\015
+1,"Alligator Descartes"\015
+3,"Jochen Wiedmann"\015
+2,"Tim Bunce"\015
+" 4","Andreas König"\015
+5\015
+CONTENTS
+
+open $io, "<", $tfn or die "$tfn: $!";
+my $content = do { local $/; <$io> };
+close $io;
+is ($content, $expected, "Content");
+open $io, ">", $tfn or die "$tfn: $!";
+print $io $content;
+close $io;
+open $io, "<", $tfn or die "$tfn: $!";
+
+my $fields;
+print "# Retrieving data\n";
+for (0 .. 5) {
+ ok ($fields = $csv->getline ($io), "Fetch field $_");
+ is ($csv->eof, "", "EOF");
+ print "# Row $_: $fields (@$fields)\n";
+ }
+is ($csv->getline ($io), undef, "Fetch field 6");
+is ($csv->eof, 1, "EOF");
+
+# Edge cases
+for ([ 1, 1, 0, "\n" ],
+ [ 2, 1, 0, "+\n" ],
+ [ 3, 1, 0, "+" ],
+ [ 4, 0, 2021, qq{"+"\n} ],
+ [ 5, 0, 2025, qq{"+\n} ],
+ [ 6, 0, 2011, qq{""+\n} ],
+ [ 7, 0, 2027, qq{"+"} ],
+ [ 8, 0, 2024, qq{"+} ],
+ [ 9, 0, 2011, qq{""+} ],
+ [ 10, 1, 0, "\r" ],
+ [ 11, 0, 2031, "\r\b" ],
+ [ 12, 0, 2032, "+\r\b" ],
+ [ 13, 0, 2032, "+\r\b+" ],
+ [ 14, 0, 2022, qq{"\r"} ],
+ [ 15, 0, 2022, qq{"\r\b" } ],
+ [ 16, 0, 2022, qq{"\r\b"\t} ],
+ [ 17, 0, 2025, qq{"+\r\b"} ],
+ [ 18, 0, 2025, qq{"+\r\b+"} ],
+ [ 19, 0, 2022, qq{"\r"\b} ],
+ [ 20, 0, 2022, qq{"\r\b"\b} ],
+ [ 21, 0, 2025, qq{"+\r\b"\b} ],
+ [ 22, 0, 2025, qq{"+\r\b+"\b} ],
+ [ 23, 0, 2037, qq{\b} ],
+ [ 24, 0, 2026, qq{"\b"} ],
+ ) {
+ my ($tst, $valid, $err, $str) = @$_;
+ my $raw = $] < 5.008 ? "" : ":raw";
+ open my $io, ">$raw", $tfn or die "$tfn: $!";
+ print $io $str;
+ close $io;
+
+ $csv = Text::CSV->new ({ escape_char => "+" });
+ open $io, "<$raw", $tfn or die "$tfn: $!";
+ my $row = $csv->getline ($io);
+ close $io;
+ my @err = $csv->error_diag;
+ my $sstr = _readable ($str);
+ SKIP: {
+ $tst == 10 && $] >= 5.008 && $] < 5.008003 && $UTF8 and
+ skip "Be reasonable, this perl version does not do Unicode reliable", 2;
+ ok ($valid ? $row : !$row, "$tst - getline ESC +, '$sstr'");
+ is ($err[0], $err, "Error expected $err");
+ }
+ }
diff --git a/src/test/resources/module/Text-CSV/t/22_scalario.t b/src/test/resources/module/Text-CSV/t/22_scalario.t
new file mode 100644
index 000000000..2132b15c6
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/22_scalario.t
@@ -0,0 +1,208 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1; # use warnings;
+$| = 1;
+
+use Config;
+use Test::More;
+
+BEGIN {
+ unless (exists $Config{useperlio} &&
+ defined $Config{useperlio} &&
+ $] >= 5.008 && # perlio was experimental in 5.6.2, but not reliable
+ $Config{useperlio} eq "define") {
+ plan skip_all => "No reliable perlIO available";
+ }
+ else {
+ plan tests => 136;
+ }
+ }
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+$/ = "\n";
+$\ = undef;
+
+my $io;
+my $io_str = "";
+my $csv = Text::CSV->new ();
+
+open $io, ">", \$io_str or die "IO: $!";
+ok (!$csv->print ($io, ["abc", "def\007", "ghi"]), "print bad character");
+close $io;
+
+for ( [ 1, 1, 1, '""' ],
+ [ 2, 1, 1, '', '' ],
+ [ 3, 1, 0, '', 'I said, "Hi!"', '' ],
+ [ 4, 1, 0, '"', 'abc' ],
+ [ 5, 1, 0, 'abc', '"' ],
+ [ 6, 1, 1, 'abc', 'def', 'ghi' ],
+ [ 7, 1, 1, "abc\tdef", 'ghi' ],
+ [ 8, 1, 0, '"abc' ],
+ [ 9, 1, 0, 'ab"c' ],
+ [ 10, 1, 0, '"ab"c"' ],
+ [ 11, 0, 0, qq("abc\nc") ],
+ [ 12, 1, 1, q(","), ',' ],
+ [ 13, 1, 0, qq("","I said,\t""Hi!""",""), '', qq(I said,\t"Hi!"), '' ],
+ ) {
+ my ($tst, $validp, $validg, @arg, $row) = @$_;
+
+ open $io, ">", \$io_str or die "IO: $!";
+ is ($csv->print ($io, \@arg), $validp||"", "$tst - print ()");
+ close $io;
+
+ open $io, ">", \$io_str or die "IO: $!";
+ print $io join ",", @arg;
+ close $io;
+
+ open $io, "<", \$io_str or die "IO: $!";
+ $row = $csv->getline ($io);
+ unless ($validg) {
+ is ($row, undef, "$tst - false getline ()");
+ next;
+ }
+ ok ($row, "$tst - good getline ()");
+ $tst == 12 and @arg = (",", "", "");
+ foreach my $a (0 .. $#arg) {
+ (my $exp = $arg[$a]) =~ s/^"(.*)"$/$1/;
+ is ($row->[$a], $exp, "$tst - field $a");
+ }
+ }
+
+# This test because of a problem with DBD::CSV
+
+ok (1, "Tests for DBD::CSV");
+open $io, ">", \$io_str or die "IO: $!";
+$csv->binary (1);
+$csv->eol ("\r\n");
+ok ($csv->print ($io, [ "id", "name" ]), "Bad character");
+ok ($csv->print ($io, [ 1, "Alligator Descartes" ]), "Name 1");
+ok ($csv->print ($io, [ "3", "Jochen Wiedmann" ]), "Name 2");
+ok ($csv->print ($io, [ 2, "Tim Bunce" ]), "Name 3");
+ok ($csv->print ($io, [ " 4", "Andreas König" ]), "Name 4");
+ok ($csv->print ($io, [ 5 ]), "Name 5");
+close $io;
+
+my $expected = <<"CONTENTS";
+id,name\015
+1,"Alligator Descartes"\015
+3,"Jochen Wiedmann"\015
+2,"Tim Bunce"\015
+" 4","Andreas König"\015
+5\015
+CONTENTS
+
+open $io, "<", \$io_str or die "IO: $!";
+my $content = do { local $/; <$io> };
+close $io;
+is ($content, $expected, "Content");
+open $io, ">", \$io_str or die "IO: $!";
+print $io $content;
+close $io;
+open $io, "<", \$io_str or die "IO: $!";
+
+my $fields;
+print "# Retrieving data\n";
+for (0 .. 5) {
+ ok ($fields = $csv->getline ($io), "Fetch field $_");
+ is ($csv->eof, "", "EOF");
+ print "# Row $_: $fields (@$fields)\n";
+ }
+is ($csv->getline ($io), undef, "Fetch field 6");
+is ($csv->eof, 1, "EOF");
+
+{ ok (my $csv = Text::CSV->new ({ binary => 1, eol => "\n" }), "new csv");
+ my ($out1, $out2, @fld, $fh) = ("", "", qw( 1 aa 3.14 ahhrg ));
+ open $fh, ">", \$out1 or die "IO: $!";
+ ok ($csv->print ($fh, \@fld), "Add line $_") for 1..3;
+ close $fh;
+ $csv->bind_columns (\(@fld));
+ open $fh, ">", \$out2 or die "IO: $!";
+ ok ($csv->print ($fh, \@fld), "Add line $_") for 1..3;
+ close $fh;
+ is ($out2, $out1, "ignoring bound columns");
+ $out2 = "";
+ open $fh, ">", \$out2 or die "IO: $!";
+ ok ($csv->print ($fh, undef), "Add line $_") for 1..3;
+ close $fh;
+ is ($out2, $out1, "using bound columns");
+ }
+
+# Edge cases
+for ([ 1, 1, 0, "\n" ],
+ [ 2, 1, 0, "+\n" ],
+ [ 3, 1, 0, "+" ],
+ [ 4, 0, 2021, qq{"+"\n} ],
+ [ 5, 0, 2025, qq{"+\n} ],
+ [ 6, 0, 2011, qq{""+\n} ],
+ [ 7, 0, 2027, qq{"+"} ],
+ [ 8, 0, 2024, qq{"+} ],
+ [ 9, 0, 2011, qq{""+} ],
+ [ 10, 1, 0, "\r" ],
+ [ 11, 0, 2031, "\r\b" ],
+ [ 12, 0, 2032, "+\r\b" ],
+ [ 13, 0, 2032, "+\r\b+" ],
+ [ 14, 0, 2022, qq{"\r"} ],
+ [ 15, 0, 2022, qq{"\r\b" } ],
+ [ 16, 0, 2022, qq{"\r\b"\t} ],
+ [ 17, 0, 2025, qq{"+\r\b"} ],
+ [ 18, 0, 2025, qq{"+\r\b+"} ],
+ [ 19, 0, 2022, qq{"\r"\b} ],
+ [ 20, 0, 2022, qq{"\r\b"\b} ],
+ [ 21, 0, 2025, qq{"+\r\b"\b} ],
+ [ 22, 0, 2025, qq{"+\r\b+"\b} ],
+ [ 23, 0, 2037, qq{\b} ],
+ [ 24, 0, 2026, qq{"\b"} ],
+ ) {
+ my ($tst, $valid, $err, $str) = @$_;
+ $csv = Text::CSV->new ({ escape_char => "+" });
+ $io_str = $str;
+ open $io, "<", \$io_str or die "IO: $!"; binmode $io;
+ my $row = $csv->getline ($io);
+ close $io;
+ my @err = $csv->error_diag;
+ my $sstr = _readable ($str);
+ ok ($valid ? $row : !$row, "$tst - getline ESC +, '$sstr'");
+ is ($err[0], $err, "Error expected $err");
+ }
+
+{ ok (my $csv = Text::CSV->new, "new for sep=");
+ open my $fh, "<", \qq{sep=;\n"a b";3\n} or die "IO: $!";
+ is_deeply ($csv->getline_all ($fh), [["a b", 3]], "valid sep=");
+ is (($csv->error_diag)[0], 2012, "EOF");
+ }
+
+{ ok (my $csv = Text::CSV->new, "new for sep=");
+ open my $fh, "<", \qq{sep=;\n"a b",3\n} or die "IO: $!";
+ is_deeply (eval { $csv->getline_all ($fh); }, [], "invalid sep=");
+ is (($csv->error_diag)[0], 2023, "error");
+ }
+
+{ ok (my $csv = Text::CSV->new, "new for sep=");
+ open my $fh, "<", \qq{sep=XX\n"a b"XX3\n} or die "IO: $!";
+ is_deeply (eval { $csv->getline_all ($fh); },
+ [["a b", 3]], "multibyte sep=");
+ is (($csv->error_diag)[0], 2012, "error");
+ }
+
+{ ok (my $csv = Text::CSV->new, "new for sep=");
+ # To check that it is *only* supported on the first line
+ open my $fh, "<", \qq{sep=;\n"a b";3\nsep=,\n"a b",3\n} or die "IO: $!";
+ is_deeply ($csv->getline_all ($fh),
+ [["a b","3"],["sep=,"]], "sep= not on 1st line");
+ is (($csv->error_diag)[0], 2023, "error");
+ }
+
+{ ok (my $csv = Text::CSV->new, "new for sep=");
+ my $sep = "#" x 80;
+ open my $fh, "<", \qq{sep=$sep\n"a b",3\n2,3\n} or die "IO: $!";
+ my $r = $csv->getline_all ($fh);
+ is_deeply ($r, [["sep=$sep"],["a b","3"],[2,3]], "sep= too long");
+ is (($csv->error_diag)[0], 2012, "EOF");
+ }
diff --git a/src/test/resources/module/Text-CSV/t/30_types.t b/src/test/resources/module/Text-CSV/t/30_types.t
new file mode 100644
index 000000000..0e0691192
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/30_types.t
@@ -0,0 +1,63 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Test::More tests => 25;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV", ();
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ }
+
+$| = 1;
+
+my $csv = Text::CSV->new ({
+ types => [
+ Text::CSV::IV (),
+ Text::CSV::PV (),
+ Text::CSV::NV (),
+ ],
+ });
+
+ok ($csv, "CSV_XS->new ()");
+
+is (@{$csv->{types}}, 3, "->{types} as hash");
+is ($csv->{types}[0], Text::CSV::IV (), "type IV");
+is ($csv->{types}[1], Text::CSV::PV (), "type PV");
+is ($csv->{types}[2], Text::CSV::NV (), "type NV");
+
+is (ref ($csv->types), "ARRAY", "->types () as method");
+is ($csv->types ()->[0], Text::CSV::IV (), "type IV");
+is ($csv->types ()->[1], Text::CSV::PV (), "type PV");
+is ($csv->types ()->[2], Text::CSV::NV (), "type NV");
+
+is (length $csv->{_types}, 3, "->{_types}");
+my $inp = join "", map { chr $_ }
+ Text::CSV::IV (), Text::CSV::PV (), Text::CSV::NV ();
+# should be "\001\000\002"
+is ($csv->{_types}, $inp, "IV PV NV");
+
+ok ($csv->parse ("2.55,CSFDATVM01,3.75"), "parse ()");
+my @fields = $csv->fields ();
+is ($fields[0], "2", "Field 1");
+is ($fields[1], "CSFDATVM01", "Field 2");
+is ($fields[2], "3.75", "Field 3");
+
+ok ($csv->combine ("", "", "1.00"), "combine ()");
+is ($csv->string, ',,1.00', "string");
+
+my $warning;
+$SIG{__WARN__} = sub { $warning = shift };
+
+ok ($csv->parse ($csv->string ()), "parse (combine ())");
+like ($warning, qr/numeric/, "numeric warning");
+
+@fields = $csv->fields ();
+is ($fields[0], "0", "Field 1");
+is ($fields[1], "", "Field 2");
+is ($fields[2], "1", "Field 3");
+
+is ($csv->types (0), undef, "delete types");
+is ($csv->types, undef, "types gone");
diff --git a/src/test/resources/module/Text-CSV/t/40_misc.t b/src/test/resources/module/Text-CSV/t/40_misc.t
new file mode 100644
index 000000000..07be272d3
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/40_misc.t
@@ -0,0 +1,63 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Test::More tests => 24;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ require_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+$| = 1;
+
+my @binField = ("abc\0def\n\rghi", "ab\"ce,\031\"'", "\266");
+
+my $csv = Text::CSV->new ({ binary => 1 });
+ok ($csv->combine (@binField), "combine ()");
+
+my $string;
+is_binary ($string = $csv->string,
+ qq("abc"0def\n\rghi","ab""ce,\031""'",\266), "string ()");
+
+ok ($csv->parse ($string), "parse ()");
+is ($csv->fields, scalar @binField, "field count");
+
+my @field = $csv->fields ();
+for (0 .. $#binField) {
+ is ($field[$_], $binField[$_], "Field $_");
+ }
+
+ok (1, "eol \\r\\n");
+$csv->eol ("\r\n");
+ok ($csv->combine (@binField), "combine ()");
+is_binary ($csv->string,
+ qq("abc"0def\n\rghi","ab""ce,\031""'",\266\r\n), "string ()");
+
+ok (1, "eol \\n");
+$csv->eol ("\n");
+ok ($csv->combine (@binField), "combine ()");
+is_binary ($csv->string,
+ qq("abc"0def\n\rghi","ab""ce,\031""'",\266\n), "string ()");
+
+ok (1, "eol ,xxxxxxx\\n");
+$csv->eol (",xxxxxxx\n");
+ok ($csv->combine (@binField), "combine ()");
+is_binary ($csv->string,
+ qq("abc"0def\n\rghi","ab""ce,\031""'",\266,xxxxxxx\n), "string ()");
+
+$csv->eol ("\n");
+ok (1, "quote_char undef");
+$csv->quote_char (undef);
+ok ($csv->combine ("abc","def","ghi"), "combine");
+is ($csv->string, "abc,def,ghi\n", "string ()");
+
+# Ken's test
+ok (1, "always_quote");
+my $csv2 = Text::CSV->new ({ always_quote => 1 });
+ok ($csv2, "new ()");
+ok ($csv2->combine ("abc","def","ghi"), "combine ()");
+is ($csv2->string, '"abc","def","ghi"', "string ()");
diff --git a/src/test/resources/module/Text-CSV/t/41_null.t b/src/test/resources/module/Text-CSV/t/41_null.t
new file mode 100644
index 000000000..4a056d5dc
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/41_null.t
@@ -0,0 +1,113 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Test::More tests => 128;
+BEGIN { $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0; }
+use Text::CSV;
+
+my @pat = (
+ "00",
+ "\00",
+ "0\0",
+ "\0\0",
+
+ "0\n0",
+ "\0\n0",
+ "0\n\0",
+ "\0\n\0",
+
+ "\"0\n0",
+ "\"\0\n0",
+ "\"0\n\0",
+ "\"\0\n\0",
+
+ "\"0\n\"0",
+ "\"\0\n\"0",
+ "\"0\n\"\0",
+ "\"\0\n\"\0",
+
+ "0\n0",
+ "\0\n0",
+ "0\n\0",
+ "\0\n\0",
+ );
+my %exp = map {
+ my $x = $_;
+ $x =~ s/\0/\\0/g;
+ $x =~ s/\n/\\n/g;
+ ($_ => $x);
+ } @pat;
+my $line = ["", undef, "0\n", "", "\0\0\n0"];
+my $tfn = "_41test.csv"; END { -f $tfn and unlink $tfn; }
+
+my $csv = Text::CSV->new ({
+ eol => "\n",
+ binary => 1,
+ auto_diag => 1,
+ blank_is_undef => 1,
+ });
+
+ok ($csv->combine (@$line), "combine [ ... ]");
+is ($csv->string, qq{,,"0\n",,""0"0\n0"\n}, "string");
+
+open my $fh, ">", $tfn or die "$tfn: $!\n";
+binmode $fh;
+
+ok ($csv->print ($fh, [ $_ ]), "print $exp{$_}") for @pat;
+
+$csv->always_quote (1);
+
+ok ($csv->print ($fh, $line), "print [ ... ]");
+
+close $fh;
+
+open $fh, "<", $tfn or die "$tfn: $!\n";
+binmode $fh;
+
+foreach my $pat (@pat) {
+ ok (my $row = $csv->getline ($fh), "getline $exp{$pat}");
+ is ($row->[0], $pat, "data $exp{$pat}");
+ }
+
+is_deeply ($csv->getline ($fh), $line, "read [ ... ]");
+
+close $fh;
+unlink $tfn;
+
+$csv = Text::CSV->new ({
+ eol => "\n",
+ binary => 1,
+ auto_diag => 1,
+ blank_is_undef => 1,
+ quote_null => 0,
+ });
+
+ok ($csv->combine (@$line), "combine [ ... ]");
+is ($csv->string, qq{,,"0\n",,"\0\0\n0"\n}, "string");
+
+open $fh, ">", $tfn or die "$tfn: $!\n";
+binmode $fh;
+
+for (@pat) {
+ ok ($csv->print ($fh, [ $_ ]), "print $exp{$_}");
+ }
+
+$csv->always_quote (1);
+
+ok ($csv->print ($fh, $line), "print [ ... ]");
+
+close $fh;
+
+open $fh, "<", $tfn or die "$tfn: $!\n";
+binmode $fh;
+
+foreach my $pat (@pat) {
+ ok (my $row = $csv->getline ($fh), "getline $exp{$pat}");
+ is ($row->[0], $pat, "data $exp{$pat}");
+ }
+
+is_deeply ($csv->getline ($fh), $line, "read [ ... ]");
+
+close $fh;
diff --git a/src/test/resources/module/Text-CSV/t/45_eol.t b/src/test/resources/module/Text-CSV/t/45_eol.t
new file mode 100644
index 000000000..0ae48d692
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/45_eol.t
@@ -0,0 +1,701 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Test::More tests => 1182;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ require_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+$| = 1;
+
+# Embedded newline tests
+
+my $tfn = "_45eol.csv"; END { -f $tfn and unlink $tfn; }
+my $def_rs = $/;
+
+foreach my $rs ("\n", "\r\n", "\r") {
+ for $\ (undef, $rs) {
+
+ my $csv = Text::CSV->new ({ binary => 1 });
+ $csv->eol ($/ = $rs) unless defined $\;
+
+ foreach my $pass (0, 1) {
+ my $fh;
+ if ($pass == 0) {
+ open $fh, ">", $tfn or die "$tfn: $!\n";
+ }
+ else {
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ }
+
+ foreach my $eol ("", "\r", "\n", "\r\n", "\n\r") {
+ my $s_eol = join " - ", map { defined $_ ? $_ : "" } $\, $rs, $eol;
+ $s_eol =~ s/\r/\\r/g;
+ $s_eol =~ s/\n/\\n/g;
+
+ my @p;
+ my @f = ("", 1,
+ $eol, " $eol", "$eol ", " $eol ", "'$eol'",
+ "\"$eol\"", " \" $eol \"\n ", "EOL");
+
+ if ($pass == 0) {
+ ok ($csv->combine (@f), "combine |$s_eol|");
+ ok (my $str = $csv->string, "string |$s_eol|");
+ my $state = $csv->parse ($str);
+ ok ($state, "parse |$s_eol|");
+ if ($state) {
+ ok (@p = $csv->fields, "fields |$s_eol|");
+ }
+ else{
+ is ($csv->error_input, $str, "error |$s_eol|");
+ }
+
+ print $fh $str;
+ }
+ else {
+ ok (my $row = $csv->getline ($fh), "getline |$s_eol|");
+ is (ref $row, "ARRAY", "row |$s_eol|");
+ @p = @$row;
+ }
+
+ local $, = "|";
+ is_binary ("@p", "@f", "result |$s_eol|");
+ }
+
+ close $fh;
+ }
+
+ unlink $tfn;
+ }
+ }
+$/ = $def_rs;
+
+{ my $csv = Text::CSV->new ({ escape_char => undef });
+
+ ok ($csv->parse (qq{"x"\r\n}), "Trailing \\r\\n with no escape char");
+
+ is ($csv->eol ("\r"), "\r", "eol set to \\r");
+ ok ($csv->parse (qq{"x"\r}), "Trailing \\r with no escape char");
+
+ ok ($csv->allow_whitespace (1), "Allow whitespace");
+ ok ($csv->parse (qq{"x" \r}), "Trailing \\r with no escape char");
+ }
+
+SKIP: {
+ $] < 5.008 and skip "\$\\ tests don't work in perl 5.6.x and older", 2;
+ { local $\ = "#\r\n";
+ my $csv = Text::CSV->new ();
+ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ $csv->print ($fh, [ "a", 1 ]);
+ close $fh;
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ local $/;
+ is (<$fh>, "a,1#\r\n", "Strange \$\\");
+ close $fh;
+ unlink $tfn;
+ }
+ { local $\ = "#\r\n";
+ my $csv = Text::CSV->new ({ eol => $\ });
+ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ $csv->print ($fh, [ "a", 1 ]);
+ close $fh;
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ local $/;
+ is (<$fh>, "a,1#\r\n", "Strange \$\\ + eol");
+ close $fh;
+ unlink $tfn;
+ }
+ }
+$/ = $def_rs;
+
+ok (1, "Auto-detecting \\r");
+{ my @row = qw( a b c ); local $" = ",";
+ for (["\n", "\\n"], ["\r\n", "\\r\\n"], ["\r", "\\r"]) {
+ my ($eol, $s_eol) = @$_;
+ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ print $fh qq{@row$eol@row$eol@row$eol\x91};
+ close $fh;
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ my $c = Text::CSV->new ({ binary => 1, auto_diag => 1 });
+ is ($c->eol (), "", "default EOL");
+ is_deeply ($c->getline ($fh), [ @row ], "EOL 1 $s_eol");
+ is ($c->eol (), $eol eq "\r" ? "\r" : "", "EOL");
+ is_deeply ($c->getline ($fh), [ @row ], "EOL 2 $s_eol");
+ is_deeply ($c->getline ($fh), [ @row ], "EOL 3 $s_eol");
+ close $fh;
+ unlink $tfn;
+ }
+ }
+
+ok (1, "Specific \\r test from tfrayner");
+{ $/ = "\r";
+ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ print $fh qq{a,b,c$/}, qq{"d","e","f"$/};
+ close $fh;
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ my $c = Text::CSV->new ({ eol => $/ });
+
+ my $row;
+ local $" = " ";
+ ok ($row = $c->getline ($fh), "getline 1");
+ is (scalar @$row, 3, "# fields");
+ is ("@$row", "a b c", "fields 1");
+ ok ($row = $c->getline ($fh), "getline 2");
+ is (scalar @$row, 3, "# fields");
+ is ("@$row", "d e f", "fields 2");
+ close $fh;
+ unlink $tfn;
+ }
+$/ = $def_rs;
+
+ok (1, "EOL undef");
+foreach my $se (0, 1) {
+ $/ = "\r";
+ ok (my $csv = Text::CSV->new ({
+ eol => undef,
+ strict_eol => $se,
+ }), "new csv with eol => undef");
+ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ ok ($csv->print ($fh, [1, 2, 3]), "print");
+ ok ($csv->print ($fh, [4, 5, 6]), "print");
+ close $fh;
+
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ ok (my $row = $csv->getline ($fh), "getline 1");
+ is (scalar @$row, 5, "# fields");
+ is_deeply ($row, [ 1, 2, 34, 5, 6], "fields 1");
+ close $fh;
+ unlink $tfn;
+ }
+$/ = $def_rs;
+
+foreach my $eol ("!", "!!", "!\n", "!\n!", "!!!!!!!!", "!!!!!!!!!!",
+ "\n!!!!!\n!!!!!", "!!!!!\n!!!!!\n", "%^+_\n\0!X**",
+ "\r\n", "\r") {
+ (my $s_eol = $eol) =~ s/\n/\\n/g;
+ $s_eol =~ s/\r/\\r/g;
+ $s_eol =~ s/\0/\\0/g;
+ ok (1, "EOL $s_eol");
+ ok (my $csv = Text::CSV->new ({ eol => $eol }), "new csv with eol => $s_eol");
+ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ ok ($csv->print ($fh, [1, 2, 3]), "print");
+ ok ($csv->print ($fh, [4, 5, 6]), "print");
+ close $fh;
+
+ foreach my $rs (undef, "", "\n", $eol, "!", "!\n", "\n!", "!\n!", "\n!\n") {
+ local $/ = $rs;
+ (my $s_rs = defined $rs ? $rs : "-- undef --") =~ s/\n/\\n/g;
+ ok (1, "with RS $s_rs");
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ ok (my $row = $csv->getline ($fh), "getline 1");
+ is (scalar @$row, 3, "field count");
+ is_deeply ($row, [ 1, 2, 3], "fields 1");
+ ok ( $row = $csv->getline ($fh), "getline 2");
+ is (scalar @$row, 3, "field count");
+ is_deeply ($row, [ 4, 5, 6], "fields 2");
+ close $fh;
+ }
+ unlink $tfn;
+ }
+$/ = $def_rs;
+
+
+foreach my $se (0, 1) {
+ my @w;
+ local $SIG{__WARN__} = sub { push @w => @_ };
+ open my $fh, "<", "files/macosx.csv" or die "files/macosx.csv: $!";
+ ok (1, "MacOSX exported file");
+ ok (my $csv = Text::CSV->new ({
+ auto_diag => 1,
+ binary => 1,
+ strict_eol => $se,
+ }), "new csv");
+ ok (my $row = $csv->getline ($fh), "getline 1");
+ is (scalar @$row, 15, "field count");
+ is ($row->[7], "", "field 8");
+ ok ( $row = $csv->getline ($fh), "getline 2");
+ is (scalar @$row, 15, "field count");
+ is ($row->[6], "Category", "field 7");
+ ok ( $row = $csv->getline ($fh), "getline 3");
+ is (scalar @$row, 15, "field count");
+ is ($row->[5], "Notes", "field 6");
+ ok ( $row = $csv->getline ($fh), "getline 4");
+ is (scalar @$row, 15, "field count");
+ is ($row->[7], "Points", "field 8");
+ ok ( $row = $csv->getline ($fh), "getline 5");
+ is (scalar @$row, 15, "field count");
+ is ($row->[7], 11, "field 8");
+ ok ( $row = $csv->getline ($fh), "getline 6");
+ is (scalar @$row, 15, "field count");
+ is ($row->[8], 34, "field 9");
+ ok ( $row = $csv->getline ($fh), "getline 7");
+ is (scalar @$row, 15, "field count");
+ is ($row->[7], 12, "field 8");
+ ok ( $row = $csv->getline ($fh), "getline 8");
+ is (scalar @$row, 15, "field count");
+ is ($row->[8], 2, "field 9");
+ ok ( $row = $csv->getline ($fh), "getline 9");
+ is (scalar @$row, 15, "field count");
+ is ($row->[3], "devs", "field 4");
+ ok ( $row = $csv->getline ($fh), "getline 10");
+ is (scalar @$row, 15, "field count");
+ is ($row->[3], "", "field 4");
+ ok ( $row = $csv->getline ($fh), "getline 11");
+ is (scalar @$row, 15, "field count");
+ is ($row->[6], "Mean", "field 7");
+ ok ( $row = $csv->getline ($fh), "getline 12");
+ is (scalar @$row, 15, "field count");
+ is ($row->[6], "Median", "field 7");
+ ok ( $row = $csv->getline ($fh), "getline 13");
+ is (scalar @$row, 15, "field count");
+ is ($row->[6], "Mode", "field 7");
+ ok ( $row = $csv->getline ($fh), "getline 14");
+ is (scalar @$row, 15, "field count");
+ is ($row->[6], "Min", "field 7");
+ ok ( $row = $csv->getline ($fh), "getline 15");
+ is (scalar @$row, 15, "field count");
+ is ($row->[6], "Max", "field 7");
+ ok ( $row = $csv->getline ($fh), "getline 16");
+ is (scalar @$row, 15, "field count");
+ is ($row->[0], "", "field 1");
+ close $fh;
+ if ($se) {
+ like ($w[0], qr{2016 - EOL}, "Got EOL warning");
+ }
+ else {
+ is_deeply (\@w, [], "No warnings");
+ }
+ }
+
+{ ok (my $csv = Text::CSV->new ({ auto_diag => 1, binary => 1 }), "new csv");
+ ok ($csv->eol ("--"), "eol = --");
+ ok ($csv->parse (qq{1,"2--3",4}), "no eol");
+ is_deeply ([$csv->fields], [ "1", "2--3", 4 ], "parse");
+ ok ($csv->parse (qq{1,"2--3",4--}), "eol");
+ is_deeply ([$csv->fields], [ "1", "2--3", 4 ], "parse");
+ ok ($csv->parse (qq{1,"2--3",4,--}), ",eol");
+ is_deeply ([$csv->fields], [ "1", "2--3", 4, "" ], "parse");
+
+ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ print $fh qq{1,"2--3",4--};
+ print $fh qq{1,"2--3",4,--};
+ print $fh qq{1,"2--3",4};
+ close $fh;
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ is_deeply ($csv->getline ($fh), [ "1", "2--3", 4 ], "getline eol");
+ is_deeply ($csv->getline ($fh), [ "1", "2--3", 4, "" ], "getline ,eol");
+ is_deeply ($csv->getline ($fh), [ "1", "2--3", 4 ], "getline eof");
+ close $fh;
+ }
+
+{ ok (my $csv = Text::CSV->new (), "new csv");
+ ok ($csv->parse (qq{"a","b","c"\r\n}), "parse \\r\\n");
+ is_deeply ([$csv->fields], [qw( a b c )], "result");
+ ok ($csv->allow_loose_escapes (1), "allow loose escapes");
+ ok ($csv->parse (qq{"a","b","c"\r\n}), "parse \\r\\n");
+ is_deeply ([$csv->fields], [qw( a b c )], "result");
+ }
+
+foreach my $eol ("\n", "\r\n", "\r") {
+ my $s_eol = $eol;
+ $s_eol =~ s{\r}{\\r};
+ $s_eol =~ s{\n}{\\n};
+ foreach my $before ("1,2$eol", "") {
+ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ print $fh $before; # To test if skipping the very first line works
+ print $fh $eol; # skipped
+ print $fh qq{ $eol}; # -> [ " " ]
+ print $fh qq{,$eol}; # -> [ "", "" ]
+ print $fh $eol; # skipped
+ print $fh qq{""$eol}; # -> [ "" ]
+ print $fh qq{eol$eol}; # -> [ "eol" ]
+ close $fh;
+
+ my @expect = ([ " " ], [ "", "" ], [ "" ], [ "eol" ]);
+ $before and unshift @expect => [ 1, 2 ];
+
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ my $csv = Text::CSV->new ({
+ skip_empty_rows => 1,
+ eol => $eol,
+ });
+ my @csv;
+ while (my $row = $csv->getline ($fh)) {
+ push @csv => $row;
+ }
+ close $fh;
+ is_deeply (\@csv, \@expect, "Empty lines skipped $s_eol\tEOL set");
+
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ $csv = Text::CSV->new ({ skip_empty_rows => 1 });
+ @csv = ();
+ while (my $row = $csv->getline ($fh)) {
+ push @csv => $row;
+ }
+ close $fh;
+ is_deeply (\@csv, \@expect, "Empty lines skipped $s_eol\tauto-detect");
+ }
+ }
+
+my %ers = (
+ # For backward compat :( - on 2024-12-05 XS and PP acted identical
+ # some are not OK or at least do not DWIM in hindsight
+ # strict : skip : reset : quoted
+ '0:0:0:' => [[ "Aardvark", "snort", ],
+ [ "", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "", ],
+ [ "Camel", "grunt", ],
+ [ "", ],
+ [ "", ],
+ [ "Crow", "caw", ],
+ [ "", ],
+ [ "", ],
+ [ 2012, 0, 15, 0, "" ]], # EOF
+ '0:0:1:' => [[ "Aardvark", "snort", ],
+ [ "", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ [ "", ],
+ [ "Crow", "caw", ],
+ [ "Deer", "bellow", ],
+ [ "Dolphin", "click", ],
+ [ 2012, 0, 14, 0, "" ]], # EOF
+ '0:1:0:' => [[ "Aardvark", "snort", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ [ "Crow", "caw", ],
+ [ "Deer", "bellow", ],
+ [ 2012, 0, 11, 0, "" ]], # EOF
+ '0:1:1:' => [[ "Aardvark", "snort", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ [ "Crow", "caw", ],
+ [ "Deer", "bellow", ],
+ [ "Dolphin", "click", ],
+ [ 2012, 0, 12, 0, "" ]], # EOF
+ '0:0:0:"' => [[ "Aardvark", "snort", ],
+ [ "", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "", ],
+ [ "Camel", "grunt", ],
+ [ "", ],
+ [ "", ],
+ [ "Crow", "caw", ],
+ [ "", ],
+ [ "", ],
+ [ 2012, 0, 15, 0, "" ]], # EOF
+ '0:0:1:"' => [[ "Aardvark", "snort", ],
+ [ "", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ #[ "Crow", "caw", ], WRONG
+ [ "Deer", "bellow", ],
+ [ "Dolphin", "click", ],
+ [ 2012, 0, 12, 0, "" ]], # EOF
+ '0:1:0:"' => [[ "Aardvark", "snort", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ [ "Crow", "caw", ],
+ [ "Deer", "bellow", ], # WRONG
+ [ 2012, 0, 11, 0, "" ]], # EOF
+ '0:1:1:"' => [[ "Aardvark", "snort", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ #[ "Crow", "caw", ], WRONG
+ [ "Deer", "bellow", ],
+ [ "Dolphin", "click", ],
+ [ 2012, 0, 11, 0, "" ]], # EOF
+
+ # Strict EOL warn / strict : skip : reset : quoted
+ '1:0:0:' => [[ "Aardvark", "snort", ],
+ [ "", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ [ "", ],
+ [ "Crow", "caw", ],
+ [ "Deer", "bellow", ],
+ [ "Dolphin", "click", ],
+ [ 2012, 13, 14, 2, "2016 - EOL" ]],
+ '1:0:1:' => [[ "Aardvark", "snort", ],
+ [ "", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ [ "", ],
+ [ "Crow", "caw", ],
+ [ "Deer", "bellow", ],
+ [ "Dolphin", "click", ],
+ [ 2012, 0, 14, 0, "" ]], # EOF
+ '1:1:0:' => [[ "Aardvark", "snort", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ [ "Crow", "caw", ],
+ [ "Deer", "bellow", ],
+ [ "Dolphin", "click", ],
+ [ 2012, 13, 12, 2, "2016 - EOL" ]],
+ '1:1:1:' => [[ "Aardvark", "snort", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ [ "Crow", "caw", ],
+ [ "Deer", "bellow", ],
+ [ "Dolphin", "click", ],
+ [ 2012, 10, 12, 1, "2016 - EOL" ]],
+ '1:0:0:"' => [[ "Aardvark", "snort", ],
+ [ "", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ [ "", ],
+ [ "Crow", "caw", ],
+ [ "Deer", "bellow", ],
+ [ "Dolphin", "click", ],
+ [ 2012, 15, 14, 2, "2016 - EOL" ]],
+ '1:0:1:"' => [[ "Aardvark", "snort", ],
+ [ "", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ #[ "Crow", "caw", ], WRONG: might change
+ [ "Deer", "bellow", ],
+ [ "Dolphin", "click", ],
+ [ 2012, 13, 12, 2, "2016 - EOL" ]],
+ '1:1:0:"' => [[ "Aardvark", "snort", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ [ "Crow", "caw", ],
+ [ "Deer", "bellow", ],
+ [ "Dolphin", "click", ],
+ [ 2012, 15, 12, 2, "2016 - EOL" ]],
+ '1:1:1:"' => [[ "Aardvark", "snort", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ #[ "Crow", "caw", ], WRONG, might change
+ [ "Deer", "bellow", ],
+ [ "Dolphin", "click", ],
+ [ 2012, 13, 11, 2, "2016 - EOL" ]],
+
+ # Strict EOL croak / strict : skip : reset : quoted
+ '2:0:0:' => [[ "Aardvark", "snort", ],
+ [ "", ],
+ [ "Alpaca", "spit", ],
+ [ 2016, 13, 4, 2, "2016 - EOL" ]],
+ '2:0:1:' => [[ "Aardvark", "snort", ],
+ [ "", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ [ "", ],
+ [ "Crow", "caw", ],
+ [ "Deer", "bellow", ],
+ [ "Dolphin", "click", ],
+ [ 2012, 0, 14, 0, "" ]], # EOF
+ '2:1:0:' => [[ "Aardvark", "snort", ],
+ [ "Alpaca", "spit", ],
+ [ 2016, 13, 3, 2, "2016 - EOL" ]],
+ '2:1:1:' => [[ "Aardvark", "snort", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ "Cobra", "shh", ],
+ [ 2016, 10, 9, 1, "2016 - EOL" ]],
+ '2:0:0:"' => [[ "Aardvark", "snort", ],
+ [ "", ],
+ [ "Alpaca", "spit", ],
+ [ 2016, 15, 4, 2, "2016 - EOL" ]],
+ '2:0:1:"' => [[ "Aardvark", "snort", ],
+ [ "", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ [ 2016, 13, 9, 2, "2016 - EOL" ]],
+ '2:1:0:"' => [[ "Aardvark", "snort", ],
+ [ "Alpaca", "spit", ],
+ [ 2016, 15, 3, 2, "2016 - EOL" ]],
+ '2:1:1:"' => [[ "Aardvark", "snort", ],
+ [ "Alpaca", "spit", ],
+ [ "Badger", "growl", ],
+ [ "Bat", "screech", ],
+ [ "Bear", "roar", ],
+ [ "Bee", "buzz", ],
+ [ "Camel", "grunt", ],
+ #[ "Cobra", "shh", ], NOT stored, documented, might change
+ [ 2016, 13, 8, 2, "2016 - EOL" ]],
+ );
+
+foreach my $q ('', '"') {
+ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ print $fh "Aardvark,${q}snort${q}\r\n";
+ print $fh "\r\n"; # Empty line
+ print $fh "Alpaca,${q}spit${q}\r\n";
+ print $fh "Badger,${q}growl${q}\n"; # only newline
+ print $fh "Bat,${q}screech${q}\r\n";
+ print $fh "Bear,${q}roar${q}\r"; # only carriage return - no newline
+ print $fh "Bee,${q}buzz${q}\r\n";
+ print $fh "Camel,${q}grunt${q}\r\n";
+ print $fh "Cobra,${q}shh${q}\r\r"; # two CR's
+ print $fh "Crow,${q}caw${q}\r\n";
+ print $fh "Deer,${q}bellow${q}\n"; # only newline
+ print $fh "Dolphin,${q}click${q}\r\n";
+ close $fh;
+
+ foreach my $se (0, 1, 2) {
+ foreach my $ser (0, 1) {
+ foreach my $reset (0, 1) {
+ my $tag = join ":" => $se, $ser, $reset, $q;
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ my $csv = Text::CSV->new ({
+ strict_eol => $se,
+ skip_empty_rows => $ser,
+ auto_diag => 1,
+ diag_verbose => 1,
+ # Do NOT set binary!
+ });
+
+ my (@r, @w);
+ eval {
+ local $SIG{__WARN__} = sub { push @w => @_ };
+ while (my $row = $csv->getline ($fh)) {
+ push @r => [ @$row ];
+ $reset and $csv->eol (undef);
+ }
+ close $fh;
+ };
+ my @diag = $csv->error_diag;
+ my $warn = join " | " => map { substr $_, 16, 10 } @w;
+ my $got = [ @r, [ @diag[0, 2, 3, 4], $warn ]];
+ my $exp = $ers{$tag};
+ unless (is_deeply ($got, $exp, $tag)) {
+ # use Data::Peek;
+ #diag DDumper { got => $got, tag => $tag };
+ }
+ }
+ }
+ }
+ }
+
+# strict_eol should NOT warn/die/complain on deviating EOL inside quoted fields
+{ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ print $fh qq{Abrax,Booz,Wrox\r\n};
+ print $fh qq{Foo,"x\ry",Ornf\r\n};
+ print $fh qq{Cruy,"a\nb",Hye\r\n};
+ print $fh qq{Daj,"f\r\nb",Uf\r\n};
+ close $fh;
+
+ foreach my $se (0, 1, 2) {
+ my $tag = join ":" => "SE $se";
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ my $csv = Text::CSV->new ({
+ strict_eol => $se,
+ auto_diag => 1,
+ diag_verbose => 1,
+ binary => 1,
+ });
+
+ my (@r, @w);
+ eval {
+ local $SIG{__WARN__} = sub { push @w => @_ };
+ while (my $row = $csv->getline ($fh)) {
+ push @r => [ @$row ];
+ }
+ close $fh;
+ };
+ my @diag = $csv->error_diag;
+ my $warn = join " | " => map { substr $_, 16, 10 } @w;
+ is (scalar @r, 4, "$tag: Got 4 rows");
+ is (scalar @w, 0, "$tag: Got no warnings");
+ }
+ }
+
+1;
diff --git a/src/test/resources/module/Text-CSV/t/46_eol_si.t b/src/test/resources/module/Text-CSV/t/46_eol_si.t
new file mode 100644
index 000000000..23bea6359
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/46_eol_si.t
@@ -0,0 +1,265 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Config;
+use Test::More;
+
+BEGIN {
+ unless (exists $Config{useperlio} &&
+ defined $Config{useperlio} &&
+ $] >= 5.008 && # perlio was experimental in 5.6.2, but not reliable
+ $Config{useperlio} eq "define") {
+ plan skip_all => "No reliable perlIO available";
+ }
+ else {
+ plan tests => 562;
+ }
+ }
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ require_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+$| = 1;
+
+# Embedded newline tests
+
+my $file = "";
+
+my $def_rs = $/;
+
+foreach my $rs ("\n", "\r\n", "\r") {
+ for $\ (undef, $rs) {
+
+ my $csv = Text::CSV->new ({ binary => 1 });
+ $csv->eol ($/ = $rs) unless defined $\;
+
+ my $fh;
+ foreach my $pass (0, 1) {
+ if ($pass == 0) {
+ $file = "";
+ open $fh, ">", \$file or die "IO: $!\n";
+ }
+ else {
+ open $fh, "<", \$file or die "IO: $!\n";
+ }
+
+ foreach my $eol ("", "\r", "\n", "\r\n", "\n\r") {
+ my $s_eol = join " - ", map { defined $_ ? $_ : "" } $\, $rs, $eol;
+ $s_eol =~ s/\r/\\r/g;
+ $s_eol =~ s/\n/\\n/g;
+
+ my @p;
+ my @f = ("", 1,
+ $eol, " $eol", "$eol ", " $eol ", "'$eol'",
+ "\"$eol\"", " \" $eol \"\n ", "EOL");
+
+ if ($pass == 0) {
+ ok ($csv->combine (@f), "combine |$s_eol|");
+ ok (my $str = $csv->string, "string |$s_eol|");
+ my $state = $csv->parse ($str);
+ ok ($state, "parse |$s_eol|");
+ if ($state) {
+ ok (@p = $csv->fields, "fields |$s_eol|");
+ }
+ else{
+ is ($csv->error_input, $str, "error |$s_eol|");
+ }
+
+ print $fh $str;
+ }
+ else {
+ ok (my $row = $csv->getline ($fh), "getline |$s_eol|");
+ is (ref $row, "ARRAY", "row |$s_eol|");
+ @p = @$row;
+ }
+
+ local $, = "|";
+ is_binary ("@p", "@f", "result |$s_eol|");
+ }
+
+ close $fh;
+ }
+
+ }
+ }
+$/ = $def_rs;
+
+{ my $csv = Text::CSV->new ({ escape_char => undef });
+
+ ok ($csv->parse (qq{"x"\r\n}), "Trailing \\r\\n with no escape char");
+
+ is ($csv->eol ("\r"), "\r", "eol set to \\r");
+ ok ($csv->parse (qq{"x"\r}), "Trailing \\r with no escape char");
+
+ ok ($csv->allow_whitespace (1), "Allow whitespace");
+ ok ($csv->parse (qq{"x" \r}), "Trailing \\r with no escape char");
+ }
+
+SKIP: {
+ $] < 5.008 and skip "\$\\ tests don't work in perl 5.6.x and older", 2;
+ { local $\ = "#\r\n";
+ my $csv = Text::CSV->new ();
+ $file = "";
+ open my $fh, ">", \$file or die "IO: $!\n";
+ $csv->print ($fh, [ "a", 1 ]);
+ close $fh;
+ open $fh, "<", \$file or die "IO: $!\n";
+ local $/;
+ is (<$fh>, "a,1#\r\n", "Strange \$\\");
+ close $fh;
+ }
+ { local $\ = "#\r\n";
+ my $csv = Text::CSV->new ({ eol => $\ });
+ $file = "";
+ open my $fh, ">", \$file or die "IO: $!\n";
+ $csv->print ($fh, [ "a", 1 ]);
+ close $fh;
+ open $fh, "<", \$file or die "IO: $!\n";
+ local $/;
+ is (<$fh>, "a,1#\r\n", "Strange \$\\ + eol");
+ close $fh;
+ }
+ }
+$/ = $def_rs;
+
+ok (1, "Auto-detecting \\r");
+{ my @row = qw( a b c ); local $" = ",";
+ for (["\n", "\\n"], ["\r\n", "\\r\\n"], ["\r", "\\r"]) {
+ my ($eol, $s_eol) = @$_;
+ $file = "";
+ open my $fh, ">", \$file or die "IO: $!\n";
+ print $fh qq{@row$eol@row$eol@row$eol\x91};
+ close $fh;
+ open $fh, "<", \$file or die "IO: $!\n";
+ my $c = Text::CSV->new ({ binary => 1, auto_diag => 1 });
+ is ($c->eol (), "", "default EOL");
+ is_deeply ($c->getline ($fh), [ @row ], "EOL 1 $s_eol");
+ is ($c->eol (), $eol eq "\r" ? "\r" : "", "EOL");
+ is_deeply ($c->getline ($fh), [ @row ], "EOL 2 $s_eol");
+ is_deeply ($c->getline ($fh), [ @row ], "EOL 3 $s_eol");
+ close $fh;
+ }
+ }
+
+ok (1, "Specific \\r test from tfrayner");
+{ $/ = "\r";
+ $file = "";
+ open my $fh, ">", \$file or die "IO: $!\n";
+ print $fh qq{a,b,c$/}, qq{"d","e","f"$/};
+ close $fh;
+ open $fh, "<", \$file or die "IO: $!\n";
+ my $c = Text::CSV->new ({ eol => $/ });
+
+ my $row;
+ local $" = " ";
+ ok ($row = $c->getline ($fh), "getline 1");
+ is (scalar @$row, 3, "# fields");
+ is ("@$row", "a b c", "fields 1");
+ ok ($row = $c->getline ($fh), "getline 2");
+ is (scalar @$row, 3, "# fields");
+ is ("@$row", "d e f", "fields 2");
+ close $fh;
+ }
+$/ = $def_rs;
+
+ok (1, "EOL undef");
+{ $/ = "\r";
+ ok (my $csv = Text::CSV->new ({ eol => undef }), "new csv with eol => undef");
+ $file = "";
+ open my $fh, ">", \$file or die "IO: $!\n";
+ ok ($csv->print ($fh, [1, 2, 3]), "print");
+ ok ($csv->print ($fh, [4, 5, 6]), "print");
+ close $fh;
+
+ open $fh, "<", \$file or die "IO: $!\n";
+ ok (my $row = $csv->getline ($fh), "getline 1");
+ is (scalar @$row, 5, "# fields");
+ is_deeply ($row, [ 1, 2, 34, 5, 6], "fields 1");
+ close $fh;
+ }
+$/ = $def_rs;
+
+foreach my $eol ("!", "!!", "!\n", "!\n!") {
+ (my $s_eol = $eol) =~ s/\n/\\n/g;
+ ok (1, "EOL $s_eol");
+ ok (my $csv = Text::CSV->new ({ eol => $eol }), "new csv with eol => $s_eol");
+ $file = "";
+ open my $fh, ">", \$file or die "IO: $!\n";
+ ok ($csv->print ($fh, [1, 2, 3]), "print");
+ ok ($csv->print ($fh, [4, 5, 6]), "print");
+ close $fh;
+
+ foreach my $rs (undef, "", "\n", $eol, "!", "!\n", "\n!", "!\n!", "\n!\n") {
+ local $/ = $rs;
+ (my $s_rs = defined $rs ? $rs : "-- undef --") =~ s/\n/\\n/g;
+ ok (1, "with RS $s_rs");
+ open $fh, "<", \$file or die "IO: $!\n";
+ ok (my $row = $csv->getline ($fh), "getline 1");
+ is (scalar @$row, 3, "# fields");
+ is_deeply ($row, [ 1, 2, 3], "fields 1");
+ ok ( $row = $csv->getline ($fh), "getline 2");
+ is (scalar @$row, 3, "# fields");
+ is_deeply ($row, [ 4, 5, 6], "fields 2");
+ close $fh;
+ }
+ }
+$/ = $def_rs;
+
+{ ok (my $csv = Text::CSV->new, "new for say");
+ my $foo;
+ open my $fh, ">", \$foo or die "IO: $!\n";
+ ok ($csv->say ($fh, [ 1, 2 ]), "say");
+ close $fh;
+ is ($foo, "1,2$/", "content with eol \$/");
+ $foo = "";
+ $csv->eol (undef);
+ open $fh, ">", \$foo or die "IO: $!\n";
+ ok ($csv->say ($fh, [ 1, 2 ]), "say");
+ close $fh;
+ $foo = "";
+ $csv->eol ("");
+ open $fh, ">", \$foo or die "IO: $!\n";
+ ok ($csv->say ($fh, [ 1, 2 ]), "say");
+ close $fh;
+ is ($foo, "1,2$/", "content with eol \$/");
+ $foo = "";
+ $csv->eol ("#");
+ open $fh, ">", \$foo or die "IO: $!\n";
+ ok ($csv->say ($fh, [ 1, 2 ]), "say");
+ close $fh;
+ is ($foo, "1,2#", "content with eol #");
+ $foo = "";
+ $csv->eol ("0");
+ open $fh, ">", \$foo or die "IO: $!\n";
+ ok ($csv->say ($fh, [ 1, 2 ]), "say");
+ close $fh;
+ is ($foo, "1,20", "content with eol 0");
+ }
+
+{ ok (my $csv = Text::CSV->new, "new for say");
+ my $foo;
+ my $dta = "x";
+ ok ($csv->bind_columns (\$dta), "bind columns");
+
+ local $\ = undef;
+ local $/ = "\n";
+
+ open my $fh, ">", \$foo or die "IO: $!\n";
+ ok ($csv->print ($fh, undef), "print");
+ close $fh;
+ is ($foo, "x", "print, no newline");
+
+ $foo = "";
+ open $fh, ">", \$foo or die "IO: $!\n";
+ ok ($csv->say ($fh, undef), "say");
+ close $fh;
+ is ($foo, "x\n", "say, with newline");
+ }
+
+1;
diff --git a/src/test/resources/module/Text-CSV/t/47_comment.t b/src/test/resources/module/Text-CSV/t/47_comment.t
new file mode 100644
index 000000000..c9473aaa2
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/47_comment.t
@@ -0,0 +1,138 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Test::More;
+BEGIN { $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0; }
+use Text::CSV qw(csv);
+
+BEGIN {
+ if ($] < 5.008002) {
+ plan skip_all => "These tests require Encode and Unicode support";
+ }
+ else {
+ require Encode;
+ plan tests => 71;
+ }
+ require "./t/util.pl";
+ }
+
+$| = 1;
+
+my $tfn = "_47cmnt.csv"; END { -f $tfn and unlink $tfn; }
+
+foreach my $cstr ("#", "//", "Comment", "\xe2\x98\x83") {
+ foreach my $rest ("", " 1,2", "a,b") {
+
+ my $csv = Text::CSV->new ({ binary => 1 });
+ $csv->comment_str ($cstr);
+
+ my $fh;
+ open $fh, ">", $tfn or die "$tfn: $!\n";
+ print $fh qq{$cstr$rest\n};
+ print $fh qq{c,$cstr\n};
+ print $fh qq{ $cstr\n};
+ print $fh qq{e,$cstr,$rest\n};
+ print $fh qq{$cstr\n};
+ print $fh qq{g,i$cstr\n};
+ print $fh qq{j,"k\n${cstr}k"\n};
+ print $fh qq{$cstr\n};
+ close $fh;
+
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+
+ my $cuni = Encode::decode ("utf-8", $cstr);
+ my @rest = split m/,/ => $rest, -1; @rest or push @rest => "";
+
+ is_deeply ($csv->getline ($fh), [ "c", $cuni ], "$cstr , $rest");
+ is_deeply ($csv->getline ($fh), [ " $cuni" ], "leading space");
+ is_deeply ($csv->getline ($fh), [ "e", $cuni, @rest ], "not start of line");
+ is_deeply ($csv->getline ($fh), [ "g", "i$cuni" ], "not start of field");
+ is_deeply ($csv->getline ($fh), [ "j", "k\n${cuni}k" ], "in quoted field after NL");
+
+ close $fh;
+
+ unlink $tfn;
+ }
+ }
+
+my $data = <<"EOC";
+id | name
+#
+42 | foo
+#
+EOC
+
+is_deeply (csv (
+ in => \$data,
+ sep_char => "|",
+ headers => "auto",
+ allow_whitespace => 1,
+ comment_str => "#",
+ strict => 0,
+ ), [{ id => 42, name => "foo" }], "Last record is comment");
+is_deeply (csv (
+ in => \$data,
+ sep_char => "|",
+ headers => "auto",
+ allow_whitespace => 1,
+ comment_str => "#",
+ strict => 1,
+ ), [{ id => 42, name => "foo" }], "Last record is comment, under strict");
+
+$data .= "3\n";
+is_deeply (csv (
+ in => \$data,
+ sep_char => "|",
+ headers => "auto",
+ allow_whitespace => 1,
+ comment_str => "#",
+ strict => 0,
+ ), [{ id => 42, name => "foo" },
+ { id => 3, name => undef },
+ ], "Valid record past comment");
+is_deeply (csv (
+ in => \$data,
+ sep_char => "|",
+ headers => "auto",
+ allow_whitespace => 1,
+ comment_str => "#",
+ strict => 1,
+ auto_diag => 0, # Suppress error 2014
+ ), [{ id => 42, name => "foo" }], "Invalid record past comment, under strict");
+is_deeply (csv (
+ in => \"# comment\n42 | foo\n53 | bar\n",
+ sep_char => "|",
+ allow_whitespace => 1,
+ comment_str => "#",
+ strict => 1,
+ auto_diag => 1,
+ ), [[ 42, "foo" ], [ 53, "bar" ]], "Comment on first line, under strict");
+
+foreach my $io (1, 0) {
+ my $csv = Text::CSV->new ({
+ strict => 1,
+ comment_str => "#",
+ sep_char => "|",
+ auto_diag => 2,
+ diag_verbose => 1,
+ });
+
+ # Data line is required to set field count for strict
+ if ($io) {
+ is_deeply ($csv->getline (*DATA), [ "a", "b" ], "Comment on last line IO data");
+ is_deeply ($csv->getline (*DATA), undef, "Comment on last line IO comment");
+ }
+ else {
+ ok ($csv->parse ("a|b"), "Parse data line");
+ is_deeply ([ $csv->fields ], [ "a", "b" ], "Data in parse");
+ ok ($csv->parse ("# some comment"), "Parse comment");
+ is_deeply ([ $csv->fields ], [ ], "Comment in parse");
+ }
+ }
+
+1;
+__END__
+a|b
+# some comment
diff --git a/src/test/resources/module/Text-CSV/t/50_utf8.t b/src/test/resources/module/Text-CSV/t/50_utf8.t
new file mode 100644
index 000000000..8793e48f7
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/50_utf8.t
@@ -0,0 +1,140 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Test::More;
+use charnames ":full";
+
+BEGIN {
+ if ($] < 5.008001) {
+ plan skip_all => "UTF8 tests useless in this ancient perl version";
+ }
+ else {
+ plan tests => 93;
+ }
+ }
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ require_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+my $tfn = "_50test.csv"; END { -f $tfn and unlink $tfn; }
+# No binary => 1, as UTF8 is supposed to be allowed without it
+my $csv = Text::CSV->new ({
+ always_quote => 1,
+ keep_meta_info => 1,
+ });
+
+# Special characters to check:
+# 0A = \n 2C = , 20 = 22 = "
+# 0D = \r 3B = ;
+foreach my $test (
+ # Space-like characters
+ [ "\x{0000A0}", "U+0000A0 NO-BREAK SPACE" ],
+ [ "\x{00200B}", "U+00200B ZERO WIDTH SPACE" ],
+ # Some characters with possible problems in the code point
+ [ "\x{000122}", "U+000122 LATIN CAPITAL LETTER G WITH CEDILLA" ],
+ [ "\x{002C22}", "U+002C22 GLAGOLITIC CAPITAL LETTER SPIDERY HA" ],
+ [ "\x{000A2C}", "U+000A2C GURMUKHI LETTER BA" ],
+ [ "\x{000E2C}", "U+000E2C THAI CHARACTER LO CHULA" ],
+ [ "\x{010A2C}", "U+010A2C KHAROSHTHI LETTER VA" ],
+ # Characters with possible problems in the encoded representation
+ # Should not be possible. ASCII is coded in 000..127, all other
+ # characters in 128..255
+ ) {
+ my ($u, $msg) = @$test;
+ ($u = "$u\x{0123}") =~ s/.$//; # Make sure it's marked UTF8
+ my @in = ("", " ", $u, "");
+ my $exp = join ",", map { qq{"$_"} } @in;
+
+ ok ($csv->combine (@in), "combine $msg");
+
+ my $str = $csv->string;
+ is_binary ($str, $exp, "string $msg");
+
+ ok ($csv->parse ($str), "parse $msg");
+ my @out = $csv->fields;
+ # Cannot use is_deeply (), because of the binary content
+ is (scalar @in, scalar @out, "fields $msg");
+ is_binary ($in[$_], $out[$_], "field $_ $msg") for 0 .. $#in;
+ }
+
+# Test if the UTF8 part is accepted, but the \n is not
+is ($csv->parse (qq{"\x{0123}\n\x{20ac}"}), 0, "\\n still needs binary");
+is ($csv->binary, 0, "bin flag still unset");
+is ($csv->error_diag + 0, 2021, "Error 2021");
+
+open my $fh, ">:encoding(utf-8)", $tfn or die "$tfn: $!\n";
+print $fh qq{"\N{LATIN CAPITAL LETTER O WITH STROKE}l/Vin",0\n};
+close $fh;
+SKIP: {
+ open my $fh, "<:encoding(utf-8)", $tfn or
+ skip "Cannot open UTF-8 test file", 6;
+
+ my $row;
+ ok ($row = $csv->getline ($fh), "read/parse");
+
+ is ($csv->is_quoted (0), 1, "First field is quoted");
+ is ($csv->is_quoted (1), 0, "Second field is not quoted");
+ is ($csv->is_binary (0), 1, "First field is binary");
+ is ($csv->is_binary (1), 0, "Second field is not binary");
+
+ ok (utf8::valid ($row->[0]), "First field is valid utf8");
+
+ $csv->combine (@$row);
+ ok (utf8::valid ($csv->string), "Combined string is valid utf8");
+ }
+
+# Test quote_binary
+$csv->always_quote (0);
+$csv->quote_space (0);
+$csv->quote_binary (0);
+ok ($csv->combine (" ", 1, "\x{20ac} "), "Combine");
+is ($csv->string, qq{ ,1,\x{20ac} }, "String 0-0");
+$csv->quote_binary (1);
+ok ($csv->combine (" ", 1, "\x{20ac} "), "Combine");
+is ($csv->string, qq{ ,1,"\x{20ac} "}, "String 0-1");
+
+$csv->quote_space (1);
+$csv->quote_binary (0);
+ok ($csv->combine (" ", 1, "\x{20ac} "), "Combine");
+is ($csv->string, qq{" ",1,"\x{20ac} "}, "String 1-0");
+ok ($csv->quote_binary (1), "quote binary on");
+ok ($csv->combine (" ", 1, "\x{20ac} "), "Combine");
+is ($csv->string, qq{" ",1,"\x{20ac} "}, "String 1-1");
+
+ok ($csv->parse (qq{,1,"f\x{014d}o, 3""56",,bar,\r\n}), "example from XS");
+is_deeply ([$csv->fields], [
+ "", 1, qq{f\x{014d}o, 3"56}, "", "bar", "" ], "content");
+
+open $fh, ">:encoding(utf-8)", $tfn or die "$tfn: $!\n";
+print $fh "euro\n\x{20ac}\neuro\n";
+close $fh;
+open $fh, "<:encoding(utf-8)", $tfn or die "$tfn: $!\n";
+
+SKIP: {
+ my $out = "";
+ my $isutf8 = $] < 5.008001 ?
+ sub { !$_[0]; } : # utf8::is_utf8 () not available in 5.8.0
+ sub { utf8::is_utf8 ($out); };
+ ok ($csv->auto_diag (1), "auto diag");
+ ok ($csv->binary (1), "set binary");
+ ok ($csv->bind_columns (\$out), "bind");
+ ok ($csv->getline ($fh), "parse");
+ is ($csv->is_binary (0), 0, "not binary");
+ is ($out, "euro", "euro");
+ ok (!$isutf8->(1), "not utf8");
+ ok ($csv->getline ($fh), "parse");
+ is ($csv->is_binary (0), 1, "is binary");
+ is ($out, "\x{20ac}", "euro");
+ ok ($isutf8->(0), "is utf8");
+ ok ($csv->getline ($fh), "parse");
+ is ($csv->is_binary (0), 0, "not binary");
+ is ($out, "euro", "euro");
+ ok (!$isutf8->(1), "not utf8");
+ close $fh;
+ }
diff --git a/src/test/resources/module/Text-CSV/t/51_utf8.t b/src/test/resources/module/Text-CSV/t/51_utf8.t
new file mode 100644
index 000000000..2cf2a4b9c
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/51_utf8.t
@@ -0,0 +1,270 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+use charnames ":full";
+
+use Test::More;
+$| = 1;
+
+BEGIN {
+ $] < 5.008002 and
+ plan skip_all => "UTF8 tests useless in this ancient perl version";
+ }
+
+my @tests;
+my $ebcdic = ord ("A") == 0xC1;
+
+BEGIN {
+ delete $ENV{PERLIO};
+
+ my $pu = $ENV{PERL_UNICODE};
+ $pu = defined $pu && ($pu eq "" || $pu =~ m/[oD]/ || ($pu =~ m/^[0-9]+$/ && $pu & 16));
+
+ my $euro_ch = "\x{20ac}";
+
+ utf8::encode (my $bytes = $euro_ch);
+ utf8::downgrade (my $bytes_dn = $bytes);
+ utf8::upgrade (my $bytes_up = $bytes);
+
+ @tests = (
+ # $test $perlio $data, $encoding $expect_w
+ # ---------------------------- ------------------- ----------- --------- ----------
+ [ "Unicode default", "", $euro_ch, "utf8", $pu ? "no warn" : "warn" ],
+ [ "Unicode binmode", "[binmode]", $euro_ch, "utf8", "warn", ],
+ [ "Unicode :utf8", ":utf8", $euro_ch, "utf8", "no warn", ],
+ [ "Unicode :encoding(utf8)", ":encoding(utf8)", $euro_ch, "utf8", "no warn", ],
+ [ "Unicode :encoding(UTF-8)", ":encoding(UTF-8)", $euro_ch, "utf8", "no warn", ],
+
+ [ "bytes dn default", "", $bytes_dn, "[none]", "no warn", ],
+ [ "bytes dn binmode", "[binmode]", $bytes_dn, "[none]", "no warn", ],
+ [ "bytes dn :utf8", ":utf8", $bytes_dn, "utf8", "no warn", ],
+ [ "bytes dn :encoding(utf8)", ":encoding(utf8)", $bytes_dn, "utf8", "no warn", ],
+ [ "bytes dn :encoding(UTF-8)", ":encoding(UTF-8)", $bytes_dn, "utf8", "no warn", ],
+
+ [ "bytes up default", "", $bytes_up, "[none]", "no warn", ],
+ [ "bytes up binmode", "[binmode]", $bytes_up, "[none]", "no warn", ],
+ [ "bytes up :utf8", ":utf8", $bytes_up, "utf8", "no warn", ],
+ [ "bytes up :encoding(utf8)", ":encoding(utf8)", $bytes_up, "utf8", "no warn", ],
+ [ "bytes up :encoding(UTF-8)", ":encoding(UTF-8)", $bytes_up, "utf8", "no warn", ],
+ );
+
+ my $builder = Test::More->builder;
+ binmode $builder->output, ":encoding(utf8)";
+ binmode $builder->failure_output, ":encoding(utf8)";
+ binmode $builder->todo_output, ":encoding(utf8)";
+
+ plan tests => 11 + 6 * @tests + 4 * 22 + 6 + 10 + 2;
+ }
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV", ("csv");
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+sub hexify { join " ", map { sprintf "%02x", $_ } unpack "C*", @_ }
+sub warned { length ($_[0]) ? "warn" : "no warn" }
+
+my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 });
+
+for (@tests) {
+ my ($test, $perlio, $data, $enc, $expect_w) = @$_;
+
+ my $expect = qq{"$data"};
+ $enc eq "utf8" and utf8::encode ($expect);
+
+ my ($p_out, $p_fh) = ("");
+ my ($c_out, $c_fh) = ("");
+
+ if ($perlio eq "[binmode]") {
+ open $p_fh, ">", \$p_out or die "IO: $!\n"; binmode $p_fh;
+ open $c_fh, ">", \$c_out or die "IO: $!\n"; binmode $c_fh;
+ }
+ else {
+ open $p_fh, ">$perlio", \$p_out or die "IO: $!\n";
+ open $c_fh, ">$perlio", \$c_out or die "IO: $!\n";
+ }
+
+ my $p_warn = "";
+ { local $SIG{__WARN__} = sub { $p_warn .= join "", @_ };
+ ok ((print $p_fh qq{"$data"}), "$test perl print");
+ close $p_fh;
+ }
+
+ my $c_warn = "";
+ { local $SIG{__WARN__} = sub { $c_warn .= join "", @_ };
+ ok ($csv->print ($c_fh, [ $data ]), "$test csv print");
+ close $c_fh;
+ }
+
+ is (hexify ($c_out), hexify ($p_out), "$test against Perl");
+ is (hexify ($c_out), hexify ($expect), "$test against expected");
+
+ is (warned ($c_warn), warned ($p_warn), "$test against Perl warning");
+ is (warned ($c_warn), $expect_w, "$test against expected warning");
+ }
+
+# Test automatic upgrades for valid UTF-8
+{ my $blob = pack "C*", 0..255; $blob =~ tr/",//d;
+ # perl-5.10.x has buggy SvCUR () on blob
+ $] >= 5.010000 && $] <= 5.012001 and $blob =~ tr/\0//d;
+ my $b1 = "\x{b6}"; # PILCROW SIGN in ISO-8859-1
+ my $b2 = $ebcdic # ARABIC COMMA in UTF-8
+ ? "\x{b8}\x{57}\x{53}"
+ : "\x{d8}\x{8c}";
+ my @data = (
+ qq[1,aap,3], # No diac
+ qq[1,a${b1}p,3], # Single-byte
+ qq[1,a${b2}p,3], # Multi-byte
+ qq[1,"$blob",3], # Binary shit
+ ) x 2;
+ my $data = join "\n" => @data;
+ my @expect = ("aap", "a\266p", "a\x{060c}p", $blob) x 2;
+
+ my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 });
+
+ foreach my $bc (undef, 3) {
+ my @read;
+
+ # Using getline ()
+ open my $fh, "<", \$data or die "IO: $!\n"; binmode $fh;
+ $bc and $csv->bind_columns (\my ($f1, $f2, $f3));
+ is (scalar $csv->bind_columns, $bc, "Columns_bound?");
+ while (my $row = $csv->getline ($fh)) {
+ push @read, $bc ? $f2 : $row->[1];
+ }
+ close $fh;
+ is_deeply (\@read, \@expect, "Set and reset UTF-8 ".($bc?"no bind":"bind_columns"));
+ is_deeply ([ map { utf8::is_utf8 ($_) } @read ],
+ [ "", "", 1, "", "", "", 1, "" ], "UTF8 flags");
+
+ # Using parse ()
+ @read = map {
+ $csv->parse ($_);
+ $bc ? $f2 : ($csv->fields)[1];
+ } @data;
+ is_deeply (\@read, \@expect, "Set and reset UTF-8 ".($bc?"no bind":"bind_columns"));
+ is_deeply ([ map { utf8::is_utf8 ($_) } @read ],
+ [ "", "", 1, "", "", "", 1, "" ], "UTF8 flags");
+ }
+ }
+
+my $sep = "\x{2665}";#"\N{INVISIBLE SEPARATOR}";
+my $quo = "\x{2661}";#"\N{FULLWIDTH QUOTATION MARK}";
+foreach my $new (0, 1, 2, 3) {
+ my %attr = (
+ binary => 1,
+ always_quote => 1,
+ );;
+ $new & 1 and $attr{sep} = $sep;
+ $new & 2 and $attr{quote} = $quo;
+ my $csv = Text::CSV->new (\%attr);
+
+ my $s = $attr{sep} || ',';
+ my $q = $attr{quote} || '"';
+
+ note ("Test SEP: '$s', QUO: '$q'") if $Test::More::VERSION > 0.81;
+ is ($csv->sep, $s, "sep");
+ is ($csv->quote, $q, "quote");
+
+ foreach my $data (
+ [ 1, 2 ],
+ [ "\N{EURO SIGN}", "\N{SNOWMAN}" ],
+# [ $sep, $quo ],
+ ) {
+
+ my $exp8 = join $s => map { qq{$q$_$q} } @$data;
+ utf8::encode (my $expb = $exp8);
+ my @exp = ($expb, $exp8);
+
+ ok ($csv->combine (@$data), "combine");
+ my $x = $csv->string;
+ is ($csv->string, $exp8, "string");
+
+ open my $fh, ">:encoding(utf8)", \(my $out = "") or die "IO: $!\n";
+ ok ($csv->print ($fh, $data), "print with UTF8 sep");
+ close $fh;
+
+ is ($out, $expb, "output");
+
+ ok ($csv->parse ($expb), "parse");
+ is_deeply ([ $csv->fields ], $data, "fields");
+
+ open $fh, "<", \$expb or die "IO: $!\n"; binmode $fh;
+ is_deeply ($csv->getline ($fh), $data, "data from getline ()");
+ close $fh;
+
+ $expb =~ tr/"//d;
+
+ ok ($csv->parse ($expb), "parse");
+ is_deeply ([ $csv->fields ], $data, "fields");
+
+ open $fh, "<", \$expb or die "IO: $!\n"; binmode $fh;
+ is_deeply ($csv->getline ($fh), $data, "data from getline ()");
+ close $fh;
+ }
+ }
+
+{ my $h = "\N{WHITE HEART SUIT}";
+ my $H = "\N{BLACK HEART SUIT}";
+ my $str = "${h}I$h$H${h}L\"${h}ve$h$H${h}Perl$h";
+ utf8::encode ($str);
+ ok (my $aoa = csv (in => \$str, sep => $H, quote => $h), "Hearts");
+ is_deeply ($aoa, [[ "I", "L${h}ve", "Perl"]], "I $H Perl");
+
+ ok (my $csv = Text::CSV->new ({
+ binary => 1, sep => $H, quote => $h }), "new hearts");
+ ok ($csv->combine (@{$aoa->[0]}), "combine");
+ ok ($str = $csv->string, "string");
+ utf8::decode ($str);
+ is ($str, "I${H}${h}L\"${h}ve${h}${H}Perl", "Correct quotation");
+ }
+
+# Tests pulled from tests in Raku
+{ my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 });
+ my $h = pack "C*", 224, 34, 204, 182;
+ ok ($csv->combine (1, $h, 3));
+ ok (my $s = $csv->string, "String");
+ my $b = $h;
+ utf8::encode ($b);
+ ok ($csv->combine (1, $b, 3));
+ ok ($s = $csv->string, "String");
+ }
+
+{ my $h = qq{\x{10fffd}xE0"}; #"
+ my $b = $h;
+ ok ($csv->combine (1, $b, 3));
+ ok (my $s = $csv->string, "String");
+ $b = $h;
+ utf8::encode ($b);
+ ok ($csv->combine (1, $b, 3));
+ ok ($s = $csv->string, "String");
+ $b = $h;
+ utf8::encode ($b);
+ ok ($csv->combine (1, $b, 3));
+ ok ($s = $csv->string, "String");
+ }
+
+{ my $file = "Eric,\N{LATIN CAPITAL LETTER E WITH ACUTE}RIC\n";
+ utf8::encode ($file);
+ open my $fh, "<", \$file or die $!;
+
+ my $csv = Text::CSV->new ({ binary => 1, auto_diag => 2 });
+ is_deeply (
+ [ $csv->header ($fh) ],
+ [ "eric", "\N{LATIN SMALL LETTER E WITH ACUTE}ric" ],
+ "Lowercase unicode header");
+ }
+
+{ my $file = "Eric,\N{LATIN SMALL LETTER E WITH ACUTE}ric\n";
+ utf8::encode ($file);
+ open my $fh, "<", \$file or die $!;
+
+ my $csv = Text::CSV->new ({ binary => 1, auto_diag => 2 });
+ is_deeply (
+ [ $csv->header ($fh, { munge => "uc" }) ],
+ [ "ERIC", "\N{LATIN CAPITAL LETTER E WITH ACUTE}RIC" ],
+ "Uppercase unicode header");
+ }
diff --git a/src/test/resources/module/Text-CSV/t/55_combi.t b/src/test/resources/module/Text-CSV/t/55_combi.t
new file mode 100644
index 000000000..87566e1be
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/55_combi.t
@@ -0,0 +1,159 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Test::More tests => 25119;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ require_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+my $csv = Text::CSV->new ({ binary => 1 });
+
+my @attrib = qw( quote_char escape_char sep_char );
+my @special = ('"', "'", ",", ";", "\t", "\\", "~");
+# Add undef, once we can return undef
+my @input = ( "", 1, "1", 1.4, "1.4", " - 1,4", "1+2=3", "' ain't it great '",
+ '"foo"! said the `bär', q{the ~ in "0 \0 this l'ne is \r ; or "'"} );
+my $ninput = scalar @input;
+my $string = join "=", "", @input, "";
+my %fail;
+
+ok (1, "-- qc ec sc ac");
+sub combi {
+ my %attr = @_;
+ my $combi = join " ", "--",
+ map { sprintf "%6s", _readable $attr{$_} } @attrib, "always_quote";
+ ok (1, $combi);
+
+ # use legal non-special characters
+ is ($csv->allow_whitespace (0), 0, "Reset allow WS");
+ is ($csv->sep_char ("\x03"), "\x03", "Reset sep");
+ is ($csv->quote_char ("\x0b"), "\x0b", "Reset quo");
+ is ($csv->escape_char ("\x0c"), "\x0c", "Reset esc");
+
+ # Set the attributes and check failure
+ my %state;
+ foreach my $attr (sort keys %attr) {
+ eval { $csv->$attr ($attr{$attr}); };
+ $@ or next;
+ $state{0 + $csv->error_diag} ||= $@;
+ }
+ if ($attr{sep_char} eq $attr{quote_char} ||
+ $attr{sep_char} eq $attr{escape_char}) {
+ ok (exists $state{1001}, "Illegal combo");
+ like ($state{1001}, qr{sep_char is equal to}, "Illegal combo");
+ }
+ else {
+ ok (!exists $state{1001}, "No char conflict");
+ }
+ if (!exists $state{1001} and
+ $attr{sep_char} =~ m/[\r\n]/ ||
+ $attr{quote_char} =~ m/[\r\n]/ ||
+ $attr{escape_char} =~ m/[\r\n]/
+ ) {
+ ok (exists $state{1003}, "Special contains eol");
+ like ($state{1003}, qr{in main attr not}, "Illegal combo");
+ }
+ if ($attr{allow_whitespace} and
+ $attr{quote_char} =~ m/^[ \t]/ ||
+ $attr{escape_char} =~ m/^[ \t]/
+ ) {
+ #diag (join " -> ** " => $combi, join ", " => sort %state);
+ ok (exists $state{1002}, "Illegal combo under allow_whitespace");
+ like ($state{1002}, qr{allow_whitespace with}, "Illegal combo");
+ }
+ %state and return;
+
+ # Check success
+ is ($csv->$_ (), $attr{$_}, "check $_") for sort keys %attr;
+
+ my $ret = $csv->combine (@input);
+
+ ok ($ret, "combine");
+ ok (my $str = $csv->string, "string");
+ SKIP: {
+ ok (my $ok = $csv->parse ($str), "parse");
+
+ unless ($ok) {
+ $fail{parse}{$combi} = $csv->error_input;
+ skip "parse () failed", 3;
+ }
+
+ ok (my @ret = $csv->fields, "fields");
+ unless (@ret) {
+ $fail{fields}{$combi} = $csv->error_input;
+ skip "fields () failed", 2;
+ }
+
+ is (scalar @ret, $ninput, "$ninput fields");
+ unless (scalar @ret == $ninput) {
+ $fail{'$#fields'}{$combi} = $str;
+ skip "# fields failed", 1;
+ }
+
+ my $ret = join "=", "", @ret, "";
+ is ($ret, $string, "content");
+ }
+ } # combi
+
+foreach my $aw (0, 1) {
+foreach my $aq (0, 1) {
+foreach my $qc (@special) {
+foreach my $ec (@special, "+") {
+foreach my $sc (@special, "\0") {
+ combi (
+ sep_char => $sc,
+ quote_char => $qc,
+ escape_char => $ec,
+ always_quote => $aq,
+ allow_whitespace => $aw,
+ );
+ }
+ }
+ }
+ }
+ }
+
+foreach my $fail (sort keys %fail) {
+ print STDERR "Failed combi for $fail ():\n",
+ "-- qc ec sc ac\n";
+ foreach my $combi (sort keys %{$fail{$fail}}) {
+ printf STDERR "%-20s - %s\n", map { _readable $_ } $combi, $fail{$fail}{$combi};
+ }
+ }
+
+{ my $err = "";
+ local $SIG{__WARN__} = sub { $err = shift; };
+ is (Text::CSV->new ({ sep => ",", quote => ",", auto_diag => 1 }),
+ undef, "New (illegal combo + auto_diag)");
+ like ($err, qr{\bERROR: 1001 - INI -}, "Error message");
+
+ $err = "";
+ ok (my $csv = Text::CSV->new ({ auto_diag => 1 }), "new auto_diag");
+ eval { $csv->sep ('"'); };
+ like ($err, qr{\bERROR: 1001 - INI -}, "Error message");
+ is ($csv->sep_char (), '"', "sep changed anyway");
+ }
+
+{ ok (my $csv = Text::CSV->new ({ binary => 1 }), "New CSV default");
+ ok ($csv->combine ("=\x00="), "combine =\\x00=");
+ is ($csv->string, qq{"="0="}, "string");
+ }
+{ ok (my $csv = Text::CSV->new ({ binary => 1, escape_null => 0 }), "New CSV no escape_null");
+ ok ($csv->combine ("=\x00="), "combine =\\x00=");
+ is ($csv->string, qq{"=\0="}, "string");
+ }
+{ ok (my $csv = Text::CSV->new ({ binary => 1, escape_char => "" }), "New CSV no escape");
+ ok ($csv->combine ("=\x00="), "combine =\\x00=");
+ is ($csv->string, qq{"=\0="}, "string");
+ }
+{ ok (my $csv = Text::CSV->new ({ binary => 1, escape_char => "", escape_null => 0 }), "New CSV no escape no escape_null");
+ ok ($csv->combine ("=\x00="), "combine =\\x00=");
+ is ($csv->string, qq{"=\0="}, "string");
+ }
+1;
diff --git a/src/test/resources/module/Text-CSV/t/60_samples.t b/src/test/resources/module/Text-CSV/t/60_samples.t
new file mode 100644
index 000000000..d78f2a83b
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/60_samples.t
@@ -0,0 +1,35 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Test::More tests => 8;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV", ();
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ }
+
+# Some assorted examples from the modules history
+
+# "Pavel Kotala"
+{
+ my $csv = Text::CSV->new ({
+ quote_char => '"',
+ escape_char => '\\',
+ sep_char => ';',
+ binary => 1,
+ });
+ ok ($csv, "new (\", \\\\, ;, 1)");
+
+ my @list = ("c:\\winnt", "text");
+ ok ($csv->combine (@list), "combine ()");
+ my $line = $csv->string;
+ ok ($line, "string ()");
+ ok ($csv->parse ($line), "parse ()");
+ my @olist = $csv->fields;
+ is (scalar @list, scalar @olist, "field count");
+ is ($list[0], $olist[0], "field 1");
+ is ($list[1], $olist[1], "field 2");
+ }
diff --git a/src/test/resources/module/Text-CSV/t/65_allow.t b/src/test/resources/module/Text-CSV/t/65_allow.t
new file mode 100644
index 000000000..ac20ea1c0
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/65_allow.t
@@ -0,0 +1,428 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+#use Test::More "no_plan";
+ use Test::More tests => 1119;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV", ();
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+my $tfn = "_65test.csv"; END { -f $tfn and unlink $tfn; }
+my $csv;
+
+ok (1, "Allow unescaped quotes");
+# Allow unescaped quotes inside an unquoted field
+{ my @bad = (
+ # valid, line
+ [ 1, 1, 0, qq{foo,bar,"baz",quux} ],
+ [ 2, 0, 2034, qq{rj,bs,r"jb"s,rjbs} ],
+ [ 3, 0, 2034, qq{some "spaced" quote data,2,3,4} ],
+ [ 4, 1, 0, qq{and an,entirely,quoted,"field"} ],
+ [ 5, 1, 0, qq{and then,"one with ""quoted"" quotes",okay,?} ],
+ );
+
+ for (@bad) {
+ my ($tst, $valid, $err, $bad) = @$_;
+ $csv = Text::CSV->new ();
+ ok ($csv, "$tst - new (alq => 0)");
+ is ($csv->parse ($bad), $valid, "$tst - parse () fail");
+ is (0 + $csv->error_diag, $err, "$tst - error $err");
+
+ $csv->allow_loose_quotes (1);
+ ok ($csv->parse ($bad), "$tst - parse () pass");
+ ok (my @f = $csv->fields, "$tst - fields");
+ }
+
+ #$csv = Text::CSV->new ({ quote_char => '"', escape_char => "=" });
+ #ok (!$csv->parse (qq{foo,d'uh"bar}), "should fail");
+ }
+
+ok (1, "Allow loose quotes inside quoted");
+# Allow unescaped quotes inside a quoted field
+{ my @bad = (
+ # valid, line
+ [ 1, 1, 0, qq{foo,bar,"baz",quux} ],
+ [ 2, 0, 2023, qq{rj,bs,"r"jb"s",rjbs} ],
+ [ 3, 0, 2023, qq{"some "spaced" quote data",2,3,4} ],
+ [ 4, 1, 0, qq{and an,entirely,quoted,"field"} ],
+ [ 5, 1, 0, qq{and then,"one with ""quoted"" quotes",okay,?} ],
+ );
+
+ for (@bad) {
+ my ($tst, $valid, $err, $bad) = @$_;
+ $csv = Text::CSV->new ();
+ ok ($csv, "$tst - new (alq => 0)");
+ is ($csv->parse ($bad), $valid, "$tst - parse () fail");
+ is (0 + $csv->error_diag, $err, "$tst - error $err");
+
+ $csv->allow_loose_quotes (1);
+ is ($csv->parse ($bad), $valid, "$tst - parse () fail with lq");
+ is (0 + $csv->error_diag, $err, "$tst - error $err");
+
+ $csv->escape_char (undef);
+ ok ($csv->parse ($bad), "$tst - parse () pass");
+ ok (my @f = $csv->fields, "$tst - fields");
+ }
+ }
+
+ok (1, "Allow loose escapes");
+# Allow escapes to escape characters that should not be escaped
+{ my @bad = (
+ # valid, line
+ [ 1, 1, 0, qq{1,foo,bar,"baz",quux} ],
+ [ 2, 1, 0, qq{2,escaped,"quote\\"s",in,"here"} ],
+ [ 3, 1, 0, qq{3,escaped,quote\\"s,in,"here"} ],
+ [ 4, 1, 0, qq{4,escap\\'d chars,allowed,in,unquoted,fields} ],
+ [ 5, 0, 2025, qq{5,42,"and it\\'s dog",} ],
+
+ [ 6, 1, 0, qq{\\,} ],
+ [ 7, 1, 0, qq{\\} ],
+ [ 8, 0, 2035, qq{foo\\} ],
+ );
+
+ for (@bad) {
+ my ($tst, $valid, $err, $bad) = @$_;
+ $csv = Text::CSV->new ({ escape_char => "\\" });
+ ok ($csv, "$tst - new (ale => 0)");
+ is ($csv->parse ($bad), $valid, "$tst - parse () fail");
+ is (0 + $csv->error_diag, $err, "$tst - error $err");
+
+ $csv->allow_loose_escapes (1);
+ if ($tst >= 8) {
+ # Should always fail
+ ok (!$csv->parse ($bad), "$tst - parse () fail");
+ is (0 + $csv->error_diag, $err, "$tst - error $err");
+ }
+ else {
+ ok ($csv->parse ($bad), "$tst - parse () pass");
+ ok (my @f = $csv->fields, "$tst - fields");
+ }
+ }
+ }
+
+ok (1, "Allow whitespace");
+# Allow whitespace to surround sep char
+{ my @bad = (
+ # valid, line
+ [ 1, 1, 0, qq{1,foo,bar,baz,quux} ],
+ [ 2, 1, 0, qq{1,foo,bar,"baz",quux} ],
+ [ 3, 1, 0, qq{1, foo,bar,"baz",quux} ],
+ [ 4, 1, 0, qq{ 1,foo,bar,"baz",quux} ],
+ [ 5, 0, 2034, qq{1,foo,bar, "baz",quux} ],
+ [ 6, 1, 0, qq{1,foo ,bar,"baz",quux} ],
+ [ 7, 1, 0, qq{1,foo,bar,"baz",quux } ],
+ [ 8, 1, 0, qq{1,foo,bar,"baz","quux"} ],
+ [ 9, 0, 2023, qq{1,foo,bar,"baz" ,quux} ],
+ [ 10, 0, 2023, qq{1,foo,bar,"baz","quux" } ],
+ [ 11, 0, 2034, qq{ 1 , foo , bar , "baz" , quux } ],
+ [ 12, 0, 2034, qq{ 1 , foo , bar , "baz" , quux } ],
+ [ 13, 0, 2034, qq{ 1 , foo , bar , "baz"\t , quux } ],
+ );
+
+ foreach my $eol ("", "\n", "\r", "\r\n") {
+ my $s_eol = _readable ($eol);
+ for (@bad) {
+ my ($tst, $ok, $err, $bad) = @$_;
+ $csv = Text::CSV->new ({ eol => $eol, binary => 1 });
+ ok ($csv, "$s_eol / $tst - new - '$bad')");
+ is ($csv->parse ($bad), $ok, "$s_eol / $tst - parse () fail");
+ is (0 + $csv->error_diag, $err, "$tst - error $err");
+
+ $csv->allow_whitespace (1);
+ ok ($csv->parse ("$bad$eol"), "$s_eol / $tst - parse () pass");
+
+ ok (my @f = $csv->fields, "$s_eol / $tst - fields");
+
+ local $" = ",";
+ is ("@f", $bad[0][-1], "$s_eol / $tst - content");
+ }
+ }
+ }
+
+ok (1, "Allow whitespace");
+# Allow whitespace to surround sep char
+{ my @bad = (
+ # test, ok, line
+ [ 1, 1, 0, qq{1,foo,bar,baz,quux} ],
+ [ 2, 1, 0, qq{1,foo,bar,"baz",quux} ],
+ [ 3, 1, 0, qq{1, foo,bar,"baz",quux} ],
+ [ 4, 1, 0, qq{ 1,foo,bar,"baz",quux} ],
+ [ 5, 0, 2034, qq{1,foo,bar, "baz",quux} ],
+ [ 6, 1, 0, qq{1,foo ,bar,"baz",quux} ],
+ [ 7, 1, 0, qq{1,foo,bar,"baz",quux } ],
+ [ 8, 1, 0, qq{1,foo,bar,"baz","quux"} ],
+ [ 9, 0, 2023, qq{1,foo,bar,"baz" ,quux} ],
+ [ 10, 0, 2023, qq{1,foo,bar,"baz","quux" } ],
+ [ 11, 0, 2023, qq{1,foo,bar,"baz","quux" } ],
+ [ 12, 0, 2034, qq{ 1 , foo , bar , "baz" , quux } ],
+ [ 13, 0, 2034, qq{ 1 , foo , bar , "baz" , quux } ],
+ [ 14, 0, 2034, qq{ 1 , foo , bar , "baz"\t , quux } ],
+ );
+
+ foreach my $eol ("", "\n", "\r", "\r\n") {
+ my $s_eol = _readable ($eol);
+ for (@bad) {
+ my ($tst, $ok, $err, $bad) = @$_;
+ $csv = Text::CSV->new ({
+ eol => $eol,
+ binary => 1,
+ });
+ ok ($csv, "$s_eol / $tst - new - '$bad')");
+ is ($csv->parse ($bad), $ok, "$s_eol / $tst - parse () fail");
+ is (0 + $csv->error_diag, $err, "$tst - error $err");
+
+ $csv->allow_whitespace (1);
+ ok ($csv->parse ("$bad$eol"), "$s_eol / $tst - parse () pass");
+
+ ok (my @f = $csv->fields, "$s_eol / $tst - fields");
+
+ local $" = ",";
+ is ("@f", $bad[0][-1], "$s_eol / $tst - content");
+ }
+ }
+ }
+
+ok (1, "blank_is_undef");
+foreach my $conf (
+ [ 0, 0, 0, 1, "", " ", '""', 2, "", "", "" ],
+ [ 0, 0, 1, 1, undef, " ", '""', 2, undef, undef, undef ],
+ [ 0, 1, 0, 1, "", " ", '""', 2, "", "", "" ],
+ [ 0, 1, 1, 1, undef, " ", '""', 2, undef, undef, undef ],
+ [ 1, 0, 0, 1, "", " ", '""', 2, "", "", "" ],
+ [ 1, 0, 1, 1, "", " ", '""', 2, undef, "", undef ],
+ [ 1, 1, 0, 1, "", " ", '""', 2, "", "", "" ],
+ [ 1, 1, 1, 1, "", " ", '""', 2, undef, "", undef ],
+ ) {
+ my ($aq, $aw, $bu, @expect, $str) = @$conf;
+ $csv = Text::CSV->new ({ always_quote => $aq, allow_whitespace => $aw, blank_is_undef => $bu });
+ ok ($csv, "new ({ aq $aq aw $aw bu $bu })");
+ ok ($csv->combine (1, "", " ", '""', 2, undef, "", undef), "combine ()");
+ ok ($str = $csv->string, "string ()");
+ foreach my $eol ("", "\n", "\r\n") {
+ my $s_eol = _readable ($eol);
+ ok ($csv->parse ($str.$eol), "parse (*$str$s_eol*)");
+ ok (my @f = $csv->fields, "fields ()");
+ is_deeply (\@f, \@expect, "result");
+ }
+ }
+
+ok (1, "empty_is_undef");
+foreach my $conf (
+ [ 0, 0, 0, 1, "", " ", '""', 2, "", "", "" ],
+ [ 0, 0, 1, 1, undef, " ", '""', 2, undef, undef, undef ],
+ [ 0, 1, 0, 1, "", " ", '""', 2, "", "", "" ],
+ [ 0, 1, 1, 1, undef, " ", '""', 2, undef, undef, undef ],
+ [ 1, 0, 0, 1, "", " ", '""', 2, "", "", "" ],
+ [ 1, 0, 1, 1, undef, " ", '""', 2, undef, undef, undef ],
+ [ 1, 1, 0, 1, "", " ", '""', 2, "", "", "" ],
+ [ 1, 1, 1, 1, undef, " ", '""', 2, undef, undef, undef ],
+ ) {
+ my ($aq, $aw, $bu, @expect, $str) = @$conf;
+ $csv = Text::CSV->new ({ always_quote => $aq, allow_whitespace => $aw, empty_is_undef => $bu });
+ ok ($csv, "new ({ aq $aq aw $aw bu $bu })");
+ ok ($csv->combine (1, "", " ", '""', 2, undef, "", undef), "combine ()");
+ ok ($str = $csv->string, "string ()");
+ foreach my $eol ("", "\n", "\r\n") {
+ my $s_eol = _readable ($eol);
+ ok ($csv->parse ($str.$eol), "parse (*$str$s_eol*)");
+ ok (my @f = $csv->fields, "fields ()");
+ is_deeply (\@f, \@expect, "result");
+ }
+ }
+
+
+ok (1, "Trailing junk");
+foreach my $bin (0, 1) {
+ foreach my $eol (undef, "\r") {
+ my $s_eol = _readable ($eol);
+ my $csv = Text::CSV->new ({ binary => $bin, eol => $eol });
+ ok ($csv, "$s_eol - new ()");
+ my @bad = (
+ # test, line
+ [ 1, qq{"\r\r\n"\r} ],
+ [ 2, qq{"\r\r\n"\r\r} ],
+ [ 3, qq{"\r\r\n"\r\r\n} ],
+ [ 4, qq{"\r\r\n"\t \r} ],
+ [ 5, qq{"\r\r\n"\t \r\r} ],
+ [ 6, qq{"\r\r\n"\t \r\r\n} ],
+ );
+ my @pass = ( 0, 0, 0, 1 );
+ my @fail = ( 2022, 2022, 2023, 0 );
+
+ foreach my $arg (@bad) {
+ my ($tst, $bad) = @$arg;
+ my $ok = ($bin << 1) | ($eol ? 1 : 0);
+ is ($csv->parse ($bad), $pass[$ok], "$tst $ok - parse () default");
+ is (0 + $csv->error_diag, $fail[$ok], "$tst $ok - error $fail[$ok]");
+
+ $csv->allow_whitespace (1);
+ is ($csv->parse ($bad), $pass[$ok], "$tst $ok - parse () allow");
+ is (0 + $csv->error_diag, $fail[$ok], "$tst $ok - error $fail[$ok]");
+ }
+ }
+ }
+
+{ ok (1, "verbatim");
+ my $csv = Text::CSV->new ({
+ sep_char => "^",
+ binary => 1,
+ });
+
+ my @str = (
+ qq{M^^Abe^Timmerman#\r\n},
+ qq{M^^Abe\nTimmerman#\r\n},
+ );
+
+ my $gc;
+
+ ok (1, "verbatim on parse ()");
+ foreach $gc (0, 1) {
+ $csv->verbatim ($gc);
+
+ ok ($csv->parse ($str[0]), "\\n $gc parse");
+ my @fld = $csv->fields;
+ is (@fld, 4, "\\n $gc fields");
+ is ($fld[2], "Abe", "\\n $gc fld 2");
+ if ($gc) { # Note line ending is still there!
+ is ($fld[3], "Timmerman#\r\n", "\\n $gc fld 3");
+ }
+ else { # Note the stripped \r!
+ is ($fld[3], "Timmerman#", "\\n $gc fld 3");
+ }
+
+ ok ($csv->parse ($str[1]), "\\n $gc parse");
+ @fld = $csv->fields;
+ is (@fld, 3, "\\n $gc fields");
+ if ($gc) { # All newlines verbatim
+ is ($fld[2], "Abe\nTimmerman#\r\n", "\\n $gc fld 2");
+ }
+ else { # Note, rest is next line
+ is ($fld[2], "Abe", "\\n $gc fld 2");
+ }
+ }
+
+ $csv->eol ($/ = "#\r\n");
+ foreach $gc (0, 1) {
+ $csv->verbatim ($gc);
+
+ ok ($csv->parse ($str[0]), "#\\r\\n $gc parse");
+ my @fld = $csv->fields;
+ is (@fld, 4, "#\\r\\n $gc fields");
+ is ($fld[2], "Abe", "#\\r\\n $gc fld 2");
+ is ($fld[3], $gc ? "Timmerman#\r\n"
+ : "Timmerman", "#\\r\\n $gc fld 3");
+
+ ok ($csv->parse ($str[1]), "#\\r\\n $gc parse");
+ @fld = $csv->fields;
+ is (@fld, 3, "#\\r\\n $gc fields");
+ is ($fld[2], $gc ? "Abe\nTimmerman#\r\n"
+ : "Abe", "#\\r\\n $gc fld 2");
+ }
+
+ my $fh;
+ ok (1, "verbatim on getline (\$fh)");
+ open $fh, ">", $tfn or die "$tfn: $!\n";
+ print $fh @str, "M^Abe^*\r\n";
+ close $fh;
+
+ foreach $gc (0, 1) {
+ $csv->verbatim ($gc);
+
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+
+ my $row;
+ ok ($row = $csv->getline ($fh), "#\\r\\n $gc getline");
+ is (@$row, 4, "#\\r\\n $gc fields");
+ is ($row->[2], "Abe", "#\\r\\n $gc fld 2");
+ is ($row->[3], "Timmerman", "#\\r\\n $gc fld 3");
+
+ ok ($row = $csv->getline ($fh), "#\\r\\n $gc parse");
+ is (@$row, 3, "#\\r\\n $gc fields");
+ is ($row->[2], $gc ? "Abe\nTimmerman"
+ : "Abe", "#\\r\\n $gc fld 2");
+ }
+
+ $gc = $csv->verbatim ();
+ ok (my $row = $csv->getline ($fh), "#\\r\\n $gc parse EOF");
+ is (@$row, 3, "#\\r\\n $gc fields");
+ is ($row->[2], "*\r\n", "#\\r\\n $gc fld 2");
+
+ close $fh;
+
+ $csv = Text::CSV->new ({
+ binary => 0,
+ verbatim => 1,
+ eol => "#\r\n",
+ });
+ open $fh, ">", $tfn or die "$tfn: $!\n";
+ print $fh $str[1];
+ close $fh;
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ is ($csv->getline ($fh), undef, "#\\r\\n $gc getline 2030");
+ is (0 + $csv->error_diag, 2030, "Got 2030");
+ close $fh;
+ unlink $tfn;
+ }
+
+{ ok (1, "keep_meta_info on getline ()");
+
+ my $csv = Text::CSV->new ({ eol => "\n" });
+
+ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ print $fh qq{1,"",,"Q",2\n};
+ close $fh;
+
+ is ($csv->keep_meta_info (0), 0, "No meta info");
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ my $row = $csv->getline ($fh);
+ ok ($row, "Get 1st line");
+ $csv->error_diag ();
+ is ($csv->is_quoted (2), undef, "Is field 2 quoted?");
+ is ($csv->is_quoted (3), undef, "Is field 3 quoted?");
+ close $fh;
+
+ open $fh, ">", $tfn or die "$tfn: $!\n";
+ print $fh qq{1,"",,"Q",2\n};
+ close $fh;
+
+ is ($csv->keep_meta_info (1), 1, "Keep meta info");
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ $row = $csv->getline ($fh);
+ ok ($row, "Get 2nd line");
+ $csv->error_diag ();
+ is ($csv->is_quoted (2), 0, "Is field 2 quoted?");
+ is ($csv->is_quoted (3), 1, "Is field 3 quoted?");
+ close $fh;
+ unlink $tfn;
+ }
+
+{ my $csv = Text::CSV->new ({});
+
+ my $s2023 = qq{2023,",2008-04-05," \tFoo, Bar",\n}; # "
+ # ^
+
+ is ( $csv->parse ($s2023), 0, "Parse 2023");
+ is (($csv->error_diag)[0], 2023, "Fail code 2023");
+ is (($csv->error_diag)[2], 19, "Fail position");
+
+ is ( $csv->allow_whitespace (1), 1, "Allow whitespace");
+ is ( $csv->parse ($s2023), 0, "Parse 2023");
+ is (($csv->error_diag)[0], 2023, "Fail code 2023");
+ is (($csv->error_diag)[2], 22, "Space is eaten now");
+ }
+
+{ my $csv = Text::CSV->new ({ allow_unquoted_escape => 1, escape_char => "=" });
+ my $str = q{1,3,=};
+ is ( $csv->parse ($str), 0, "Parse trailing ESC");
+ is (($csv->error_diag)[0], 2035, "Fail code 2035");
+
+ $str .= "0";
+ is ( $csv->parse ($str), 1, "Parse trailing ESC");
+ is_deeply ([ $csv->fields ], [ 1,3,"\0" ], "Parse passed");
+ }
diff --git a/src/test/resources/module/Text-CSV/t/66_formula.t b/src/test/resources/module/Text-CSV/t/66_formula.t
new file mode 100644
index 000000000..dfd990a25
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/66_formula.t
@@ -0,0 +1,207 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Test::More tests => 119;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV", ();
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ }
+my $tfn = "_66test.csv"; END { -f $tfn and unlink $tfn; }
+
+ok (my $csv = Text::CSV->new, "new");
+
+is ($csv->formula, "none", "default");
+is ($csv->formula (1), "die", "die");
+is ($csv->formula ("die"), "die", "die");
+is ($csv->formula (2), "croak", "croak");
+is ($csv->formula ("croak"), "croak", "croak");
+is ($csv->formula (3), "diag", "diag");
+is ($csv->formula ("diag"), "diag", "diag");
+is ($csv->formula (4), "empty", "empty");
+is ($csv->formula ("empty"), "empty", "empty");
+is ($csv->formula (""), "empty", "explicit empty");
+is ($csv->formula (5), "undef", "undef");
+is ($csv->formula ("undef"), "undef", "undef");
+is ($csv->formula (undef), "undef", "explicit undef");
+is ($csv->formula (sub { }), "cb", "callback");
+is ($csv->formula (0), "none", "none");
+is ($csv->formula ("none"), "none", "none");
+
+is ($csv->formula_handling, "none", "default");
+is ($csv->formula_handling ("DIE"), "die", "die");
+is ($csv->formula_handling ("CROAK"), "croak", "croak");
+is ($csv->formula_handling ("DIAG"), "diag", "diag");
+is ($csv->formula_handling ("EMPTY"), "empty", "empty");
+is ($csv->formula_handling ("UNDEF"), "undef", "undef");
+is ($csv->formula_handling ("NONE"), "none", "none");
+
+foreach my $f (-1, 9, "xxx", "DIAX", [], {}) {
+ eval { $csv->formula ($f); };
+ like ($@, qr/\bformula-handling '\Q$f\E' is not supported/, "$f in invalid");
+ }
+
+my %f = qw(
+ 0 none none none
+ 1 die die die
+ 2 croak croak croak
+ 3 diag diag diag
+ 4 empty empty empty
+ 5 undef undef undef
+ );
+foreach my $f (sort keys %f) {
+ ok (my $p = Text::CSV->new ({ formula => $f }), "new with $f");
+ is ($p->formula, $f{$f}, "Set to $f{$f}");
+ }
+eval { Text::CSV->new ({ formula => "xxx" }); };
+like ($@, qr/\bformula-handling 'xxx' is not supported/, "xxx is invalid");
+
+# TODO : $csv->formula (sub { 42; });
+
+# Parser
+
+my @data = split m/\n/ => <<"EOC";
+a,b,c
+1,2,3
+=1+2,3,4
+1,=2+3,4
+1,2,=3+4
+EOC
+
+sub parse {
+ my $f = shift;
+ my @d;
+ ok (my $csv = Text::CSV->new ({ formula => $f }), "new $f");
+ #diag ("Formula: ". $csv->formula);
+ for (@data) {
+ $csv->parse ($_);
+ push @d, [ $csv->fields ];
+ }
+ \@d;
+ } # parse
+
+is_deeply (parse (0), [
+ [ "a", "b", "c", ],
+ [ "1", "2", "3", ],
+ [ "=1+2", "3", "4", ],
+ [ "1", "=2+3", "4", ],
+ [ "1", "2", "=3+4", ],
+ ], "Default");
+
+my $r = eval { parse (1) };
+is ($r, undef, "Die on formulas");
+is ($@, "Formulas are forbidden\n", "Message");
+$@ = undef;
+
+ $r = eval { parse (2) };
+is ($r, undef, "Croak on formulas");
+is ($@, "Formulas are forbidden\n", "Message");
+$@ = undef;
+
+my @m;
+local $SIG{__WARN__} = sub { push @m, @_ };
+
+is_deeply (parse (3), [
+ [ "a", "b", "c", ],
+ [ "1", "2", "3", ],
+ [ "=1+2", "3", "4", ],
+ [ "1", "=2+3", "4", ],
+ [ "1", "2", "=3+4", ],
+ ], "Default");
+is ($@, undef, "Legal with warnings");
+is_deeply (\@m, [
+ "Field 1 in record 3 contains formula '=1+2'\n",
+ "Field 2 in record 4 contains formula '=2+3'\n",
+ "Field 3 in record 5 contains formula '=3+4'\n",
+ ], "Warnings");
+@m = ();
+
+is_deeply (parse (4), [
+ [ "a", "b", "c", ],
+ [ "1", "2", "3", ],
+ [ "", "3", "4", ],
+ [ "1", "", "4", ],
+ [ "1", "2", "", ],
+ ], "Empty");
+
+is_deeply (parse (5), [
+ [ "a", "b", "c", ],
+ [ "1", "2", "3", ],
+ [ undef, "3", "4", ],
+ [ "1", undef, "4", ],
+ [ "1", "2", undef, ],
+ ], "Undef");
+
+for ([ "Callback return", sub { 42; } ],
+ [ "Callback assign", sub { $_ = 42; } ],
+ [ "Callback subst", sub { s/.*/42/; $_ } ], # s///r requires 5.13.2
+ ) {
+ my ($msg, $cb) = @$_;
+ is_deeply (parse ($cb), [
+ [ "a", "b", "c", ],
+ [ "1", "2", "3", ],
+ [ "42", "3", "4", ],
+ [ "1", "42", "4", ],
+ [ "1", "2", "42", ],
+ ], $msg);
+ }
+is_deeply (parse (sub { eval { s{^=([-+*/0-9()]+)$}{$1}ee }; $_ }), [
+ [ "a", "b", "c", ],
+ [ "1", "2", "3", ],
+ [ "3", "3", "4", ],
+ [ "1", "5", "4", ],
+ [ "1", "2", "7", ],
+ ], "Callback calculations");
+
+{ @m = ();
+ ok (my $csv = Text::CSV->new ({ formula => 3 }), "new 3 hr");
+ ok ($csv->column_names ("code", "value", "desc"), "Set column names");
+ ok ($csv->parse ("1,=2+3,4"), "Parse");
+ is_deeply (\@m,
+ [ qq{Field 2 (column: 'value') contains formula '=2+3'\n} ],
+ "Warning for HR");
+ }
+
+# Writer
+
+sub writer {
+ my $f = shift;
+ ok (my $csv = Text::CSV->new ({
+ formula_handling => $f, quote_empty => 1 }), "new $f");
+ ok ($csv->combine ("1", "=2+3", "4"), "combine $f");
+ $csv->string;
+ } # writer
+
+@m = ();
+is ( writer (0), q{1,=2+3,4}, "Out 0");
+is (eval { writer (1) }, undef, "Out 1");
+is (eval { writer (2) }, undef, "Out 2");
+is ( writer (3), q{1,=2+3,4}, "Out 3");
+is ( writer (4), q{1,"",4}, "Out 4");
+is ( writer (5), q{1,,4}, "Out 5");
+is_deeply (\@m, [ "Field 1 contains formula '=2+3'\n" ], "Warning 3");
+
+@m = ();
+is ( writer ("none"), q{1,=2+3,4}, "Out none");
+is (eval { writer ("die") }, undef, "Out die");
+is (eval { writer ("croak") }, undef, "Out croak");
+is ( writer ("diag"), q{1,=2+3,4}, "Out diag");
+is ( writer ("empty"), q{1,"",4}, "Out empty");
+is ( writer ("undef"), q{1,,4}, "Out undef");
+is_deeply (\@m, [ "Field 1 contains formula '=2+3'\n" ], "Warning diag");
+
+open my $fh, ">", $tfn;
+printf $fh <<"EOC";
+1,2,3
+=1+2,3,4
+1,=12-6,5
+1,2,=4+(9-1)/2
+EOC
+close $fh;
+
+is_deeply (Text::CSV::csv (in => $tfn,
+ formula => sub { eval { s{^=([-+*/0-9()]+)$}{$1}ee }; $_ }),
+ [[1,2,3],[3,3,4],[1,6,5],[1,2,8]], "Formula calc from csv function");
diff --git a/src/test/resources/module/Text-CSV/t/67_emptrow.t b/src/test/resources/module/Text-CSV/t/67_emptrow.t
new file mode 100644
index 000000000..6d4ccbf11
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/67_emptrow.t
@@ -0,0 +1,144 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Test::More;
+
+BEGIN {
+ if ($] < 5.008001) {
+ plan skip_all => "This test unit requires perl-5.8.1 or higher";
+ }
+ else {
+ plan tests => 56;
+ }
+
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+
+ use_ok "Text::CSV", ("csv");
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ }
+my $tfn = "_67test.csv"; END { -f $tfn and unlink $tfn; }
+
+ok (my $csv = Text::CSV->new, "new");
+
+is ($csv->skip_empty_rows, 0, "default");
+is ($csv->skip_empty_rows (1), 1, "+1");
+is ($csv->skip_empty_rows ("skip"), 1, "skip");
+is ($csv->skip_empty_rows ("SKIP"), 1, "SKIP");
+is ($csv->skip_empty_rows (2), "eof", "+2");
+is ($csv->skip_empty_rows ("eof"), "eof", "eof");
+is ($csv->skip_empty_rows ("EOF"), "eof", "EOF");
+is ($csv->skip_empty_rows ("stop"), "eof", "stop");
+is ($csv->skip_empty_rows ("STOP"), "eof", "STOP");
+is ($csv->skip_empty_rows (3), "die", "+3");
+is ($csv->skip_empty_rows ("die"), "die", "die");
+is ($csv->skip_empty_rows ("DIE"), "die", "DIE");
+is ($csv->skip_empty_rows (4), "croak", "+4");
+is ($csv->skip_empty_rows ("croak"), "croak", "croak");
+is ($csv->skip_empty_rows ("CROAK"), "croak", "CROAK");
+is ($csv->skip_empty_rows (5), "error", "+5");
+is ($csv->skip_empty_rows ("error"), "error", "error");
+is ($csv->skip_empty_rows ("ERROR"), "error", "ERROR");
+
+sub cba { [ 3, 42, undef, 3 ] }
+sub cbh { { a => 3, b => 42, c => undef, d => 3 } }
+
+is ($csv->skip_empty_rows (\&cba), \&cba, "callback");
+
+is ($csv->skip_empty_rows (0), 0, "+0");
+is ($csv->skip_empty_rows (undef), 0, "undef");
+
+open my $fh, ">", $tfn or BAIL_OUT "$tfn: $!\n";
+print $fh "a,b,c,d\n";
+print $fh "1,2,0,4\n";
+print $fh "4,0,9,1\n";
+print $fh "\n";
+print $fh "8,2,7,1\n";
+print $fh "\n";
+print $fh "\n";
+print $fh "5,7,9,3\n";
+print $fh "\n";
+close $fh;
+
+my @parg = (auto_diag => 0, in => $tfn);
+my @head = ([qw( a b c d )], [1,2,0,4], [4,0,9,1]);
+my @repl = (1..4);
+my $ea = \@repl;
+
+# Array behavior
+is_deeply (csv (@parg, skip_empty_rows => 0), [ @head,
+ [""],[8,2,7,1],[""],[""],[5,7,9,3],[""]], "A Default");
+
+is_deeply (csv (@parg, skip_empty_rows => 1), [ @head,
+ [8,2,7,1],[5,7,9,3]], "A Skip");
+
+is_deeply (csv (@parg, skip_empty_rows => 2), \@head, "A EOF");
+
+is (eval { csv (@parg, skip_empty_rows => 3); }, undef, "A die");
+like ($@, qr{^Empty row}, "A msg");
+
+is (eval { csv (@parg, skip_empty_rows => 4); }, undef, "A croak");
+like ($@, qr{^Empty row}, "A msg");
+
+$@ = "";
+$csv = Text::CSV->new ({ skip_empty_rows => 5 });
+is_deeply ($csv->csv (@parg), \@head, "A error");
+is ($@, "", "A msg");
+is (0 + $csv->error_diag, 2015, "A code");
+
+is_deeply (csv (@parg, skip_empty_rows => sub {\@repl}), [ @head,
+ $ea,[8,2,7,1],$ea,$ea,[5,7,9,3],$ea], "A Callback");
+is_deeply (csv (@parg, skip_empty_rows => sub {0}), \@head, "A Callback 0");
+
+# Array behavior (line by line)
+open $fh, "<", $tfn;
+$csv = Text::CSV->new ({ skip_empty_rows => 1 });
+while (my $row = $csv->getline ($fh)) {
+ ok (@$row, "Row has columns");
+ }
+close $fh;
+
+# Hash behavior
+push @parg => bom => 1;
+my $eh = { a => "", b => undef, c => undef, d => undef },
+@head = ({ a => 1, b => 2, c => 0, d => 4 },
+ { a => 4, b => 0, c => 9, d => 1 });
+is_deeply (csv (@parg, skip_empty_rows => 0), [ @head, $eh,
+ { a => 8, b => 2, c => 7, d => 1 },$eh,$eh,
+ { a => 5, b => 7, c => 9, d => 3 },$eh], "H Default");
+
+is_deeply (csv (@parg, skip_empty_rows => 1), [ @head,
+ { a => 8, b => 2, c => 7, d => 1 },
+ { a => 5, b => 7, c => 9, d => 3 }], "H Skip");
+
+is_deeply (csv (@parg, skip_empty_rows => 2), \@head, "H EOF");
+
+is (eval { csv (@parg, skip_empty_rows => 3); }, undef, "H die");
+like ($@, qr{^Empty row}, "H msg");
+
+is (eval { csv (@parg, skip_empty_rows => 4); }, undef, "H croak");
+like ($@, qr{^Empty row}, "H msg");
+
+$@ = "";
+$csv = Text::CSV->new ({ skip_empty_rows => 5 });
+is_deeply ($csv->csv (@parg), \@head, "H error");
+is ($@, "", "H msg");
+is (0 + $csv->error_diag, 2015, "H code");
+
+$eh = { a => 1, b => 2, c => 3, d => 4 };
+is_deeply (csv (@parg, skip_empty_rows => sub {\@repl}), [ @head, $eh,
+ { a => 8, b => 2, c => 7, d => 1 },$eh,$eh,
+ { a => 5, b => 7, c => 9, d => 3 },$eh], "H Callback");
+
+is_deeply (csv (@parg, skip_empty_rows => sub {0}), \@head, "H Callback 0");
+
+# Hash behavior (line by line)
+open $fh, "<", $tfn;
+$csv = Text::CSV->new ({ skip_empty_rows => 1 });
+my $cols = $csv->getline ($fh);
+$csv->column_names (@$cols);
+while (my $row = $csv->getline_hr ($fh)) {
+ isnt ($row->{a}, undef, "Column 'a' is defined");
+ }
+close $fh;
diff --git a/src/test/resources/module/Text-CSV/t/68_header.t b/src/test/resources/module/Text-CSV/t/68_header.t
new file mode 100644
index 000000000..96319d42f
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/68_header.t
@@ -0,0 +1,64 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Test::More;
+
+BEGIN {
+ if ($] < 5.008001) {
+ plan skip_all => "This test unit requires perl-5.8.1 or higher";
+ }
+ else {
+ plan tests => 32;
+ }
+
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+
+ use_ok "Text::CSV", "csv";
+ require "./t/util.pl";
+ }
+
+my $tfn = "_68test.csv"; END { unlink $tfn, "_$tfn"; }
+
+my @dta = (
+ [qw( foo bar zap )],
+ [qw( mars venus pluto )],
+ [qw( 1 2 3 )],
+ );
+my @dth = (
+ { foo => "mars", bar => "venus", zap => "pluto" },
+ { foo => 1, bar => 2, zap => 3 },
+ );
+
+{ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ local $" = ",";
+ print $fh "@$_\n" for @dta;
+ close $fh;
+ }
+
+is_deeply (csv (in => $tfn), \@dta, "csv ()");
+is_deeply (csv (in => $tfn, bom => 1), \@dth, "csv (bom)");
+is_deeply (csv (in => $tfn, headers => "auto"), \@dth, "csv (headers)");
+is_deeply (csv (in => $tfn, bom => 1, headers => "auto"), \@dth, "csv (bom, headers)");
+
+foreach my $arg ("", "bom", "auto", "bom, auto") {
+ open my $fh, "<", $tfn or die "$tfn: $!\n";
+ my %attr;
+ $arg =~ m/bom/ and $attr{bom} = 1;
+ $arg =~ m/auto/ and $attr{headers} = "auto";
+ ok (my $csv = Text::CSV->new (), "New ($arg)");
+ is ($csv->record_number, 0, "start");
+ if ($arg) {
+ is_deeply ([ $csv->header ($fh, \%attr) ], $dta[0], "Header") if $arg;
+ is ($csv->record_number, 1, "first data-record");
+ is_deeply ($csv->getline_hr ($fh), $dth[$_], "getline $_") for 0..$#dth;
+ }
+ else {
+ is_deeply ($csv->getline ($fh), $dta[$_], "getline $_") for 0..$#dta;
+ }
+ is ($csv->record_number, 3, "done");
+ close $fh;
+ }
+
+
diff --git a/src/test/resources/module/Text-CSV/t/71_pp.t b/src/test/resources/module/Text-CSV/t/71_pp.t
new file mode 100644
index 000000000..953f8d1b5
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/71_pp.t
@@ -0,0 +1,393 @@
+#!/usr/bin/perl
+
+# tests for bug report fixes or patches.
+
+use strict;
+$^W = 1;
+
+use Test::More tests => 104;
+
+
+BEGIN { $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0; }
+
+BEGIN {
+ require_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+}
+
+#warn Text::CSV->backend;
+
+my $csv = Text::CSV->new( { sep_char => "\t", blank_is_undef => 1, allow_whitespace => 1 } );
+
+ok $csv->parse(qq|John\t\t"my notes"|);
+
+is_deeply ([ $csv->fields ], [ "John", undef, "my notes" ], "Tab with allow_white_space");
+
+
+
+# 2009-04-23 rt#45215
+
+my $str = "this,is,some,csv,data\n";
+
+$csv = Text::CSV->new;
+$csv->parse($str);
+
+is( $csv->string, $str );
+
+#=pod
+
+# 2009-05-16
+# getline() handles having escaped null
+
+my $opts = {
+ 'escape_char' => '"',
+ 'quote_char' => '"',
+ 'binary' => 1,
+ 'sep_char' => ','
+};
+
+my $eol = "\r\n";
+my $blob = ( join "", map { chr $_ } 0 .. 255 ) x 1;
+#my $blob = ( join "", map { chr $_ } 0 .. 2 ) x 1;
+
+$csv = Text::CSV->new( $opts );
+
+open( FH, '>__test.csv' ) or die $!;
+binmode FH;
+
+# writting
+ok( $csv->print( *FH, [ $blob ] ) );
+close( FH );
+
+# reading
+open( FH, "__test.csv" ) or die $!;
+binmode FH;
+
+$opts->{eol} = $eol;
+$csv = Text::CSV->new( $opts );
+
+ok( my $colref = $csv->getline( *FH ) );
+
+is( $colref->[0], $blob, "blob" );
+
+close( FH );
+
+#exit;
+unlink( '__test.csv' );
+
+#=cut
+
+# 2009-07-30
+# getline() handles a 0 staring multiline
+
+
+# writting
+open( FH, '>__test.csv' ) or die $!;
+binmode FH;
+
+
+ok( $csv->print( *FH, [ "00" ] ) );
+ok( $csv->print( *FH, [ "\00" ] ) );
+ok( $csv->print( *FH, [ "0\0" ] ) );
+ok( $csv->print( *FH, [ "\0\0" ] ) );
+
+ok( $csv->print( *FH, [ "0\n0" ] ) );
+ok( $csv->print( *FH, [ "\0\n0" ] ) );
+ok( $csv->print( *FH, [ "0\n\0" ] ) );
+ok( $csv->print( *FH, [ "\0\n\0" ] ) );
+
+ok( $csv->print( *FH, [ "\"0\n0" ] ) );
+ok( $csv->print( *FH, [ "\"\0\n0" ] ) );
+ok( $csv->print( *FH, [ "\"0\n\0" ] ) );
+ok( $csv->print( *FH, [ "\"\0\n\0" ] ) );
+
+ok( $csv->print( *FH, [ "\"0\n\"0" ] ) );
+ok( $csv->print( *FH, [ "\"\0\n\"0" ] ) );
+ok( $csv->print( *FH, [ "\"0\n\"\0" ] ) );
+ok( $csv->print( *FH, [ "\"\0\n\"\0" ] ) );
+
+ok( $csv->print( *FH, [ "0\n0", "0\n0" ] ) );
+ok( $csv->print( *FH, [ "\0\n0", "\0\n0" ] ) );
+ok( $csv->print( *FH, [ "0\n\0", "0\n\0" ] ) );
+ok( $csv->print( *FH, [ "\0\n\0", "\0\n\0" ] ) );
+
+$csv->always_quote(1);
+
+ok( $csv->print( *FH, [ "", undef, "0\n", "", "\0\n0" ] ) );
+
+
+close( FH );
+
+# reading
+open( FH, "__test.csv" ) or die $!;
+binmode FH;
+
+is( $csv->getline( *FH )->[0], "00", '*00' ); # Test::More warns 00
+is( $csv->getline( *FH )->[0], "\00", '\00' );
+is( $csv->getline( *FH )->[0], "0\0", '0\0' );
+is( $csv->getline( *FH )->[0], "\0\0", '\0\0' );
+
+is( $csv->getline( *FH )->[0], "0\n0", '*0\n0' ); # Test::More warns 00
+is( $csv->getline( *FH )->[0], "\0\n0", '\0\n0' );
+is( $csv->getline( *FH )->[0], "0\n\0", '0\n\0' );
+is( $csv->getline( *FH )->[0], "\0\n\0", '\0\n\0' );
+
+is( $csv->getline( *FH )->[0], "\"0\n0", '\"0\n0' );
+is( $csv->getline( *FH )->[0], "\"\0\n0", '\"\0\n0' );
+is( $csv->getline( *FH )->[0], "\"0\n\0", '\"0\n\0' );
+is( $csv->getline( *FH )->[0], "\"\0\n\0", '\"\0\n\0' );
+
+is( $csv->getline( *FH )->[0], "\"0\n\"0", '\"0\n\"0' );
+is( $csv->getline( *FH )->[0], "\"\0\n\"0", '\"\0\n\"0' );
+is( $csv->getline( *FH )->[0], "\"0\n\"\0", '\"0\n\"\0' );
+is( $csv->getline( *FH )->[0], "\"\0\n\"\0", '\"\0\n\"\0' );
+
+is( $csv->getline( *FH )->[1], "0\n0", '*0\n0' ); # Test::More warns 00
+is( $csv->getline( *FH )->[1], "\0\n0", '\0\n0' );
+is( $csv->getline( *FH )->[1], "0\n\0", '0\n\0' );
+is( $csv->getline( *FH )->[1], "\0\n\0", '\0\n\0' );
+
+$csv->blank_is_undef(1);
+
+my $col = $csv->getline( *FH );
+
+is( $col->[0], "", '' );
+is( $col->[1], undef, '' );
+is( $col->[2], "0\n", '' );
+is( $col->[3], "", '' );
+is( $col->[4], "\0\n0", '' );
+
+close( FH );
+
+unlink( '__test.csv' );
+
+# 2010-06-18 reported by https://rt.cpan.org/Public/Bug/Display.html?id=58356
+
+$csv = Text::CSV->new ({ binary => 1, quote_space => 0 });
+my @list = (
+ "a a",
+ "b,b",
+ "c ,c",
+);
+
+ok( $csv->combine( @list ) );
+is( $csv->string, q{a a,"b,b","c ,c"} );
+
+
+# 2010-06-22 reported
+{
+ $csv = Text::CSV->new ({ binary => 1, sep_char => ';', always_quote => 1 });
+
+ open( FH, '>__test.csv' ) or die $!;
+ binmode FH;
+
+ ok( $csv->print( *FH, [ 0, qq{t"t"\n} ] ) );
+
+ close( FH );
+
+ open( FH, "__test.csv" ) or die $!;
+ binmode FH;
+
+ my $col = $csv->getline( *FH );
+
+ is( $col->[0], "0" );
+ is( $col->[1], qq{t"t"\n} );
+ close( FH );
+
+ unlink( '__test.csv' );
+}
+
+
+# 2010-10-13 reported by hiratara
+{
+ $csv = Text::CSV->new ({ binary => 1, eol => $/, always_quote => 1 });
+
+ open( FH, '>__test.csv' ) or die $!;
+ binmode FH;
+
+ ok( $csv->print( *FH, [qw/A 01/] ) );
+ ok( $csv->print( *FH, [qw/B 02/] ) );
+ close( FH );
+
+ open( FH, "__test.csv" ) or die $!;
+ binmode FH;
+
+ my $col = $csv->getline( *FH );
+
+ is( $col->[0], 'A' );
+ is( $col->[1], '01' );
+
+ $col = $csv->getline( *FH );
+
+ is( $col->[0], 'B' );
+ is( $col->[1], '02' );
+ close( FH );
+
+ unlink( '__test.csv' );
+}
+
+
+# 2010-10-13 reported(2) by hiratara
+{
+ $csv = Text::CSV->new ({ binary => 1, eol => $/ });
+
+ open( FH, '>__test.csv' ) or die $!;
+ binmode FH;
+
+ ok( $csv->print( *FH, [qw/1 0"/] ) );
+ ok( $csv->print( *FH, [qw/2 0"/] ) );
+ close( FH );
+
+ open( FH, "__test.csv" ) or die $!;
+ binmode FH;
+
+ my $col = $csv->getline( *FH );
+
+ is( $col->[0], '1' );
+ is( $col->[1], '0"' );
+
+ $col = $csv->getline( *FH );
+
+ is( $col->[0], '2' );
+ is( $col->[1], '0"' );
+
+ close( FH );
+
+ unlink( '__test.csv' );
+}
+
+
+{ # previous three test merged
+ $csv = Text::CSV->new ({ binary => 1, eol => $/ });
+
+ open( FH, '>__test.csv' ) or die $!;
+ binmode FH;
+
+ ok( $csv->print( *FH, [ 0, qq{t"t"\n} ] ) );
+ ok( $csv->print( *FH, [qw/A 01/] ) );
+ ok( $csv->print( *FH, [qw/1 0"/] ) );
+ ok( $csv->print( *FH, [undef,undef] ) );
+ ok( $csv->print( *FH, [qw/1 0"/] ) );
+ ok( $csv->print( *FH, [qw/A 01/] ) );
+ close( FH );
+
+ open( FH, "__test.csv" ) or die $!;
+ binmode FH;
+
+ my $col = $csv->getline( *FH );
+ is( $col->[0], "0" );
+ is( $col->[1], qq{t"t"\n} );
+
+ $col = $csv->getline( *FH );
+ is( $col->[0], 'A' );
+ is( $col->[1], '01' );
+
+ $col = $csv->getline( *FH );
+ is( $col->[0], '1' );
+ is( $col->[1], '0"' );
+
+ $col = $csv->getline( *FH );
+ is( $col->[0], '' );
+ is( $col->[1], '' );
+
+ $col = $csv->getline( *FH );
+ is( $col->[0], '1' );
+ is( $col->[1], '0"' );
+
+ $col = $csv->getline( *FH );
+ is( $col->[0], 'A' );
+ is( $col->[1], '01' );
+ close( FH );
+
+ unlink( '__test.csv' );
+}
+
+
+SKIP: { # https://rt.cpan.org/Ticket/Display.html?id=83705
+skip "pp only for now", 3 unless Text::CSV->is_pp;
+
+my $csv = Text::CSV->new(
+ {
+ binary => 1,
+ allow_loose_escapes => 1,
+ allow_loose_quotes => 1,
+ sep_char => q{;},
+ escape_char => q{"},
+ quote_char => q{"}
+ }
+);
+
+$csv->parse(q{"6RE";"EINKAUF";"5";"";"2,5" HD"});
+is_deeply([$csv->fields], ["6RE","EINKAUF","5","",'2,5" HD']);
+
+my $csv_dump = q{"6RE";"EINKAUF";"5";"";"2,5" HD"
+"LIDL";"-2"};
+
+open( FH, '>__test.csv' ) or die $!;
+print FH $csv_dump;
+close FH;
+
+open FH, '<__test.csv';
+
+is_deeply( $csv->getline(*FH), ["6RE","EINKAUF","5","",'2,5" HD'] );
+is_deeply( $csv->getline(*FH), ['LIDL','-2'] );
+
+close FH;
+
+unlink( '__test.csv' );
+
+}
+
+{ # imported from t/70_rt.t
+my $csv = Text::CSV->new ({ escape_char => "\\", auto_diag => 1 });
+
+ok( $csv->parse(q{1,"\,",3}) );
+is_deeply ([ $csv->fields ], [ 1, ",", 3 ], "escaped sep in quoted field");
+ok( $csv->parse(q{1,"2\,4",3}) );
+is_deeply ([ $csv->fields ], [ 1, "2,4", 3 ], "escaped sep in quoted field");
+
+$csv->allow_unquoted_escape(1);
+ok( $csv->parse(q{1,\,,3}) );
+is_deeply ([ $csv->fields ], [ 1, ",", 3 ], "escaped sep in quoted field");
+ok( $csv->parse(q{1,2\,4,3}) );
+is_deeply ([ $csv->fields ], [ 1, "2,4", 3 ], "escaped sep in quoted field");
+}
+
+{ # https://github.com/makamaka/Text-CSV/pull/3
+
+ {
+ package FakeFileHandleForEOF;
+
+ sub new { return bless { line => "foo,bar,baz\n" }, shift }
+
+ sub getline {
+ my $self = shift;
+ return delete $self->{line};
+ }
+
+ sub eof {
+ my $self = shift;
+ return not exists $self->{line};
+ }
+ }
+
+ my $csv = Text::CSV->new({binary => 1});
+ my $fh = FakeFileHandleForEOF->new;
+ ok(!$fh->eof);
+ eval { is_deeply( $csv->getline($fh), [qw[ foo bar baz ]]) };
+ is($@, '', "no exception thrown");
+ ok($fh->eof);
+}
+
+{ # https://github.com/makamaka/Text-CSV/issues/14
+ # https://rt.cpan.org/Ticket/Display.html?id=109719
+ SKIP: {
+ skip "requires Encode", 1 unless eval "require Encode";
+ my $csv = Text::CSV->new({empty_is_undef => 1, blank_is_undef => 1});
+ my $line = "foo,,bar,";
+ Encode::_utf8_on($line);
+ $csv->parse($line);
+ my @fields = $csv->fields;
+ is_deeply \@fields => ['foo', undef, 'bar', undef];
+ }
+}
diff --git a/src/test/resources/module/Text-CSV/t/71_strict.t b/src/test/resources/module/Text-CSV/t/71_strict.t
new file mode 100644
index 000000000..41dcdab48
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/71_strict.t
@@ -0,0 +1,231 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+ use Test::More tests => 75;
+#use Test::More "no_plan";
+
+my %err;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ require_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+
+ open my $fh, "<", "lib/Text/CSV_PP.pm" or die "Cannot read error messages from PP\n";
+ while (<$fh>) {
+ m/^ ([0-9]{4}) => "([^"]+)",/ and $err{$1} = $2;
+ }
+ close $fh;
+ }
+
+my $tfn = "_80test.csv"; END { -f $tfn and unlink $tfn; }
+$| = 1;
+
+my $csv = Text::CSV->new ();
+
+{ my $csv = Text::CSV->new ({ strict => 1 });
+ ok ($csv->parse ("1,2,3"), "Set strict to 3 columns");
+ ok ($csv->parse ("a,b,c"), "3 columns should be correct");
+ is ($csv->parse ("3,4"), 0, "Not enough columns");
+ is (0 + $csv->error_diag, 2014, "Error set correctly");
+ }
+{ my $csv = Text::CSV->new ({ strict => 1 });
+ ok ($csv->parse ("1,2,3"), "Set strict to 3 columns");
+ is ($csv->parse ("3,4,5,6"), 0, "Too many columns");
+ is (0 + $csv->error_diag, 2014, "Error set correctly");
+ }
+{ my $csv = Text::CSV->new ({ strict => 1 });
+ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ ok ($csv->say ($fh, [ 1, 2, 3 ]), "Write line 1");
+ ok ($csv->say ($fh, [ 1, 2, 3 ]), "Write line 2");
+ close $fh;
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ ok ((my $r = $csv->getline ($fh)), "Get line 1 under strict");
+ ok (( $r = $csv->getline ($fh)), "Get line 2 under strict");
+ is ($csv->getline ($fh), undef, "EOF under strict");
+ is (0 + $csv->error_diag, 2012, "Error is 2012 instead of 2014");
+ ok ($csv->eof, "EOF is set");
+ close $fh;
+ }
+{ my $csv = Text::CSV->new ({ strict => 1 });
+ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ ok ($csv->say ($fh, [ 1, 2, 3 ]), "Write line 1");
+ ok ($csv->print ($fh, [ 1, 2, 3 ]), "Write line 2 no newline");
+ close $fh;
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ ok ((my $r = $csv->getline ($fh)), "Get line 1 under strict");
+ ok (( $r = $csv->getline ($fh)), "Get line 2 under strict no newline");
+ is ($csv->getline ($fh), undef, "EOF under strict");
+ is (0 + $csv->error_diag, 2012, "Error is 2012 instead of 2014");
+ ok ($csv->eof, "EOF is set");
+ close $fh;
+ }
+{ my $csv = Text::CSV->new ();
+ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ ok ($csv->say ($fh, [ 1 .. 3 ]), "Write line 1 (headers)");
+ ok ($csv->say ($fh, [ 1 .. 4 ]), "Write line 2 (data)");
+ close $fh;
+ my $aoh = Text::CSV::csv (in => $tfn, headers => "auto");
+ is_deeply ($aoh, [{ 1 => 1, 2 => 2, 3 => 3 }], "Column dropped");
+ my @e;
+ eval {
+ local $SIG{__WARN__} = sub { push @e => @_ };
+ $aoh = Text::CSV::csv (in => $tfn, headers => "auto", strict => 1);
+ };
+ is_deeply ($aoh, [], "Fail under strict");
+ is (scalar @e, 1, "Got error");
+ like ($e[0], qr{ 2014 }, "Error 2014");
+
+ open $fh, ">", $tfn or die "$tfn: $!\n";
+ ok ($csv->say ($fh, [ 1 .. 4 ]), "Write line 1 (headers)");
+ ok ($csv->say ($fh, [ 1 .. 3 ]), "Write line 2 (data)");
+ close $fh;
+ $aoh = Text::CSV::csv (in => $tfn, headers => "auto");
+ is_deeply ($aoh, [{ 1 => 1, 2 => 2, 3 => 3, 4 => undef }], "Column added");
+ @e = ();
+ eval {
+ local $SIG{__WARN__} = sub { push @e => @_ };
+ $aoh = Text::CSV::csv (in => $tfn, headers => "auto", strict => 1);
+ };
+ is_deeply ($aoh, [], "Fail under strict");
+ is (scalar @e, 1, "Got error");
+ like ($e[0], qr{ 2014 }, "Error 2014");
+ }
+
+foreach my $strict (0, 1) {
+ my $csv = Text::CSV->new ({
+ binary => 1,
+ comment_str => "#",
+ eol => "\n",
+ escape_char => '"',
+ quote_char => '"',
+ sep_char => "|",
+ strict => $strict,
+ });
+
+ my $status = $csv->parse ('a|b|"d"');
+ is (0 + $csv->error_diag, 0, "No fail under strict = $strict");
+ $status = $csv->parse ('a|b|c"d"e'); # Loose unescaped quote
+ is (0 + $csv->error_diag, 2034, "Previous error still actual");
+ }
+
+open my $fh, ">", $tfn or die "$tfn: $!\n";
+print $fh <<"EOC";
+1,foo
+2,bar,fail
+3,baz
+4
+5,eox
+EOC
+close $fh;
+
+open $fh, "<", $tfn or die "$tfn: $!\n";
+my @rpt;
+$csv = Text::CSV->new ({ strict => 1, auto_diag => 1 });
+$csv->callbacks (error => sub {
+ my ($err, $msg, $pos, $recno, $fldno) = @_;
+ if ($err == 2014) {
+ push @rpt => [ $recno, $fldno, $pos ];
+ $csv->SetDiag (0);
+ }
+ });
+is_deeply ([ $csv->getline_all ($fh), @rpt ],
+ [[[ 1, "foo" ], [ 2, "bar", "fail" ], [ 3, "baz" ], [ 4 ], [ 5, "eox" ]],
+ [ 2, 3, 12 ], [ 4, 1, 3 ]], "Can catch strict 2014 with \$csv");
+close $fh;
+
+open $fh, "<", $tfn or die "$tfn: $!\n";
+@rpt = ();
+$csv = Text::CSV->new ({ strict => 1, auto_diag => 1, callbacks => {
+ error => sub {
+ my ($err, $msg, $pos, $recno, $fldno) = @_;
+ if ($err == 2014) {
+ push @rpt => [ $recno, $fldno, $pos ];
+ Text::CSV->SetDiag (0);
+ }
+ }}});
+is_deeply ([ $csv->getline_all ($fh), @rpt ],
+ [[[ 1, "foo" ], [ 2, "bar", "fail" ], [ 3, "baz" ], [ 4 ], [ 5, "eox" ]],
+ [ 2, 3, 12 ], [ 4, 1, 3 ]], "Can catch strict 2014 with class");
+close $fh;
+
+# Under strcict, fail un not enough fields.
+# Under non-strict expect the value of the previous record
+foreach my $test (
+ [ "a,b,c\n" . "d,e,f\n". "g,h\n". "i,j,k\n",
+ "a,b,c\n" . "d,e,f\n". "g,h,f\n". "i,j,k\n", 2, 5 ],
+ [ "a,b,c\n" . "d,e,f\n". "g,h\n" ,
+ "a,b,c\n" . "d,e,f\n". "g,h,f\n" , 2, 5 ],
+ [ "a,b,c\n" . "g,h\n". "i,j,k\n",
+ "a,b,c\n" . "g,h,c\n". "i,j,k\n", 1, 5 ],
+ [ "a,b\n" . "d,e,f\n". "g,h\n". "i,j,k\n",
+ "a,b,*\n" . "d,e,f\n". "g,h,f\n". "i,j,k\n", 1, 5 ],
+ ) {
+ my ($dta, $dta0, $err_line, $pos) = @$test;
+ open $fh, ">", $tfn or die "$tfn: $!\n";
+ print $fh $dta;
+ close $fh;
+ my $expect = [ map {[ split m/,/ => $_ ]} grep m/\S/ => split "\n" => $dta0 ];
+ foreach my $strict (0, 1) {
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ my $csv = Text::CSV->new ({ strict => $strict });
+ my ($r1, $r2, $r3) = ("-", "+", "*");
+ $csv->bind_columns (\($r1, $r2, $r3));
+ my @out;
+ eval {
+ while ($csv->getline ($fh)) {
+ push @out => [ $r1, $r2, $r3 ];
+ }
+ };
+ close $fh;
+ my @err = $csv->error_diag;
+ if ($strict) {
+ is ($err[0], 2014, "ENF");
+ splice @$expect, $err_line;
+ }
+ else {
+ is ($err[0], 2012, "EOF");
+ }
+ is_deeply (\@out, $expect, "Bound + strict = $strict");
+ }
+ }
+
+{ ok (my $csv = Text::CSV->new ({ strict => 1 }), "Issue#58 data first");
+ ok ($csv->column_names (qw( A B C )), "Expect 3 colums");
+ is_deeply ($csv->getline_hr (*DATA), { A => 1, B => 2, C => 42 }, "Stream OK");
+ ok ($csv->parse ("1,2,42"), "Parse");
+ is_deeply ([ $csv->fields ], [ 1, 2, 42 ], "Parse OK");
+ is ($csv->parse ("2,42"), 0, "Parse not enough");
+ my @err = $csv->error_diag; # error-code, str, pos, rec, fld
+ is ($err[0], 2014, "Error 2014");
+ is ($err[4], 2, "Just got 2");
+ }
+{ ok (my $csv = Text::CSV->new ({ strict => 1 }), "Issue#58 no data first");
+ ok ($csv->column_names (qw( A B C )), "Expect 3 colums");
+ is ($csv->parse ("2,42"), 0, "Parse not enough");
+ my @err = $csv->error_diag; # error-code, str, pos, rec, fld
+ is ($err[0], 2014, "Error 2014");
+ is ($err[4], 2, "Just got 2");
+ }
+{ ok (my $csv = Text::CSV->new ({ strict => 1 }), "Issue#62 no data first");
+ my $tf = "issue-62-$$.csv";
+ END { -e $tf and unlink $tf }
+ open my $fh, ">", $tf;
+ print $fh "A,B\n1,2\n";
+ close $fh;
+ open $fh, "<", $tf;
+ ok (my @col = @{$csv->getline ($fh)}, "Get header");
+ my $val = {};
+ ok ($csv->bind_columns (\@{$val}{@col}), "Bind columns");
+ ok ($csv->getline ($fh), "Values into bound hash entries");
+ my @err = $csv->error_diag; # error-code, str, pos, rec, fld
+ is ($err[0], 0, "No error 2014");
+ is_deeply ($val, { A => 1, B => 2 }, "Content");
+ close $fh;
+ unlink $tf;
+ }
+__END__
+1,2,42
diff --git a/src/test/resources/module/Text-CSV/t/75_hashref.t b/src/test/resources/module/Text-CSV/t/75_hashref.t
new file mode 100644
index 000000000..6f18d9874
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/75_hashref.t
@@ -0,0 +1,184 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+#use Test::More "no_plan";
+ use Test::More tests => 102;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV", ();
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ }
+
+my $tfn = "_75hashref.csv"; END { -f $tfn and unlink $tfn; }
+
+open my $fh, ">", $tfn or die "$tfn: $!\n";
+print $fh <new (), "new");
+is ($csv->column_names, undef, "No headers yet");
+
+foreach my $args ([\1], ["foo", \1], [{ 1 => 2 }]) {
+ eval { $csv->column_names (@$args) };
+ like ($@, qr/^EHR/, "croak");
+ is ($csv->error_diag () + 0, 3001, "Bad args to column_names");
+ }
+
+ok ($csv->column_names ("name"), "One single name");
+is ($csv->column_names (undef), undef, "reset column_names");
+eval { $csv->column_names (\undef) };
+is ($csv->error_diag () + 0, 3001, "No hash please");
+eval { $csv->column_names ({ 1 => 2 }) };
+is ($csv->error_diag () + 0, 3001, "No hash please");
+
+my $hr;
+eval { $hr = $csv->getline_hr ($fh) };
+is ($hr, undef, "getline_hr before column_names");
+like ($@, qr/^EHR/, "croak");
+is ($csv->error_diag () + 0, 3002, "error code");
+
+ok ($csv->column_names ("name", "code"), "column_names (list)");
+is_deeply ([ $csv->column_names ], [ "name", "code" ], "well set");
+
+open $fh, "<", $tfn or die "$tfn: $!\n";
+my $row;
+ok ($row = $csv->getline ($fh), "getline headers");
+is ($row->[0], "code", "Header line");
+ok ($csv->column_names ($row), "column_names from array_ref");
+is_deeply ([ $csv->column_names ], [ @$row ], "Keys set");
+while (my $hr = $csv->getline_hr ($fh)) {
+ ok (exists $hr->{code}, "Line has a code field");
+ like ($hr->{code}, qr/^[0-9]+$/, "Code is numeric");
+ ok (exists $hr->{name}, "Line has a name field");
+ like ($hr->{name}, qr/^[A-Z][a-z]+$/, "Name");
+ }
+close $fh;
+
+my ($code, $name, $price, $desc) = (1..4);
+is ($csv->bind_columns (), undef, "No bound columns yet");
+eval { $csv->bind_columns (\$code) };
+is ($csv->error_diag () + 0, 3003, "Arg cound mismatch");
+eval { $csv->bind_columns ({}, {}, {}, {}) };
+is ($csv->error_diag () + 0, 3004, "bad arg types");
+is ($csv->column_names (undef), undef, "reset column_names");
+ok ($csv->bind_columns (\($code, $name, $price)), "Bind columns");
+
+eval { $csv->column_names ("foo") };
+is ($csv->error_diag () + 0, 3003, "Arg cound mismatch");
+$csv->bind_columns (undef);
+eval { $csv->bind_columns ([undef]) };
+is ($csv->error_diag () + 0, 3004, "legal header defenition");
+
+my @bcr = \($code, $name, $price, $desc);
+open $fh, "<", $tfn or die "$tfn: $!\n";
+ok ($row = $csv->getline ($fh), "getline headers");
+ok ($csv->bind_columns (@bcr), "Bind columns");
+ok ($csv->column_names ($row), "column_names from array_ref");
+is_deeply ([ $csv->column_names ], [ @$row ], "Keys set");
+
+$row = $csv->getline ($fh);
+is_deeply ([ $csv->bind_columns ], [ @bcr ], "check refs");
+is_deeply ($row, [], "return from getline with bind_columns");
+
+is ($csv->column_names (undef), undef, "reset column headers");
+is ($csv->bind_columns (undef), undef, "reset bound columns");
+
+my $foo;
+ok ($csv->bind_columns (@bcr, \$foo), "bind too many columns");
+($code, $name, $price, $desc, $foo) = (101 .. 105);
+ok ($csv->getline ($fh), "fetch less than expected");
+is_deeply ([ $code, $name, $price, $desc, $foo ],
+ [ 2, "Drinks", "82.78", "Drinks", 105 ], "unfetched not reset");
+
+my @foo = (0) x 0x012345;
+ok ($csv->bind_columns (\(@foo)), "bind a lot of columns");
+
+ok ($csv->bind_columns (\1, \2, \3, \""), "bind too constant columns");
+is ($csv->getline ($fh), undef, "fetch to read-only ref");
+is ($csv->error_diag () + 0, 3008, "Read-only");
+
+ok ($csv->bind_columns (\$code), "bind not enough columns");
+eval { $row = $csv->getline ($fh) };
+is ($csv->error_diag () + 0, 3006, "cannot read all fields");
+
+close $fh;
+
+open $fh, "<", $tfn or die "$tfn: $!\n";
+
+is ($csv->column_names (undef), undef, "reset column headers");
+is ($csv->bind_columns (undef), undef, "reset bound columns");
+is_deeply ([ $csv->column_names (undef, "", "name", "name") ],
+ [ "\cAUNDEF\cA", "", "name", "name" ], "undefined column header");
+ok ($hr = $csv->getline_hr ($fh), "getline_hr ()");
+is (ref $hr, "HASH", "returned a hashref");
+is_deeply ($hr, { "\cAUNDEF\cA" => "code", "" => "name", "name" => "description" },
+ "Discarded 3rd field");
+
+close $fh;
+
+open $fh, ">", $tfn or die "$tfn: $!\n";
+$hr = { c_foo => 1, foo => "poison", zebra => "Of course" };
+is ($csv->column_names (undef), undef, "reset column headers");
+ok ($csv->column_names (sort keys %$hr), "set column names");
+ok ($csv->eol ("\n"), "set eol for output");
+ok ($csv->print ($fh, [ $csv->column_names ]), "print header");
+ok ($csv->print_hr ($fh, $hr), "print_hr");
+ok ($csv->print ($fh, []), "empty print");
+close $fh;
+ok ($csv->keep_meta_info (1), "keep meta info");
+open $fh, "<", $tfn or die "$tfn: $!\n";
+ok ($csv->column_names ($csv->getline ($fh)), "get column names");
+is_deeply ($csv->getline_hr ($fh), $hr, "compare to written hr");
+
+is_deeply ($csv->getline_hr ($fh),
+ { c_foo => "", foo => undef, zebra => undef }, "compare to written hr");
+is ($csv->is_missing (1), 1, "No col 1");
+close $fh;
+
+open $fh, ">", $tfn or die "$tfn: $!\n";
+print $fh <<"EOC";
+a,b
+
+2
+EOC
+close $fh;
+
+ok ($csv = Text::CSV->new (), "new");
+
+open $fh, "<", $tfn or die "$tfn: $!\n";
+ok ($csv->column_names ("code", "foo"), "set column names");
+ok ($hr = $csv->getline_hr ($fh), "get header line");
+is ($csv->is_missing (0), undef, "not is_missing () - no meta");
+is ($csv->is_missing (1), undef, "not is_missing () - no meta");
+ok ($hr = $csv->getline_hr ($fh), "get empty line");
+is ($csv->is_missing (0), undef, "not is_missing () - no meta");
+is ($csv->is_missing (1), undef, "not is_missing () - no meta");
+ok ($hr = $csv->getline_hr ($fh), "get partial data line");
+is (int $hr->{code}, 2, "code == 2");
+is ($csv->is_missing (0), undef, "not is_missing () - no meta");
+is ($csv->is_missing (1), undef, "not is_missing () - no meta");
+close $fh;
+
+open $fh, "<", $tfn or die "$tfn: $!\n";
+$csv->keep_meta_info (1);
+ok ($csv->column_names ("code", "foo"), "set column names");
+ok ($hr = $csv->getline_hr ($fh), "get header line");
+is ($csv->is_missing (0), 0, "not is_missing () - with meta");
+is ($csv->is_missing (1), 0, "not is_missing () - with meta");
+ok ($hr = $csv->getline_hr ($fh), "get empty line");
+is ($csv->is_missing (0), 1, "not is_missing () - with meta");
+is ($csv->is_missing (1), 1, "not is_missing () - with meta");
+ok ($hr = $csv->getline_hr ($fh), "get partial data line");
+is (int $hr->{code}, 2, "code == 2");
+is ($csv->is_missing (0), 0, "not is_missing () - with meta");
+is ($csv->is_missing (1), 1, "not is_missing () - with meta");
+close $fh;
diff --git a/src/test/resources/module/Text-CSV/t/76_magic.t b/src/test/resources/module/Text-CSV/t/76_magic.t
new file mode 100644
index 000000000..dbcc1fb50
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/76_magic.t
@@ -0,0 +1,129 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+#use Test::More "no_plan";
+ use Test::More tests => 44;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV", ();
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ }
+
+my $tfn = "_76test.csv"; END { -f $tfn and unlink $tfn; }
+my $csv = Text::CSV->new ({ binary => 1, eol => "\n" });
+
+my $fh;
+my $foo;
+my $bar;
+my @foo = ("#", 1..3);
+
+tie $foo, "Foo";
+ok ($csv->combine (@$foo), "combine () from magic");
+untie $foo;
+is_deeply ([$csv->fields], \@foo, "column_names ()");
+
+tie $bar, "Bar";
+$bar = "#";
+ok ($csv->combine ($bar, @{$foo}[1..3]),"combine () from magic");
+untie $bar;
+is_deeply ([$csv->fields], \@foo, "column_names ()");
+
+tie $foo, "Foo";
+open $fh, ">", $tfn or die "$tfn: $!\n";
+ok ($csv->print ($fh, $foo), "print with unused magic scalar");
+close $fh;
+untie $foo;
+
+open $fh, "<", $tfn or die "$tfn: $!\n";
+is_deeply ($csv->getline ($fh), \@foo, "Content read-back");
+close $fh;
+
+tie $foo, "Foo";
+ok ($csv->column_names ($foo), "column_names () from magic");
+untie $foo;
+is_deeply ([$csv->column_names], \@foo, "column_names ()");
+
+open $fh, "<", $tfn or die "$tfn: $!\n";
+tie $bar, "Bar";
+ok ($csv->bind_columns (\$bar, \my ($f0, $f1, $f2)), "bind");
+ok ($csv->getline ($fh), "fetch with magic");
+is_deeply ([$bar,$f0,$f1,$f2], \@foo, "columns fetched on magic");
+# free any refs
+is ($csv->bind_columns (undef), undef, "bind column clear");
+untie $bar;
+close $fh;
+
+$csv->eol (undef);
+ok ($csv->combine ("us", undef, 3), "Combine with undef");
+is ($csv->string, "us,,3", "Default");
+foreach my $us ("\\N", 1, ",,,", "", "\xe2\x80\xa2", "\x{2205}") {
+ ok (defined ($csv->undef_str ($us)),"Set undef_str with method");
+ ok ($csv->combine ("us", undef, 3), "Combine with undef");
+ is ($csv->string, "us,$us,3", "String after method");
+ }
+
+tie my $us, "Bar";
+$us = "NULL";
+ok ($csv->undef_str ($us), "Set undef_str from tied scalar");
+ok ($csv->combine ("us", undef, 3), "Combine with undef");
+is ($csv->string, "us,NULL,3", "String after method");
+$us = "\\N";
+ok ($csv->undef_str ($us), "Set undef_str from tied scalar");
+ok ($csv->combine ("us", undef, 3), "Combine with undef");
+is ($csv->string, "us,\\N,3", "String after method");
+$us = undef;
+is ($csv->undef_str ($us), undef, "Set undef_str from tied scalar");
+ok ($csv->combine ("us", undef, 3), "Combine with undef");
+is ($csv->string, "us,,3", "String after method");
+untie $us;
+
+$csv = Text::CSV->new ({ undef_str => "\\N" });
+ok ($csv->combine ("us", undef, 3), "Combine with undef");
+is ($csv->string, "us,\\N,3", "String after undef_str from constructor");
+
+{ package Foo;
+ use strict;
+ use warnings;
+
+ require Tie::Scalar;
+ use vars qw( @ISA );
+ @ISA = qw(Tie::Scalar);
+
+ sub FETCH {
+ [ "#", 1 .. 3 ];
+ } # FETCH
+
+ sub TIESCALAR {
+ bless [], "Foo";
+ } # TIESCALAR
+
+ 1;
+ }
+
+{ package Bar;
+
+ use strict;
+ use warnings;
+
+ require Tie::Scalar;
+ use vars qw( @ISA );
+ @ISA = qw(Tie::Scalar);
+
+ sub FETCH {
+ return ${$_[0]};
+ } # FETCH
+
+ sub STORE {
+ ${$_[0]} = $_[1];
+ } # STORE
+
+ sub TIESCALAR {
+ my $bar;
+ bless \$bar, "Bar";
+ } # TIESCALAR
+
+ 1;
+ }
diff --git a/src/test/resources/module/Text-CSV/t/77_getall.t b/src/test/resources/module/Text-CSV/t/77_getall.t
new file mode 100644
index 000000000..0cf50a80a
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/77_getall.t
@@ -0,0 +1,86 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Test::More tests => 81;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ require_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+$| = 1;
+
+my $tfn = "_77test.csv"; END { -f $tfn and unlink $tfn; }
+my @testlist = (
+ [ 1, "a", "\x01", "A" ],
+ [ 2, "b", "\x02", "B" ],
+ [ 3, "c", "\x03", "C" ],
+ [ 4, "d", "\x04", "D" ],
+ );
+
+my @list;
+sub do_tests {
+ my $sub = shift;
+
+ $sub->(\@list);
+ $sub->(\@list, 0);
+ $sub->([@list[2,3]], 2);
+ $sub->([], 0, 0);
+ $sub->(\@list, 0, 10);
+ $sub->([@list[0,1]], 0, 2);
+ $sub->([@list[1,2]], 1, 2);
+ $sub->([@list[1,2]], 1e0, 2);
+ $sub->([@list[1,2]], "1", 2);
+ $sub->([@list[1..3]], -3);
+ $sub->([@list[1,2]], -3, 2);
+ $sub->([@list[1..3]], -3, 3);
+
+ $sub->([$list[0]], 0, 1);
+ $sub->([$list[0]], 0, 1e0);
+ $sub->([$list[0]], 0, "1");
+ } # do_tests
+
+foreach my $eol ("\n", "\r") {
+
+ @list = @testlist;
+
+ { ok (my $csv = Text::CSV->new ({ binary => 1, eol => $eol }), "csv out EOL "._readable ($eol));
+ open my $fh, ">", $tfn or die "$tfn: $!";
+ ok ($csv->print ($fh, $_), "write $_->[0]") for @list;
+ close $fh;
+ }
+
+ { ok (my $csv = Text::CSV->new ({ binary => 1 }), "csv in");
+
+ do_tests (sub {
+ my ($expect, @args) = @_;
+ open my $fh, "<", $tfn or die "$tfn: $!";
+ my $s_args = join ", " => @args;
+ is_deeply ($csv->getline_all ($fh, @args), $expect, "getline_all ($s_args)");
+ close $fh;
+ });
+ }
+
+ { ok (my $csv = Text::CSV->new ({ binary => 1 }), "csv in");
+ ok ($csv->column_names (my @cn = qw( foo bar bin baz )), "Set column names");
+ @list = map { my %h; @h{@cn} = @$_; \%h } @list;
+
+ do_tests (sub {
+ my ($expect, @args) = @_;
+ open my $fh, "<", $tfn or die "$tfn: $!";
+ my $s_args = join ", " => @args;
+ is_deeply ($csv->getline_hr_all ($fh, @args), $expect, "getline_hr_all ($s_args)");
+ close $fh;
+ });
+ }
+
+ { ok (my $csv = Text::CSV->new ({ binary => 1 }), "csv in");
+ open my $fh, "<", $tfn or die "$tfn: $!";
+ eval { my $row = $csv->getline_hr_all ($fh); };
+ is ($csv->error_diag () + 0, 3002, "Use _hr before colnames ()");
+ }
+ }
diff --git a/src/test/resources/module/Text-CSV/t/78_fragment.t b/src/test/resources/module/Text-CSV/t/78_fragment.t
new file mode 100644
index 000000000..758fff6d5
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/78_fragment.t
@@ -0,0 +1,125 @@
+#!/pro/bin/perl
+
+use strict;
+$^W = 1;
+$| = 1;
+
+use Config;
+use Test::More;
+
+BEGIN {
+ unless (exists $Config{useperlio} &&
+ defined $Config{useperlio} &&
+ $] >= 5.008 && # perlio was experimental in 5.6.2, but not reliable
+ $Config{useperlio} eq "define") {
+ plan skip_all => "No reliable perlIO available";
+ }
+ else {
+ plan tests => 38;
+ }
+ }
+
+BEGIN { $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0; }
+use Text::CSV;
+my $csv = Text::CSV->new ();
+
+my @test = (
+ "row=1" => [[ 11,12,13,14,15,16,17,18,19 ]],
+ "row=2-3" => [[ 21,22,23,24,25,26,27,28,29 ],
+ [ 31,32,33,34,35,36,37,38,39 ]],
+ "row=2;4;6" => [[ 21,22,23,24,25,26,27,28,29 ],
+ [ 41,42,43,44,45,46,47,48,49 ],
+ [ 61,62,63,64,65,66,67,68,69 ]],
+ "row=1-2;4;6-*" => [[ 11,12,13,14,15,16,17,18,19 ],
+ [ 21,22,23,24,25,26,27,28,29 ],
+ [ 41,42,43,44,45,46,47,48,49 ],
+ [ 61,62,63,64,65,66,67,68,69 ],
+ [ 71,72,73,74,75,76,77,78,79 ],
+ [ 81,82,83,84,85,86,87,88,89 ],
+ [ 91,92,93,94,95,96,97,98,99 ]],
+ "col=1" => [[11],[21],[31],[41],[51],[61],[71],[81],[91]],
+ "col=2-3" => [[12,13],[22,23],[32,33],[42,43],[52,53],
+ [62,63],[72,73],[82,83],[92,93]],
+ "col=2;4;6" => [[12,14,16],[22,24,26],[32,34,36],[42,44,46],[52,54,56],
+ [62,64,66],[72,74,76],[82,84,86],[92,94,96]],
+ "col=1-2;4;6-*" => [[11,12,14,16,17,18,19], [21,22,24,26,27,28,29],
+ [31,32,34,36,37,38,39], [41,42,44,46,47,48,49],
+ [51,52,54,56,57,58,59], [61,62,64,66,67,68,69],
+ [71,72,74,76,77,78,79], [81,82,84,86,87,88,89],
+ [91,92,94,96,97,98,99]],
+ #cell=R,C
+ "cell=7,7" => [[ 77 ]],
+ "cell=7,7-8,8" => [[ 77,78 ], [ 87,88 ]],
+ "cell=7,7-*,8" => [[ 77,78 ], [ 87,88 ], [ 97,98 ]],
+ "cell=7,7-8,*" => [[ 77,78,79 ], [ 87,88,89 ]],
+ "cell=7,7-*,*" => [[ 77,78,79 ], [ 87,88,89 ], [ 97,98,99 ]],
+
+ "cell=7,7;7,8;8,7;8,8" => [[ 77,78 ], [ 87,88 ]],
+ "cell=8,8;8,7;7,8;7,7" => [[ 77,78 ], [ 87,88 ]],
+
+ "cell=1,1-2,2;3,3-4,4" => [
+ [11,12],
+ [21,22],
+ [33,34],
+ [43,44]],
+ "cell=1,1-3,3;2,3-4,4" => [
+ [11,12,13],
+ [21,22,23,24],
+ [31,32,33,34],
+ [43,44]],
+ "cell=1,1-3,3;2,2-4,4;2,3;4,2" => [
+ [11,12,13],
+ [21,22,23,24],
+ [31,32,33,34],
+ [42,43,44]],
+ "cell=1,1-2,2;3,3-4,4;1,4;4,1" => [
+ [11,12, 14],
+ [21,22],
+ [33,34],
+ [41, 43,44]],
+ );
+my $todo = "";
+my $data = join "" => ;
+while (my ($spec, $expect) = splice @test, 0, 2) {
+ open my $io, "<", \$data or die "IO: $!\n";
+ my $aoa = $csv->fragment ($io, $spec);
+ is_deeply ($aoa, $expect, "${todo}Fragment $spec");
+ }
+
+{ $csv->column_names ("c3", "c4");
+ open my $io, "<", \$data or die "IO: $!\n";
+ is_deeply ($csv->fragment ($io, "cell=3,2-4,3"),
+ [ { c3 => 32, c4 => 33 }, { c3 => 42, c4 => 43 }], "Fragment to AoH");
+ }
+{ $csv->column_names ("C1", "C2");
+ open my $io, "<", \$data or die "IO: $!\n";
+ is_deeply ($csv->fragment ($io, "row=3"),
+ [ { C1 => 31, C2 => 32 }], "Fragment row with headers to AoH");
+ }
+{ $csv->column_names ("C1");
+ open my $io, "<", \$data or die "IO: $!\n";
+ is_deeply ($csv->fragment ($io, "col=2"),
+ [ map +{ C1 => $_.2 } => 1 .. 9 ], "Fragment col with headers to AoH");
+ }
+
+$csv->column_names (undef);
+foreach my $spec ("col=1;3=2", "col=1,3-2", "col=-3", "col=0", "col=2--5",
+ "col=0-2", "col=2-0", "col=2;;3") {
+ open my $io, "<", \$data or die "IO: $!\n";
+ my $ref = eval { $csv->fragment ($io, "col=2;3=2"); };
+ is ($ref, undef, "Bad fragment spec");
+ is (0 + $csv->error_diag, 2013, "Error in spec");
+ }
+
+#$csv->eol ("\n");
+#foreach my $r (1..9){$csv->print(*STDOUT,[map{$r.$_}1..9])}
+__END__
+11,12,13,14,15,16,17,18,19
+21,22,23,24,25,26,27,28,29
+31,32,33,34,35,36,37,38,39
+41,42,43,44,45,46,47,48,49
+51,52,53,54,55,56,57,58,59
+61,62,63,64,65,66,67,68,69
+71,72,73,74,75,76,77,78,79
+81,82,83,84,85,86,87,88,89
+91,92,93,94,95,96,97,98,99
diff --git a/src/test/resources/module/Text-CSV/t/79_callbacks.t b/src/test/resources/module/Text-CSV/t/79_callbacks.t
new file mode 100644
index 000000000..bc39d9081
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/79_callbacks.t
@@ -0,0 +1,175 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+ use Test::More tests => 111;
+#use Test::More "no_plan";
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ require_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+$| = 1;
+
+my $csv;
+my $tfn = "_79test.csv"; END { -f $tfn and unlink $tfn; }
+
+# These tests are for the constructor
+{ my $warn;
+ local $SIG{__WARN__} = sub { $warn = shift; };
+ ok ($csv = Text::CSV->new ({ callbacks => undef }), "new");
+ is ($warn, undef, "no warn for undef");
+ is ($csv->callbacks, $warn = undef, "no callbacks for undef");
+ ok ($csv = Text::CSV->new ({ callbacks => 0 }), "new");
+ like ($warn, qr{: ignored\n}, "warn for 0");
+ is ($csv->callbacks, $warn = undef, "no callbacks for 0");
+ ok ($csv = Text::CSV->new ({ callbacks => 1 }), "new");
+ like ($warn, qr{: ignored\n}, "warn for 1");
+ is ($csv->callbacks, $warn = undef, "no callbacks for 1");
+ ok ($csv = Text::CSV->new ({ callbacks => \1 }), "new");
+ like ($warn, qr{: ignored\n}, "warn for \\1");
+ is ($csv->callbacks, $warn = undef, "no callbacks for \\1");
+ ok ($csv = Text::CSV->new ({ callbacks => "" }), "new");
+ like ($warn, qr{: ignored\n}, "warn for ''");
+ is ($csv->callbacks, $warn = undef, "no callbacks for ''");
+ ok ($csv = Text::CSV->new ({ callbacks => [] }), "new");
+ like ($warn, qr{: ignored\n}, "warn for []");
+ is ($csv->callbacks, $warn = undef, "no callbacks for []");
+ ok ($csv = Text::CSV->new ({ callbacks => sub {} }), "new");
+ like ($warn, qr{: ignored\n}, "warn for sub {}");
+ is ($csv->callbacks, $warn = undef, "no callbacks for sub {}");
+ }
+
+ok ($csv = Text::CSV->new (), "new");
+is ($csv->callbacks, undef, "no callbacks");
+ok ($csv->bind_columns (\my ($c, $s)), "bind");
+ok ($csv->getline (*DATA), "parse ok");
+is ($c, 1, "key");
+is ($s, "foo", "value");
+$s = "untouched";
+ok ($csv->getline (*DATA), "parse bad");
+is ($c, 1, "key");
+is ($s, "untouched", "untouched");
+ok ($csv->getline (*DATA), "parse bad");
+is ($c, "foo", "key");
+is ($s, "untouched", "untouched");
+ok ($csv->getline (*DATA), "parse good");
+is ($c, 2, "key");
+is ($s, "bar", "value");
+eval { is ($csv->getline (*DATA), undef,"parse bad"); };
+my @diag = $csv->error_diag;
+is ($diag[0], 3006, "too many values");
+
+# These tests are for the method
+foreach my $args ([""], [1], [[]], [sub{}], [1,2], [1,2,3],
+ [undef,"error"], ["error",undef],
+ ["%23bad",sub {}], ["error",sub{0;},undef,1],
+ ["error",[]], ["error","error"], ["",sub{0;}],
+ [sub{0;},0], [[],""]) {
+ eval { $csv->callbacks (@$args); };
+ my @diag = $csv->error_diag;
+ is ($diag[0], 1004, "invalid callbacks");
+ is ($csv->callbacks, undef, "not set");
+ }
+
+# These tests are for invalid arguments *inside* the hash
+foreach my $arg (undef, 0, 1, \1, "", [], $csv) {
+ eval { $csv->callbacks ({ error => $arg }); };
+ my @diag = $csv->error_diag;
+ is ($diag[0], 1004, "invalid callbacks");
+ is ($csv->callbacks, undef, "not set");
+ }
+ok ($csv->callbacks (bogus => sub { 0; }), "useless callback");
+
+my $error = 3006;
+sub ignore {
+ is ($_[0], $error, "Caught error $error");
+ $csv->SetDiag (0); # Ignore this error
+ } # ignore
+
+my $idx = 1;
+ok ($csv->auto_diag (1), "set auto_diag");
+my $callbacks = {
+ error => \&ignore,
+ after_parse => sub {
+ my ($c, $av) = @_;
+ # Just add a field
+ push @$av, "NEW";
+ },
+ before_print => sub {
+ my ($c, $av) = @_;
+ # First field set to line number
+ $av->[0] = $idx++;
+ # Maximum 2 fields
+ @{$av} > 2 and splice @{$av}, 2;
+ # Minimum 2 fields
+ @{$av} < 2 and push @{$av}, "";
+ },
+ };
+is (ref $csv->callbacks ($callbacks), "HASH", "callbacks set");
+ok ($csv->getline (*DATA), "parse ok");
+is ($c, 1, "key");
+is ($s, "foo", "value");
+ok ($csv->getline (*DATA), "parse bad, skip 3006");
+ok ($csv->getline (*DATA), "parse good");
+is ($c, 2, "key");
+is ($s, "bar", "value");
+
+$csv->bind_columns (undef);
+ok (my $row = $csv->getline (*DATA), "get row");
+is_deeply ($row, [ 1, 2, 3, "NEW" ], "fetch + value from hook");
+
+$error = 2012; # EOF
+ok ($csv->getline (*DATA), "parse past eof");
+
+ok ($csv->eol ("\n"), "eol for output");
+open my $fh, ">", $tfn or die "$tfn: $!";
+ok ($csv->print ($fh, [ 0, "foo" ]), "print OK");
+ok ($csv->print ($fh, [ 0, "bar", 3 ]), "print too many");
+ok ($csv->print ($fh, [ 0 ]), "print too few");
+close $fh;
+
+open $fh, "<", $tfn or die "$tfn: $!";
+is (do { local $/; <$fh> }, "1,foo\n2,bar\n3,\n", "Modified output");
+close $fh;
+
+# Test the non-IO interface
+ok ($csv->parse ("10,blah,33\n"), "parse");
+is_deeply ([ $csv->fields ], [ 10, "blah", 33, "NEW" ], "fields");
+
+ok ($csv->combine (11, "fri", 22, 18), "combine - no hook");
+is ($csv->string, qq{11,fri,22,18\n}, "string");
+
+is ($csv->callbacks (undef), undef, "clear callbacks");
+
+is_deeply (Text::CSV::csv (in => $tfn, callbacks => $callbacks),
+ [[1,"foo","NEW"],[2,"bar","NEW"],[3,"","NEW"]], "using getline_all");
+
+open $fh, ">", $tfn or die "$tfn: $!\n";
+print $fh <<"EOC";
+1,foo
+2,bar
+3,baz
+4,zoo
+EOC
+close $fh;
+
+open $fh, "<", $tfn or die "$tfn: $!\n";
+$csv->callbacks (after_parse => sub { $_[1][0] eq 3 and return \"skip" });
+is_deeply ($csv->getline_all ($fh), [[1,"foo"],[2,"bar"],[4,"zoo"]], "skip");
+close $fh;
+
+__END__
+1,foo
+1
+foo
+2,bar
+3,baz,2
+1,foo
+3,baz,2
+2,bar
+1,2,3
diff --git a/src/test/resources/module/Text-CSV/t/80_diag.t b/src/test/resources/module/Text-CSV/t/80_diag.t
new file mode 100644
index 000000000..0bd05fbc4
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/80_diag.t
@@ -0,0 +1,337 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+ use Test::More tests => 316;
+#use Test::More "no_plan";
+
+my %err;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ require_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+
+ open my $fh, "<", "lib/Text/CSV_PP.pm" or die "Cannot read error messages from PP\n";
+ while (<$fh>) {
+ m/^\s+([0-9]{4}) => "([^"]+)"/ and $err{$1} = $2;
+ }
+ close $fh;
+ }
+
+my $tfn = "_80test.csv"; END { -f $tfn and unlink $tfn; }
+$| = 1;
+
+my $csv = Text::CSV->new ();
+is (Text::CSV::error_diag (), "", "Last failure for new () - OK");
+is_deeply ([ $csv->error_diag ], [ 0, "", 0, 0, 0, 0 ], "OK in list context");
+
+sub parse_err {
+ my ($n_err, $p_err, $r_err, $f_err, $str) = @_;
+ my $s_err = $err{$n_err};
+ my $STR = _readable ($str);
+ is ($csv->parse ($str), 0, "$n_err - Err for parse ('$STR')");
+ is ($csv->error_diag () + 0, $n_err, "$n_err - Diag in numerical context");
+ is ($csv->error_diag (), $s_err, "$n_err - Diag in string context");
+ my ($c_diag, $s_diag, $p_diag, $r_diag, $f_diag) = $csv->error_diag ();
+ is ($c_diag, $n_err, "$n_err - Num diag in list context");
+ is ($s_diag, $s_err, "$n_err - Str diag in list context");
+ is ($p_diag, $p_err, "$n_err - Pos diag in list context");
+ is ($r_diag, $r_err, "$n_err - Rec diag in list context");
+ is ($f_diag, $f_err, "$n_err - Fld diag in list context");
+ } # parse_err
+
+parse_err 2023, 19, 1, 2, qq{2023,",2008-04-05,"Foo, Bar",\n}; # "
+
+$csv = Text::CSV->new ({ escape_char => "+", eol => "\n" });
+is ($csv->error_diag (), "", "No errors yet");
+
+parse_err 2010, 3, 1, 1, qq{"x"\r};
+parse_err 2011, 4, 2, 1, qq{"x"x};
+
+parse_err 2021, 2, 3, 1, qq{"\n"};
+parse_err 2022, 2, 4, 1, qq{"\r"};
+parse_err 2025, 2, 5, 1, qq{"+ "};
+parse_err 2026, 2, 6, 1, qq{"\0 "};
+parse_err 2027, 1, 7, 1, '"';
+parse_err 2031, 1, 8, 1, qq{\r };
+parse_err 2032, 2, 9, 1, qq{ \r};
+parse_err 2034, 4, 10, 2, qq{1, "bar",2};
+parse_err 2037, 1, 11, 1, qq{\0 };
+
+{ my @warn;
+ local $SIG{__WARN__} = sub { push @warn => @_ };
+ $csv->error_diag ();
+ ok (@warn == 1, "Got error message");
+ like ($warn[0], qr{^# CSV_(?:PP|XS) ERROR: 2037 - EIF}, "error content");
+ }
+
+is ($csv->eof, "", "No EOF");
+$csv->SetDiag (2012);
+is ($csv->eof, 1, "EOF caused by 2012");
+
+is (Text::CSV->new ({ ecs_char => ":" }), undef, "Unsupported option");
+
+{ my @warn;
+ local $SIG{__WARN__} = sub { push @warn => @_ };
+ Text::CSV::error_diag ();
+ ok (@warn == 1, "Error_diag in void context ::");
+ like ($warn[0], qr{^# CSV_(?:PP|XS) ERROR: 1000 - INI}, "error content");
+ }
+{ my @warn;
+ local $SIG{__WARN__} = sub { push @warn => @_ };
+ Text::CSV->error_diag ();
+ ok (@warn == 1, "Error_diag in void context ->");
+ like ($warn[0], qr{^# CSV_(?:PP|XS) ERROR: 1000 - INI}, "error content");
+ }
+
+{ my @warn;
+ local $SIG{__WARN__} = sub { push @warn => @_ };
+ is (Text::CSV->new ({ auto_diag => 0, ecs_char => ":" }), undef,
+ "Unsupported option");
+ ok (@warn == 0, "Error_diag in from new ({ auto_diag => 0})");
+ }
+{ my @warn;
+ local $SIG{__WARN__} = sub { push @warn => @_ };
+ is (Text::CSV->new ({ auto_diag => 1, ecs_char => ":" }), undef,
+ "Unsupported option");
+ ok (@warn == 1, "Error_diag in from new ({ auto_diag => 1})");
+ like ($warn[0], qr{^# CSV_(?:PP|XS) ERROR: 1000 - INI}, "error content");
+ }
+
+is (Text::CSV::error_diag (), "INI - Unknown attribute 'ecs_char'",
+ "Last failure for new () - FAIL");
+is (Text::CSV->error_diag (), "INI - Unknown attribute 'ecs_char'",
+ "Last failure for new () - FAIL");
+is (Text::CSV::error_diag (bless {}, "Foo"), "INI - Unknown attribute 'ecs_char'",
+ "Last failure for new () - FAIL");
+$csv->SetDiag (1000);
+is (0 + $csv->error_diag (), 1000, "Set error NUM");
+is ( $csv->error_diag (), "INI - constructor failed","Set error STR");
+$csv->SetDiag (0);
+is (0 + $csv->error_diag (), 0, "Reset error NUM");
+is ( $csv->error_diag (), "", "Reset error STR");
+
+ok (1, "Test auto_diag");
+$csv = Text::CSV->new ({ auto_diag => 1 });
+{ my @warn;
+ local $SIG{__WARN__} = sub { push @warn => @_ };
+ is ($csv->{_RECNO}, 0, "No records read yet");
+ is ($csv->parse ('"","'), 0, "1 - bad parse");
+ ok (@warn == 1, "1 - One error");
+ like ($warn[0], qr '^# CSV_(?:PP|XS) ERROR: 2027 -', "1 - error message");
+ is ($csv->{_RECNO}, 1, "One record read");
+ }
+{ my @warn;
+ local $SIG{__WARN__} = sub { push @warn => @_ };
+ is ($csv->diag_verbose (3), 3, "Set diag_verbose");
+ is ($csv->parse ('"","'), 0, "1 - bad parse");
+ ok (@warn == 1, "1 - One error");
+ @warn = split m/\n/ => $warn[0];
+ ok (@warn == 3, "1 - error plus two lines");
+ like ($warn[0], qr '^# CSV_(?:PP|XS) ERROR: 2027 -', "1 - error message");
+ like ($warn[1], qr '^"","', "1 - input line");
+ like ($warn[2], qr '^ \^', "1 - position indicator");
+ is ($csv->{_RECNO}, 2, "Another record read");
+ }
+{ ok ($csv->{auto_diag} = 2, "auto_diag = 2 to die");
+ eval { $csv->parse ('"","') };
+ like ($@, qr '^# CSV_(?:PP|XS) ERROR: 2027 -', "2 - error message");
+ }
+
+{ my @warn;
+ local $SIG{__WARN__} = sub { push @warn => @_ };
+
+ # Invalid error_input calls
+ is (Text::CSV::error_input (undef), undef, "Bad error_input call");
+ is (Text::CSV::error_input (""), undef, "Bad error_input call");
+ is (Text::CSV::error_input ([]), undef, "Bad error_input call");
+ is (Text::CSV->error_input, undef, "Bad error_input call");
+
+ ok (my $csv = Text::CSV->new (), "new for cache diag");
+ $csv->_cache_diag ();
+ ok (@warn == 1, "Got warn");
+ is ($warn[0], "CACHE: invalid\n", "Uninitialized cache");
+
+ @warn = ();
+ ok ($csv->parse ("1"), "parse"); # initialize cache
+ $csv->_cache_set (987, 10);
+ ok (@warn == 1, "Got warn");
+ is ($warn[0], "Unknown cache index 987 ignored\n", "Ignore bad cache calls");
+
+ is ($csv->parse ('"'), 0, "Bad parse");
+ is ($csv->error_input, '"', "Error input");
+ ok ($csv->_cache_set (34, 0), "Reset error input (dangerous!)");
+ is ($csv->error_input, '"', "Error input not reset");
+ }
+
+{ my $csv = Text::CSV->new ();
+ ok ($csv->parse (q{1,"abc"}), "Valid parse");
+ is ($csv->error_input (), undef, "Undefined error_input");
+ is ($csv->{_ERROR_INPUT}, undef, "Undefined error_input");
+ }
+
+foreach my $spec (
+ undef, # No spec at all
+ "", # No spec at all
+ "row=0", # row > 0
+ "col=0", # col > 0
+ "cell=0", # cell = r,c
+ "cell=0,0", # TL col > 0
+ "cell=1,0", # TL row > 0
+ "cell=1,1;0,1", # BR col > 0
+ "cell=1,1;1,0", # BR row > 0
+ "row=*", # * only after n-
+ "col=3-1", # to >= from
+ "cell=4,1;1", # cell has no ;
+ "cell=3,3-2,1", # bottom-right should be right to and below top-left
+ "cell=3,3-2,*", # bottom-right should be right to and below top-left
+ "cell=3,3-4,1", # bottom-right should be right to and below top-left
+ "cell=3,3-*,1", # bottom-right should be right to and below top-left
+ "cell=1,*", # * in single cell col
+ "cell=*,1", # * in single cell row
+ "cell=*,*", # * in single cell row and column
+ "cell=1,*-8,9", # * in cell range top-left cell col
+ "cell=*,1-8,9", # * in cell range top-left cell row
+ "cell=*,*-8,9", # * in cell range top-left cell row and column
+ "row=/", # illegal character
+ "col=4;row=3", # cannot combine rows and columns
+ ) {
+ my $csv = Text::CSV->new ();
+ my $r;
+ eval { $r = $csv->fragment (undef, $spec); };
+ is ($r, undef, "Cannot do fragment with bad RFC7111 spec");
+ my ($c_diag, $s_diag, $p_diag) = $csv->error_diag ();
+ is ($c_diag, 2013, "Illegal RFC7111 spec");
+ is ($p_diag, 0, "Position");
+ }
+
+my $diag_file = "_$$.out";
+open EH, ">&STDERR" or die "STDERR: $!\n";
+open STDERR, ">", $diag_file or die "STDERR: $!\n";
+# Trigger extra output for longer quote and sep
+is ($csv->sep ("--"), "--", "set longer sep");
+is ($csv->quote ("^^"), "^^", "set longer quote");
+ok ($csv->_cache_diag, "Cache debugging output");
+close STDERR;
+open STDERR, ">&EH" or die "STDERR: $!\n";
+open EH, "<", $diag_file or die "STDERR: $!\n";
+is (scalar , "CACHE:\n", "Title");
+while () {
+ m/^\s+(?:tmp|bptr|cache)\b/ and next;
+ like ($_, qr{^ \w+\s+[0-9a-f]+:(?:".*"|\s*[0-9]+)$}, "Content");
+ }
+close EH;
+unlink $diag_file;
+
+{ my $err = "";
+ local $SIG{__DIE__} = sub { $err = shift; };
+ ok (my $csv = Text::CSV->new, "new");
+ eval { $csv->print_hr (*STDERR, {}); };
+ is (0 + $csv->error_diag, 3009, "Missing column names");
+ ok ($csv->column_names ("foo"), "set columns");
+ eval { $csv->print_hr (*STDERR, []); };
+ is (0 + $csv->error_diag, 3010, "print_hr needs a hashref");
+ }
+
+{ my $csv = Text::CSV->new ({ sep_char => "=" });
+ eval { $csv->quote ("::::::::::::::"); };
+ is (0 + $csv->error_diag, 0, "Can set quote to something long");
+ eval { $csv->quote ("="); };
+ is (0 + $csv->error_diag, 1001, "Cannot set quote to current sep");
+ }
+
+{ my $csv = Text::CSV->new ({ quote_char => "=" });
+ eval { $csv->sep ("::::::::::::::"); };
+ is (0 + $csv->error_diag, 0, "Can set sep to something long");
+ eval { $csv->sep (undef); };
+ is (0 + $csv->error_diag, 1008, "Can set sep to undef");
+ eval { $csv->sep (""); };
+ is (0 + $csv->error_diag, 1008, "Can set sep to empty");
+ eval { $csv->sep ("="); };
+ is (0 + $csv->error_diag, 1001, "Cannot set sep to current sep");
+ }
+
+{ my $csv = Text::CSV->new;
+ eval { $csv->header (undef, "foo"); };
+ is (0 + $csv->error_diag, 1014, "Cannot read header from undefined source");
+ eval { $csv->header (*STDIN, "foo"); };
+ like ($@, qr/^usage:/, "Illegal header call");
+ }
+
+{ my $csv = Text::CSV->new;
+ foreach my $arg ([], sub {}, Text::CSV->new, {}) {
+ eval { $csv->parse ($arg) };
+ my @diag = $csv->error_diag;
+ is ($diag[0], 1500, "Invalid parameters (code)");
+ like ($diag[1], qr{^PRM - Invalid/unsupported argument}, "Invalid parameters (msg)");
+ }
+ }
+
+SKIP: {
+ $] < 5.008 and skip qq{$] does not support ScalarIO}, 24;
+ foreach my $key ({}, sub {}, []) {
+ my $csv = Text::CSV->new;
+ my $x = eval { $csv->csv (in => \"a,b", key => $key) };
+ is ($x, undef, "Invalid key");
+ my @diag = $csv->error_diag;
+ is ($diag[0], 1501, "Invalid key type");
+ }
+
+ { my $csv = Text::CSV->new;
+ my $x = eval { $csv->csv (in => \"a,b", value => "b") };
+ is ($x, undef, "Value without key");
+ my @diag = $csv->error_diag;
+ is ($diag[0], 1502, "No key");
+ }
+
+ foreach my $val ({}, sub {}, []) {
+ my $csv = Text::CSV->new;
+ my $x = eval { $csv->csv (in => \"a,b", key => "a", value => $val) };
+ is ($x, undef, "Invalid value");
+ my @diag = $csv->error_diag;
+ is ($diag[0], 1503, "Invalid value type");
+ }
+
+ foreach my $ser ("die", 4) {
+ ok (my $csv = Text::CSV->new ({ skip_empty_rows => $ser }),
+ "New CSV for SER $ser");
+ is (eval { $csv->csv (in => \"\n") }, undef,
+ "Parse empty line for SER $ser");
+ like ($@, qr{^Empty row}, "Message");
+ my @diag = $csv->error_diag;
+ is ($diag[0], 2015, "Empty row");
+ like ($diag[1], qr{^ERW - Empty row}, "Error description");
+ }
+ }
+
+# Issue 19: auto_diag > 1 does not die if ->header () is used
+if ($] >= 5.008002) {
+ open my $fh, ">", $tfn or die "$tfn: $!\n";
+ print $fh qq{foo,bar,baz\n};
+ print $fh qq{a,xxx,1\n};
+ print $fh qq{b,"xx"xx", 2"\n};
+ print $fh qq{c, foo , 3\n};
+ close $fh;
+ foreach my $h (0, 1) {
+ $@ = "";
+ my @row;
+ my $ok = eval {
+ open $fh, "<", $tfn or die "$tfn: $!\n";
+ my $csv = Text::CSV->new ({ auto_diag => 2 });
+ $h and push @row => [ $csv->header ($fh) ];
+ while (my $row = $csv->getline ($fh)) { push @row => $row }
+ close $fh;
+ 1;
+ };
+ is_deeply (\@row, [[qw(foo bar baz)],[qw(a xxx 1)]], "2 valid rows");
+ like ($@, qr '^# CSV_(?:PP|XS) ERROR: 2023 -', "3rd row dies error 2023");
+ }
+ }
+else {
+ ok (1, "Test skipped in this version of perl") for 1..4;
+ }
+
+1;
diff --git a/src/test/resources/module/Text-CSV/t/81_subclass.t b/src/test/resources/module/Text-CSV/t/81_subclass.t
new file mode 100644
index 000000000..bd5fbac16
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/81_subclass.t
@@ -0,0 +1,33 @@
+#!/usr/bin/perl
+
+package Text::CSV::Subclass;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+}
+
+BEGIN { require Text::CSV; } # needed for perl5.005
+
+use strict;
+$^W = 1;
+
+use base "Text::CSV";
+
+use Test::More tests => 6;
+
+ok (1, "Subclassed");
+
+my $csvs = Text::CSV::Subclass->new ();
+is ($csvs->error_diag (), "", "Last failure for new () - OK");
+
+my $sc_csv;
+eval { $sc_csv = Text::CSV::Subclass->new ({ ecs_char => ":" }); };
+is ($sc_csv, undef, "Unsupported option");
+is ($@, "", "error");
+
+is (Text::CSV::Subclass->error_diag (),
+ "INI - Unknown attribute 'ecs_char'", "Last failure for new () - FAIL");
+
+is (Text::CSV::Subclass->new ({ fail_me => "now" }), undef, "bad new ()");
+
+1;
diff --git a/src/test/resources/module/Text-CSV/t/85_util.t b/src/test/resources/module/Text-CSV/t/85_util.t
new file mode 100644
index 000000000..641457990
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/85_util.t
@@ -0,0 +1,358 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+use Test::More;
+
+my $ebcdic = ord ("A") == 0xC1;
+my $pu;
+BEGIN {
+ $pu = $ENV{PERL_UNICODE};
+ $pu = defined $pu && ($pu eq "" || $pu =~ m/[oD]/ || ($pu =~ m/^[0-9]+$/ && $pu & 16));
+
+ if ($] < 5.008002) {
+ plan skip_all => "This test unit requires perl-5.8.2 or higher";
+ }
+ else {
+ my $n = 1448;
+ $pu and $n -= 120;
+ plan tests => $n;
+ }
+
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+
+ use_ok "Text::CSV", "csv";
+ # Encode up to and including 2.01 have an error in a regex:
+ # False [] range "\s-" in regex; marked by <-- HERE in m/\bkoi8[\s- <-- HERE _]*([ru])$/
+ # in Encode::Alias. This however does not influence this test, as then *all* encodings
+ # are skipped as unsupported
+ require Encode;
+ require "./t/util.pl";
+ }
+
+$| = 1;
+
+ok (my $csv = Text::CSV->new, "new for header tests");
+is ($csv->sep_char, ",", "Sep = ,");
+
+my $hdr_lc = [qw( bar foo )];
+
+foreach my $sep (",", ";") {
+ my $data = "bAr,foo\n1,2\n3,4,5\n";
+ $data =~ s/,/$sep/g;
+
+ $csv->column_names (undef);
+ { open my $fh, "<", \$data;
+ ok (my $slf = $csv->header ($fh), "header");
+ is ($slf, $csv, "Return self");
+ is ($csv->sep_char, $sep, "Sep = $sep");
+ is_deeply ([ $csv->column_names ], $hdr_lc, "headers");
+ is_deeply ($csv->getline ($fh), [ 1, 2 ], "Line 1");
+ is_deeply ($csv->getline ($fh), [ 3, 4, 5 ], "Line 2");
+ close $fh;
+ }
+
+ $csv->column_names (undef);
+ { open my $fh, "<", \$data;
+ ok (my @hdr = $csv->header ($fh), "header");
+ is_deeply (\@hdr, $hdr_lc, "Return headers");
+ close $fh;
+ }
+
+ $csv->column_names (undef);
+ { open my $fh, "<", \$data;
+ ok (my $slf = $csv->header ($fh), "header");
+ is ($slf, $csv, "Return self");
+ is ($csv->sep_char, $sep, "Sep = $sep");
+ is_deeply ([ $csv->column_names ], $hdr_lc, "headers");
+ is_deeply ($csv->getline_hr ($fh), { bar => 1, foo => 2 }, "Line 1");
+ is_deeply ($csv->getline_hr ($fh), { bar => 3, foo => 4 }, "Line 2");
+ close $fh;
+ }
+
+ { open my $fh, "<", \$data;
+ is_deeply (csv (in => $fh, bom => 1),
+ [{ bar => 1, foo => 2 }, { bar => 3, foo => 4 }],
+ "use header () from csv () with $sep");
+ }
+
+ { open my $fh, "<", \$data;
+ is_deeply (csv (in => $fh, seps => [ ",", ";" ]),
+ [{ bar => 1, foo => 2 }, { bar => 3, foo => 4 }],
+ "use header () from csv () with $sep");
+ }
+
+ { open my $fh, "<", \$data;
+ is_deeply (csv (in => $fh, bom => 1, key => "bar"),
+ { 1 => { bar => 1, foo => 2 }, 3 => { bar => 3, foo => 4 }},
+ "use header () from csv (key) with $sep");
+ }
+
+ { open my $fh, "<", \$data;
+ is_deeply (csv (in => $fh, munge => "uc", key => "BAR"),
+ { 1 => { BAR => 1, FOO => 2 }, 3 => { BAR => 3, FOO => 4 }},
+ "use header () from csv (key, uc) with $sep");
+ }
+
+ { open my $fh, "<", \$data;
+ is_deeply (csv (in => $fh, set_column_names => 0),
+ [[ "bar", "foo" ], [ 1, 2 ], [ 3, 4, 5 ]],
+ "use header () from csv () with $sep to ARRAY not setting column names");
+ }
+ { open my $fh, "<", \$data;
+ is_deeply (csv (in => $fh, set_column_names => 0, munge => "none"),
+ [[ "bAr", "foo" ], [ 1, 2 ], [ 3, 4, 5 ]],
+ "use header () from csv () with $sep to ARRAY not setting column names not lc");
+ }
+ }
+
+my $sep_utf = byte_utf8a_to_utf8n ("\xe2\x81\xa3"); # U+2063 INVISIBLE SEPARATOR
+my $sep_ok = [ "\t", "|", ",", ";", "##", $sep_utf ];
+unless ($pu) {
+ foreach my $sep (@$sep_ok) {
+ my $data = "bAr,foo\n1,2\n3,4,5\n";
+ $data =~ s/,/$sep/g;
+
+ $csv->column_names (undef);
+ { open my $fh, "<", \$data;
+ ok (my $slf = $csv->header ($fh, $sep_ok), "header with specific sep set");
+ is ($slf, $csv, "Return self");
+ is (Encode::encode ("utf-8", $csv->sep), $sep, "Sep = $sep");
+ is_deeply ([ $csv->column_names ], $hdr_lc, "headers");
+ is_deeply ($csv->getline ($fh), [ 1, 2 ], "Line 1");
+ is_deeply ($csv->getline ($fh), [ 3, 4, 5 ], "Line 2");
+ close $fh;
+ }
+
+ $csv->column_names (undef);
+ { open my $fh, "<", \$data;
+ ok (my @hdr = $csv->header ($fh, $sep_ok), "header with specific sep set");
+ is_deeply (\@hdr, $hdr_lc, "Return headers");
+ close $fh;
+ }
+
+ $csv->column_names (undef);
+ { open my $fh, "<", \$data;
+ ok (my $slf = $csv->header ($fh, { sep_set => $sep_ok }), "header with specific sep set as opt");
+ is ($slf, $csv, "Return self");
+ is (Encode::encode ("utf-8", $csv->sep), $sep, "Sep = $sep");
+ is_deeply ([ $csv->column_names ], $hdr_lc, "headers");
+ is_deeply ($csv->getline ($fh), [ 1, 2 ], "Line 1");
+ is_deeply ($csv->getline ($fh), [ 3, 4, 5 ], "Line 2");
+ close $fh;
+ }
+
+ $csv->column_names (undef);
+ { open my $fh, "<", \$data;
+ ok (my $slf = $csv->header ($fh, $sep_ok), "header with specific sep set");
+ is ($slf, $csv, "Return self");
+ is (Encode::encode ("utf-8", $csv->sep), $sep, "Sep = $sep");
+ is_deeply ([ $csv->column_names ], $hdr_lc, "headers");
+ is_deeply ($csv->getline_hr ($fh), { bar => 1, foo => 2 }, "Line 1");
+ is_deeply ($csv->getline_hr ($fh), { bar => 3, foo => 4 }, "Line 2");
+ close $fh;
+ }
+ }
+ }
+
+for ( [ 1010, 0, qq{} ], # Empty header
+ [ 1011, 0, qq{a,b;c,d} ], # Multiple allowed separators
+ [ 1012, 0, qq{a,,b} ], # Empty header field
+ [ 1013, 0, qq{a,a,b} ], # Non-unique headers
+ [ 2027, 1, qq{a,"b\nc",c} ], # Embedded newline binary on
+ [ 2021, 0, qq{a,"b\nc",c} ], # Embedded newline binary off
+ ) {
+ my ($err, $bin, $data) = @$_;
+ $csv->binary ($bin);
+ open my $fh, "<", \$data;
+ my $self = eval { $csv->header ($fh); };
+ is ($self, undef, "FAIL for '$data'");
+ ok ($@, "Error");
+ is (0 + $csv->error_diag, $err, "Error code $err");
+ close $fh;
+ }
+{ open my $fh, "<", \"bar,bAr,bAR,BAR\n1,2,3,4";
+ $csv->column_names (undef);
+ ok ($csv->header ($fh, { munge_column_names => "none", detect_bom => 0 }), "non-unique unfolded headers");
+ is_deeply ([ $csv->column_names ], [qw( bar bAr bAR BAR )], "Headers");
+ close $fh;
+ }
+{ open my $fh, "<", \"bar,bAr,bAR,BAR\n1,2,3,4";
+ $csv->column_names (undef);
+ ok (my @hdr = $csv->header ($fh, { munge_column_names => "none" }), "non-unique unfolded headers");
+ is_deeply (\@hdr, [qw( bar bAr bAR BAR )], "Headers from method");
+ is_deeply ([ $csv->column_names ], [qw( bar bAr bAR BAR )], "Headers from column_names");
+ close $fh;
+ }
+
+foreach my $sep (",", ";") {
+ my $data = "bAr,foo\n1,2\n3,4,5\n";
+ $data =~ s/,/$sep/g;
+
+ $csv->column_names (undef);
+ { open my $fh, "<", \$data;
+ ok (my $slf = $csv->header ($fh, { set_column_names => 0 }), "Header without column setting");
+ is ($slf, $csv, "Return self");
+ is ($csv->sep_char, $sep, "Sep = $sep");
+ is_deeply ([ $csv->column_names ], [], "headers");
+ is_deeply ($csv->getline ($fh), [ 1, 2 ], "Line 1");
+ is_deeply ($csv->getline ($fh), [ 3, 4, 5 ], "Line 2");
+ close $fh;
+ }
+ $csv->column_names (undef);
+ { open my $fh, "<", \$data;
+ ok (my @hdr = $csv->header ($fh, { set_column_names => 0 }), "Header without column setting");
+ is_deeply (\@hdr, $hdr_lc, "Headers from method");
+ is_deeply ([ $csv->column_names ], [], "Headers from column_names");
+ close $fh;
+ }
+ }
+
+foreach my $ss ("", "bad", sub { 1; }, \*STDOUT, +{}) {
+ my $dta = "a,b\n1,2\n";
+ open my $fh, "<", \$dta;
+ my @hdr = eval { $csv->header ($fh, { sep_set => $ss }) };
+ is (scalar @hdr, 0, "No header on invalid sep_set");
+ is (0 + $csv->error_diag, 1500, "Error code");
+ }
+
+foreach my $dta ("", "\xfe\xff", "\xf7\x64\x4c", "\xdd\x73\x66\x73",
+ "\x0e\xfe\xff", "\xfb\xee\x28", "\x84\x31\x95\x33") {
+ open my $fh, "<", \$dta;
+ my @hdr = eval { $csv->header ($fh) };
+ is (scalar @hdr, 0, "No header on empty stream");
+ is (0 + $csv->error_diag, 1010, "Error code");
+ }
+
+my $n;
+for ([ undef, "_bar" ], [ "lc", "_bar" ], [ "uc", "_BAR" ], [ "none", "_bAr" ],
+ [ sub { "column_".$n++ }, "column_0" ], [ "db", "bar" ]) {
+ my ($munge, $hdr) = @$_;
+
+ my $data = "_bAr,foo\n1,2\n3,4,5\n";
+ my $how = defined $munge ? ref $munge ? "CB" : $munge : "undef";
+
+ $n = 0;
+ $csv->column_names (undef);
+ open my $fh, "<", \$data;
+ ok (my $slf = $csv->header ($fh, { munge_column_names => $munge }), "munge header with $how");
+ is (($csv->column_names)[0], $hdr, "folded header to $hdr");
+ close $fh;
+
+ $n = 0;
+ $csv->column_names (undef);
+ open $fh, "<", \$data;
+ ok (my @hdr = $csv->header ($fh, { munge_column_names => $munge }), "munge header with $how");
+ is ($hdr[0], $hdr, "folded header to $hdr");
+ close $fh;
+ }
+
+my $fnm = "_85hdr.csv"; END { unlink $fnm; }
+
+my $a_ring = chr (utf8::unicode_to_native (0xe5));
+foreach my $irs ("\n", chr (utf8::unicode_to_native (0xaa))) {
+ local $/ = $irs;
+ foreach my $eol ("\n", "\r\n", "\r") {
+ my $str = join $eol =>
+ qq{zoo,b${a_ring}r},
+ qq{1,"1 \x{20ac} each"},
+ "";
+ for ( [ "none" => "" ],
+ [ "utf-8" => "\xef\xbb\xbf" ],
+ [ "utf-16be" => "\xfe\xff" ],
+ [ "utf-16le" => "\xff\xfe" ],
+ [ "utf-32be" => "\x00\x00\xfe\xff" ],
+ [ "utf-32le" => "\xff\xfe\x00\x00" ],
+ # Below 5 not (yet) supported by Encode
+ [ "utf-1" => "\xf7\x64\x4c" ],
+ [ "utf-ebcdic" => "\xdd\x73\x66\x73" ],
+ [ "scsu" => "\x0e\xfe\xff" ],
+ [ "bocu-1" => "\xfb\xee\x28" ],
+ [ "gb-18030" => "\x84\x31\x95" ],
+ #
+ [ "UTF-8" => "\x{feff}" ],
+ ) {
+ my ($enc, $bom) = @$_;
+ my ($enx, $box, $has_enc) = ($enc, $bom, 0);
+ $enc eq "UTF-8" || $enc eq "none" or
+ $box = eval { Encode::encode ($enc, chr (0xfeff)) };
+ $enc eq "none" and $enx = "utf-8";
+
+ # On os390, Encode only supports the following EBCDIC
+ # cp37, cp500, cp875, cp1026, cp1047, and posix-bc
+ # utf-ebcdic is not in the list
+ eval {
+ no warnings "utf8";
+ open my $fh, ">", $fnm;
+ binmode $fh;
+ if (defined $box) {
+ print $fh byte_utf8a_to_utf8n ($box);
+ print $fh Encode::encode ($enx, $str);
+ $has_enc = 1;
+ }
+ else {
+ print $fh Encode::encode ("utf-8", $str);
+ }
+
+ close $fh;
+ };
+ #$ebcdic and $has_enc = 0; # TODO
+
+ $csv = Text::CSV->new ({ binary => 1, auto_diag => 9 });
+
+ SKIP: {
+ $has_enc or skip "Encoding $enc not supported", $enc =~ m/^utf/ ? 10 : 9;
+ $csv->column_names (undef);
+ open my $fh, "<", $fnm;
+ binmode $fh;
+ ok (1, "$fnm opened for enc $enc");
+ ok ($csv->header ($fh), "headers with BOM for $enc");
+ $enc =~ m/^utf/ and is ($csv->{ENCODING}, uc $enc, "Encoding inquirable");
+
+ is (($csv->column_names)[1], "b${a_ring}r", "column name was decoded");
+ ok (my $row = $csv->getline_hr ($fh), "getline_hr");
+ is ($row->{"b${a_ring}r"}, "1 \x{20ac} each", "Returned in Unicode");
+ close $fh;
+
+ my $aoh;
+ ok ($aoh = csv (in => $fnm, bom => 1), "csv (bom => 1)");
+ is_deeply ($aoh,
+ [{ zoo => 1, "b${a_ring}r" => "1 \x{20ac} each" }], "Returned data bom = 1");
+
+ ok ($aoh = csv (in => $fnm, encoding => "auto"), "csv (encoding => auto)");
+ is_deeply ($aoh,
+ [{ zoo => 1, "b${a_ring}r" => "1 \x{20ac} each" }], "Returned data auto");
+ }
+
+ SKIP: {
+ $has_enc or skip "Encoding $enc not supported", 7;
+ $csv->column_names (undef);
+ open my $fh, "<", $fnm;
+ $enc eq "none" or binmode $fh, ":encoding($enc)";
+ ok (1, "$fnm opened for enc $enc");
+ ok ($csv->header ($fh), "headers with BOM for $enc");
+ is (($csv->column_names)[1], "b${a_ring}r", "column name was decoded");
+ ok (my $row = $csv->getline_hr ($fh), "getline_hr");
+ is ($row->{"b${a_ring}r"}, "1 \x{20ac} each", "Returned in Unicode");
+ close $fh;
+
+ ok (my $aoh = csv (in => $fnm, bom => 1), "csv (bom => 1)");
+ is_deeply ($aoh,
+ [{ zoo => 1, "b${a_ring}r" => "1 \x{20ac} each" }], "Returned data");
+ }
+
+ unlink $fnm;
+ }
+ }
+ }
+
+{ # Header after first line with sep=
+ open my $fh, ">", $fnm or die "$fnm: $!";
+ print $fh "sep=;\n";
+ print $fh "a;b 1;c\n";
+ print $fh "1;2;3\n";
+ close $fh;
+ ok (my $aoh = csv (in => $fnm, munge => "db"), "Read header with sep=;");
+ is_deeply ($aoh, [{ a => 1, "b_1" => 2, c => 3 }], "Munged to db with sep");
+ }
diff --git a/src/test/resources/module/Text-CSV/t/90_csv.t b/src/test/resources/module/Text-CSV/t/90_csv.t
new file mode 100644
index 000000000..0d22dafe8
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/90_csv.t
@@ -0,0 +1,412 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+use Config;
+
+#use Test::More "no_plan";
+ use Test::More tests => 127;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV", ("csv");
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+my $tfn = "_90test.csv"; END { -f $tfn and unlink $tfn }
+my $data =
+ "foo,bar,baz\n".
+ "1,2,3\n".
+ "2,a b,\n";
+open FH, ">", $tfn or die "$tfn: $!";
+print FH $data;
+close FH;
+
+my @hdr = qw( foo bar baz );
+my $aoa = [
+ \@hdr,
+ [ 1, 2, 3 ],
+ [ 2, "a b", "" ],
+ ];
+my $aoh = [
+ { foo => 1, bar => 2, baz => 3 },
+ { foo => 2, bar => "a b", baz => "" },
+ ];
+
+SKIP: for my $io ([ $tfn, "file" ], [ \*FH, "globref" ], [ *FH, "glob" ], [ \$data, "ScalarIO"] ) {
+ $] < 5.008 && ref $io->[0] eq "SCALAR" and skip "No ScalarIO support for $]", 1;
+ open FH, "<", $tfn or die "$tfn: $!\n";
+ is_deeply (csv ({ in => $io->[0] }), $aoa, "AOA $io->[1]");
+ close FH;
+ }
+
+SKIP: for my $io ([ $tfn, "file" ], [ \*FH, "globref" ], [ *FH, "glob" ], [ \$data, "ScalarIO"] ) {
+ $] < 5.008 && ref $io->[0] eq "SCALAR" and skip "No ScalarIO support for $]", 1;
+ open FH, "<", $tfn or die "$tfn: $!\n";
+ is_deeply (csv (in => $io->[0], headers => "auto"), $aoh, "AOH $io->[1]");
+ close FH;
+ }
+
+is_deeply (csv (in => $tfn, headers => { bar => "tender" }), [
+ { foo => 1, tender => 2, baz => 3 },
+ { foo => 2, tender => "a b", baz => "" },
+ ], "AOH with header map");
+
+my @aoa = @{$aoa}[1,2];
+is_deeply (csv (file => $tfn, headers => "skip"), \@aoa, "AOA skip");
+is_deeply (csv (file => $tfn, fragment => "row=2-3"), \@aoa, "AOA fragment");
+
+if ($] >= 5.008001) {
+ my @hdr;
+ ok (my $ref = csv (in => $tfn, bom => 1), "csv (-- not keeping header)");
+ is_deeply (\@hdr, [], "Should still be empty");
+ foreach my $alias (qw( keep_headers keep_column_names kh )) {
+ @hdr = ();
+ ok (my $ref = csv (in => $tfn, bom => 1, $alias => \@hdr), "csv ($alias => ...)");
+ is_deeply (\@hdr, [qw( foo bar baz )], "Headers kept for $alias");
+ }
+ foreach my $alias (qw( keep_headers keep_column_names kh )) {
+ @hdr = ();
+ ok (my $ref = csv (in => $tfn, $alias => \@hdr), "csv ($alias => ... -- implied headers)");
+ is_deeply (\@hdr, [qw( foo bar baz )], "Headers kept for $alias");
+ }
+ foreach my $alias (qw( internal true yes 1 )) {
+ my $buf = "";
+ ok (my $ref = csv (in => $tfn, kh => $alias), "csv (kh => $alias)");
+ ok (csv (in => $ref, out => \$buf, kh => $alias, quote_space => 0, eol => "\n"), "get it back");
+ is ($buf, $data, "Headers kept for $alias");
+ }
+ }
+else {
+ ok (1, q{This perl cannot do scalar IO}) for 1..26;
+ }
+
+if ($] >= 5.008001) {
+ is_deeply (csv (in => $tfn, encoding => "utf-8", headers => ["a", "b", "c"],
+ fragment => "row=2", sep_char => ","),
+ [{ a => 1, b => 2, c => 3 }], "AOH headers fragment");
+ is_deeply (csv (in => $tfn, enc => "utf-8", headers => ["a", "b", "c"],
+ fragment => "row=2", sep_char => ","),
+ [{ a => 1, b => 2, c => 3 }], "AOH headers fragment");
+ }
+else {
+ ok (1, q{This perl does not support open with "<:encoding(...)"});
+ ok (1, q{This perl does not support open with "<:encoding(...)"});
+ }
+
+ok (csv (in => $aoa, out => $tfn), "AOA out file");
+is_deeply (csv (in => $tfn), $aoa, "AOA parse out");
+
+ok (csv (in => $aoh, out => $tfn, headers => "auto"), "AOH out file");
+is_deeply (csv (in => $tfn, headers => "auto"), $aoh, "AOH parse out");
+
+if ($Config{usecperl} && $Config{usecperl} eq "define") {
+ ok (1, "cperl has a different view on stable sorting of hash keys");
+ ok (1, "not doing this (silly) test");
+ }
+else {
+ ok (csv (in => $aoh, out => $tfn, headers => "skip"), "AOH out file no header");
+ is_deeply (csv (in => $tfn, headers => [keys %{$aoh->[0]}]),
+ $aoh, "AOH parse out no header");
+ }
+
+my $idx = 0;
+sub getrowa { return $aoa->[$idx++]; }
+sub getrowh { return $aoh->[$idx++]; }
+
+ok (csv (in => \&getrowa, file => $tfn), "out via file from CODE/AR");
+is_deeply (csv (in => $tfn), $aoa, "data from CODE/AR");
+
+$idx = 0;
+ok (csv (in => \&getrowh, out => $tfn, headers => \@hdr), "out from CODE/HR");
+is_deeply (csv (in => $tfn, headers => "auto"), $aoh, "data from CODE/HR");
+
+$idx = 0;
+ok (csv (in => \&getrowh, out => $tfn), "out from CODE/HR (auto headers)");
+is_deeply (csv (in => $tfn, headers => "auto"), $aoh, "data from CODE/HR");
+unlink $tfn;
+
+# Basic "key" checks
+SKIP: {
+ $] < 5.008 and skip "No ScalarIO support for $]", 4;
+ # Simple key
+ is_deeply (csv (in => \"key,value\n1,2\n", key => "key"),
+ { 1 => { key => 1, value => 2 }}, "key");
+ is_deeply (csv (in => \"1,2\n", key => "key", headers => [qw( key value )]),
+ { 1 => { key => 1, value => 2 }}, "key");
+ # Combined key
+ is_deeply (csv (in => \"a,b,value\n1,1,2\n", key => [ ":" => "a", "b" ]),
+ { "1:1" => { a => 1, b => 1, value => 2 }}, "key list");
+ is_deeply (csv (in => \"2,3,2\n", key => [ ":" => "a", "b" ], headers => [qw( a b value )]),
+ { "2:3" => { a => 2, b => 3, value => 2 }}, "key list");
+ }
+# Basic "value" checks
+SKIP: {
+ $] < 5.008001 and skip "No ScalarIO support for 'value's in $]", 5;
+ # Simple key simple value
+ is_deeply (csv (in => \"key,value\n1,2\n", key => "key", value => "value"),
+ { 1 => 2 }, "key:value");
+ is_deeply (csv (in => \"1,2\n", key => "key", headers => [qw( key value )], value => "value"),
+ { 1 => 2 }, "key:value");
+ # Simple key combined value
+ is_deeply (csv (in => \"key,v1,v2\n1,2,3\n", key => "key", value => [ "v1", "v2" ]),
+ { 1 => { v1 => 2, v2 => 3 }}, "key:value");
+ # Combined key simple value
+ is_deeply (csv (in => \"a,b,value\n1,1,2\n", key => [ ":" => "a", "b" ], value => "value"),
+ { "1:1" => 2 }, "[key]:value");
+ # Combined key combined value
+ is_deeply (csv (in => \"a,b,v1,v2\n1,1,2,2\n", key => [ ":" => "a", "b" ], value => [ "v1", "v2" ]),
+ { "1:1" => { v1 => 2, v2 => 2 }}, "[key]:[value]");
+ }
+
+# Some "out" checks
+my $crnl;
+open my $fh, ">", $tfn or die "$tfn: $!\n";
+csv (in => [{ a => 1 }], out => $fh);
+csv (in => [{ a => 1 }], out => $fh, headers => undef);
+csv (in => [{ a => 1 }], out => $fh, headers => "auto");
+csv (in => [{ a => 1 }], out => $fh, headers => ["a"]);
+csv (in => [{ b => 1 }], out => $fh, headers => { b => "a" });
+close $fh;
+{ open $fh, "<", $tfn or die "$tfn: $!\n";
+ my $dta = do {local $/; <$fh>};
+ my @layers = eval { PerlIO::get_layers ($fh); };
+ close $fh;
+ if (grep m/crlf/ => @layers) {
+ $dta =~ s/\n/\r\n/g;
+ $crnl++;
+ }
+ is ($dta, "a\r\n1\r\n" x 5, "AoH to out");
+ }
+
+# check internal defaults
+{
+ my $ad = 1;
+
+ sub check
+ {
+ my ($csv, $ar) = @_;
+ is ($csv->auto_diag, $ad, "default auto_diag ($ad)");
+ is ($csv->binary, 1, "default binary");
+ is ($csv->eol, "\r\n", "default eol");
+ } # check
+
+ # Note that 5.6.x writes to a *file* named SCALAR(0x50414A10)
+ open my $fh, ">", \my $out or die "IO: $!\n";
+ csv (in => [[1,2]], out => $fh, on_in => \&check);
+
+ # Check that I can overrule auto_diag
+ $ad = 0;
+ csv (in => [[1,2]], out => $fh, on_in => \&check, auto_diag => 0,
+ ($] >= 5.008004 ? (encoding => "utf-8") : ()));
+ }
+$] < 5.008 and unlink glob "SCALAR(*)";
+
+# errors
+{ my $err = "";
+ local $SIG{__DIE__} = sub { $err = shift; };
+ my $r = eval { csv (in => undef); };
+ is ($r, undef, "csv needs in or file");
+ like ($err, qr{^usage:}, "error");
+ $err = "";
+
+ $r = eval { csv (in => $tfn, key => [ ":" ], auto_diag => 0); };
+ $err =~ s{\s+at\s+\S+\s+line\s+\d+\.\r?\n?\Z}{};
+ is ($r, undef, "Fail call with key with not enough fields");
+ like ($err, qr{PRM.*unsupported type}, $err);
+ $err = "";
+
+ $r = eval { csv (in => $tfn, key => { "fx" => 1 }, auto_diag => 0); };
+ $err =~ s{\s+at\s+\S+\s+line\s+\d+\.\r?\n?\Z}{};
+ is ($r, undef, "Fail call with unsupported key type");
+ like ($err, qr{PRM.*unsupported type}, $err);
+ $err = "";
+
+ $r = eval { csv (in => $tfn, key => sub { "foo" }, auto_diag => 0); };
+ $err =~ s{\s+at\s+\S+\s+line\s+\d+\.\r?\n?\Z}{};
+ is ($r, undef, "Fail call with bad unsupported type");
+ like ($err, qr{PRM.*unsupported type}, $err);
+ $err = "";
+
+ $r = eval { csv (in => $tfn, key => "xyz", auto_diag => 0); };
+ $err =~ s{\s+at\s+\S+\s+line\s+\d+\.\r?\n?\Z}{};
+ is ($r, undef, "Fail call with nonexisting key");
+ like ($err, qr{PRM.*xyz}, $err);
+ $err = "";
+
+ $r = eval { csv (in => $tfn, key => [ "x" ], auto_diag => 0); };
+ $err =~ s{\s+at\s+\S+\s+line\s+\d+\.\r?\n?\Z}{};
+ is ($r, undef, "Fail call with no key in keylist");
+ like ($err, qr{PRM.*unsupported type}, $err);
+ $err = "";
+
+ $r = eval { csv (in => $tfn, key => [ ":", "a", "xyz" ], auto_diag => 0); };
+ $err =~ s{\s+at\s+\S+\s+line\s+\d+\.\r?\n?\Z}{};
+ is ($r, undef, "Fail call with nonexisting key in keylist");
+ like ($err, qr{PRM.*xyz}, $err);
+ $err = "";
+
+ local $SIG{__DIE__} = sub { $err = shift; };
+ local $SIG{__WARN__} = sub { $err = shift; };
+ foreach my $hr (42, "foo", \my %hr, sub { 42; }, *STDOUT) {
+ $r = eval { csv (in => $tfn, kh => $hr, auto_diag => 0); };
+ $err =~ s{\s+at\s+\S+\s+line\s+\d+\.\r?\n?\Z}{};
+ is ($r, undef, "Fail call with bad keep_header type");
+ like ($err, qr{PRM.*unsupported type}, $err);
+ $err = "";
+ }
+
+# $r = eval { csv (in => +{}, auto_diag => 0); };
+# is ($r, undef, "Cannot read from hashref");
+# like ($err, qr{No such file}i, "No such file or directory");
+# undef $err;
+
+ $r = eval { csv (in => undef, auto_diag => 0); };
+ $err =~ s{\s+at\s+\S+\s+line\s+\d+\.\r?\n?\Z}{};
+ is ($r, undef, "Cannot read from undef");
+ like ($err, qr{^usage}, "Remind them of correct syntax");
+ $err = "";
+
+ $r = eval { csv (in => "", auto_diag => 0); };
+ $err =~ s{\s+at\s+\S+\s+line\s+\d+\.\r?\n?\Z}{};
+ is ($r, undef, "Cannot read from empty");
+ like ($err, qr{^usage}, "Remind them of correct syntax");
+ $err = "";
+
+ my $fn = "./dev/foo/bar/\x99\x99/\x88\x88/".
+ (join "\x99" => map { chr (128 + int rand 128) } 0..100).".csv";
+ $r = eval { csv (in => $fn, auto_diag => 0); };
+ $err =~ s{\s+at\s+\S+\s+line\s+\d+\.\r?\n?\Z}{};
+ is ($r, undef, "Cannot read from impossible file");
+ like ($err, qr{/foo/bar}, "No such file or directory");
+ $err = "";
+
+ $r = eval { csv (in => [[1,2]], out => $fn, auto_diag => 0); };
+ $err =~ s{\s+at\s+\S+\s+line\s+\d+\.\r?\n?\Z}{};
+ is ($r, undef, "Cannot write to impossible file");
+ like ($err, qr{/foo/bar}, "No such file or directory");
+ $err = "";
+
+ $r = eval { csv (); };
+ is ($r, undef, "Needs arguments");
+ like ($err, qr{^usage}i, "Don't know what to do");
+ $err = "";
+
+ my $x = sub { 42; };
+ $r = eval { csv (in => $tfn, out => \$x, auto_diag => 0); };
+ $err =~ s{\s+at\s+\S+\s+line\s+\d+\.\r?\n?\Z}{};
+ is ($r, undef, "Cannot write to subref");
+ like ($err, qr{Not a GLOB}i, "Not a GLOB");
+ $err = "";
+
+ SKIP: {
+ $] < 5.008 and skip "$] does not support bom here", 2;
+ $x = [[ 1, 2 ]]; # Add hashes to arrays
+ $r = eval { csv (in => $tfn, out => $x, bom => 1); };
+ $err =~ s{\s+at\s+\S+\s+line\s+\d+\.\r?\n?\Z}{};
+ is ($r, undef, "Cannot add hashes to arrays");
+ like ($err, qr{type mismatch}, "HASH != ARRAY");
+ $err = "";
+ }
+
+ $x = [{ a => 1, b => 2 }]; # Add arrays to hashes
+ $r = eval { csv (in => $tfn, out => $x); };
+ $err =~ s{\s+at\s+\S+\s+line\s+\d+\.\r?\n?\Z}{};
+ is ($r, undef, "Cannot add arrays to hashes");
+ like ($err, qr{type mismatch}i, "ARRAY != HASH");
+ $err = "";
+ }
+
+eval {
+ exists $Config{useperlio} &&
+ defined $Config{useperlio} &&
+ $] >= 5.008 &&
+ $Config{useperlio} eq "define" or skip "No scalar ref in this perl", 5;
+ my $out = "";
+ open my $fh, ">", \$out or die "IO: $!\n";
+ ok (csv (in => [[ 1, 2, 3 ]], out => $fh), "out to fh to scalar ref");
+ is ($out, "1,2,3\r\n", "Scalar out");
+ $out = "";
+ ok (csv (in => [[ 1, 2, 3 ]], out => \$out), "out to scalar ref");
+ is ($out, "1,2,3\r\n", "Scalar out");
+
+ is_deeply (csv (in => \qq{1,"2 3"}, quo => undef, esc => undef),
+ [["1", q{"2 3"}]], "quo => undef");
+ };
+
+{ my $csv = Text::CSV->new ({ binary => 1, auto_diag => 1 });
+ my $expect = [["a"],[1],["a"],[1],["a"],[1],["a"],[1],["a"],[1]];
+ is_deeply ($csv->csv (in => $tfn), $expect, "csv from object");
+ is_deeply (csv (in => $tfn, csv => $csv), $expect, "csv from attribute");
+ }
+
+{ local *STDOUT;
+ my $ofn = "_STDOUT.csv";
+
+ open STDOUT, ">", $ofn or die "$ofn: $!\n";
+ { my @w;
+ local $SIG{__WARN__} = sub { push @w => @_ };
+ csv (in => $tfn, quote_always => 1, fragment => "row=1-2",
+ on_in => sub { splice @{$_[1]}, 1; }, eol => "\n");
+ if ($crnl) {
+ is (scalar @w, 0, "CRNL layer found");
+ }
+ else {
+ like ($w[0], qr/2016 - EOL/, "EOL mismatch");
+ }
+ }
+ close STDOUT;
+ my $dta = do { local (@ARGV, $/) = $ofn; <> };
+ is ($dta, qq{"a"\n"1"\n}, "Chained csv call inherited attributes");
+ unlink $ofn;
+
+ open STDOUT, ">", $ofn;
+ csv (in => [[1,2]], out => *STDOUT, eol => "\n");
+ close STDOUT;
+ $dta = do { local (@ARGV, $/) = $ofn; <> };
+ is ($dta, qq{1,2\n}, "out to *STDOUT");
+ unlink $ofn;
+
+ open STDOUT, ">", $ofn;
+ csv (in => [[1,2]], out => \*STDOUT, eol => "\n");
+ close STDOUT;
+ $dta = do { local (@ARGV, $/) = $ofn; <> };
+ is ($dta, qq{1,2\n}, "out to \\*STDOUT");
+ unlink $ofn;
+
+ open STDOUT, ">", $ofn;
+ csv (in => []);
+ close STDOUT;
+ is (-s $ofn, 0, "No data results in an empty file");
+ unlink $ofn;
+
+ SKIP: {
+ $] <= 5.008 and skip qq{$] does not support ScalarIO}, 6;
+ my $aoa = [[ 1, 2 ]];
+ is (csv (in => \"3,4", out => $aoa), $aoa, "return AOA");
+ is_deeply ($aoa, [[ 1, 2 ], [ 3, 4 ]], "Add to AOA");
+
+ my $aoh = [{ a => 1, b => 2 }];
+ is (csv (in => \"a,b\n3,4", out => $aoh, bom => 1), $aoh, "return AOH");
+ is_deeply ($aoa, [[ 1, 2 ], [ 3, 4 ]], "Add to AOH");
+
+ my $ref = { 1 => { a => 1, b => 2 }};
+ is (csv (in => \"a,b\n3,4", out => $ref, key => "a"), $ref, "return REF");
+ is_deeply ($ref, { 1 => { a => 1, b => 2},
+ 3 => { a => 3, b => 4},
+ }, "Add to keyed hash");
+ }
+
+ SKIP: {
+ $] <= 5.008003 and skip qq{$] does not support ">:crlf"}, 1;
+ open STDOUT, ">", $ofn; binmode STDOUT, ":crlf";
+ csv (in => [[1,2]], out => \*STDOUT);
+ close STDOUT;
+ open my $oh, "<", $ofn or die "$ofn: $!\n";
+ binmode $oh;
+ $dta = do { local $/; <$oh> };
+ is ($dta, qq{1,2\r\n}, "out to \\*STDOUT");
+ unlink $ofn;
+ }
+ }
diff --git a/src/test/resources/module/Text-CSV/t/91_csv_cb.t b/src/test/resources/module/Text-CSV/t/91_csv_cb.t
new file mode 100644
index 000000000..7a79ec100
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/91_csv_cb.t
@@ -0,0 +1,308 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+#use Test::More "no_plan";
+ use Test::More tests => 82;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV", ("csv");
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+my $tfn = "_91test.csv"; END { -f $tfn and unlink $tfn }
+my $data =
+ "foo,bar,baz\n".
+ "1,2,3\n".
+ "2,a b,\n";
+open my $fh, ">", $tfn or die "$tfn: $!";
+print $fh $data;
+close $fh;
+
+my $aoa = [
+ [qw( foo bar baz )],
+ [ 1, 2, 3 ],
+ [ 2, "a b", "" ],
+ ];
+my $aoh = [
+ { foo => 1, bar => 2, baz => 3 },
+ { foo => 2, bar => "a b", baz => "" },
+ ];
+
+for (qw( after_in on_in before_out )) {
+ is_deeply (csv (in => $tfn, $_ => sub {}), $aoa, "callback $_ on AOA with empty sub");
+ is_deeply (csv (in => $tfn, callbacks => { $_ => sub {} }), $aoa, "callback $_ on AOA with empty sub");
+ }
+is_deeply (csv (in => $tfn, after_in => sub {},
+ callbacks => { on_in => sub {} }), $aoa, "callback after_in and on_in on AOA");
+
+for (qw( after_in on_in before_out )) {
+ is_deeply (csv (in => $tfn, headers => "auto", $_ => sub {}), $aoh, "callback $_ on AOH with empty sub");
+ is_deeply (csv (in => $tfn, headers => "auto", callbacks => { $_ => sub {} }), $aoh, "callback $_ on AOH with empty sub");
+ }
+is_deeply (csv (in => $tfn, headers => "auto", after_in => sub {},
+ callbacks => { on_in => sub {} }), $aoh, "callback after_in and on_in on AOH");
+
+is_deeply (csv (in => $tfn, after_in => sub { push @{$_[1]}, "A" }), [
+ [qw( foo bar baz A )],
+ [ 1, 2, 3, "A" ],
+ [ 2, "a b", "", "A" ],
+ ], "AOA ith after_in callback");
+
+is_deeply (csv (in => $tfn, headers => "auto", after_in => sub { $_[1]{baz} = "A" }), [
+ { foo => 1, bar => 2, baz => "A" },
+ { foo => 2, bar => "a b", baz => "A" },
+ ], "AOH with after_in callback");
+
+is_deeply (csv (in => $tfn, filter => { 2 => sub { /a/ }}), [
+ [qw( foo bar baz )],
+ [ 2, "a b", "" ],
+ ], "AOA with filter on col 2");
+is_deeply (csv (in => $tfn, filter => { 2 => sub { /a/ },
+ 1 => sub { length > 1 }}), [
+ [qw( foo bar baz )],
+ ], "AOA with filter on col 1 and 2");
+is_deeply (csv (in => $tfn, filter => { foo => sub { $_ > 1 }}), [
+ { foo => 2, bar => "a b", baz => "" },
+ ], "AOH with filter on column name");
+
+SKIP: {
+ $] < 5.008001 and skip "No HOH/xx support in $]", 3;
+ is_deeply (csv (in => $tfn, headers => "lc"),
+ [ { foo => 1, bar => 2, baz => 3 },
+ { foo => 2, bar => "a b", baz => "" }],
+ "AOH with lc headers");
+ is_deeply (csv (in => $tfn, headers => "uc"),
+ [ { FOO => 1, BAR => 2, BAZ => 3 },
+ { FOO => 2, BAR => "a b", BAZ => "" }],
+ "AOH with lc headers");
+ is_deeply (csv (in => $tfn, headers => sub { lcfirst uc $_[0] }),
+ [ { fOO => 1, bAR => 2, bAZ => 3 },
+ { fOO => 2, bAR => "a b", bAZ => "" }],
+ "AOH with mangled headers");
+ }
+
+SKIP: {
+ $] < 5.008001 and skip "No BOM support in $]", 1;
+ is_deeply (csv (in => $tfn, munge => { bar => "boo" }),
+ [{ baz => 3, boo => 2, foo => 1 },
+ { baz => "", boo => "a b", foo => 2 }], "Munge with hash");
+ }
+
+open $fh, ">>", $tfn or die "$tfn: $!";
+print $fh <<"EOD";
+3,3,3
+4,5,6
+5,7,9
+6,9,12
+7,11,15
+8,13,18
+EOD
+close $fh;
+
+is_deeply (csv (in => $tfn,
+ filter => { foo => sub { $_ > 2 && $_[1][2] - $_[1][1] < 4 }}), [
+ { foo => 3, bar => 3, baz => 3 },
+ { foo => 4, bar => 5, baz => 6 },
+ { foo => 5, bar => 7, baz => 9 },
+ { foo => 6, bar => 9, baz => 12 },
+ ], "AOH with filter on column name + on other numbered fields");
+
+is_deeply (csv (in => $tfn,
+ filter => { foo => sub { $_ > 2 && $_{baz} - $_{bar} < 4 }}), [
+ { foo => 3, bar => 3, baz => 3 },
+ { foo => 4, bar => 5, baz => 6 },
+ { foo => 5, bar => 7, baz => 9 },
+ { foo => 6, bar => 9, baz => 12 },
+ ], "AOH with filter on column name + on other named fields");
+
+# Check content ref in on_in AOA
+{ my $aoa = csv (
+ in => $tfn,
+ filter => { 1 => sub { m/^[3-9]/ }},
+ on_in => sub {
+ is ($_[1][1], 2 * $_[1][0] - 3, "AOA $_[1][0]: b = 2a - 3 \$_[1][]");
+ });
+ }
+# Check content ref in on_in AOH
+{ my $aoa = csv (
+ in => $tfn,
+ headers => "auto",
+ filter => { foo => sub { m/^[3-9]/ }},
+ after_parse => sub {
+ is ($_[1]{bar}, 2 * $_[1]{foo} - 3, "AOH $_[1]{foo}: b = 2a - 3 \$_[1]{}");
+ });
+ }
+# Check content ref in on_in AOH with aliases %_
+SKIP: {
+ $] < 5.008001 and skip "No AOH/alias support in $]", 7; # 6 in on_in, 1 is_deeply
+ %_ = ( brt => 42 );
+ my $aoa = csv (
+ in => $tfn,
+ headers => "auto",
+ filter => { foo => sub { m/^[3-9]/ }},
+ on_in => sub {
+ is ($_{bar}, 2 * $_{foo} - 3, "AOH $_{foo}: b = 2a - 3 \$_{}");
+ });
+ is_deeply (\%_, { brt => 42 }, "%_ restored");
+ }
+
+SKIP: {
+ $] < 5.008001 and skip "Too complicated test for $]", 2;
+ # Add to %_ in callback
+ # And test bizarre (but allowed) attribute combinations
+ # Most of them can be either left out or done more efficiently in
+ # a different way
+ my $xcsv = Text::CSV->new;
+ is_deeply (csv (in => $tfn,
+ seps => [ ",", ";" ],
+ munge => "uc",
+ quo => '"',
+ esc => '"',
+ csv => $xcsv,
+ filter => { 1 => sub { $_ eq "4" }},
+ on_in => sub { $_{BRT} = 42; }),
+ [{ FOO => 4, BAR => 5, BAZ => 6, BRT => 42 }],
+ "AOH with addition to %_ in on_in");
+ is_deeply ($xcsv->csv (
+ file => $tfn,
+ sep_set => [ ";", "," ],
+ munge_column_names => "uc",
+ quote_char => '"',
+ quote => '"',
+ escape_char => '"',
+ escape => '"',
+ filter => { 1 => sub { $_ eq "4" }},
+ after_in => sub { $_{BRT} = 42; }),
+ [{ FOO => 4, BAR => 5, BAZ => 6, BRT => 42 }],
+ "AOH with addition to %_ in on_in");
+ }
+
+
+SKIP: {
+ $] < 5.008001 and skip "Too complicated test for $]", 2;
+ ok (my $hr = csv (in => $tfn, key => "foo", on_in => sub {
+ $_[1]{quz} = "B"; $_{ziq} = 2; }),
+ "Get into hashref with key and on_in");
+ is_deeply ($hr->{8}, {qw( bar 13 baz 18 foo 8 quz B ziq 2 )},
+ "on_in with key works");
+ }
+
+open $fh, ">", $tfn or die "$tfn: $!";
+print $fh <<"EOD";
+3,3,3
+
+5,7,9
+,
+"",
+,, ,
+,"",
+,," ",
+""
+8,13,18
+EOD
+close $fh;
+
+SKIP: {
+ $] < 5.008001 and skip "Too complicated test for $]", 4;
+ is_deeply (csv (in => $tfn, filter => "not_blank"),
+ [[3,3,3],[5,7,9],["",""],["",""],["",""," ",""],
+ ["","",""],["",""," ",""],[8,13,18]],
+ "filter => not_blank");
+ is_deeply (csv (in => $tfn, filter => "not_empty"),
+ [[3,3,3],[5,7,9],["",""," ",""],["",""," ",""],[8,13,18]],
+ "filter => not_empty");
+ is_deeply (csv (in => $tfn, filter => "filled"),
+ [[3,3,3],[5,7,9],[8,13,18]],
+ "filter => filled");
+
+ is_deeply (csv (in => $tfn, filter => sub {
+ grep { defined && m/\S/ } @{$_[1]} }),
+ [[3,3,3],[5,7,9],[8,13,18]],
+ "filter => filled");
+ }
+
+{ my @err;
+ my $aoa = csv (in => $tfn, strict => 1, on_error => sub { @err = @_ });
+ is_deeply ($aoa, [[3,3,3]], "Bad CSV still returns ref");
+ is ($err[0], 2014, "ENF - Inconsistent number of fields");
+ is (0 + Text::CSV->error_diag, 2014, "Error is kept");
+ }
+
+# Count rows in different ways
+open $fh, ">", $tfn or die "$tfn: $!";
+print $fh <<"EOD";
+foo,bar,baz
+1,,3
+0,"d
+€",4
+999,999,
+EOD
+close $fh;
+
+{ my $n = 0;
+ open my $fh, "<", $tfn;
+ my $csv = Text::CSV->new ({ binary => 1 });
+ while (my $row = $csv->getline ($fh)) { $n++; }
+ close $fh;
+ is ($n, 4, "Count rows with getline");
+ }
+{ my $n = 0;
+ my $aoa = csv (in => $tfn, on_in => sub { $n++ });
+ is ($n, 4, "Count rows with on_in");
+ }
+{ my $n = 0;
+ my $aoa = csv (in => $tfn, filter => { 0 => sub { $n++; 0; }});
+ is ($n, 4, "Count rows with filter hash");
+ }
+SKIP: {
+ $] < 5.008001 and skip "Too complicated test for $]", 1;
+ my $n = 0;
+ my $aoa = csv (in => $tfn, filter => sub { $n++; 0; });
+ is ($n, 4, "Count rows with filter sub");
+ }
+SKIP: {
+ $] < 5.008001 and skip "Too complicated test for $]", 1;
+ my $n = 0;
+ csv (in => $tfn, on_in => sub { $n++; 0; }, out => \"skip");
+ is ($n, 4, "Count rows with on_in and skipped out");
+ }
+
+# sep_set, seps, sep on problematic header
+foreach my $sep (",", ";", "\t") {
+ my $ph = "Problematic header";
+
+ open $fh, ">", $tfn or die "$tfn: $!";
+ print $fh qq{foo${sep}"bar: a, b"${sep}"c;d"${sep}"e"\n};
+ print $fh qq{1${sep}2${sep}3${sep}4\n};
+ close $fh;
+
+ my $exp = [{
+ "foo" => 1,
+ "bar: a, b" => 2,
+ "c;d" => 3,
+ "e" => 4,
+ }];
+
+ ok (csv (in => $tfn, allow_loose_quotes => 1), "$ph, AoA");
+
+ if ($] < 5.010000) {
+ ok (1, "Unsupported header feature for $] - sep: $sep") for 1..6;
+ next;
+ }
+
+ my @err;
+ is (eval {
+ local $SIG{__WARN__} = sub { push @err => @_ };
+ csv (in => $tfn, bom => 1);
+ }, undef, "$ph: cannot decide on sep");
+ like ($err[0], qr{ERROR: 1011\b}, "$ph: error 1011");
+
+ is_deeply (csv (in => $tfn, bom => 1, sep_set => [ $sep ]), $exp, "$ph: sep_set");
+ is_deeply (csv (in => $tfn, bom => 1, seps => [ $sep ]), $exp, "$ph: seps");
+ is_deeply (csv (in => $tfn, bom => 1, sep_char => $sep ), $exp, "$ph: sep_char");
+ is_deeply (csv (in => $tfn, bom => 1, sep => $sep ), $exp, "$ph: sep");
+ }
diff --git a/src/test/resources/module/Text-CSV/t/92_stream.t b/src/test/resources/module/Text-CSV/t/92_stream.t
new file mode 100644
index 000000000..58fc51067
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/92_stream.t
@@ -0,0 +1,126 @@
+#!/usr/bin/perl
+
+use strict;
+$^W = 1;
+
+#use Test::More "no_plan";
+ use Test::More tests => 21;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ use_ok "Text::CSV", ("csv");
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+my $tfni = "_92test-i.csv"; END { -f $tfni and unlink $tfni } # CRNL
+my $tfnn = "_92test-n.csv"; END { -f $tfnn and unlink $tfnn } # CRNL + NL
+my $tfno = "_92test-o.csv"; END { -f $tfno and unlink $tfno } # out
+
+my $data =
+ "foo,bar,baz,quux\r\n".
+ "1,2,3,25\r\n".
+ "2,a b,,14\r\n";
+open my $fhi, ">", $tfni or die "$tfni: $!";
+print $fhi $data;
+close $fhi;
+open my $fhn, ">", $tfnn or die "$tfnn: $!";
+{ my $d = $data;
+ $d =~ s/5\r\n/5\n/;
+ print $fhn $d;
+ }
+close $fhn;
+ok (my $aoa = csv (in => $tfni), "Read default data");;
+
+{ my ($I, $O, @W);
+ ok (my $co = Text::CSV->new ({
+ eol => "\n",
+ auto_diag => 1,
+ callbacks => {
+ before_print => sub {
+ warn ++$O, "\n";
+ $_[1][3] =~ s/x$/y/ or $_[1][3] *= 4;
+ },
+ },
+ }), "Create external CSV object");
+ open my $fho, ">", $tfno or die "$tfno: $!\n";
+ { local $SIG{__WARN__} = sub { push @W => @_ };
+ csv (
+ in => $tfni,
+ out => undef,
+ callbacks => {
+ after_parse => sub {
+ warn ++$I, "\n";
+ $co->print ($fho, $_[1]);
+ },
+ },
+ );
+ }
+ close $tfno;
+ chomp @W;
+ is ("@W", "1 1 2 2 3 3", "Old-fashioned streaming");
+ }
+
+# Basic straight-forward streaming, no filters/modifiers
+unlink $tfno if -e $tfno;
+csv (in => $tfni, out => $tfno, quote_space => 0);
+ok (-s $tfno, "FILE -> FILE");
+is_deeply (csv (in => $tfno), $aoa, "Data is equal");
+
+unlink $tfno if -e $tfno;
+open my $fho, ">", $tfno;
+csv (in => $tfni, out => $fho, quote_space => 0);
+close $fho;
+ok (-s $tfno, "FILE -> FH");
+is_deeply (csv (in => $tfno), $aoa, "Data is equal");
+
+unlink $tfno if -e $tfno;
+open $fhi, "<", $tfni;
+csv (in => $fhi, out => $tfno, quote_space => 0);
+close $fhi;
+ok (-s $tfno, "FH -> FILE");
+is_deeply (csv (in => $tfno), $aoa, "Data is equal");
+
+unlink $tfno if -e $tfno;
+open $fhi, "<", $tfni;
+open $fho, ">", $tfno;
+csv (in => $fhi, out => $fho, quote_space => 0);
+close $fho;
+close $fhi;
+ok (-s $tfno, "FH -> FH");
+is_deeply (csv (in => $tfno), $aoa, "Data is equal");
+
+unlink $tfno if -e $tfno;
+my @W;
+eval {
+ local $SIG{__WARN__} = sub { push @W => @_ };
+ csv (in => $tfnn, out => $tfno, quote_space => 0);
+ };
+like ($W[0], qr{\b2016 - EOL\b}, "Inconsistent use of EOL");
+ok (-s $tfno, "FH -> FILE (NL => CRNL)");
+is_deeply (csv (in => $tfno), $aoa, "Data is equal");
+is (do { local (@ARGV, $/) = ($tfno); <> }, $data, "Consistent CRNL");
+
+unlink $tfno if -e $tfno;
+csv (
+ in => $tfni,
+ out => $tfno,
+ quote_space => 0,
+ after_parse => sub { $_[1][1] .= "X" },
+ );
+ok (-s $tfno, "With after_parse");
+my @new = map { my @x = @$_; $x[1] .= "X"; \@x } @$aoa;
+is_deeply (csv (in => $tfno), \@new, "Data is equal");
+
+# Prove streaming behavior
+my $io = "";
+unlink $tfno if -e $tfno;
+csv (
+ in => $tfni,
+ out => $tfno,
+ on_in => sub { $io .= "I" },
+ callbacks => { before_print => sub { $io .= "O" }},
+ );
+ok (-s $tfno, "FILE -> FILE");
+is_deeply (csv (in => $tfno), $aoa, "Data is equal");
+like ($io, qr{^(?:IO)+\z}, "IOIOIO...");
diff --git a/src/test/resources/module/Text-CSV/t/csv_method.t b/src/test/resources/module/Text-CSV/t/csv_method.t
new file mode 100644
index 000000000..3c5273c27
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/csv_method.t
@@ -0,0 +1,35 @@
+use strict;
+use warnings;
+use File::Spec;
+use Test::More tests => 5;
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ require_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ }
+
+{
+ my $file = prepare('1,2,3');
+ my $csv = Text::CSV->new ();
+ ok my $aoa = eval { $csv->csv (in => $file) };
+ is_deeply($aoa, [[1,2,3]]) or note explain $aoa;
+ unlink $file;
+}
+
+{
+ my $file = prepare('col1;col2;col3','1;2;3');
+ my $csv = Text::CSV->new ({ sep_char => ";" });
+ ok my $aoh = eval { $csv->csv (in => $file, bom => 1) };
+ is_deeply($aoh, [{col1 => 1, col2 => 2, col3 => 3}]) or note explain $aoh;
+ unlink $file;
+}
+
+sub prepare {
+ my @lines = @_;
+ my $file = File::Spec->catfile(File::Spec->tmpdir, "file.csv");
+ open my $fh, '>', $file;
+ print $fh "$_\n" for @lines;
+ close $fh;
+ $file;
+}
diff --git a/src/test/resources/module/Text-CSV/t/fields_containing_0.t b/src/test/resources/module/Text-CSV/t/fields_containing_0.t
new file mode 100644
index 000000000..d726b12e1
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/fields_containing_0.t
@@ -0,0 +1,91 @@
+#!/usr/bin/perl
+
+=head1 DESCRIPTION
+
+This is a test program that succeeds with Text::CSV_PP and fails with
+Text::CSV_XS. The Text::CSV_XS behaviour is the correct one.
+
+=head1 CREDITS AND LICENSE
+
+The sample data (now anonymised) and a test program were contributed by
+blue_sky on Freenode’s
+#perl channel as part of a problem report with Text::CSV_PP based on the
+Text::CSV documentation. License is open source and compatible with the license
+of Text::CSV.
+
+Converted into a test program by Shlomi Fish ( L )
+, while disclaiming all explicit or implicit copyright ownership on the
+modifications.
+
+==head1 MODIFICATION
+
+modified by makamaka for old perl.
+
+=cut
+
+#use warnings;
+$^W = 1;
+use strict;
+
+use Test::More tests => 4;
+
+my $FALSE = 0;
+# my $USE_XS = $ENV{'USE_TEXT_CSV_XS'};
+my $USE_XS = $FALSE;
+
+use Text::CSV_PP;
+use Data::Dumper qw(Dumper);
+
+END { unlink '_fc0_test.csv'; }
+
+if ($USE_XS)
+{
+ require Text::CSV_XS;
+}
+
+{
+ my $csv_text = <<'EOF';
+"DIVISION CODE", "DIVISION DESCRIPTION", "CUSTOMER CODE", "CUSTOMER NAME", "SHORT NAME", "ADDRESS LINE 1", "ADDRESS LINE 2", "ADDRESS LINE 3", "TOWN", "COUNTY", "POST CODE", "COUNTRY", "GRID REF", "TELEPHONE", "AGENT CODE", "YEAR TO DATE SALES"
+"1", "UK", "Lambda", "Gambda Noo", "Foo", "Quad", "Rectum", "", "Eingoon", "Land", "Simplex", "", "", "099 999", "", 0.00
+EOF
+
+# open my $IF, "<", \$csv_text;
+ my $IF;
+ open $IF, ">_fc0_test.csv" or die "_fc0_test.csv: $!";
+ print $IF $csv_text;
+ close $IF;
+
+ open $IF, "<_fc0_test.csv" or die "_fc0_test.csv: $!";
+
+ my $csv = ($USE_XS ? "Text::CSV_XS" : "Text::CSV_PP")->new({
+ allow_whitespace => 1,
+ allow_loose_escapes => 1,
+ }) or die "Cannot use CSV: ".Text::CSV->error_diag();
+
+ $csv->column_names( $csv->getline($IF) );
+
+ {
+ my $first_line = $csv->getline_hr($IF);
+
+ # TEST
+ is ($first_line->{'POST CODE'}, 'Simplex',
+ "First line POST CODE"
+ );
+
+ # TEST
+ is ($first_line->{'COUNTRY'}, '',
+ "First line COUNTRY",
+ );
+
+ # TEST
+ is ($first_line->{'GRID REF'}, '',
+ "First line GRID REF",
+ );
+
+ # TEST
+ is ($first_line->{'TELEPHONE'}, '099 999',
+ "First line TELEPHONE",
+ );
+ }
+ close($IF);
+}
diff --git a/src/test/resources/module/Text-CSV/t/rt99774.t b/src/test/resources/module/Text-CSV/t/rt99774.t
new file mode 100644
index 000000000..8406fb599
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/rt99774.t
@@ -0,0 +1,42 @@
+
+$^W = 1;
+use strict;
+
+use Test::More tests => 8;
+
+
+BEGIN {
+ $ENV{PERL_TEXT_CSV} = $ENV{TEST_PERL_TEXT_CSV} || 0;
+ require_ok "Text::CSV";
+ plan skip_all => "Cannot load Text::CSV" if $@;
+ require "./t/util.pl";
+ }
+
+my $csv = Text::CSV->new ( { binary => 1, sep_char => ';', allow_whitespace => 1, quote_char => '"' } );
+
+# https://rt.cpan.org/Public/Bug/Display.html?id=99774
+
+while ( my $line = ) {
+ my $text = $line;
+ chomp($text); $text =~ s/"//g;
+ my $expect = [ split/;/, $text ];
+
+ $csv->parse($line);
+ is_deeply( [$csv->fields], $expect, $line );
+}
+
+# https://rt.cpan.org/Public/Bug/Display.html?id=92509
+
+for my $allow_whitespace ( 0, 1 ) {
+ $csv = Text::CSV->new ( { allow_whitespace => $allow_whitespace } );
+ $csv->parse(q{"value1","0","value3"});
+ is_deeply( [$csv->fields], ["value1","0","value3"], 'allow_whitespace:' . $allow_whitespace );
+}
+
+
+__DATA__
+"data_quality_id";"language_version_id";"name"
+"0";"2";"0%"
+"10";"2";"33%"
+"20";"2";"66%"
+"30";"2";"100%"
diff --git a/src/test/resources/module/Text-CSV/t/util.pl b/src/test/resources/module/Text-CSV/t/util.pl
new file mode 100644
index 000000000..346ba9c89
--- /dev/null
+++ b/src/test/resources/module/Text-CSV/t/util.pl
@@ -0,0 +1,141 @@
+use strict;
+
+my %special = ( 9 => "\\t", 10 => "\\n", 13 => "\\r" );
+my $ebcdic = ord ("A") == 0xc1;
+my @ebcdic = (# Convert EBCDIC 2 ASCII
+ 0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d, 0x8e, 0x0b,
+ 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x9d, 0x0a, 0x08, 0x87,
+ 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f, 0x80, 0x81, 0x82, 0x83,
+ 0x84, 0x85, 0x17, 0x1b, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07,
+ 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b,
+ 0x14, 0x15, 0x9e, 0x1a, 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5,
+ 0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, 0x26, 0xe9, 0xea, 0xeb,
+ 0xe8, 0xed, 0xee, 0xef, 0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
+ 0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5, 0xc7, 0xd1, 0xa6, 0x2c,
+ 0x25, 0x5f, 0x3e, 0x3f, 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
+ 0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, 0xd8, 0x61, 0x62, 0x63,
+ 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1,
+ 0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xaa, 0xba,
+ 0xe6, 0xb8, 0xc6, 0xa4, 0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0x5b, 0xde, 0xae, 0xac, 0xa3, 0xa5, 0xb7,
+ 0xa9, 0xa7, 0xb6, 0xbc, 0xbd, 0xbe, 0xdd, 0xa8, 0xaf, 0x5d, 0xb4, 0xd7,
+ 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xad, 0xf4,
+ 0xf6, 0xf2, 0xf3, 0xf5, 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
+ 0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff, 0x5c, 0xf7, 0x53, 0x54,
+ 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xb3, 0xdb,
+ 0xdc, 0xd9, 0xda, 0x9f );
+
+sub _readable {
+ defined $_[0] or return "--undef--";
+ join "", map {
+ my $cp = ord $_;
+ $ebcdic and $cp = $ebcdic[$cp];
+ $cp >= 0x20 && $cp <= 0x7e
+ ? $_
+ : $special{$cp} || sprintf "\\x{%02x}", $cp
+ } split m//, $_[0];
+ } # _readable
+
+sub is_binary {
+ my ($str, $exp, $tst) = @_;
+ if ($str eq $exp) {
+ ok (1, $tst);
+ }
+ else {
+ my ($hs, $he) = map { _readable $_ } $str, $exp;
+ is ($hs, $he, $tst);
+ }
+ } # is_binary
+
+# The rest is a modified copy of CORE's t/charset_tools.pl
+my @utf8_skip = $ebcdic ? (
+ # This translates a utf-8-encoded byte into how many
+ # bytes the full utf8 character occupies.
+
+ # 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 0
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 1
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 2
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 3
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 4
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 5
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 6
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 7
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, # 8
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, # 9
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, # A
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, # B
+ -1,-1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # C
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # D
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, # E
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7,13, # F
+ ) : ();
+
+# Used for BOM testing
+*byte_utf8a_to_utf8n = $ebcdic ? sub {
+ # Convert a UTF-8 byte sequence into the platform's native UTF-8
+ # equivalent, currently only UTF-8 and UTF-EBCDIC.
+
+ my $string = shift;
+ utf8::is_utf8 ($string) and return $string;
+
+ my $length = length $string;
+ #diag ($string);
+ #diag ($length);
+ my $out = "";
+ for (my $i = 0; $i < $length; $i++) {
+ my $byte = ord substr $string, $i, 1;
+ my $byte_count = $utf8_skip[$byte];
+ #diag ($byte);
+ #diag ($byte_count);
+
+ $byte_count < 0 and die "Illegal start byte";
+ ($i + $byte_count) > $length and
+ die "Attempt to read " . ($i + $byte_count - $length) . " beyond end-of-string";
+
+ # Just translate UTF-8 invariants directly.
+ if ($byte_count == 1) {
+ $out .= chr utf8::unicode_to_native ($byte);
+ next;
+ }
+
+ # Otherwise calculate the code point ordinal represented by the
+ # sequence beginning with this byte, using the algorithm adapted from
+ # utf8.c. We absorb each byte in the sequence as we go along
+ my $ord = $byte & (0x1F >> ($byte_count - 2));
+ my $bytes_remaining = $byte_count - 1;
+ while ($bytes_remaining > 0) {
+ $byte = ord substr $string, ++$i, 1;
+ ($byte & 0xC0) == 0x80 or
+ die sprintf "byte '%X' is not a valid continuation", $byte;
+ $ord = $ord << 6 | ($byte & 0x3f);
+ $bytes_remaining--;
+ }
+ #diag ($byte);
+ #diag ($ord);
+
+ my $expected_bytes =
+ $ord < 0x00000080 ? 1 :
+ $ord < 0x00000800 ? 2 :
+ $ord < 0x00010000 ? 3 :
+ $ord < 0x00200000 ? 4 :
+ $ord < 0x04000000 ? 5 :
+ $ord < 0x80000000 ? 6 : 7; #: (uv) < UTF8_QUAD_MAX ? 7 : 13 )
+
+ # Make sure is not an overlong sequence
+ $byte_count == $expected_bytes or
+ die sprintf "character U+%X should occupy %d bytes, not %d",
+ $ord, $expected_bytes, $byte_count;
+
+ # Now that we have found the code point the original UTF-8 meant, we
+ # use the native chr function to get its native string equivalent.
+ $out .= chr utf8::unicode_to_native ($ord);
+ }
+
+ utf8::encode ($out); # Turn off utf8 flag.
+ #diag ($out);
+ return $out;
+ } : sub { return shift };
+
+1;