Skip to content

Commit

Permalink
Import of HMBRAND/Text-CSV_XS-0.43 from CPAN.
Browse files Browse the repository at this point in the history
gitpan-cpan-distribution: Text-CSV_XS
gitpan-cpan-version:      0.43
gitpan-cpan-path:         HMBRAND/Text-CSV_XS-0.43.tgz
gitpan-cpan-author:       HMBRAND
gitpan-cpan-maturity:     released
  • Loading branch information
H.Merijn Brand authored and Gitpan committed Oct 22, 2014
1 parent 089a9ce commit 28c14e0
Show file tree
Hide file tree
Showing 11 changed files with 265 additions and 56 deletions.
42 changes: 32 additions & 10 deletions CSV_XS.pm
Expand Up @@ -30,7 +30,7 @@ use DynaLoader ();
use Carp;

use vars qw( $VERSION @ISA );
$VERSION = "0.42";
$VERSION = "0.43";
@ISA = qw( DynaLoader );

sub PV { 0 }
Expand Down Expand Up @@ -267,13 +267,14 @@ sub error_input
sub error_diag
{
my $self = shift;
my @diag = (0, $last_new_err);
my @diag = (0, $last_new_err, 0);

unless ($self && ref $self) { # Class method or direct call
$last_new_err and $diag[0] = 1000;
}
elsif ($self->isa (__PACKAGE__) && exists $self->{_ERROR_DIAG}) {
@diag = (0 + $self->{_ERROR_DIAG}, $self->{_ERROR_DIAG});
exists $self->{_ERROR_POS} and $diag[2] = 1 + $self->{_ERROR_POS};
}
my $context = wantarray;
unless (defined $context) { # Void context
Expand Down Expand Up @@ -724,6 +725,18 @@ would result in a parse error. Though it is still bad practice to
allow this format, we cannot help there are some vendors that make
their applications spit out lines styled like this.
In case there is B<really> bad CSV data, like
1,"foo "bar" baz",42
or
1,""foo bar baz"",42
there is a way to get that parsed, and leave the quotes inside the quoted
field as-is. This can be achieved by setting C<allow_loose_quotes> B<AND>
making sure that the C<escape_char> is I<not> equal to C<quote_char>.
=item escape_char
The character used for escaping certain characters inside quoted fields.
Expand Down Expand Up @@ -1089,9 +1102,9 @@ C<combine ()> or C<parse ()>, whichever was called more recently.
Text::CSV_XS->error_diag ();
$csv->error_diag ();
$error_code = 0 + $csv->error_diag ();
$error_str = "" . $csv->error_diag ();
($cde, $str) = $csv->error_diag ();
$error_code = 0 + $csv->error_diag ();
$error_str = "" . $csv->error_diag ();
($cde, $str, $pos) = $csv->error_diag ();
If (and only if) an error occured, this function returns the diagnostics
of that error.
Expand All @@ -1100,7 +1113,9 @@ If called in void context, it will print the internal error code and the
associated error message to STDERR.
If called in list context, it will return the error code and the error
message in that order.
message in that order. If the last error was from parsing, the third
value returned is the best guess at the location within the line that was
being parsed. It's value is 1-based.
If called in scalar context, it will return the diagnostics in a single
scalar, a-la $!. It will contain the error code in numeric context, and
Expand All @@ -1109,6 +1124,12 @@ the diagnostics message in string context.
When called as a class method or a direct function call, the error diag
is that of the last C<new ()> call.
=head2 SetDiag
$csv->SetDiag (0);
Use to reset the diagnosticts if you are dealing with errors.
=head1 INTERNALS
=over 4
Expand All @@ -1117,8 +1138,6 @@ is that of the last C<new ()> call.
=item Parse (...)
=item SetDiag (...)
=back
The arguments to these two internal functions are deliberately not
Expand Down Expand Up @@ -1187,6 +1206,9 @@ Reading a CSV file line by line:
}
close $fh;
For more extended examples, see the C<examples/> subdirectory in the
original distribution.
=head1 TODO
=over 2
Expand Down Expand Up @@ -1395,8 +1417,8 @@ has been selected with the constructor.
=item 2023 "EIQ - QUO ..."
I have not been able yet to generate this error. Please inform me how you
got it when you get it.
Sequences like C<"foo "bar" baz",quux> and C<2023,",2008-04-05,"Foo, Bar",\n>
will cause this error.
=item 2024 "EIQ - EOF cannot be escaped, not even inside quotes"
Expand Down
64 changes: 47 additions & 17 deletions CSV_XS.xs
Expand Up @@ -128,7 +128,7 @@ xs_error_t xs_errors[] = {
/* EIQ - Error Inside Quotes */
{ 2021, "EIQ - NL char inside quotes, binary off" },
{ 2022, "EIQ - CR char inside quotes, binary off" },
{ 2023, "EIQ - QUO ..." },
{ 2023, "EIQ - QUO character not allowed" },
{ 2024, "EIQ - EOF cannot be escaped, not even inside quotes" },
{ 2025, "EIQ - Loose unescaped escape" },
{ 2026, "EIQ - Binary character inside quoted field, binary off" },
Expand Down Expand Up @@ -182,6 +182,8 @@ static SV *SetDiag (csv_t *csv, int xse)
SvIOK_on (err);
hv_store (csv->self, "_ERROR_DIAG", 11, err, 0);
}
if (xse == 0)
hv_store (csv->self, "_ERROR_POS", 10, newSViv (0), 0);
return (err);
} /* SetDiag */

Expand Down Expand Up @@ -377,11 +379,15 @@ static int Combine (csv_t *csv, SV *dst, AV *fields)

if (i > 0)
CSV_PUT (csv, dst, csv->sep_char);
if ((svp = av_fetch (fields, i, 0)) && *svp && SvOK (*svp)) {
if ((svp = av_fetch (fields, i, 0)) && *svp) {
STRLEN len;
char *ptr = SvPV (*svp, len);
char *ptr;
int quoteMe = csv->always_quote;

unless ((SvOK (*svp) || (
(SvMAGICAL (*svp) && (mg_get (*svp), 1) && SvOK (*svp)))
)) continue;
ptr = SvPV (*svp, len);
/* Do we need quoting? We do quote, if the user requested
* (always_quote), if binary or blank characters are found
* and if the string contains quote or escape characters.
Expand Down Expand Up @@ -450,8 +456,9 @@ static int Combine (csv_t *csv, SV *dst, AV *fields)
#if MAINT_DEBUG
static char str_parsed[40];
#endif
static void ParseError (csv_t *csv, int xse)
static void ParseError (csv_t *csv, int xse, int pos)
{
hv_store (csv->self, "_ERROR_POS", 10, newSViv (pos), 0);
if (csv->tmp) {
if (hv_store (csv->self, "_ERROR_INPUT", 12, csv->tmp, 0))
SvREFCNT_inc (csv->tmp);
Expand Down Expand Up @@ -507,12 +514,12 @@ static int CsvGet (csv_t *csv, SV *src)

#define ERROR_INSIDE_QUOTES(diag_code) { \
SvREFCNT_dec (sv); \
ParseError (csv, diag_code); \
ParseError (csv, diag_code, spl); \
return FALSE; \
}
#define ERROR_INSIDE_FIELD(diag_code) { \
SvREFCNT_dec (sv); \
ParseError (csv, diag_code); \
ParseError (csv, diag_code, spl); \
return FALSE; \
}

Expand Down Expand Up @@ -615,8 +622,8 @@ static int Parse (csv_t *csv, SV *src, AV *fields, AV *fflags)
STRLEN len;
int seenSomething = FALSE;
int fnum = 0;
#if MAINT_DEBUG
int spl = -1;
#if MAINT_DEBUG
memset (str_parsed, 0, 40);
#endif

Expand All @@ -630,8 +637,9 @@ static int Parse (csv_t *csv, SV *src, AV *fields, AV *fflags)
NewField;

seenSomething = TRUE;
spl++;
#if MAINT_DEBUG
if (++spl < 39) str_parsed[spl] = c;
if (spl < 39) str_parsed[spl] = c;
#endif
restart:
if (c == csv->sep_char) {
Expand Down Expand Up @@ -781,7 +789,6 @@ restart:

if (!csv->escape_char || c != csv->escape_char) {
/* Field is terminated */
AV_PUSH;
c2 = CSV_GET;

#if ALLOW_ALLOW
Expand All @@ -792,30 +799,46 @@ restart:
}
#endif

if (c2 == csv->sep_char)
if (c2 == csv->sep_char) {
AV_PUSH;
continue;
}

if (c2 == EOF)
if (c2 == EOF) {
AV_PUSH;
return TRUE;
}

if (c2 == CH_CR) {
int c3;

if (csv->eol_is_cr)
if (csv->eol_is_cr) {
AV_PUSH;
/* uncovered */ return TRUE;
}

c3 = CSV_GET;
if (c3 == CH_NL)
if (c3 == CH_NL) {
AV_PUSH;
/* uncovered */ return TRUE;
}

ParseError (csv, 2010);
ParseError (csv, 2010, spl);
return FALSE;
}

if (c2 == CH_NL)
if (c2 == CH_NL) {
AV_PUSH;
return TRUE;
}

if (csv->allow_loose_quotes) {
CSV_PUT_SV (sv, c);
c = c2;
goto restart;
}

ParseError (csv, 2011);
ParseError (csv, 2011, spl);
return FALSE;
}

Expand Down Expand Up @@ -865,6 +888,12 @@ restart:
return TRUE;
}
}

if (csv->allow_loose_quotes && csv->escape_char != csv->quote_char) {
CSV_PUT_SV (sv, c);
c = c2;
goto restart;
}
#if ALLOW_ALLOW
if (csv->allow_whitespace) {
while (c2 == CH_SPACE || c2 == CH_TAB) {
Expand All @@ -876,6 +905,7 @@ restart:
}
}
#endif

ERROR_INSIDE_QUOTES (2023);
}
}
Expand Down Expand Up @@ -1105,7 +1135,7 @@ Combine (self, dst, fields, useIO)
AV *av;

CSV_XS_SELF;
av = (AV*)SvRV (fields);
av = (AV *)SvRV (fields);
ST (0) = xsCombine (hv, av, dst, useIO) ? &PL_sv_yes : &PL_sv_undef;
XSRETURN (1);
/* XS Combine */
Expand Down
8 changes: 8 additions & 0 deletions ChangeLog
@@ -1,3 +1,11 @@
2008-04-21 0.43 - H.Merijn Brand <h.m.brand@xs4all.nl>

* parse errors try to remember failing position
* used valgrind to test for leaks (devel-only)
* used Test::Valgrind as alternative leak check (devel-only)
* improve documentation for error 2023
* nailed the loose quotes in quoted fields

2008-04-16 0.42 - H.Merijn Brand <h.m.brand@xs4all.nl>

* Generate META.yml myself. I won't use Build.PL
Expand Down
1 change: 1 addition & 0 deletions MANIFEST
Expand Up @@ -27,5 +27,6 @@ t/76_magic.t array_ref from magig
t/80_diag.t Error diagnostics
t/util.pl Extra test utilities
examples/csv2xls Script to onvert CSV files to M$Excel
examples/csv-check Script to check a CSV file/stream
examples/speed.pl Small benchmark script
META.yml Module meta-data (added by MakeMaker)
4 changes: 2 additions & 2 deletions META.yml
@@ -1,6 +1,6 @@
--- #YAML:1.0
name: Text-CSV_XS
version: 0.42
version: 0.43
abstract: Comma-Separated Values manipulation routines
license: perl
author:
Expand All @@ -10,7 +10,7 @@ distribution_type: module
provides:
Text::CSV_XS:
file: CSV_XS.pm
version: 0.42
version: 0.43
requires:
perl: 5.005
DynaLoader: 0
Expand Down
21 changes: 21 additions & 0 deletions Makefile.PL
Expand Up @@ -37,6 +37,7 @@ my %wm = (
CSV_XS.gcno
CSV_XS.xs.gcov
cover_db
valgrind.log
)
},
);
Expand All @@ -50,6 +51,19 @@ package MY;

sub postamble
{
my $valgrind = join " ", qw(
PERL_DESTRUCT_LEVEL=2 PERL_DL_NONLAZY=1
valgrind
--suppressions=sandbox/perl.supp
--leak-check=yes
--leak-resolution=high
--show-reachable=yes
--num-callers=50
--log-fd=3
$(FULLPERLRUN) "-MExtUtils::Command::MM" "-e"
"test_harness($(TEST_VERBOSE), '$(INST_LIB)', '$(INST_ARCHLIB)')"
$(TEST_FILES) 3>valgrind.log
);
join "\n" =>
'cover test_cover:',
' cover -delete',
Expand All @@ -59,6 +73,13 @@ sub postamble
' find * -name \*.gcov -exec gcov2perl {} \;',
' cover',
'',
'leakcheck:',
" $valgrind",
' -@tail -5 valgrind.log',
'',
'leaktest:',
q{ sandbox/leaktest $(FULLPERLRUN) "test_harness($(TEST_VERBOSE), '$(INST_LIB)', '$(INST_ARCHLIB)')" $(TEST_FILES)},
'',
'fixmeta: distmeta',
' perl genMETA.pl',
'',
Expand Down

0 comments on commit 28c14e0

Please sign in to comment.