Skip to content

Commit

Permalink
Merge pull request #76 from pali/master
Browse files Browse the repository at this point in the history
Fix handling of undef, COW and magic scalar argument in Unicode.xs
  • Loading branch information
dankogai authored Nov 22, 2016
2 parents d915450 + e26cee2 commit 3728813
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 19 deletions.
60 changes: 52 additions & 8 deletions Unicode/Unicode.xs
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,38 @@ CODE:
STRLEN ulen;
STRLEN resultbuflen;
U8 *resultbuf;
U8 *s = (U8 *)SvPVbyte(str,ulen);
U8 *e = (U8 *)SvEND(str);
U8 *s;
U8 *e;
bool modify = (check && !(check & ENCODE_LEAVE_SRC));
bool temp_result;

SvGETMAGIC(str);
if (!SvOK(str))
XSRETURN_UNDEF;
s = modify ? (U8 *)SvPV_force_nomg(str, ulen) : (U8 *)SvPV_nomg(str, ulen);
if (SvUTF8(str)) {
if (!modify) {
SV *tmp = sv_2mortal(newSVpvn((char *)s, ulen));
SvUTF8_on(tmp);
if (SvTAINTED(str))
SvTAINTED_on(tmp);
str = tmp;
s = (U8 *)SvPVX(str);
}
if (ulen) {
if (!utf8_to_bytes(s, &ulen))
croak("Wide character");
SvCUR_set(str, ulen);
}
SvUTF8_off(str);
}
e = s+ulen;

/* Optimise for the common case of being called from PerlIOEncode_fill()
with a standard length buffer. In this case the result SV's buffer is
only used temporarily, so we can afford to allocate the maximum needed
and not care about unused space. */
const bool temp_result = (ulen == PERLIO_BUFSIZ);
temp_result = (ulen == PERLIO_BUFSIZ);

ST(0) = sv_2mortal(result);
SvUTF8_on(result);
Expand Down Expand Up @@ -308,6 +333,7 @@ CODE:
SvCUR_set(str,0);
}
*SvEND(str) = '\0';
SvSETMAGIC(str);
}

if (!temp_result) shrink_buffer(result);
Expand All @@ -328,13 +354,32 @@ CODE:
const STRLEN usize = (size > 0 ? size : 1);
SV *result = newSVpvn("", 0);
STRLEN ulen;
U8 *s = (U8 *) SvPVutf8(utf8, ulen);
const U8 *e = (U8 *) SvEND(utf8);
U8 *s;
U8 *e;
bool modify = (check && !(check & ENCODE_LEAVE_SRC));
bool temp_result;

SvGETMAGIC(utf8);
if (!SvOK(utf8))
XSRETURN_UNDEF;
s = modify ? (U8 *)SvPV_force_nomg(utf8, ulen) : (U8 *)SvPV_nomg(utf8, ulen);
if (!SvUTF8(utf8)) {
if (!modify) {
SV *tmp = sv_2mortal(newSVpvn((char *)s, ulen));
if (SvTAINTED(utf8))
SvTAINTED_on(tmp);
utf8 = tmp;
}
sv_utf8_upgrade_nomg(utf8);
s = (U8 *)SvPV_nomg(utf8, ulen);
}
e = s+ulen;

/* Optimise for the common case of being called from PerlIOEncode_flush()
with a standard length buffer. In this case the result SV's buffer is
only used temporarily, so we can afford to allocate the maximum needed
and not care about unused space. */
const bool temp_result = (ulen == PERLIO_BUFSIZ);
temp_result = (ulen == PERLIO_BUFSIZ);

ST(0) = sv_2mortal(result);

Expand Down Expand Up @@ -408,12 +453,11 @@ CODE:
SvCUR_set(utf8,0);
}
*SvEND(utf8) = '\0';
SvSETMAGIC(utf8);
}

if (!temp_result) shrink_buffer(result);
if (SvTAINTED(utf8)) SvTAINTED_on(result); /* propagate taintedness */

SvSETMAGIC(utf8);

XSRETURN(1);
}
25 changes: 14 additions & 11 deletions t/magic.t
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ use warnings;

use Encode qw(find_encoding encode decode encode_utf8 decode_utf8 is_utf8 _utf8_on _utf8_off FB_CROAK);

use Test::More tests => 3*(2*(3*(4*4)+4)+4+3*3);
use Test::More tests => 3*(2*(4*(4*4)+4)+4+3*3);

my $ascii = find_encoding('ASCII');
my $latin1 = find_encoding('Latin1');
my $utf8 = find_encoding('UTF-8');
my $utf16 = find_encoding('UTF-16LE');

my $undef = undef;
my $ascii_str = 'ascii_str';
Expand All @@ -34,44 +35,46 @@ _utf8_on($utf8_str);
{
foreach my $str ($undef, $ascii_str, $utf8_str) {
foreach my $croak (0, 1) {
foreach my $enc ('ASCII', 'Latin1', 'UTF-8') {
foreach my $enc ('ASCII', 'Latin1', 'UTF-8', 'UTF-16LE') {
my $mod = defined $str && $croak;
my $func = "encode('" . $enc . "', " . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
tie my $input, 'TieScalarCounter', $str;
my $output = encode($enc, $input, $croak ? FB_CROAK : 0);
is(tied($input)->{fetch}, 1, "$func processes get magic only once");
is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
is($output, $str, "$func returns correct \$output string");
is($output, ((defined $str and $enc eq 'UTF-16LE') ? encode("UTF-16LE", $str) : $str), "$func returns correct \$output string");
}
foreach my $enc ('ASCII', 'Latin1', 'UTF-8') {
foreach my $enc ('ASCII', 'Latin1', 'UTF-8', 'UTF-16LE') {
my $mod = defined $str && $croak;
my $func = "decode('" . $enc . "', " . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
tie my $input, 'TieScalarCounter', $str;
my $input_str = ((defined $str and $enc eq 'UTF-16LE') ? encode("UTF-16LE", $str) : $str);
tie my $input, 'TieScalarCounter', $input_str;
my $output = decode($enc, $input, $croak ? FB_CROAK : 0);
is(tied($input)->{fetch}, 1, "$func processes get magic only once");
is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
is($input, $mod ? '' : $input_str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
is($output, $str, "$func returns correct \$output string");
}
foreach my $obj ($ascii, $latin1, $utf8) {
foreach my $obj ($ascii, $latin1, $utf8, $utf16) {
my $mod = defined $str && $croak;
my $func = '$' . $obj->name() . '->encode(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
tie my $input, 'TieScalarCounter', $str;
my $output = $obj->encode($input, $croak ? FB_CROAK : 0);
is(tied($input)->{fetch}, 1, "$func processes get magic only once");
is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
is($output, $str, "$func returns correct \$output string");
is($output, ((defined $str and $obj == $utf16) ? encode("UTF-16LE", $str) : $str), "$func returns correct \$output string");
}
foreach my $obj ($ascii, $latin1, $utf8) {
foreach my $obj ($ascii, $latin1, $utf8, $utf16) {
my $mod = defined $str && $croak;
my $func = '$' . $obj->name() . '->decode(' . (!defined $str ? 'undef' : is_utf8($str) ? '$utf8_str' : '$ascii_str') . ($croak ? ', FB_CROAK' : '') . ')';
tie my $input, 'TieScalarCounter', $str;
my $input_str = ((defined $str and $obj == $utf16) ? encode("UTF-16LE", $str) : $str);
tie my $input, 'TieScalarCounter', $input_str;
my $output = $obj->decode($input, $croak ? FB_CROAK : 0);
is(tied($input)->{fetch}, 1, "$func processes get magic only once");
is(tied($input)->{store}, $mod ? 1 : 0, "$func " . ($mod ? 'processes set magic only once' : 'does not process set magic'));
is($input, $mod ? '' : $str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
is($input, $mod ? '' : $input_str, "$func " . ($mod ? 'modifies' : 'does not modify') . ' $input string');
is($output, $str, "$func returns correct \$output string");
}
{
Expand Down

0 comments on commit 3728813

Please sign in to comment.