Skip to content

Commit

Permalink
handy.h: Fix isBLANK_uni and isBLANK_utf8
Browse files Browse the repository at this point in the history
These macros have never worked outside the Latin1 range, so this extends
them to work.

There are no tests I could find for things in handy.h, except that many
of them are called all over the place during the normal course of
events.  This commit adds a new file for such testing, containing for
now only with a few tests for the isBLANK's
  • Loading branch information
Karl Williamson committed Jun 30, 2012
1 parent f74da94 commit bdd8600
Show file tree
Hide file tree
Showing 13 changed files with 76 additions and 5 deletions.
1 change: 1 addition & 0 deletions MANIFEST
Expand Up @@ -3972,6 +3972,7 @@ ext/XS-APItest/t/gv_fetchmeth_autoload.t XS::APItest: tests for gv_fetchmeth_aut
ext/XS-APItest/t/gv_fetchmethod_flags.t XS::APItest: tests for gv_fetchmethod_flags() and variants
ext/XS-APItest/t/gv_fetchmeth.t XS::APItest: tests for gv_fetchmeth() and variants
ext/XS-APItest/t/gv_init.t XS::APItest: tests for gv_init and variants
ext/XS-APItest/t/handy.t XS::APItest: tests for handy.h
ext/XS-APItest/t/hash.t XS::APItest: tests for hash related APIs
ext/XS-APItest/t/keyword_multiline.t test keyword plugin parsing across lines
ext/XS-APItest/t/keyword_plugin.t test keyword plugin mechanism
Expand Down
2 changes: 2 additions & 0 deletions embed.fnc
Expand Up @@ -594,6 +594,7 @@ ApPR |bool |is_uni_alnum |UV c
ApPR |bool |is_uni_idfirst |UV c
ApPR |bool |is_uni_alpha |UV c
ApPR |bool |is_uni_ascii |UV c
ApPR |bool |is_uni_blank |UV c
ApPR |bool |is_uni_space |UV c
ApPR |bool |is_uni_cntrl |UV c
ApPR |bool |is_uni_graph |UV c
Expand Down Expand Up @@ -645,6 +646,7 @@ ApR |bool |is_utf8_idcont |NN const U8 *p
ApR |bool |is_utf8_xidcont |NN const U8 *p
ApR |bool |is_utf8_alpha |NN const U8 *p
ApR |bool |is_utf8_ascii |NN const U8 *p
ApR |bool |is_utf8_blank |NN const U8 *p
ApR |bool |is_utf8_space |NN const U8 *p
ApR |bool |is_utf8_perl_space |NN const U8 *p
ApR |bool |is_utf8_perl_word |NN const U8 *p
Expand Down
2 changes: 2 additions & 0 deletions embed.h
Expand Up @@ -226,6 +226,7 @@
#define is_uni_alpha_lc(a) Perl_is_uni_alpha_lc(aTHX_ a)
#define is_uni_ascii(a) Perl_is_uni_ascii(aTHX_ a)
#define is_uni_ascii_lc(a) Perl_is_uni_ascii_lc(aTHX_ a)
#define is_uni_blank(a) Perl_is_uni_blank(aTHX_ a)
#define is_uni_cntrl(a) Perl_is_uni_cntrl(aTHX_ a)
#define is_uni_cntrl_lc(a) Perl_is_uni_cntrl_lc(aTHX_ a)
#define is_uni_digit(a) Perl_is_uni_digit(aTHX_ a)
Expand All @@ -249,6 +250,7 @@
#define is_utf8_alnum(a) Perl_is_utf8_alnum(aTHX_ a)
#define is_utf8_alpha(a) Perl_is_utf8_alpha(aTHX_ a)
#define is_utf8_ascii(a) Perl_is_utf8_ascii(aTHX_ a)
#define is_utf8_blank(a) Perl_is_utf8_blank(aTHX_ a)
#define is_utf8_char Perl_is_utf8_char
#define is_utf8_char_buf Perl_is_utf8_char_buf
#define is_utf8_cntrl(a) Perl_is_utf8_cntrl(aTHX_ a)
Expand Down
1 change: 1 addition & 0 deletions embedvar.h
Expand Up @@ -368,6 +368,7 @@
#define PL_utf8_X_prepend (vTHX->Iutf8_X_prepend)
#define PL_utf8_alnum (vTHX->Iutf8_alnum)
#define PL_utf8_alpha (vTHX->Iutf8_alpha)
#define PL_utf8_blank (vTHX->Iutf8_blank)
#define PL_utf8_digit (vTHX->Iutf8_digit)
#define PL_utf8_foldable (vTHX->Iutf8_foldable)
#define PL_utf8_foldclosures (vTHX->Iutf8_foldclosures)
Expand Down
2 changes: 1 addition & 1 deletion ext/XS-APItest/APItest.pm
Expand Up @@ -5,7 +5,7 @@ use strict;
use warnings;
use Carp;

our $VERSION = '0.40';
our $VERSION = '0.41';

require XSLoader;

Expand Down
14 changes: 14 additions & 0 deletions ext/XS-APItest/APItest.xs
Expand Up @@ -3456,3 +3456,17 @@ test_get_vtbl()
RETVAL = PTR2UV(get_vtbl(-1));
OUTPUT:
RETVAL

bool
test_isBLANK_uni(UV ord)
CODE:
RETVAL = isBLANK_uni(ord);
OUTPUT:
RETVAL

bool
test_isBLANK_utf8(char * p)
CODE:
RETVAL = isBLANK_utf8((U8 *) p);
OUTPUT:
RETVAL
14 changes: 14 additions & 0 deletions ext/XS-APItest/t/handy.t
@@ -0,0 +1,14 @@
#!perl -w

use strict;
use Test::More;

use XS::APItest;

ok(test_isBLANK_uni(ord("\N{EM SPACE}")), "EM SPACE is blank in isBLANK_uni()");
ok(test_isBLANK_utf8("\N{EM SPACE}"), "EM SPACE is blank in isBLANK_utf8()");

ok(! test_isBLANK_uni(ord("\N{GREEK DASIA}")), "GREEK DASIA is not a blank in isBLANK_uni()");
ok(! test_isBLANK_utf8("\N{GREEK DASIA}"), "GREEK DASIA is not a blank in isBLANK_utf8()");

done_testing;
7 changes: 3 additions & 4 deletions handy.h
Expand Up @@ -912,6 +912,7 @@ EXTCONST U32 PL_charclass[];
/* Note that all ignore 'use bytes' */

#define isALNUM_uni(c) generic_uni(isWORDCHAR, is_uni_alnum, c)
#define isBLANK_uni(c) generic_uni(isBLANK, is_uni_blank, c)
#define isIDFIRST_uni(c) generic_uni(isIDFIRST, is_uni_idfirst, c)
#define isALPHA_uni(c) generic_uni(isALPHA, is_uni_alpha, c)
#define isSPACE_uni(c) generic_uni(isSPACE, is_uni_space, c)
Expand All @@ -932,7 +933,6 @@ EXTCONST U32 PL_charclass[];

/* Posix and regular space differ only in U+000B, which is in Latin1 */
#define isPSXSPC_uni(c) ((c) < 256 ? isPSXSPC_L1(c) : isSPACE_uni(c))
#define isBLANK_uni(c) isBLANK(c) /* could be wrong */

#define isALNUM_LC_uvchr(c) (c < 256 ? isALNUM_LC(c) : is_uni_alnum_lc(c))
#define isIDFIRST_LC_uvchr(c) (c < 256 ? isIDFIRST_LC(c) : is_uni_idfirst_lc(c))
Expand Down Expand Up @@ -981,6 +981,7 @@ EXTCONST U32 PL_charclass[];
: Perl__is_utf8__perl_idstart(aTHX_ p))
#define isIDCONT_utf8(p) generic_utf8(isWORDCHAR, is_utf8_xidcont, p)
#define isALPHA_utf8(p) generic_utf8(isALPHA, is_utf8_alpha, p)
#define isBLANK_utf8(p) generic_utf8(isBLANK, is_utf8_blank, p)
#define isSPACE_utf8(p) generic_utf8(isSPACE, is_utf8_space, p)
#define isDIGIT_utf8(p) generic_utf8(isDIGIT, is_utf8_digit, p)
#define isUPPER_utf8(p) generic_utf8(isUPPER, is_utf8_upper, p)
Expand All @@ -1004,11 +1005,10 @@ EXTCONST U32 PL_charclass[];
? isPSXSPC_L1(TWO_BYTE_UTF8_TO_UNI(*(p), \
*((p)+1)))\
: isSPACE_utf8(p)))
#define isBLANK_utf8(c) isBLANK(c) /* could be wrong */

#define isALNUM_LC_utf8(p) isALNUM_LC_uvchr(valid_utf8_to_uvchr(p, 0))
#define isIDFIRST_LC_utf8(p) isIDFIRST_LC_uvchr(valid_utf8_to_uvchr(p, 0))
#define isALPHA_LC_utf8(p) isALPHA_LC_uvchr(valid_utf8_to_uvchr(p, 0))
#define isBLANK_LC_utf8(p) isBLANK_LC_uvchr(valid_utf8_to_uvchr(p, 0))
#define isSPACE_LC_utf8(p) isSPACE_LC_uvchr(valid_utf8_to_uvchr(p, 0))
#define isDIGIT_LC_utf8(p) isDIGIT_LC_uvchr(valid_utf8_to_uvchr(p, 0))
#define isUPPER_LC_utf8(p) isUPPER_LC_uvchr(valid_utf8_to_uvchr(p, 0))
Expand All @@ -1020,7 +1020,6 @@ EXTCONST U32 PL_charclass[];
#define isPUNCT_LC_utf8(p) isPUNCT_LC_uvchr(valid_utf8_to_uvchr(p, 0))

#define isPSXSPC_LC_utf8(c) (isSPACE_LC_utf8(c) ||(c) == '\f')
#define isBLANK_LC_utf8(c) isBLANK(c) /* could be wrong */

/* This conversion works both ways, strangely enough. On EBCDIC platforms,
* CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII */
Expand Down
1 change: 1 addition & 0 deletions intrpvar.h
Expand Up @@ -614,6 +614,7 @@ PERLVAR(I, VertSpace, SV *)
/* utf8 character class swashes */
PERLVAR(I, utf8_alnum, SV *)
PERLVAR(I, utf8_alpha, SV *)
PERLVAR(I, utf8_blank, SV *)
PERLVAR(I, utf8_space, SV *)
PERLVAR(I, utf8_graph, SV *)
PERLVAR(I, utf8_digit, SV *)
Expand Down
2 changes: 2 additions & 0 deletions perl.c
Expand Up @@ -991,6 +991,7 @@ perl_destruct(pTHXx)
/* clear utf8 character classes */
SvREFCNT_dec(PL_utf8_alnum);
SvREFCNT_dec(PL_utf8_alpha);
SvREFCNT_dec(PL_utf8_blank);
SvREFCNT_dec(PL_utf8_space);
SvREFCNT_dec(PL_utf8_graph);
SvREFCNT_dec(PL_utf8_digit);
Expand All @@ -1009,6 +1010,7 @@ perl_destruct(pTHXx)
SvREFCNT_dec(PL_utf8_foldclosures);
PL_utf8_alnum = NULL;
PL_utf8_alpha = NULL;
PL_utf8_blank = NULL;
PL_utf8_space = NULL;
PL_utf8_graph = NULL;
PL_utf8_digit = NULL;
Expand Down
10 changes: 10 additions & 0 deletions proto.h
Expand Up @@ -1673,6 +1673,10 @@ PERL_CALLCONV bool Perl_is_uni_ascii_lc(pTHX_ UV c)
__attribute__warn_unused_result__
__attribute__pure__;

PERL_CALLCONV bool Perl_is_uni_blank(pTHX_ UV c)
__attribute__warn_unused_result__
__attribute__pure__;

PERL_CALLCONV bool Perl_is_uni_cntrl(pTHX_ UV c)
__attribute__warn_unused_result__
__attribute__pure__;
Expand Down Expand Up @@ -1831,6 +1835,12 @@ PERL_CALLCONV bool Perl_is_utf8_ascii(pTHX_ const U8 *p)
#define PERL_ARGS_ASSERT_IS_UTF8_ASCII \
assert(p)

PERL_CALLCONV bool Perl_is_utf8_blank(pTHX_ const U8 *p)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1);
#define PERL_ARGS_ASSERT_IS_UTF8_BLANK \
assert(p)

PERL_CALLCONV STRLEN Perl_is_utf8_char(const U8 *s)
__attribute__deprecated__
__attribute__nonnull__(1);
Expand Down
1 change: 1 addition & 0 deletions sv.c
Expand Up @@ -13443,6 +13443,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
/* utf8 character class swashes */
PL_utf8_alnum = sv_dup_inc(proto_perl->Iutf8_alnum, param);
PL_utf8_alpha = sv_dup_inc(proto_perl->Iutf8_alpha, param);
PL_utf8_blank = sv_dup_inc(proto_perl->Iutf8_blank, param);
PL_utf8_space = sv_dup_inc(proto_perl->Iutf8_space, param);
PL_utf8_graph = sv_dup_inc(proto_perl->Iutf8_graph, param);
PL_utf8_digit = sv_dup_inc(proto_perl->Iutf8_digit, param);
Expand Down
24 changes: 24 additions & 0 deletions utf8.c
Expand Up @@ -1509,6 +1509,14 @@ Perl_is_uni_ascii(pTHX_ UV c)
return isASCII(c);
}

bool
Perl_is_uni_blank(pTHX_ UV c)
{
U8 tmpbuf[UTF8_MAXBYTES+1];
uvchr_to_utf8(tmpbuf, c);
return is_utf8_blank(tmpbuf);
}

bool
Perl_is_uni_space(pTHX_ UV c)
{
Expand Down Expand Up @@ -1829,6 +1837,12 @@ Perl_is_uni_ascii_lc(pTHX_ UV c)
return is_uni_ascii(c); /* XXX no locale support yet */
}

bool
Perl_is_uni_blank_lc(pTHX_ UV c)
{
return is_uni_blank(c); /* XXX no locale support yet */
}

bool
Perl_is_uni_space_lc(pTHX_ UV c)
{
Expand Down Expand Up @@ -2035,6 +2049,16 @@ Perl_is_utf8_ascii(pTHX_ const U8 *p)
return isASCII(*p);
}

bool
Perl_is_utf8_blank(pTHX_ const U8 *p)
{
dVAR;

PERL_ARGS_ASSERT_IS_UTF8_BLANK;

return is_utf8_common(p, &PL_utf8_blank, "XPosixBlank");
}

bool
Perl_is_utf8_space(pTHX_ const U8 *p)
{
Expand Down

0 comments on commit bdd8600

Please sign in to comment.