diff --git a/MANIFEST b/MANIFEST index 38f5da2857f..6f0e95e8c7e 100644 --- a/MANIFEST +++ b/MANIFEST @@ -3972,6 +3972,7 @@ ext/XS-APItest/t/gv_fetchmeth_autoload.t XS::APItest: tests for gv_fetchmeth_aut ext/XS-APItest/t/gv_fetchmethod_flags.t XS::APItest: tests for gv_fetchmethod_flags() and variants ext/XS-APItest/t/gv_fetchmeth.t XS::APItest: tests for gv_fetchmeth() and variants ext/XS-APItest/t/gv_init.t XS::APItest: tests for gv_init and variants +ext/XS-APItest/t/handy.t XS::APItest: tests for handy.h ext/XS-APItest/t/hash.t XS::APItest: tests for hash related APIs ext/XS-APItest/t/keyword_multiline.t test keyword plugin parsing across lines ext/XS-APItest/t/keyword_plugin.t test keyword plugin mechanism diff --git a/embed.fnc b/embed.fnc index c16dde81083..fa147504283 100644 --- a/embed.fnc +++ b/embed.fnc @@ -594,6 +594,7 @@ ApPR |bool |is_uni_alnum |UV c ApPR |bool |is_uni_idfirst |UV c ApPR |bool |is_uni_alpha |UV c ApPR |bool |is_uni_ascii |UV c +ApPR |bool |is_uni_blank |UV c ApPR |bool |is_uni_space |UV c ApPR |bool |is_uni_cntrl |UV c ApPR |bool |is_uni_graph |UV c @@ -645,6 +646,7 @@ ApR |bool |is_utf8_idcont |NN const U8 *p ApR |bool |is_utf8_xidcont |NN const U8 *p ApR |bool |is_utf8_alpha |NN const U8 *p ApR |bool |is_utf8_ascii |NN const U8 *p +ApR |bool |is_utf8_blank |NN const U8 *p ApR |bool |is_utf8_space |NN const U8 *p ApR |bool |is_utf8_perl_space |NN const U8 *p ApR |bool |is_utf8_perl_word |NN const U8 *p diff --git a/embed.h b/embed.h index 720e2531b2a..a4f7e45814a 100644 --- a/embed.h +++ b/embed.h @@ -226,6 +226,7 @@ #define is_uni_alpha_lc(a) Perl_is_uni_alpha_lc(aTHX_ a) #define is_uni_ascii(a) Perl_is_uni_ascii(aTHX_ a) #define is_uni_ascii_lc(a) Perl_is_uni_ascii_lc(aTHX_ a) +#define is_uni_blank(a) Perl_is_uni_blank(aTHX_ a) #define is_uni_cntrl(a) Perl_is_uni_cntrl(aTHX_ a) #define is_uni_cntrl_lc(a) Perl_is_uni_cntrl_lc(aTHX_ a) #define is_uni_digit(a) Perl_is_uni_digit(aTHX_ a) @@ -249,6 +250,7 @@ #define is_utf8_alnum(a) Perl_is_utf8_alnum(aTHX_ a) #define is_utf8_alpha(a) Perl_is_utf8_alpha(aTHX_ a) #define is_utf8_ascii(a) Perl_is_utf8_ascii(aTHX_ a) +#define is_utf8_blank(a) Perl_is_utf8_blank(aTHX_ a) #define is_utf8_char Perl_is_utf8_char #define is_utf8_char_buf Perl_is_utf8_char_buf #define is_utf8_cntrl(a) Perl_is_utf8_cntrl(aTHX_ a) diff --git a/embedvar.h b/embedvar.h index 3922855eda9..98efa6f22c8 100644 --- a/embedvar.h +++ b/embedvar.h @@ -368,6 +368,7 @@ #define PL_utf8_X_prepend (vTHX->Iutf8_X_prepend) #define PL_utf8_alnum (vTHX->Iutf8_alnum) #define PL_utf8_alpha (vTHX->Iutf8_alpha) +#define PL_utf8_blank (vTHX->Iutf8_blank) #define PL_utf8_digit (vTHX->Iutf8_digit) #define PL_utf8_foldable (vTHX->Iutf8_foldable) #define PL_utf8_foldclosures (vTHX->Iutf8_foldclosures) diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm index 0eff22eea14..929bf490a5d 100644 --- a/ext/XS-APItest/APItest.pm +++ b/ext/XS-APItest/APItest.pm @@ -5,7 +5,7 @@ use strict; use warnings; use Carp; -our $VERSION = '0.40'; +our $VERSION = '0.41'; require XSLoader; diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs index 69b706613b7..8138ad585dd 100644 --- a/ext/XS-APItest/APItest.xs +++ b/ext/XS-APItest/APItest.xs @@ -3456,3 +3456,17 @@ test_get_vtbl() RETVAL = PTR2UV(get_vtbl(-1)); OUTPUT: RETVAL + +bool +test_isBLANK_uni(UV ord) + CODE: + RETVAL = isBLANK_uni(ord); + OUTPUT: + RETVAL + +bool +test_isBLANK_utf8(char * p) + CODE: + RETVAL = isBLANK_utf8((U8 *) p); + OUTPUT: + RETVAL diff --git a/ext/XS-APItest/t/handy.t b/ext/XS-APItest/t/handy.t new file mode 100644 index 00000000000..48eb5b9494a --- /dev/null +++ b/ext/XS-APItest/t/handy.t @@ -0,0 +1,14 @@ +#!perl -w + +use strict; +use Test::More; + +use XS::APItest; + +ok(test_isBLANK_uni(ord("\N{EM SPACE}")), "EM SPACE is blank in isBLANK_uni()"); +ok(test_isBLANK_utf8("\N{EM SPACE}"), "EM SPACE is blank in isBLANK_utf8()"); + +ok(! test_isBLANK_uni(ord("\N{GREEK DASIA}")), "GREEK DASIA is not a blank in isBLANK_uni()"); +ok(! test_isBLANK_utf8("\N{GREEK DASIA}"), "GREEK DASIA is not a blank in isBLANK_utf8()"); + +done_testing; diff --git a/handy.h b/handy.h index abfc2c2f46f..198ea0c7089 100644 --- a/handy.h +++ b/handy.h @@ -912,6 +912,7 @@ EXTCONST U32 PL_charclass[]; /* Note that all ignore 'use bytes' */ #define isALNUM_uni(c) generic_uni(isWORDCHAR, is_uni_alnum, c) +#define isBLANK_uni(c) generic_uni(isBLANK, is_uni_blank, c) #define isIDFIRST_uni(c) generic_uni(isIDFIRST, is_uni_idfirst, c) #define isALPHA_uni(c) generic_uni(isALPHA, is_uni_alpha, c) #define isSPACE_uni(c) generic_uni(isSPACE, is_uni_space, c) @@ -932,7 +933,6 @@ EXTCONST U32 PL_charclass[]; /* Posix and regular space differ only in U+000B, which is in Latin1 */ #define isPSXSPC_uni(c) ((c) < 256 ? isPSXSPC_L1(c) : isSPACE_uni(c)) -#define isBLANK_uni(c) isBLANK(c) /* could be wrong */ #define isALNUM_LC_uvchr(c) (c < 256 ? isALNUM_LC(c) : is_uni_alnum_lc(c)) #define isIDFIRST_LC_uvchr(c) (c < 256 ? isIDFIRST_LC(c) : is_uni_idfirst_lc(c)) @@ -981,6 +981,7 @@ EXTCONST U32 PL_charclass[]; : Perl__is_utf8__perl_idstart(aTHX_ p)) #define isIDCONT_utf8(p) generic_utf8(isWORDCHAR, is_utf8_xidcont, p) #define isALPHA_utf8(p) generic_utf8(isALPHA, is_utf8_alpha, p) +#define isBLANK_utf8(p) generic_utf8(isBLANK, is_utf8_blank, p) #define isSPACE_utf8(p) generic_utf8(isSPACE, is_utf8_space, p) #define isDIGIT_utf8(p) generic_utf8(isDIGIT, is_utf8_digit, p) #define isUPPER_utf8(p) generic_utf8(isUPPER, is_utf8_upper, p) @@ -1004,11 +1005,10 @@ EXTCONST U32 PL_charclass[]; ? isPSXSPC_L1(TWO_BYTE_UTF8_TO_UNI(*(p), \ *((p)+1)))\ : isSPACE_utf8(p))) -#define isBLANK_utf8(c) isBLANK(c) /* could be wrong */ - #define isALNUM_LC_utf8(p) isALNUM_LC_uvchr(valid_utf8_to_uvchr(p, 0)) #define isIDFIRST_LC_utf8(p) isIDFIRST_LC_uvchr(valid_utf8_to_uvchr(p, 0)) #define isALPHA_LC_utf8(p) isALPHA_LC_uvchr(valid_utf8_to_uvchr(p, 0)) +#define isBLANK_LC_utf8(p) isBLANK_LC_uvchr(valid_utf8_to_uvchr(p, 0)) #define isSPACE_LC_utf8(p) isSPACE_LC_uvchr(valid_utf8_to_uvchr(p, 0)) #define isDIGIT_LC_utf8(p) isDIGIT_LC_uvchr(valid_utf8_to_uvchr(p, 0)) #define isUPPER_LC_utf8(p) isUPPER_LC_uvchr(valid_utf8_to_uvchr(p, 0)) @@ -1020,7 +1020,6 @@ EXTCONST U32 PL_charclass[]; #define isPUNCT_LC_utf8(p) isPUNCT_LC_uvchr(valid_utf8_to_uvchr(p, 0)) #define isPSXSPC_LC_utf8(c) (isSPACE_LC_utf8(c) ||(c) == '\f') -#define isBLANK_LC_utf8(c) isBLANK(c) /* could be wrong */ /* This conversion works both ways, strangely enough. On EBCDIC platforms, * CTRL-@ is 0, CTRL-A is 1, etc, just like on ASCII */ diff --git a/intrpvar.h b/intrpvar.h index ffcac0833a8..3e9600f2d43 100644 --- a/intrpvar.h +++ b/intrpvar.h @@ -614,6 +614,7 @@ PERLVAR(I, VertSpace, SV *) /* utf8 character class swashes */ PERLVAR(I, utf8_alnum, SV *) PERLVAR(I, utf8_alpha, SV *) +PERLVAR(I, utf8_blank, SV *) PERLVAR(I, utf8_space, SV *) PERLVAR(I, utf8_graph, SV *) PERLVAR(I, utf8_digit, SV *) diff --git a/perl.c b/perl.c index 4348954dec6..71e958a0fb7 100644 --- a/perl.c +++ b/perl.c @@ -991,6 +991,7 @@ perl_destruct(pTHXx) /* clear utf8 character classes */ SvREFCNT_dec(PL_utf8_alnum); SvREFCNT_dec(PL_utf8_alpha); + SvREFCNT_dec(PL_utf8_blank); SvREFCNT_dec(PL_utf8_space); SvREFCNT_dec(PL_utf8_graph); SvREFCNT_dec(PL_utf8_digit); @@ -1009,6 +1010,7 @@ perl_destruct(pTHXx) SvREFCNT_dec(PL_utf8_foldclosures); PL_utf8_alnum = NULL; PL_utf8_alpha = NULL; + PL_utf8_blank = NULL; PL_utf8_space = NULL; PL_utf8_graph = NULL; PL_utf8_digit = NULL; diff --git a/proto.h b/proto.h index 272f4868086..97524902a59 100644 --- a/proto.h +++ b/proto.h @@ -1673,6 +1673,10 @@ PERL_CALLCONV bool Perl_is_uni_ascii_lc(pTHX_ UV c) __attribute__warn_unused_result__ __attribute__pure__; +PERL_CALLCONV bool Perl_is_uni_blank(pTHX_ UV c) + __attribute__warn_unused_result__ + __attribute__pure__; + PERL_CALLCONV bool Perl_is_uni_cntrl(pTHX_ UV c) __attribute__warn_unused_result__ __attribute__pure__; @@ -1831,6 +1835,12 @@ PERL_CALLCONV bool Perl_is_utf8_ascii(pTHX_ const U8 *p) #define PERL_ARGS_ASSERT_IS_UTF8_ASCII \ assert(p) +PERL_CALLCONV bool Perl_is_utf8_blank(pTHX_ const U8 *p) + __attribute__warn_unused_result__ + __attribute__nonnull__(pTHX_1); +#define PERL_ARGS_ASSERT_IS_UTF8_BLANK \ + assert(p) + PERL_CALLCONV STRLEN Perl_is_utf8_char(const U8 *s) __attribute__deprecated__ __attribute__nonnull__(1); diff --git a/sv.c b/sv.c index 7146f386880..8b054c1b173 100644 --- a/sv.c +++ b/sv.c @@ -13443,6 +13443,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags, /* utf8 character class swashes */ PL_utf8_alnum = sv_dup_inc(proto_perl->Iutf8_alnum, param); PL_utf8_alpha = sv_dup_inc(proto_perl->Iutf8_alpha, param); + PL_utf8_blank = sv_dup_inc(proto_perl->Iutf8_blank, param); PL_utf8_space = sv_dup_inc(proto_perl->Iutf8_space, param); PL_utf8_graph = sv_dup_inc(proto_perl->Iutf8_graph, param); PL_utf8_digit = sv_dup_inc(proto_perl->Iutf8_digit, param); diff --git a/utf8.c b/utf8.c index 510db6c2ffc..2592728e6b5 100644 --- a/utf8.c +++ b/utf8.c @@ -1509,6 +1509,14 @@ Perl_is_uni_ascii(pTHX_ UV c) return isASCII(c); } +bool +Perl_is_uni_blank(pTHX_ UV c) +{ + U8 tmpbuf[UTF8_MAXBYTES+1]; + uvchr_to_utf8(tmpbuf, c); + return is_utf8_blank(tmpbuf); +} + bool Perl_is_uni_space(pTHX_ UV c) { @@ -1829,6 +1837,12 @@ Perl_is_uni_ascii_lc(pTHX_ UV c) return is_uni_ascii(c); /* XXX no locale support yet */ } +bool +Perl_is_uni_blank_lc(pTHX_ UV c) +{ + return is_uni_blank(c); /* XXX no locale support yet */ +} + bool Perl_is_uni_space_lc(pTHX_ UV c) { @@ -2035,6 +2049,16 @@ Perl_is_utf8_ascii(pTHX_ const U8 *p) return isASCII(*p); } +bool +Perl_is_utf8_blank(pTHX_ const U8 *p) +{ + dVAR; + + PERL_ARGS_ASSERT_IS_UTF8_BLANK; + + return is_utf8_common(p, &PL_utf8_blank, "XPosixBlank"); +} + bool Perl_is_utf8_space(pTHX_ const U8 *p) {