Skip to content

Commit

Permalink
Re: [PATCH] Change implementation of %+ to use a proper tied hash int…
Browse files Browse the repository at this point in the history
…erface and add support for %-

Message-ID: <9b18b3110612291245q792fe91cu69422d2b81bb4f0b@mail.gmail.com>

p4raw-id: //depot/perl@29682
  • Loading branch information
demerphq authored and rgs committed Jan 4, 2007
1 parent 1f2e791 commit 44a2ac7
Show file tree
Hide file tree
Showing 21 changed files with 678 additions and 280 deletions.
1 change: 1 addition & 0 deletions MANIFEST
Expand Up @@ -980,6 +980,7 @@ ext/re/re_comp.h re extension wrapper for regcomp.h
ext/re/re.pm re extension Perl module
ext/re/re_top.h re extension symbol hiding header
ext/re/re.xs re extension external subroutines
ext/re/lib/re/Tie/Hash/NamedCapture.pm Implements %- and %+ behaviour
ext/re/t/lexical_debug.pl generate debug output for lexical re 'debug'
ext/re/t/lexical_debug.t test that lexical re 'debug' works
ext/re/t/re_funcs.t see if exportable funcs from re.pm work
Expand Down
3 changes: 1 addition & 2 deletions doop.c
Expand Up @@ -1434,8 +1434,7 @@ Perl_do_kv(pTHX)
RETURN;
}

if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied)
&& ! SvTIED_mg((SV*)keys, PERL_MAGIC_regdata_names))
if (! SvTIED_mg((SV*)keys, PERL_MAGIC_tied) )
{
i = HvKEYS(keys);
}
Expand Down
50 changes: 31 additions & 19 deletions dump.c
Expand Up @@ -192,6 +192,10 @@ sequence. Thus the output will either be a single char,
an octal escape sequence, a special escape like C<\n> or a 3 or
more digit hex value.
If PERL_PV_ESCAPE_RE is set then the escape char used will be a '%' and
not a '\\'. This is because regexes very often contain backslashed
sequences, whereas '%' is not a particularly common character in patterns.
Returns a pointer to the escaped text as held by dsv.
=cut
Expand All @@ -203,14 +207,16 @@ Perl_pv_escape( pTHX_ SV *dsv, char const * const str,
const STRLEN count, const STRLEN max,
STRLEN * const escaped, const U32 flags )
{
char dq = (flags & PERL_PV_ESCAPE_QUOTE) ? '"' : '\\';
char octbuf[PV_ESCAPE_OCTBUFSIZE] = "\\123456789ABCDF";
char esc = (flags & PERL_PV_ESCAPE_RE) ? '%' : '\\';
char dq = (flags & PERL_PV_ESCAPE_QUOTE) ? '"' : esc;
char octbuf[PV_ESCAPE_OCTBUFSIZE] = "%123456789ABCDF";
STRLEN wrote = 0; /* chars written so far */
STRLEN chsize = 0; /* size of data to be written */
STRLEN readsize = 1; /* size of data just read */
bool isuni= flags & PERL_PV_ESCAPE_UNI ? 1 : 0; /* is this unicode */
const char *pv = str;
const char *end = pv + count; /* end of string */
octbuf[0] = esc;

if (!flags & PERL_PV_ESCAPE_NOCLEAR)
sv_setpvn(dsv, "", 0);
Expand All @@ -228,42 +234,49 @@ Perl_pv_escape( pTHX_ SV *dsv, char const * const str,
"%"UVxf, u);
else
chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE,
"\\x{%"UVxf"}", u);
"%cx{%"UVxf"}", esc, u);
} else if (flags & PERL_PV_ESCAPE_NOBACKSLASH) {
chsize = 1;
} else {
if ( (c == dq) || (c == '\\') || !isPRINT(c) ) {
chsize = 2;
if ( (c == dq) || (c == esc) || !isPRINT(c) ) {
chsize = 2;
switch (c) {
case '\\' : octbuf[1] = '\\'; break;

case '\\' : /* fallthrough */
case '%' : if ( c == esc ) {
octbuf[1] = esc;
} else {
chsize = 1;
}
break;
case '\v' : octbuf[1] = 'v'; break;
case '\t' : octbuf[1] = 't'; break;
case '\r' : octbuf[1] = 'r'; break;
case '\n' : octbuf[1] = 'n'; break;
case '\f' : octbuf[1] = 'f'; break;
case '"' :
case '"' :
if ( dq == '"' )
octbuf[1] = '"';
else
chsize = 1;
break;
break;
default:
if ( (pv < end) && isDIGIT((U8)*(pv+readsize)) )
chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE,
"\\%03o", c);
else
"%c%03o", esc, c);
else
chsize = my_snprintf( octbuf, PV_ESCAPE_OCTBUFSIZE,
"\\%o", c);
"%c%o", esc, c);
}
} else {
chsize=1;
chsize = 1;
}
}
if ( max && (wrote + chsize > max) ) {
break;
}
if ( max && (wrote + chsize > max) ) {
break;
} else if (chsize > 1) {
sv_catpvn(dsv, octbuf, chsize);
wrote += chsize;
sv_catpvn(dsv, octbuf, chsize);
wrote += chsize;
} else {
Perl_sv_catpvf( aTHX_ dsv, "%c", c);
wrote++;
Expand Down Expand Up @@ -308,7 +321,7 @@ Perl_pv_pretty( pTHX_ SV *dsv, char const * const str, const STRLEN count,
const STRLEN max, char const * const start_color, char const * const end_color,
const U32 flags )
{
U8 dq = (flags & PERL_PV_PRETTY_QUOTE) ? '"' : '\\';
U8 dq = (flags & PERL_PV_PRETTY_QUOTE) ? '"' : '%';
STRLEN escaped;

if ( dq == '"' )
Expand Down Expand Up @@ -1129,7 +1142,6 @@ static const struct { const char type; const char *name; } magic_names[] = {
{ PERL_MAGIC_sv, "sv(\\0)" },
{ PERL_MAGIC_arylen, "arylen(#)" },
{ PERL_MAGIC_rhash, "rhash(%)" },
{ PERL_MAGIC_regdata_names, "regdata_names(+)" },
{ PERL_MAGIC_pos, "pos(.)" },
{ PERL_MAGIC_symtab, "symtab(:)" },
{ PERL_MAGIC_backref, "backref(<)" },
Expand Down
6 changes: 4 additions & 2 deletions embed.fnc
Expand Up @@ -684,7 +684,8 @@ Ap |I32 |regexec_flags |NN regexp* prog|NN char* stringarg \
|NN char* strend|NN char* strbeg|I32 minend \
|NN SV* screamer|NULLOK void* data|U32 flags
ApR |regnode*|regnext |NN regnode* p
Ep |SV*|reg_named_buff_sv |NN SV* namesv
EXp |SV*|reg_named_buff_get |NN SV* namesv|NULLOK const REGEXP * const from_re|U32 flags
EXp |SV*|reg_numbered_buff_get|I32 paren|NN const REGEXP * const rx|NULLOK SV* usesv|U32 flags
Ep |void |regprop |NULLOK const regexp *prog|NN SV* sv|NN const regnode* o
Ap |void |repeatcpy |NN char* to|NN const char* from|I32 len|I32 count
ApP |char* |rninstr |NN const char* big|NN const char* bigend \
Expand Down Expand Up @@ -1100,7 +1101,8 @@ sR |I32 |do_trans_complex_utf8 |NN SV * const sv

#if defined(PERL_IN_GV_C) || defined(PERL_DECL_PROT)
s |void |gv_init_sv |NN GV *gv|I32 sv_type
s |void |require_errno |NN GV *gv
s |HV* |require_tie_mod|NN GV *gv|NN const char *varpv|NN SV* namesv \
|NN const char *methpv|const U32 flags
#endif

: #if defined(PERL_IN_HV_C) || defined(PERL_IN_SV_C) || defined(PERL_DECL_PROT)
Expand Down
10 changes: 6 additions & 4 deletions embed.h
Expand Up @@ -692,7 +692,8 @@
#define regexec_flags Perl_regexec_flags
#define regnext Perl_regnext
#if defined(PERL_CORE) || defined(PERL_EXT)
#define reg_named_buff_sv Perl_reg_named_buff_sv
#define reg_named_buff_get Perl_reg_named_buff_get
#define reg_numbered_buff_get Perl_reg_numbered_buff_get
#define regprop Perl_regprop
#endif
#define repeatcpy Perl_repeatcpy
Expand Down Expand Up @@ -1098,7 +1099,7 @@
#if defined(PERL_IN_GV_C) || defined(PERL_DECL_PROT)
#ifdef PERL_CORE
#define gv_init_sv S_gv_init_sv
#define require_errno S_require_errno
#define require_tie_mod S_require_tie_mod
#endif
#endif
#if defined(PERL_IN_HV_C) || defined(PERL_DECL_PROT)
Expand Down Expand Up @@ -2904,7 +2905,8 @@
#define regexec_flags(a,b,c,d,e,f,g,h) Perl_regexec_flags(aTHX_ a,b,c,d,e,f,g,h)
#define regnext(a) Perl_regnext(aTHX_ a)
#if defined(PERL_CORE) || defined(PERL_EXT)
#define reg_named_buff_sv(a) Perl_reg_named_buff_sv(aTHX_ a)
#define reg_named_buff_get(a,b,c) Perl_reg_named_buff_get(aTHX_ a,b,c)
#define reg_numbered_buff_get(a,b,c,d) Perl_reg_numbered_buff_get(aTHX_ a,b,c,d)
#define regprop(a,b,c) Perl_regprop(aTHX_ a,b,c)
#endif
#define repeatcpy(a,b,c,d) Perl_repeatcpy(aTHX_ a,b,c,d)
Expand Down Expand Up @@ -3301,7 +3303,7 @@
#if defined(PERL_IN_GV_C) || defined(PERL_DECL_PROT)
#ifdef PERL_CORE
#define gv_init_sv(a,b) S_gv_init_sv(aTHX_ a,b)
#define require_errno(a) S_require_errno(aTHX_ a)
#define require_tie_mod(a,b,c,d,e) S_require_tie_mod(aTHX_ a,b,c,d,e)
#endif
#endif
#ifdef PERL_CORE
Expand Down
111 changes: 111 additions & 0 deletions ext/re/lib/re/Tie/Hash/NamedCapture.pm
@@ -0,0 +1,111 @@
package re::Tie::Hash::NamedCapture;
use strict;
use warnings;
our $VERSION = "0.01";
use re qw(is_regexp
regname
regnames
regnames_count
regnames_iterinit
regnames_iternext);

sub TIEHASH {
my $classname = shift;
my $hash = {@_};

if ($hash->{re} && !is_regexp($hash->{re})) {
die "'re' parameter to ",__PACKAGE__,"->TIEHASH must be a qr//"
}

return bless $hash, $classname;
}

sub FETCH {
return regname($_[1],$_[0]->{re},$_[0]->{all});
}

sub STORE {
require Carp;
Carp::croak("STORE forbidden: Hashes tied to ",__PACKAGE__," are read/only.");
}

sub FIRSTKEY {
regnames_iterinit($_[0]->{re});
return $_[0]->NEXTKEY;
}

sub NEXTKEY {
return regnames_iternext($_[0]->{re},$_[0]->{all});
}

sub EXISTS {
return defined regname( $_[1], $_[0]->{re},$_[0]->{all});
}

sub DELETE {
require Carp;
Carp::croak("DELETE forbidden: Hashes tied to ",__PACKAGE__," are read/only");
}

sub CLEAR {
require Carp;
Carp::croak("CLEAR forbidden: Hashes tied to ",__PACKAGE__," are read/only");
}

sub SCALAR {
return scalar regnames($_[0]->{re},$_[0]->{all});
}

1;

__END__
=head1 NAME
re::Tie::Hash::NamedCapture - Perl module to support named regex capture buffers
=head1 SYNOPSIS
tie my %hash,"re::Tie::Hash::NamedCapture";
# %hash now behaves like %-
tie my %hash,"re::Tie::Hash::NamedCapture",re => $qr, all=> 1,
# %hash now access buffers from regex in $qr like %+
=head1 DESCRIPTION
Implements the behaviour required for C<%+> and C<%-> but can be used
independently.
When the C<re> parameter is provided, and the value is the result of
a C<qr//> expression then the hash is bound to that particular regexp
and will return the results of its last successful match. If the
parameter is omitted then the hash behaves just as C<$1> does by
referencing the last successful match.
When the C<all> parameter is provided then the result of a fetch
is an array ref containing the contents of each buffer whose name
was the same as the key used for the access. If the buffer wasn't
involved in the match then an undef will be stored. When the all
parameter is omitted or not a true value then the return will be
a the content of the left most defined buffer with the given name.
If there is no buffer with the desired name defined then C<undef>
is returned.
For instance:
my $qr = qr/(?<foo>bar)/;
if ( 'bar' =~ /$qr/ ) {
tie my %hash,"re::Tie::Hash::NamedCapture",re => $qr, all => 1;
if ('bar'=~/bar/) {
# last successful match is now different
print $hash{foo}; # prints foo
}
}
=head1 SEE ALSO
L<re>, L<perlmodlib/Pragmatic Modules>.
=cut
46 changes: 44 additions & 2 deletions ext/re/re.pm
Expand Up @@ -4,9 +4,11 @@ package re;
use strict;
use warnings;

our $VERSION = "0.07";
our $VERSION = "0.08";
our @ISA = qw(Exporter);
our @EXPORT_OK = qw(is_regexp regexp_pattern regmust);
our @EXPORT_OK = qw(is_regexp regexp_pattern regmust
regname regnames
regnames_count regnames_iterinit regnames_iternext);
our %EXPORT_OK = map { $_ => 1 } @EXPORT_OK;

# *** WARNING *** WARNING *** WARNING *** WARNING *** WARNING ***
Expand Down Expand Up @@ -464,6 +466,46 @@ floating string. This will be what the optimiser of the Perl that you
are using thinks is the longest. If you believe that the result is wrong
please report it via the L<perlbug> utility.
=item regname($name,$qr,$all)
Returns the contents of a named buffer. If $qr is missing, or is not the
result of a qr// then returns the result of the last successful match. If
$all is true then returns an array ref containing one entry per buffer,
otherwise returns the first defined buffer.
=item regnames($qr,$all)
Returns a list of all of the named buffers defined in a pattern. If
$all is true then it returns all names defined, if not returns only
names which were involved in the last successful match. If $qr is omitted
or is not the result of a qr// then returns the details for the last
successful match.
=item regnames_iterinit($qr)
Initializes the internal hash iterator associated to a regexps named capture
buffers. If $qr is omitted resets the iterator associated with the regexp used
in the last successful match.
=item regnames_iternext($qr,$all)
Gets the next key from the hash associated with a regexp. If $qr
is omitted resets the iterator associated with the regexp used in the
last successful match. If $all is true returns the keys of all of the
distinct named buffers in the pattern, if not returns only those names
used in the last successful match.
=item regnames_count($qr)
Returns the number of distinct names defined in the regexp $qr. If
$qr is omitted or not a regexp returns the count of names in the
last successful match.
B<Note:> that this result is always the actual number of distinct
named buffers defined, it may not actually match that which is
returned by C<regnames()> and related routines when those routines
have not been called with the $all parameter set..
=back
=head1 SEE ALSO
Expand Down

0 comments on commit 44a2ac7

Please sign in to comment.