Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Branch: master
Fetching contributors…

Cannot retrieve contributors at this time

executable file 330 lines (266 sloc) 5.581 kB
#!/usr/local/bin/perl -w
#
# $Revision: 1.1.1.1 $
#
# $Date: 2003-07-27 11:07:11 $
use XML::Parser;
my $Usage =<<'End_of_Usage;';
Usage is:
xmlfilter [-h] [-nl] [{-+}root] [{-+}el=elname] [{-+}el:elnamepat]
[{-+}att:attname] [{-+}att:attname:attvalpat] xmlfile
Prints on standard output the result of filtering the given xmlfile
for elements according to the switches. A '-' option will drop the
element from the output; a '+' will keep it. The output should also
be a well-formed XML document.
-h Print this message
-nl Emit a newline prior to every start tag.
[-+]root Drop (or keep) the root element. Defaults to keep.
If the root element were named "foo", then -root
would be equivalent to -el=foo. Note that even if
you're dropping the root element, it's start and
end tag are kept in order that the output remains
a well-formed XML document.
[-+]el=elname
Drop (or keep) elements of type elname.
[-+]el:elnamepat
Drop (or keep) element whose type name matches elnamepat.
[-+]att:attname
Drop (or keep) elements which have an attribute = attname.
[-+]att:attname:attvalpat
Drop (or keep) elements which have an attribute = attname
and for which the attribute value matches attvalpat.
End_of_Usage;
my $pass = 1;
my $do_newline = 0;
my $attcheck = 0;
my %drop_el;
my @drop_elpat;
my %keep_el;
my @keep_elpat;
my %drop_att;
my %keep_att;
my $always_true = sub {1;};
my $root_element = '';
my $in_cdata = 0;
# Process options
while (defined($ARGV[0]) and $ARGV[0] =~ /^[-+]/)
{
my $opt = shift;
if ($opt eq '-root')
{
$pass = 0;
}
elsif ($opt eq '+root')
{
$pass = 1;
}
elsif ($opt eq '-h')
{
print $Usage;
exit;
}
elsif ($opt eq '-nl')
{
$do_newline = 1;
}
elsif ($opt =~ /^([-+])el([:=])(\S*)/)
{
my ($disp, $kind, $pattern) = ($1, $2, $3);
my ($hashref, $aref);
if ($disp eq '-')
{
$hashref = \%drop_el;
$aref = \@drop_elpat;
}
else
{
$hashref = \%keep_el;
$aref = \@keep_elpat;
}
if ($kind eq '=')
{
$hashref->{$pattern} = 1;
}
else
{
push(@$aref, $pattern);
}
}
elsif ($opt =~ /^([-+])att:(\w+)(?::(\S*))?/)
{
my ($disp, $id, $pattern) = ($1, $2, $3);
my $ref = ($disp eq '-') ? \%drop_att : \%keep_att;
if (defined($pattern))
{
$pattern =~ s!/!\\/!g;
my $sub;
eval "\$sub = sub {\$_[0] =~ /$pattern/;};";
$ref->{$id} = $sub;
}
else
{
$ref->{$id} = $always_true;
}
$attcheck = 1;
}
else
{
die "Unknown option: $opt\n$Usage";
}
}
my $drop_el_pattern = join('|', @drop_elpat);
my $keep_el_pattern = join('|', @keep_elpat);
my $drop_sub;
if ($drop_el_pattern)
{
eval "\$drop_sub = sub {\$_[0] =~ /$drop_el_pattern/;}";
}
else
{
$drop_sub = sub {};
}
my $keep_sub;
if ($keep_el_pattern)
{
eval "\$keep_sub = sub {\$_[0] =~ /$keep_el_pattern/;}";
}
else
{
$keep_sub = sub {};
}
my $doc = shift;
die "No file specified\n$Usage" unless defined($doc);
my @togglestack = ();
my $p = new XML::Parser(ErrorContext => 2,
Handlers => {Start => \&start_handler,
End => \&end_handler
}
);
if ($pass) {
$p->setHandlers(Char => \&char_handler,
CdataStart => \&cdata_start,
CdataEnd => \&cdata_end);
}
$p->parsefile($doc);
print "</$root_element>\n"
unless $pass;
################
## End of main
################
sub start_handler
{
my $xp = shift;
my $el = shift;
unless ($root_element)
{
$root_element = $el;
print "<$el>\n"
unless $pass;
}
my ($elref, $attref, $sub);
if ($pass)
{
$elref = \%drop_el;
$attref = \%drop_att;
$sub = $drop_sub;
}
else
{
$elref = \%keep_el;
$attref = \%keep_att;
$sub = $keep_sub;
}
if (defined($elref->{$el})
or &$sub($el)
or check_atts($attref, @_))
{
$pass = ! $pass;
if ($pass) {
$xp->setHandlers(Char => \&char_handler,
CdataStart => \&cdata_start,
CdataEnd => \&cdata_end);
}
else {
$xp->setHandlers(Char => 0,
CdataStart => 0,
CdataEnd => 0);
}
push(@togglestack, $xp->depth);
}
if ($pass)
{
print "\n" if $do_newline;
print "<$el";
while (@_)
{
my $id = shift;
my $val = shift;
$val = $xp->xml_escape($val, "'");
print " $id='$val'";
}
print ">";
}
} # End start_handler
sub end_handler
{
my $xp = shift;
my $el = shift;
if ($pass)
{
print "</$el>";
}
if (@togglestack and $togglestack[-1] == $xp->depth)
{
$pass = ! $pass;
if ($pass) {
$xp->setHandlers(Char => \&char_handler,
CdataStart => \&cdata_start,
CdataEnd => \&cdata_end);
}
else {
$xp->setHandlers(Char => 0,
CdataStart => 0,
CdataEnd => 0);
}
pop(@togglestack);
}
} # End end_handler
sub char_handler
{
my ($xp, $text) = @_;
if (length($text)) {
$text = $xp->xml_escape($text, '>')
unless $in_cdata;
print $text;
}
} # End char_handler
sub cdata_start {
my $xp = shift;
print '<![CDATA[';
$in_cdata = 1;
}
sub cdata_end {
my $xp = shift;
print ']]>';
$in_cdata = 0;
}
sub check_atts
{
return $attcheck unless $attcheck;
my $ref = shift;
while (@_)
{
my $id = shift;
my $val = shift;
if (defined($ref->{$id}))
{
my $ret = &{$ref->{$id}}($val);
return $ret if $ret;
}
}
return 0;
} # End check_atts
# Tell Emacs that this is really a perl script
# Local Variables:
# mode:perl
# End:
Jump to Line
Something went wrong with that request. Please try again.