Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Use regular expressions directly taken from the SVG standard.
Version to 0.27.
  • Loading branch information
benkasminbullock committed Oct 12, 2016
1 parent 0bcfa59 commit cf89bcb
Show file tree
Hide file tree
Showing 7 changed files with 168 additions and 43 deletions.
4 changes: 4 additions & 0 deletions Changes
@@ -1,5 +1,9 @@
Revision history for perl module Image::SVG::Path

0.27 2016-10-12

* Use number and whitespace regexes from the official SVG grammar

0.26 2016-10-11

* Fix broken README
Expand Down
2 changes: 1 addition & 1 deletion Makefile.PL
Expand Up @@ -20,7 +20,7 @@ WriteMakefile (
bugtracker => "$repo/issues",
},
x_contributors => [
'Colin Kuskie <ckuskie@cpan.org>',
'Colin Kuskie <colink@cpan.org>',
],
},
MIN_PERL_VERSION => '5.006001',
Expand Down
141 changes: 117 additions & 24 deletions lib/Image/SVG/Path.pm
Expand Up @@ -3,12 +3,26 @@ use warnings;
use strict;
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT_OK = qw/extract_path_info reverse_path create_path_string/;
our %EXPORT_TAGS = (all => \@EXPORT_OK);
our $VERSION = '0.26';
our @SVG_REGEX = qw/
$sign
$wsp
$comma_wsp
$digit_sequence
$integer_constant
$fractional_constant
$exponent
$floating_point_constant
$number
/;
our @FUNCTIONS = qw/extract_path_info reverse_path create_path_string/;
our @EXPORT_OK = (@FUNCTIONS, @SVG_REGEX);
our %EXPORT_TAGS = (all => \@FUNCTIONS, regex => \@SVG_REGEX);
our $VERSION = '0.27';

use Carp;

# These are fields in the "arc" hash.
# These are the fields in the "arc" hash which is returned when an "A"
# command is processed.

my @arc_fields = qw/rx ry x_axis_rotation large_arc_flag sweep_flag x y/;

Expand Down Expand Up @@ -116,31 +130,108 @@ sub create_path_string
return $path;
}

# The following regular expression splits the path into pieces
# Note we only split on '-' or '+' when not preceeded by 'e'
# Match the e or E in an exponent.

my $e = qr/[eE]/;

# These regular expressions are directly taken from the SVG grammar,
# https://www.w3.org/TR/SVG/paths.html#PathDataBNF

our $sign = qr/\+|\-/;

our $wsp = qr/[\x20\x09\x0D\x0A]/;

our $comma_wsp = qr/(?:$wsp+,?$wsp*|,$wsp*)/;

# The following regular expression splits the path into pieces Note we
# only split on '-' or '+' when not preceeded by 'e'. This regular
# expression is not following the SVG grammar, it is going our own
# way.

my $split_re = qr/
(?:
\s*,\s*
$wsp*,$wsp*
|
(?<!e)(?=-)
(?<!$e)(?=-)
|
(?<!e)(?:\+)
(?<!$e)(?:\+)
|
\s+
$wsp+
)
/x;

# Match a number

my $number_re = qr/[\+\-0-9.e]+/i;
# From SVG grammar, https://www.w3.org/TR/SVG/paths.html#PathDataBNF

# $ds is "digit sequence", and it accounts for all the uses of "digit"
# in the SVG path grammar, so there is no "digit" here.


my $ds = qr/[0-9]+/;
our $digit_sequence = $ds;

# From SVG grammar, https://www.w3.org/TR/SVG/paths.html#PathDataBNF

# Aside to whoever wrote the SVG standards: this is not an integer,
# it's a whole number!

our $integer_constant = qr/$ds/;

# From SVG grammar, https://www.w3.org/TR/SVG/paths.html#PathDataBNF

our $fractional_constant = qr/$ds? \. $ds/x;

# From SVG grammar, https://www.w3.org/TR/SVG/paths.html#PathDataBNF

our $exponent = qr/
$e
$sign?
$ds
/x;

# From SVG grammar, https://www.w3.org/TR/SVG/paths.html#PathDataBNF

our $floating_point_constant = qr/
$fractional_constant
$exponent?
|
$ds
$exponent
/x;

# This is unused.
# my $nonnegative_number = qr/
# $integer_constant
# |
# $floating_point_constant
# /x;

# From SVG grammar, https://www.w3.org/TR/SVG/paths.html#PathDataBNF

# $floating_point_constant needs to go before $integer_constant,
# otherwise it matches the shorter $integer_constant every time.

our $number = qr/
$sign?
$floating_point_constant
|
$sign?
$integer_constant
/x;

# Old regex.

#my $number_re = qr/(?:[\+\-0-9.]|$e)+/i;

# This is where we depart from the SVG grammar and go our own way.

my $numbers_re = qr/(?:$number_re|(?:\s|,)+)*/;
my $numbers_re = qr/(?:$number|$comma_wsp+)*/;

sub extract_path_info
{
my ($path, $options_ref) = @_;
# Error/message reporting thing. Not sure why I did this now.
my $me = 'extract_path_info';
if (! $path) {
croak "$me: no input";
Expand All @@ -164,7 +255,7 @@ sub extract_path_info
print "$me: I am trying to split up '$path'.\n";
}
my @path_info;
my $has_moveto = ($path =~ /^\s*([Mm])\s*($numbers_re)(.*)$/s);
my $has_moveto = ($path =~ /^$wsp*([Mm])$wsp*($numbers_re)(.*)$/s);
if (! $has_moveto) {
croak "No moveto at start of path '$path'";
}
Expand All @@ -176,7 +267,7 @@ sub extract_path_info
my $position = position_type ($moveto_type);
my @coords = split $split_re, $move_to;
if (@coords < 2) {
croak "Not enough numerical values for the initial M command in '$path'";
croak "$me: Not enough numerical values for the initial M command in '$path'";
}
push @path_info, {
type => 'moveto',
Expand All @@ -189,7 +280,7 @@ sub extract_path_info
if (@coords > 2) {
# Check the number of coordinates is valid.
if (@coords % 2 != 0) {
croak "Odd number of values for an implicit L command " .
croak "$me: Odd number of values for an implicit L command " .
scalar (@coords) . " in '$path'";
}
if ($verbose) {
Expand All @@ -202,20 +293,22 @@ sub extract_path_info
}
# Deal with the rest of the path.
my @curves;
while ($curves =~ /\G([cslqtahvzm])\s*($numbers_re)/gi) {
while ($curves =~ /\G([cslqtahvzm])$wsp*($numbers_re)/gi) {
push @curves, [$1, $2];
}
for my $curve_data (@curves) {
my ($curve_type, $curve) = @$curve_data;
$curve =~ s/^,//;
# print "$curve\n";
my @numbers = split $split_re, $curve;
# print "@numbers\n";
if ($verbose) {
print "Extracted numbers: @numbers\n";
print "$me: Extracted numbers: @numbers\n";
}
if (uc $curve_type eq 'C') {
my $expect_numbers = 6;
if (@numbers % 6 != 0) {
croak "Wrong number of values for a C curve " .
croak "$me: Wrong number of values for a C curve " .
scalar @numbers . " in '$path'";
}
my $position = position_type ($curve_type);
Expand All @@ -240,7 +333,7 @@ sub extract_path_info
elsif (uc $curve_type eq 'S') {
my $expect_numbers = 4;
if (@numbers % $expect_numbers != 0) {
croak "Wrong number of values for an S curve " .
croak "$me: Wrong number of values for an S curve " .
scalar @numbers . " in '$path'";
}
my $position = position_type ($curve_type);
Expand Down Expand Up @@ -304,7 +397,7 @@ sub extract_path_info
elsif (uc $curve_type eq 'T') {
my $expect_numbers = 2;
if (@numbers % $expect_numbers != 0) {
croak "Wrong number of values for an T command " .
croak "$me: Wrong number of values for an T command " .
scalar @numbers . " in '$path'";
}
my $position = position_type ($curve_type);
Expand Down Expand Up @@ -347,7 +440,7 @@ sub extract_path_info
my $position = position_type ($curve_type);
my $expect_numbers = 7;
if (@numbers % $expect_numbers != 0) {
croak "Need 7 parameters for arc";
croak "$me: Need 7 parameters for arc";
}
for (my $i = 0; $i < @numbers / $expect_numbers; $i++) {
my $o = $expect_numbers * $i;
Expand All @@ -364,10 +457,10 @@ sub extract_path_info
my $expect_numbers = 2;
my $position = position_type ($curve_type);
if (@numbers < $expect_numbers) {
croak "Need at least $expect_numbers numbers for move to";
croak "$me: Need at least $expect_numbers numbers for move to";
}
if (@numbers % $expect_numbers != 0) {
croak "Odd number of values for an M command " .
croak "$me: Odd number of values for an M command " .
scalar (@numbers) . " in '$path'";
}
push @path_info, {
Expand Down Expand Up @@ -409,7 +502,7 @@ sub extract_path_info
if ($ip) {
if (ref $ip ne 'ARRAY' ||
scalar @$ip != 2) {
croak "The initial position supplied doesn't look like a pair of coordinates";
croak "$me: The initial position supplied doesn't look like a pair of coordinates";
}
add_coords ($element->{point}, $ip);
}
Expand Down
32 changes: 25 additions & 7 deletions lib/Image/SVG/Path.pod.tmpl
Expand Up @@ -400,12 +400,16 @@ also exists.

=over

=item Does not use the grammar
=item Mostly does not use the grammar

There is a grammar for the paths in the W3 specification. See
L<https://www.w3.org/TR/SVG/paths.html#PathDataBNF>. However, this
module does not use that grammar. Instead it hacks up the path using
regexes.
L<https://www.w3.org/TR/SVG/paths.html#PathDataBNF>. Up to version
0.27, this module did not use that grammar. From version 0.27 the
module is gradually being converted to use the SVG grammar, starting
with whitespace and number parsing. I made this change to doing things
"the right way" because so many minor errors were found (mostly by
Colin Kuskie, <COLINK>). In the course of doing this, yet more minor
errors were discovered.

=item L</no_shortcuts> does not work with quadratic bezier curves

Expand All @@ -420,8 +424,18 @@ None of the functions is exported by default.

use Image::SVG::Path ':all';

exports all of L</extract_path_info>, L</reverse_path> and
L</create_path_string>.
exports all of the module's functions, L</extract_path_info>,
L</reverse_path> and L</create_path_string>. For backward
compatibility, this does not export the regular expressions.

The SVG-parsing regular expressions which correspond to the SVG
standard can also be exported. Please refer to the source code of the
module for a list. To export these, use

use Image::SVG::Path ':regex';

As of version 0.27, not all of the SVG standard grammar is implemented
yet.

=head1 SEE ALSO

Expand Down Expand Up @@ -464,7 +478,11 @@ Render SVG via a Gnome library.
L<The full specification|https://www.w3.org/TR/SVG/> contains all the
details. The L<SVG path
specification|https://www.w3.org/TR/SVG/paths.html> contains the
specifications for paths.
specifications for paths. The grammar of paths is described in L<The
grammar for path
data|https://www.w3.org/TR/SVG/paths.html#PathDataBNF> within that
section of the document. See also L</Mostly does not use the grammar>
under L</BUGS>.

=item SVG Tiny

Expand Down
6 changes: 2 additions & 4 deletions t/Image-SVG-Path.t
Expand Up @@ -29,6 +29,8 @@ is ($path2_info_abs[1]->{point}->[1], 8);

my $path3 = 'M6.93,103.36c3.61-2.46,6.65-6.21,6.65-13.29c0-1.68-1.36-3e-3-3.03-3.03s-3.03,1.36-3.03,3.03s1.36,3.03,3.03,3.03C15.17,93.1,10.4,100.18,6.93,103.36z';

#my $path3 = 'M6.93,103.36c0-1.68-1.36-3e-3-3.03-3.03z';

eval {
my @path3_info = extract_path_info ($path3);
};
Expand Down Expand Up @@ -151,7 +153,3 @@ like ($path, qr/A\s+23\.0+,20\.0+,0\.0+,0\.0+,1\.0+,147\.0+,172\.0+/,

done_testing ();
exit;

# Local variables:
# mode: perl
# End:
12 changes: 5 additions & 7 deletions t/number-paths.t
@@ -1,7 +1,6 @@
use warnings;
use strict;
use utf8;
use FindBin '$Bin';
use Test::More;
my $builder = Test::More->builder;
binmode $builder->output, ":utf8";
Expand All @@ -12,10 +11,9 @@ binmode STDERR, ":encoding(utf8)";
use warnings;
use strict;
use Test::More;
#use Test::Exception;
use Image::SVG::Path qw/extract_path_info/;

##Test these different syntaxes to check for path parsing
# Test these different syntaxes to check for path parsing

my @strings = (
'M150 0 L75 200 L225 200 Z', 'Condensed',
Expand All @@ -31,13 +29,13 @@ my @strings = (
'M 150 0, L 75 200, 225 200, Z', 'Commas and whitespace between pairs and commands',
);

while(my ($string,$comment) = splice @strings, 0, 2) {
while (my ($string, $comment) = splice @strings, 0, 2) {
eval {
my @foo = extract_path_info($string);
my @foo = extract_path_info ($string);
};
if ($@) {
note ($@);
}
note ($@);
}
ok (! $@);
}

Expand Down
14 changes: 14 additions & 0 deletions t/regex-export.t
@@ -0,0 +1,14 @@
use warnings;
use strict;
use utf8;
use FindBin '$Bin';
use Test::More;
my $builder = Test::More->builder;
binmode $builder->output, ":utf8";
binmode $builder->failure_output, ":utf8";
binmode $builder->todo_output, ":utf8";
binmode STDOUT, ":encoding(utf8)";
binmode STDERR, ":encoding(utf8)";
use Image::SVG::Path ':regex';
ok ($number);
done_testing ();

0 comments on commit cf89bcb

Please sign in to comment.