Skip to content

Commit

Permalink
Optimize OptimizeArray
Browse files Browse the repository at this point in the history
This function took 571 seconds to execute with 10k domains in the
configuration file. After optimization it now takes about 20ms for the
same workload.
  • Loading branch information
bdraco committed Apr 25, 2018
1 parent 9ebc709 commit 184e14b
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 51 deletions.
106 changes: 106 additions & 0 deletions test/testunit/testoptimizearray.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/perl -w
#
use Test::More tests => 4;
use Test::Deep;

my @arr = ( qr/ABc/, qr/abc/, qr/ccc/, qr/abc/, qr/ABc/ );

sub OptimizeArray {
my ( $array, $notcasesensitive ) = @_;
my %seen;

if ($notcasesensitive) {

# Case insensitive
my $uncompiled_regex;
return map {
$uncompiled_regex = UnCompileRegex($_);
!$seen{ lc $uncompiled_regex }++ ? qr/$uncompiled_regex/i : ()
} @$array;
}

# Case sensitive
return map { !$seen{$_}++ ? $_ : () } @$array;
}

sub UnCompileRegex {
shift =~ /\(\?[-^\w]*:(.*)\)/; # Works with all perl
# shift =~ /\(\?[-\w]*:(.*)\)/; < perl 5.14
return $1;
}

sub OptimizeArray_old {
my $array = shift;
my @arrayunreg = map { UnCompileRegex($_) } @$array;
my $notcasesensitive = shift;
my $searchlist = 0;
if ($Debug) {
debug( "OptimizeArray (notcasesensitive=$notcasesensitive)", 4 );
}
while ( $searchlist > -1 && @arrayunreg ) {
my $elemtoremove = -1;
OPTIMIZELOOP:
foreach my $i ( $searchlist .. ( scalar @arrayunreg ) - 1 ) {

# Search if $i elem is already treated by another elem
foreach my $j ( 0 .. ( scalar @arrayunreg ) - 1 ) {
if ( $i == $j ) { next; }
my $parami =
$notcasesensitive ? lc( $arrayunreg[$i] ) : $arrayunreg[$i];
my $paramj =
$notcasesensitive ? lc( $arrayunreg[$j] ) : $arrayunreg[$j];
if ($Debug) {
debug( " Compare $i ($parami) to $j ($paramj)", 4 );
}
if ( index( $parami, $paramj ) > -1 ) {
if ($Debug) {
debug(
" Elem $i ($arrayunreg[$i]) already treated with elem $j ($arrayunreg[$j])",
4
);
}
$elemtoremove = $i;
last OPTIMIZELOOP;
}
}
}
if ( $elemtoremove > -1 ) {
if ($Debug) {
debug(
" Remove elem $elemtoremove - $arrayunreg[$elemtoremove]",
4
);
}
splice @arrayunreg, $elemtoremove, 1;
$searchlist = $elemtoremove;
}
else {
$searchlist = -1;
}
}
if ($notcasesensitive) {
return map { qr/$_/i } @arrayunreg;
}
return map { qr/$_/ } @arrayunreg;
}

is_deeply(
[ OptimizeArray( \@arr, 0 ) ],
[
qr/ABc/,
qr/abc/,
qr/ccc/,
],
"case senitive OptimizeArray returns the expected results"
);
cmp_bag( [ OptimizeArray( \@arr, 0 ) ], [ OptimizeArray_old( \@arr, 0 ) ], "..and it matches the old method" );
is_deeply(
[ OptimizeArray( \@arr, 1 ) ],
[
qr/ABc/i,
qr/ccc/i,
],
"case insenitive OptimizeArray returns the expected results"
);
cmp_bag( [ OptimizeArray( \@arr, 1 ) ], [ OptimizeArray_old( \@arr, 1 ) ], "..and it matches the old method" ) or diag explain [ OptimizeArray( \@arr, 1 ) ], [ OptimizeArray_old( \@arr, 1 ) ];

66 changes: 15 additions & 51 deletions wwwroot/cgi-bin/awstats.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1351,57 +1351,21 @@ sub debug {
# Return: None
#------------------------------------------------------------------------------
sub OptimizeArray {
my $array = shift;
my @arrayunreg = map { UnCompileRegex($_) } @$array;
my $notcasesensitive = shift;
my $searchlist = 0;
if ($Debug) {
debug( "OptimizeArray (notcasesensitive=$notcasesensitive)", 4 );
}
while ( $searchlist > -1 && @arrayunreg ) {
my $elemtoremove = -1;
OPTIMIZELOOP:
foreach my $i ( $searchlist .. ( scalar @arrayunreg ) - 1 ) {

# Search if $i elem is already treated by another elem
foreach my $j ( 0 .. ( scalar @arrayunreg ) - 1 ) {
if ( $i == $j ) { next; }
my $parami =
$notcasesensitive ? lc( $arrayunreg[$i] ) : $arrayunreg[$i];
my $paramj =
$notcasesensitive ? lc( $arrayunreg[$j] ) : $arrayunreg[$j];
if ($Debug) {
debug( " Compare $i ($parami) to $j ($paramj)", 4 );
}
if ( index( $parami, $paramj ) > -1 ) {
if ($Debug) {
debug(
" Elem $i ($arrayunreg[$i]) already treated with elem $j ($arrayunreg[$j])",
4
);
}
$elemtoremove = $i;
last OPTIMIZELOOP;
}
}
}
if ( $elemtoremove > -1 ) {
if ($Debug) {
debug(
" Remove elem $elemtoremove - $arrayunreg[$elemtoremove]",
4 );
}
splice @arrayunreg, $elemtoremove, 1;
$searchlist = $elemtoremove;
}
else {
$searchlist = -1;
}
}
if ($notcasesensitive) {
return map { qr/$_/i } @arrayunreg;
}
return map { qr/$_/ } @arrayunreg;
my ( $array, $notcasesensitive ) = @_;
my %seen;

if ($notcasesensitive) {

# Case insensitive
my $uncompiled_regex;
return map {
$uncompiled_regex = UnCompileRegex($_);
!$seen{ lc $uncompiled_regex }++ ? qr/$uncompiled_regex/i : ()
} @$array;
}

# Case sensitive
return map { !$seen{$_}++ ? $_ : () } @$array;
}

#------------------------------------------------------------------------------
Expand Down

0 comments on commit 184e14b

Please sign in to comment.