Skip to content
This repository has been archived by the owner on Apr 12, 2020. It is now read-only.

Commit

Permalink
Make file names and zset names the same, build index of dists
Browse files Browse the repository at this point in the history
  • Loading branch information
dgl committed Mar 30, 2012
1 parent 01c1fe1 commit cc5be45
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 20 deletions.
8 changes: 4 additions & 4 deletions bin/cpangrep-matcher
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ sub do_match {
$re = qr/$re/m; $re = qr/$re/m;


my $i = 0; my $i = 0;
my $fh_next = open_cached($dir . "/" . $process->[$i++]->{file}); my $fh_next = open_cached($dir . "/" . $process->[$i++]);


for my $file(@$process) { for my $file(@$process) {
my $fh = $fh_next; my $fh = $fh_next;
mmap my $pm, -s $fh, IO::AIO::PROT_READ, IO::AIO::MAP_SHARED, $fh or die $!; mmap my $pm, -s $fh, IO::AIO::PROT_READ, IO::AIO::MAP_SHARED, $fh or die $!;


if(my $next = $process->[$i++]) { if(my $next = $process->[$i++]) {
$fh_next = open_cached($dir . "/" . $next->{file}); $fh_next = open_cached($dir . "/" . $next);
# On machines with spare IO bandwidth this seemed to help, however I'm now # On machines with spare IO bandwidth this seemed to help, however I'm now
# running on VMs and this seems less of a help. # running on VMs and this seems less of a help.
#aio_readahead $fh_next, 0, -s $next; #aio_readahead $fh_next, 0, -s $next;
Expand Down Expand Up @@ -78,7 +78,7 @@ sub do_match {
} }


push @results, { push @results, {
zset => $file->{zset}, zset => $file,
text => substr($pm, $previous, $next - $previous), text => substr($pm, $previous, $next - $previous),
snippet => [$previous, $next], snippet => [$previous, $next],
match => [$-[0], $+[0]] match => [$-[0], $+[0]]
Expand All @@ -100,7 +100,7 @@ while(1) {
last unless $item->[0]; last unless $item->[0];
print "$$: processing job: $item->[1]\n"; print "$$: processing job: $item->[1]\n";
my $job = decode_json $item->[1]; my $job = decode_json $item->[1];
my $slabs = [map decode_json($_), @{$redis->lrange($job->{slablist}, @{$job->{slabs}})->recv}]; my $slabs = [@{$redis->lrange($job->{slablist}, @{$job->{slabs}})->recv}];
my $max = 500; my $max = 500;
my $start = time; my $start = time;
do_match($job->{re}, $max, $job->{notify}, $slabs); do_match($job->{re}, $max, $job->{notify}, $slabs);
Expand Down
4 changes: 2 additions & 2 deletions lib/WWW/CPANGrep/Index/Worker.pm
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -108,12 +108,12 @@ sub index_dist {
} }
}, "."; }, ".";


my $redis = tied %{$self->redis}; my $redis_conn = (tied %{$self->redis})->{_conn};


for my $file(@files) { for my $file(@files) {
next if $file eq 'MANIFEST'; next if $file eq 'MANIFEST';
my $mime_type = $self->_mmagic->get_mime($file); my $mime_type = $self->_mmagic->get_mime($file);
#$redis->hincrby("mime_stats", $mime_type, 1); $redis_conn->hincrby("mime_stats", $mime_type, 1);


if($mime_type !~ /^text/) { if($mime_type !~ /^text/) {
warn "Ignoring binary file $file ($mime_type, in $dist)\n"; warn "Ignoring binary file $file ($mime_type, in $dist)\n";
Expand Down
19 changes: 9 additions & 10 deletions lib/WWW/CPANGrep/Slab/Writer.pm
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ has redis => (
required => 1, required => 1,
); );


has zset_name => ( has name => (
is => 'ro', is => 'ro',
isa => 'Str', isa => 'Str',
default => sub { "slab:zset:process:$$-" . ++$COUNTER }, default => sub { "slab:zset:$$-" . ++$COUNTER },
); );


has rotate_size => ( has rotate_size => (
Expand All @@ -33,13 +33,10 @@ has rotate_size => (
default => sub { 10 * 1024 * 1024 }, # 10mb default => sub { 10 * 1024 * 1024 }, # 10mb
); );


has file_name => ( has seen_dists => (
is => 'ro', is => 'ro',
isa => 'Str', isa => 'HashRef',
default => sub { default => sub { {} },
my($self) = @_;
"$$-" . time . "-" . ++$COUNTER;
},
); );


has _size => ( has _size => (
Expand All @@ -54,7 +51,7 @@ has _fh => (
lazy => 1, lazy => 1,
default => sub { default => sub {
my($self) = @_; my($self) = @_;
open my $fh, ">", $self->dir . "/" . $self->file_name or die $!; open my $fh, ">", $self->dir . "/" . $self->name or die $!;
binmode $fh; binmode $fh;
$fh; $fh;
}, },
Expand All @@ -80,12 +77,14 @@ sub index {


print {$self->_fh} $content, SLAB_SEPERATOR; print {$self->_fh} $content, SLAB_SEPERATOR;


$self->redis->zadd($self->zset_name, $self->_size, encode_json { $self->redis->zadd($self->name, $self->_size, encode_json {
size => length($content), size => length($content),
dist => $dist, dist => $dist,
file => $file file => $file
}); });


$self->{seen_dists}{$dist}++;

$self->_size($self->_size + length($content) + length SLAB_SEPERATOR); $self->_size($self->_size + length($content) + length SLAB_SEPERATOR);
} }


Expand Down
16 changes: 12 additions & 4 deletions lib/WWW/CPANGrep/Slabs.pm
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -48,13 +48,21 @@ sub index {
sub finish { sub finish {
my($self) = @_; my($self) = @_;


my $r = (tied %{$self->redis})->{_conn};

# Tie::Redis won't autovivify yet :( # Tie::Redis won't autovivify yet :(
$self->redis->{$self->name} ||= []; $self->redis->{$self->name} ||= [];


push @{$self->redis->{$self->name}}, encode_json { push @{$self->redis->{$self->name}}, $self->_slab->name;
file => $self->_slab->file_name,
zset => $self->_slab->zset_name for my $dist(keys %{$self->_slab->seen_dists}) {
}; my($author, $dist) = split m{/}, $dist, 2;

$self->redis->{"cpangrep:author:$author"} ||= [];
push @{$self->redis->{"cpangrep:author:$author"}}, $dist;

$r->hset("cpangrep:dists", $dist, $self->_slab->name);
}


$self->_slab(undef); $self->_slab(undef);


Expand Down

0 comments on commit cc5be45

Please sign in to comment.