Permalink
Browse files

Make file names and zset names the same, build index of dists

  • Loading branch information...
1 parent 01c1fe1 commit cc5be454a34f44eefb8743b913057d39c02b2834 @dgl committed Mar 30, 2012
Showing with 27 additions and 20 deletions.
  1. +4 −4 bin/cpangrep-matcher
  2. +2 −2 lib/WWW/CPANGrep/Index/Worker.pm
  3. +9 −10 lib/WWW/CPANGrep/Slab/Writer.pm
  4. +12 −4 lib/WWW/CPANGrep/Slabs.pm
View
@@ -42,14 +42,14 @@ sub do_match {
$re = qr/$re/m;
my $i = 0;
- my $fh_next = open_cached($dir . "/" . $process->[$i++]->{file});
+ my $fh_next = open_cached($dir . "/" . $process->[$i++]);
for my $file(@$process) {
my $fh = $fh_next;
mmap my $pm, -s $fh, IO::AIO::PROT_READ, IO::AIO::MAP_SHARED, $fh or die $!;
if(my $next = $process->[$i++]) {
- $fh_next = open_cached($dir . "/" . $next->{file});
+ $fh_next = open_cached($dir . "/" . $next);
# On machines with spare IO bandwidth this seemed to help, however I'm now
# running on VMs and this seems less of a help.
#aio_readahead $fh_next, 0, -s $next;
@@ -78,7 +78,7 @@ sub do_match {
}
push @results, {
- zset => $file->{zset},
+ zset => $file,
text => substr($pm, $previous, $next - $previous),
snippet => [$previous, $next],
match => [$-[0], $+[0]]
@@ -100,7 +100,7 @@ while(1) {
last unless $item->[0];
print "$$: processing job: $item->[1]\n";
my $job = decode_json $item->[1];
- my $slabs = [map decode_json($_), @{$redis->lrange($job->{slablist}, @{$job->{slabs}})->recv}];
+ my $slabs = [@{$redis->lrange($job->{slablist}, @{$job->{slabs}})->recv}];
my $max = 500;
my $start = time;
do_match($job->{re}, $max, $job->{notify}, $slabs);
@@ -108,12 +108,12 @@ sub index_dist {
}
}, ".";
- my $redis = tied %{$self->redis};
+ my $redis_conn = (tied %{$self->redis})->{_conn};
for my $file(@files) {
next if $file eq 'MANIFEST';
my $mime_type = $self->_mmagic->get_mime($file);
- #$redis->hincrby("mime_stats", $mime_type, 1);
+ $redis_conn->hincrby("mime_stats", $mime_type, 1);
if($mime_type !~ /^text/) {
warn "Ignoring binary file $file ($mime_type, in $dist)\n";
@@ -21,10 +21,10 @@ has redis => (
required => 1,
);
-has zset_name => (
+has name => (
is => 'ro',
isa => 'Str',
- default => sub { "slab:zset:process:$$-" . ++$COUNTER },
+ default => sub { "slab:zset:$$-" . ++$COUNTER },
);
has rotate_size => (
@@ -33,13 +33,10 @@ has rotate_size => (
default => sub { 10 * 1024 * 1024 }, # 10mb
);
-has file_name => (
+has seen_dists => (
is => 'ro',
- isa => 'Str',
- default => sub {
- my($self) = @_;
- "$$-" . time . "-" . ++$COUNTER;
- },
+ isa => 'HashRef',
+ default => sub { {} },
);
has _size => (
@@ -54,7 +51,7 @@ has _fh => (
lazy => 1,
default => sub {
my($self) = @_;
- open my $fh, ">", $self->dir . "/" . $self->file_name or die $!;
+ open my $fh, ">", $self->dir . "/" . $self->name or die $!;
binmode $fh;
$fh;
},
@@ -80,12 +77,14 @@ sub index {
print {$self->_fh} $content, SLAB_SEPERATOR;
- $self->redis->zadd($self->zset_name, $self->_size, encode_json {
+ $self->redis->zadd($self->name, $self->_size, encode_json {
size => length($content),
dist => $dist,
file => $file
});
+ $self->{seen_dists}{$dist}++;
+
$self->_size($self->_size + length($content) + length SLAB_SEPERATOR);
}
View
@@ -48,13 +48,21 @@ sub index {
sub finish {
my($self) = @_;
+ my $r = (tied %{$self->redis})->{_conn};
+
# Tie::Redis won't autovivify yet :(
$self->redis->{$self->name} ||= [];
- push @{$self->redis->{$self->name}}, encode_json {
- file => $self->_slab->file_name,
- zset => $self->_slab->zset_name
- };
+ push @{$self->redis->{$self->name}}, $self->_slab->name;
+
+ for my $dist(keys %{$self->_slab->seen_dists}) {
+ my($author, $dist) = split m{/}, $dist, 2;
+
+ $self->redis->{"cpangrep:author:$author"} ||= [];
+ push @{$self->redis->{"cpangrep:author:$author"}}, $dist;
+
+ $r->hset("cpangrep:dists", $dist, $self->_slab->name);
+ }
$self->_slab(undef);

0 comments on commit cc5be45

Please sign in to comment.