Permalink
Browse files

Changed the mapping for UIDs to use the full path name, not 'just_name'.

This fixes a major bug where all Doc class attributes in an object were
being indexed in the same fields, so it was impossible to distinguish
a search for (mother => $user) from (father => $user).

Unfortunately, this means it is no longer possible to search for just
any UID using queryb($uid), as it now needs the full path name for
the attribute, eg queryb( 'user.partner' => $user ).

Consequently, I've also had to change the Index->repoint_uids to search
in all possible UID attributes.

Fixes #8
  • Loading branch information...
1 parent 0246658 commit e351eebd4d1e85c4d2011f32bbd5001f84ea2606 @clintongormley committed Aug 14, 2012
View
@@ -105,9 +105,9 @@ sub repoint_uids {
my $bulk_size = $args{bulk_size} || $size;
my $model = $self->model;
my $index_name = $self->name;
- my $uids = $args{uids} || [];
+ my $uids = $args{uids} || {};
- unless (@$uids) {
+ unless (%$uids) {
print "No UIDs to repoint\n" if $verbose;
return 1;
}
@@ -120,6 +120,12 @@ sub repoint_uids {
return 1;
}
+ my @uid_attrs = $self->_uid_attrs_for_indices(@indices);
+ unless (@uid_attrs) {
+ print "No UIDs to repoint\n" if $verbose;
+ return 1;
+ }
+
my $view = $model->view->domain( \@indices )->size($size);
my $doc_updater = sub {
@@ -131,47 +137,100 @@ sub repoint_uids {
my %map;
my $uid_updater = sub {
my $uid = shift;
- my $key = join "\0", @{$uid}{ 'index', 'type', 'id' };
- $uid->{index} = $index_name
- if $map{$key};
+ return unless $uids->{ $uid->{index} }{ $uid->{type} }{ $uid->{id} };
+ $uid->{index} = $index_name;
};
my $updater = $self->doc_updater( $doc_updater, $uid_updater );
- printf( "Repointing %d UIDs\n", scalar @$uids ) if $verbose;
local $| = $verbose;
- while (@$uids) {
- %map = ();
- print "." if $verbose;
-
- my @clauses;
- for ( splice @$uids, 0, $size ) {
- $map{ join( "\0", @$_ ) } = 1;
- push @clauses,
- {
- 'uid.index' => $_->[0],
- 'uid.type' => $_->[1],
- 'uid.id' => $_->[2],
- };
+ for my $index ( keys %$uids ) {
+ my $types = $uids->{$index};
+ for my $type ( keys %$types ) {
+ my @ids = keys %{ $types->{$type} };
+
+ printf( "Repointing %d UIDs from %s/%s ",
+ 0 + @ids, $index, $type )
+ if $verbose;
+
+ while (@ids) {
+ print "." if $verbose;
+
+ my $clauses
+ = $self->_build_uid_clauses( \@uid_attrs, $index, $type,
+ [ splice @ids, 0, $size ] );
+
+ my $source = $view->filter( or => $clauses )->scan($scan);
+ $model->es->reindex(
+ source => $source,
+ _method_name => 'shift_element',
+ bulk_size => $bulk_size,
+ quiet => 1,
+ transform => $updater,
+ on_conflict => $args{on_conflict},
+ on_error => $args{on_error},
+ );
+ }
+ print "\n" if $verbose;
}
+ }
+
+ print "\nDone\n" if $verbose;
+ return 1;
+}
- my $source = $view->filterb( \@clauses )->scan($scan);
+#===================================
+sub _uid_attrs_for_indices {
+#===================================
+ my $self = shift;
+ my @indices = @_;
+ my $mapping = $self->model->es->mapping( index => \@indices );
+ my %attrs = map { $_ => 1 }
+ map { _find_uid_attrs( $_->{properties} ) }
+ map { values %$_ } values %$mapping;
+ return keys %attrs;
- $model->es->reindex(
- source => $source,
- _method_name => 'shift_element',
- bulk_size => $bulk_size,
- quiet => 1,
- transform => $updater,
- on_conflict => $args{on_conflict},
- on_error => $args{on_error},
- );
+}
+#===================================
+sub _find_uid_attrs {
+#===================================
+ my ( $mapping, $level ) = @_;
+
+ my @attrs;
+ $level = '' unless $level;
+
+ keys %$mapping;
+ while ( my ( $k, $v ) = each %$mapping ) {
+ next unless $v->{properties};
+ my $attr = $level ? "$level.$k" : $k;
+
+ if ( $k eq 'uid' and $v->{properties} and $v->{properties}{index} ) {
+ push @attrs, $attr;
+ next;
+ }
+ push @attrs, _find_uid_attrs( $v->{properties} || {}, $attr );
}
+ return @attrs;
+}
- print "\nDone\n" if $verbose;
- return 1;
+#===================================
+sub _build_uid_clauses {
+#===================================
+ my ( $self, $uid_attrs, $index, $type, $ids ) = @_;
+ my @clauses;
+ for my $id (@$ids) {
+ push @clauses, map {
+ +{ and => [
+ { term => { "$_.index" => $index } },
+ { term => { "$_.type" => $type } },
+ { term => { "$_.id" => $id } }
+ ]
+ }
+ } @$uid_attrs;
+ }
+ return \@clauses;
}
#===================================
@@ -386,19 +445,23 @@ Parameters:
=item uids
-C<uids> is an array ref, containing a list of the stale
-L<UIDs|Elastic::Model::UID> which should be updated.
+C<uids> is a hash ref the stale L<UIDs|Elastic::Model::UID> which should be
+updated.
For instance: you have reindexed C<myapp_v1> to C<myapp_v2>, but domain
C<other> has documents with UIDs which point to C<myapp_v1>. You
can updated these by passing a list of the old UIDs, as follows:
$index = $namespace->index('myapp_v2');
$index->repoint_uids(
- uids => [
- ['myapp_v1','user',1], # ie old_index, type, ID
- ['myapp_v1','user',2],
- ]
+ uids => { # index
+ myapp_v1 => { # type
+ user => {
+ 1 => 1, # ids
+ 2 => 1,
+ }
+ }
+ }
);
=item exclude
@@ -6,20 +6,6 @@ use parent 'ElasticSearch::SearchBuilder';
use Carp;
#===================================
-sub _top_ElasticDocREF {
-#===================================
- my ( $self, $type, $doc ) = @_;
- $self->_uid_to_terms( $type, '', $doc->uid );
-}
-
-#===================================
-sub _top_ElasticUIDREF {
-#===================================
- my ( $self, $type, $uid ) = @_;
- $self->_uid_to_terms( $type, '', $uid );
-}
-
-#===================================
sub _hashpair_ElasticDocREF {
#===================================
my ( $self, $type, $k, $v ) = @_;
@@ -76,11 +62,10 @@ sub _query_field_text {
sub _uid_to_terms {
#===================================
my ( $self, $type, $k, $uid ) = @_;
- $k = length $k ? $k . '.' : '';
my @clauses;
for (qw(index type id)) {
my $val = $uid->$_ or croak "UID missing ($_)";
- push @clauses, { term => { "${k}uid.$_" => $val } };
+ push @clauses, { term => { "${k}.uid.$_" => $val } };
}
return $type eq 'query'
? { bool => { must => \@clauses } }
@@ -233,13 +218,3 @@ You can use either the doc/object itself, or an L<Elastic::Model::UID> object:
$view->queryb ( user => { '!=' => \@users })->search;
$view->filterb( user => { '!=' => \@users })->search;
-=head2 Docs that contain the C<$user> in any field
-
- $view->queryb ( $user )->search;
- $view->filterb( $user )->search;
-
-=head2 Docs where C<status> is C<active> and any field contains C<$user>:
-
- $view->queryb ( '' => $user, status => 'active' )->search
- $view->filterb( '' => $user, status => 'active' )->search
-
@@ -30,7 +30,6 @@ has_type 'Elastic::Model::Types::UID',
index => 'not_analyzed',
omit_norms => 1,
omit_term_freq_and_positions => 1,
- index_name => "uid.${_}",
}
} qw(index type id routing);
@@ -41,7 +40,7 @@ has_type 'Elastic::Model::Types::UID',
type => 'object',
dynamic => 'strict',
properties => \%props,
- path => 'just_name'
+ path => 'full'
);
};
@@ -132,26 +131,23 @@ via L<Elastic::Model::UID/"new_from_store()">. It is mapped as:
{
type => 'object',
dynamic => 'strict',
- path => 'just_name',
+ path => 'path',
properties => {
index => {
type => 'string',
index => 'not_analyzed',
- index_name => 'uid.index',
omit_norms => 1,
omit_term_freq_and_positions => 1,
},
type => {
type => 'string',
index => 'not_analyzed',
- index_name => 'uid.type',
omit_norms => 1,
omit_term_freq_and_positions => 1,
},
id => {
type => 'string',
index => 'not_analyzed',
- index_name => 'uid.id',
omit_norms => 1,
omit_term_freq_and_positions => 1,
},
@@ -99,21 +99,19 @@ sub uid {
#===================================
+{ type => "object",
dynamic => "strict",
- path => 'just_name',
+ path => 'full',
properties => {
id => {
index => "not_analyzed",
omit_norms => 1,
omit_term_freq_and_positions => 1,
type => "string",
- index_name => 'uid.id',
},
index => {
index => "not_analyzed",
omit_norms => 1,
omit_term_freq_and_positions => 1,
type => "string",
- index_name => 'uid.index',
},
routing => {
index => "no",
@@ -126,7 +124,6 @@ sub uid {
omit_norms => 1,
omit_term_freq_and_positions => 1,
type => "string",
- index_name => 'uid.type',
},
},
};
@@ -88,7 +88,7 @@ sub index_count {
my $terms = $model->es->search(
@_,
size => 0,
- facets => { index => { terms => { field => 'uid.index' } } }
+ facets => { index => { terms => { field => 'user.uid.index' } } }
)->{facets}{index}{terms};
return +{ map { $_->{term} => $_->{count} } @$terms };
@@ -33,48 +33,6 @@ my @objs = ( $user, $uid );
my $a = Elastic::Model::SearchBuilder->new;
test_filters(
- 'SCALAR',
-
- 'Object', $user,
- { and => [
- { term => { 'uid.index' => 'myapp' } },
- { term => { 'uid.type' => 'user' } },
- { term => { 'uid.id' => 1 } },
- ]
- },
-
- 'UID', $uid,
- { and => [
- { term => { 'uid.index' => 'myapp' } },
- { term => { 'uid.type' => 'user' } },
- { term => { 'uid.id' => 1 } },
- ]
- },
-);
-
-test_filters(
- 'HASHREF - no key',
- 'Object',
- { '' => $user },
- { and => [
- { term => { 'uid.index' => 'myapp' } },
- { term => { 'uid.type' => 'user' } },
- { term => { 'uid.id' => 1 } },
- ]
- },
-
- 'UID',
- { '' => $uid },
- { and => [
- { term => { 'uid.index' => 'myapp' } },
- { term => { 'uid.type' => 'user' } },
- { term => { 'uid.id' => 1 } },
- ]
- },
-
-);
-
-test_filters(
'HASHREF - key',
'Object',
{ 'user' => $user },
Oops, something went wrong.

0 comments on commit e351eeb

Please sign in to comment.