Skip to content

Commit

Permalink
SERVER-2001 calculate query bounds using more general key expressions
Browse files Browse the repository at this point in the history
In sharding, given a key expression like {a : 1} or {a : -1} we
must translate a query to a set of bounds to figure out which shards
are relevant. This patch amends the keyBounds calculation function
so that patterns which start with "hashed" fields calculate the
right bounds.
  • Loading branch information
Kevin Matulef committed Oct 15, 2012
1 parent 732f351 commit 5a28905
Show file tree
Hide file tree
Showing 2 changed files with 129 additions and 36 deletions.
119 changes: 88 additions & 31 deletions src/mongo/db/keypattern.cpp
Expand Up @@ -47,64 +47,78 @@ namespace mongo {
return false;
}

typedef vector<pair<BSONObj,BSONObj> >::const_iterator BoundListIter;

BoundList KeyPattern::keyBounds( const FieldRangeSet& queryConstraints ) const {
// To construct our bounds we will generate intervals based on constraints for
// the first field, then compound intervals based on constraints for the first
// 2 fields, then compound intervals for the first 3 fields, etc.
// As we loop through the fields, we start generating new intervals that will later
// get extended in another iteration of the loop. We define these partially constructed
// intervals using pairs of BSONObjBuilders (shared_ptrs, since after one iteration of the
// loop they still must exist outside their scope).
typedef vector< pair< shared_ptr<BSONObjBuilder> ,
shared_ptr<BSONObjBuilder> > > BoundBuilders;
BoundBuilders builders;
builders.push_back( make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ),
shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ) ) );
BSONObjIterator i( _pattern );
// until equalityOnly is false, we are just dealing with equality (no range or $in querys).
// until equalityOnly is false, we are just dealing with equality (no range or $in queries).
bool equalityOnly = true;
while( i.more() ) {
BSONElement e = i.next();

// get the relevant intervals for this field, but we may have to transform the
// list of what's relevant according to the expression for this field
const FieldRange &fr = queryConstraints.range( e.fieldName() );
int number = (int) e.number(); // returns 0.0 if not numeric
bool forward = ( number >= 0 );
const vector<FieldInterval> &oldIntervals = fr.intervals();
BoundList fieldBounds = _transformFieldBounds( oldIntervals , e );

if ( equalityOnly ) {
if ( fr.equality() ) {
for( BoundBuilders::const_iterator j = builders.begin(); j != builders.end(); ++j ) {
j->first->appendAs( fr.min(), "" );
j->second->appendAs( fr.min(), "" );
if ( fieldBounds.size() == 1 &&
( fieldBounds.front().first == fieldBounds.front().second ) ){
// this field is only a single point-interval
BoundBuilders::const_iterator j;
for( j = builders.begin(); j != builders.end(); ++j ) {
j->first->appendElements( fieldBounds.front().first );
j->second->appendElements( fieldBounds.front().first );
}
}
else {
// This clause is the first to generate more than a single point.
// We only execute this clause once. After that, we simplify the bound
// extensions to prevent combinatorial explosion.
equalityOnly = false;

BoundBuilders newBuilders;
const vector<FieldInterval> &intervals = fr.intervals();
for( BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i ) {
BoundBuilders::const_iterator i;
for( i = builders.begin(); i != builders.end(); ++i ) {
BSONObj first = i->first->obj();
BSONObj second = i->second->obj();

if ( forward ) {
for( vector<FieldInterval>::const_iterator j = intervals.begin(); j != intervals.end(); ++j ) {
uassert( 16449, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < MAX_IN_COMBINATIONS );
newBuilders.push_back( make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ), shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ) ) );
newBuilders.back().first->appendElements( first );
newBuilders.back().second->appendElements( second );
newBuilders.back().first->appendAs( j->_lower._bound, "" );
newBuilders.back().second->appendAs( j->_upper._bound, "" );
}
}
else {
for( vector<FieldInterval>::const_reverse_iterator j = intervals.rbegin(); j != intervals.rend(); ++j ) {
uassert( 16450, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < MAX_IN_COMBINATIONS );
newBuilders.push_back( make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ), shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ) ) );
newBuilders.back().first->appendElements( first );
newBuilders.back().second->appendElements( second );
newBuilders.back().first->appendAs( j->_upper._bound, "" );
newBuilders.back().second->appendAs( j->_lower._bound, "" );
}
for(BoundListIter j = fieldBounds.begin(); j != fieldBounds.end(); ++j ) {
uassert( 16452,
"combinatorial limit of $in partitioning of results exceeded" ,
newBuilders.size() < MAX_IN_COMBINATIONS );
newBuilders.push_back(
make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ),
shared_ptr<BSONObjBuilder>( new BSONObjBuilder())));
newBuilders.back().first->appendElements( first );
newBuilders.back().second->appendElements( second );
newBuilders.back().first->appendElements( j->first );
newBuilders.back().second->appendElements( j->second );
}
}
builders = newBuilders;
}
}
else {
for( BoundBuilders::const_iterator j = builders.begin(); j != builders.end(); ++j ) {
j->first->appendAs( forward ? fr.min() : fr.max(), "" );
j->second->appendAs( forward ? fr.max() : fr.min(), "" );
// if we've already generated a range or multiple point-intervals
// just extend what we've generated with min/max bounds for this field
BoundBuilders::const_iterator j;
for( j = builders.begin(); j != builders.end(); ++j ) {
j->first->appendElements( fieldBounds.front().first );
j->second->appendElements( fieldBounds.back().second );
}
}
}
Expand All @@ -114,4 +128,47 @@ namespace mongo {
return ret;
}

BoundList KeyPattern::_transformFieldBounds( const vector<FieldInterval>& oldIntervals ,
const BSONElement& field ) const {

BoundList ret;
vector<FieldInterval>::const_iterator i;
for( i = oldIntervals.begin(); i != oldIntervals.end(); ++i ) {
if ( isAscending( field ) ){
// straightforward map [a,b] --> [a,b]
ret.push_back( make_pair( BSON( field.fieldName() << i->_lower._bound ) ,
BSON( field.fieldName() << i->_upper._bound ) ) );
} else if ( isDescending( field ) ) {
// reverse [a,b] --> [b,a]
ret.push_back( make_pair( BSON( field.fieldName() << i->_upper._bound ) ,
BSON( field.fieldName() << i->_lower._bound ) ) );
} else if ( isHashed( field ) ){
if ( i->equality() ) {
// hash [a,a] --> [hash(a),hash(a)]
long long int h = BSONElementHasher::hash64( i->_lower._bound ,
BSONElementHasher::DEFAULT_HASH_SEED );
ret.push_back( make_pair( BSON( field.fieldName() << h ) ,
BSON( field.fieldName() << h ) ) );
} else {
// if it's a range interval and this field is hashed, just generate one
// big interval from MinKey to MaxKey, since these vals could lie anywhere
ret.clear();
ret.push_back( make_pair( BSON( field.fieldName() << MINKEY ) ,
BSON( field.fieldName() << MAXKEY ) ) );
break;
}
}
}

if ( isDescending( field ) ) {
// now order is [ [2,1], [4,3] , [6,5]....[n,n-1] ]. Reverse to get decreasing order.
reverse( ret.begin() , ret.end() );
} else if ( isHashed( field ) ){
// [ hash(a) , hash(b) , hash(c) ...] no longer in order, so sort before returning
sort( ret.begin() , ret.end() );
}

return ret;
}

} // namespace mongo
46 changes: 41 additions & 5 deletions src/mongo/db/keypattern.h
Expand Up @@ -19,9 +19,11 @@
#pragma once

#include "mongo/db/jsobj.h"
#include "mongo/util/mongoutils/str.h"

namespace mongo {

class FieldInterval;
class FieldRangeSet;

/**
Expand Down Expand Up @@ -90,18 +92,52 @@ namespace mongo {
*/
BSONObj extractSingleKey( const BSONObj& doc ) const;

/**@param fromQuery a FieldRangeSet formed from parsing a query
* @return an ordered list of bounds generated using this KeyPattern
* and the constraints from the FieldRangeSet

/**@param queryConstraints a FieldRangeSet, usually formed from parsing a query
* @return an ordered list of bounds generated using this KeyPattern and the
* constraints from the FieldRangeSet. This function is used in sharding to
* determine where to route queries according to the shard key pattern.
*
* Examples:
* If this KeyPattern is { a : 1 }
* FieldRangeSet( {a : 5 } ) --> returns [{a : 5}, {a : 5 } ]
* FieldRangeSet( {a : {$gt : 3}} ) --> returns [({a : 3} , { a : MaxInt})]
*
* The value of frsp->matchPossibleForSingleKeyFRS(fromQuery) should be true,
* otherwise this function could throw.
* If this KeyPattern is { a : "hashed }
* FieldRangeSet( {a : 5 } --> returns [ ({ a : hash(5) }, {a : hash(5) }) ]
*
* The bounds returned by this function may be a superset of those defined
* by the constraints. For instance, if this KeyPattern is {a : 1}
* FieldRanget( { a : {$in : [1,2]} , b : {$in : [3,4,5]} } )
* --> returns [({a : 1 , b : 3} , {a : 1 , b : 5}]),
* [({a : 2 , b : 3} , {a : 2 , b : 5}])
*
* The queryConstraints should be defined for all the fields in this keypattern
* (i.e. the value of frsp->matchPossibleForSingleKeyFRS(_pattern) should be true,
* otherwise this function could throw).
*
*/
BoundList keyBounds( const FieldRangeSet& queryConstraints ) const;

private:
BSONObj _pattern;
bool isAscending( const BSONElement& fieldExpression ) const {
return ( fieldExpression.isNumber() && fieldExpression.numberInt() == 1 );
}
bool isDescending( const BSONElement& fieldExpression ) const {
return ( fieldExpression.isNumber() && fieldExpression.numberInt() == -1 );
}
bool isHashed( const BSONElement& fieldExpression ) const {
return mongoutils::str::equals( fieldExpression.valuestrsafe() , "hashed" );
}

/* Takes a list of intervals corresponding to constraints on a given field
* in this keypattern, and transforms them into a list of bounds
* based on the expression for 'field'
*/
BoundList _transformFieldBounds( const vector<FieldInterval>& oldIntervals ,
const BSONElement& field ) const;

};


Expand Down

0 comments on commit 5a28905

Please sign in to comment.