Permalink
Browse files

Optimize reduced/grouped queries through lazy parsing

Avoid parsing the keys and values from the index unless it's necessary.
This can speed up some queries a lot, if the reduce function doesn't access the keys or the values.
  • Loading branch information...
1 parent 8b3cb50 commit 2ad9cc877a95122adc515244cb2ad63188558f03 @snej snej committed Oct 31, 2012
Showing with 208 additions and 81 deletions.
  1. +7 −0 Source/TDCollateJSON.h
  2. +37 −5 Source/TDCollateJSON.m
  3. +10 −0 Source/TDJSON.h
  4. +33 −0 Source/TDJSON.m
  5. +110 −75 Source/TDView.m
  6. +10 −1 Source/TDView_Tests.m
  7. +1 −0 TouchDB.xcodeproj/project.pbxproj
View
@@ -16,6 +16,13 @@ int TDCollateJSON(void *context,
int len1, const void * chars1,
int len2, const void * chars2);
+/** Collation that compares only a limited number of top-level collection items.
+ If the first 'arrayLimit' items of the top-level array/object have been parsed and are equal, it will stop and return 0. (This is useful for view result grouping.) */
+int TDCollateJSONLimited(void *context,
+ int len1, const void * chars1,
+ int len2, const void * chars2,
+ unsigned arrayLimit);
+
// CouchDB's default collation rules, including Unicode collation for strings
#define kTDCollateJSON_Unicode ((void*)0)
View
@@ -205,20 +205,26 @@ static double readNumber(const char* start, const char* end, char** endOfNumber)
}
-int TDCollateJSON(void *context,
- int len1, const void * chars1,
- int len2, const void * chars2)
+int TDCollateJSONLimited(void *context,
+ int len1, const void * chars1,
+ int len2, const void * chars2,
+ unsigned arrayLimit)
{
const char* str1 = chars1;
const char* str2 = chars2;
int depth = 0;
+ unsigned arrayIndex = 0;
do {
// Get the types of the next token in each string:
ValueType type1 = valueTypeOf(*str1);
ValueType type2 = valueTypeOf(*str2);
// If types don't match, stop and return their relative ordering:
if (type1 != type2) {
+ if (depth == 1 && (type1 == kComma || type2 == kComma)) {
+ if (++arrayIndex >= arrayLimit)
+ return 0;
+ }
if (context != kTDCollateJSON_Raw)
return cmp(type1, type2);
else
@@ -272,9 +278,14 @@ int TDCollateJSON(void *context,
case kEndObject:
++str1;
++str2;
+ if (depth == 1 && (++arrayIndex >= arrayLimit))
+ return 0;
--depth;
break;
case kComma:
+ if (depth == 1 && (++arrayIndex >= arrayLimit))
+ return 0;
+ // else fall through:
case kColon:
++str1;
++str2;
@@ -287,6 +298,14 @@ int TDCollateJSON(void *context,
}
+int TDCollateJSON(void *context,
+ int len1, const void * chars1,
+ int len2, const void * chars2)
+{
+ return TDCollateJSONLimited(context, len1, chars1, len2, chars2, UINT_MAX);
+}
+
+
#pragma mark - UNIT TESTS:
@@ -313,15 +332,19 @@ static void testEscape(const char* source, char decoded) {
testEscape("\\u0000", 0);
}
-static int collate(void *mode, const void * str1, const void * str2) {
+static int collateLimited(void *mode, const void * str1, const void * str2, unsigned arrayLimit) {
// Be evil and put numeric garbage past the ends of str1 and str2 (see bug #138):
size_t len1 = strlen(str1), len2 = strlen(str2);
char buf1[len1 + 3], buf2[len2 + 3];
strlcpy(buf1, str1, sizeof(buf1));
strlcat(buf1, "99", sizeof(buf1));
strlcpy(buf2, str2, sizeof(buf1));
strlcat(buf2, "88", sizeof(buf1));
- return TDCollateJSON(mode, (int)len1, buf1, (int)len2, buf2);
+ return TDCollateJSONLimited(mode, (int)len1, buf1, (int)len2, buf2, arrayLimit);
+}
+
+static int collate(void *mode, const void * str1, const void * str2) {
+ return collateLimited(mode, str1, str2, UINT_MAX);
}
TestCase(TDCollateScalars) {
@@ -407,4 +430,13 @@ static int collate(void *mode, const void * str1, const void * str2) {
CAssertEq(collate(mode, encode(@"\t"), encode(@" ")), -1);
CAssertEq(collate(mode, encode(@"\001"), encode(@" ")), -1);
}
+
+TestCase(TDCollateLimited) {
+ void* mode = kTDCollateJSON_Unicode;
+ CAssertEq(collateLimited(mode, "[5,\"wow\"]", "[4,\"wow\"]", 1), 1);
+ CAssertEq(collateLimited(mode, "[5,\"wow\"]", "[5,\"wow\"]", 1), 0);
+ CAssertEq(collateLimited(mode, "[5,\"wow\"]", "[5,\"MOM\"]", 1), 0);
+ CAssertEq(collateLimited(mode, "[5,\"wow\"]", "[5]", 1), 0);
+ CAssertEq(collateLimited(mode, "[5,\"wow\"]", "[5,\"MOM\"]", 2), 1);
+}
#endif
View
@@ -70,3 +70,13 @@ typedef NSUInteger TDJSONWritingOptions;
+ (NSData*) appendDictionary: (NSDictionary*)dict
toJSONDictionaryData: (NSData*)json;
@end
+
+
+/** Wrapper for an NSArray of JSON data, that avoids having to parse the data if it's not used.
+ NSData objects in the array will be parsed into native objects before being returned to the caller from -objectAtIndex. */
+@interface TDLazyArrayOfJSON : NSArray
+{
+ NSMutableArray* _array;
+}
+- (id) initWithArray: (NSMutableArray*)array;
+@end
View
@@ -107,3 +107,36 @@ + (NSData*) appendDictionary: (NSDictionary*)dict
@end
+
+
+@implementation TDLazyArrayOfJSON
+
+- (id) initWithArray: (NSMutableArray*)array {
+ self = [super init];
+ if (self) {
+ _array = [array retain];
+ }
+ return self;
+}
+
+- (void)dealloc
+{
+ [_array release];
+ [super dealloc];
+}
+
+- (NSUInteger)count {
+ return _array.count;
+}
+
+- (id)objectAtIndex:(NSUInteger)index {
+ id obj = [_array objectAtIndex: index];
+ if ([obj isKindOfClass: [NSData class]]) {
+ obj = [TDJSON JSONObjectWithData: obj options: TDJSONReadingAllowFragments
+ error: nil];
+ [_array replaceObjectAtIndex: index withObject: obj];
+ }
+ return obj;
+}
+
+@end
View
@@ -15,6 +15,7 @@
#import "TDView.h"
#import "TDInternal.h"
+#import "TDCollateJSON.h"
#import "FMDatabase.h"
#import "FMDatabaseAdditions.h"
@@ -410,28 +411,6 @@ - (FMResultSet*) resultSetWithOptions: (const TDQueryOptions*)options
}
-// Are key1 and key2 grouped together at this groupLevel?
-static bool groupTogether(id key1, id key2, unsigned groupLevel) {
- if (groupLevel == 0 || ![key1 isKindOfClass: [NSArray class]]
- || ![key2 isKindOfClass: [NSArray class]])
- return [key1 isEqual: key2];
- unsigned end = MIN(groupLevel, MIN([key1 count], [key2 count]));
- for (unsigned i = 0; i< end; ++i) {
- if (![key1[i] isEqual: key2[i]])
- return false;
- }
- return true;
-}
-
-// Returns the prefix of the key to use in the result row, at this groupLevel
-static id groupKey(id key, unsigned groupLevel) {
- if (groupLevel > 0 && [key isKindOfClass: [NSArray class]] && [key count] > groupLevel)
- return [key subarrayWithRange: NSMakeRange(0, groupLevel)];
- else
- return key;
-}
-
-
- (NSArray*) queryWithOptions: (const TDQueryOptions*)options
status: (TDStatus*)outStatus
{
@@ -442,47 +421,28 @@ - (NSArray*) queryWithOptions: (const TDQueryOptions*)options
if (!r)
return nil;
+ NSMutableArray* rows;
+
unsigned groupLevel = options->groupLevel;
bool group = options->group || groupLevel > 0;
- bool reduce = options->reduce || group;
+ if (options->reduce || group) {
+ // Reduced or grouped query:
+ // Reduced or grouped query:
+ if (!_reduceBlock && !group) {
+ Warn(@"Cannot use reduce option in view %@ which has no reduce block defined", _name);
+ *outStatus = kTDStatusBadParam;
+ return nil;
+ }
+ rows = [self reducedQuery: r group: group groupLevel: groupLevel];
- if (reduce && !_reduceBlock && !group) {
- Warn(@"Cannot use reduce option in view %@ which has no reduce block defined", _name);
- *outStatus = kTDStatusBadParam;
- return nil;
- }
-
- NSMutableArray* rows = $marray();
- NSMutableArray* keysToReduce=nil, *valuesToReduce=nil;
- id lastKey = nil;
- if (reduce) {
- keysToReduce = [[NSMutableArray alloc] initWithCapacity: 100];
- valuesToReduce = [[NSMutableArray alloc] initWithCapacity: 100];
- }
-
- while ([r next]) {
- @autoreleasepool {
- id key = fromJSON([r dataNoCopyForColumnIndex: 0]);
- id value = fromJSON([r dataNoCopyForColumnIndex: 1]);
- Assert(key);
- if (reduce) {
- // Reduced or grouped query:
- if (group && !groupTogether(key, lastKey, groupLevel) && lastKey) {
- // This pair starts a new group, so reduce & record the last one:
- id reduced = _reduceBlock ? _reduceBlock(keysToReduce, valuesToReduce,NO) : nil;
- [rows addObject: $dict({@"key", groupKey(lastKey, groupLevel)},
- {@"value", (reduced ?: $null)})];
- [keysToReduce removeAllObjects];
- [valuesToReduce removeAllObjects];
- }
- LogTo(ViewVerbose, @"Query %@: Will reduce row with key=%@, value=%@",
- _name, toJSONString(key), toJSONString(value));
- [keysToReduce addObject: key];
- [valuesToReduce addObject: value ?: $null];
- lastKey = key;
-
- } else {
- // Regular query:
+ } else {
+ // Regular query:
+ rows = $marray();
+ while ([r next]) {
+ @autoreleasepool {
+ id key = fromJSON([r dataNoCopyForColumnIndex: 0]);
+ id value = fromJSON([r dataNoCopyForColumnIndex: 1]);
+ Assert(key);
NSString* docID = [r stringForColumnIndex: 2];
id docContents = nil;
if (options->includeDocs) {
@@ -514,28 +474,103 @@ - (NSArray*) queryWithOptions: (const TDQueryOptions*)options
}
}
}
-
- if (reduce) {
- if (keysToReduce.count > 0) {
- // Finish the last group (or the entire list, if no grouping):
- id key = group ? groupKey(lastKey, groupLevel) : $null;
- id reduced = _reduceBlock ? _reduceBlock(keysToReduce, valuesToReduce,NO) : nil;
- LogTo(ViewVerbose, @"Query %@: Reduced to key=%@, value=%@",
- _name, toJSONString(key), toJSONString(reduced));
- [rows addObject: $dict({@"key", key},
- {@"value", (reduced ?: $null)})];
- }
- [keysToReduce release];
- [valuesToReduce release];
- }
-
+
[r close];
*outStatus = kTDStatusOK;
LogTo(View, @"Query %@: Returning %u rows", _name, (unsigned)rows.count);
return rows;
}
+#pragma mark - REDUCING/GROUPING:
+
+
+// Are key1 and key2 grouped together at this groupLevel?
+static bool groupTogether(NSData* key1, NSData* key2, unsigned groupLevel) {
+ if (!key1 || !key2)
+ return NO;
+ if (groupLevel == 0)
+ groupLevel = UINT_MAX;
+ return TDCollateJSONLimited(kTDCollateJSON_Unicode,
+ (int)key1.length, key1.bytes,
+ (int)key2.length, key2.bytes,
+ groupLevel) == 0;
+}
+
+// Returns the prefix of the key to use in the result row, at this groupLevel
+static id groupKey(NSData* keyJSON, unsigned groupLevel) {
+ id key = fromJSON(keyJSON);
+ if (groupLevel > 0 && [key isKindOfClass: [NSArray class]] && [key count] > groupLevel)
+ return [key subarrayWithRange: NSMakeRange(0, groupLevel)];
+ else
+ return key;
+}
+
+
+// Invokes the reduce function on the parallel arrays of keys and values
+- (id) reduceKeys: (NSMutableArray*)keys values: (NSMutableArray*)values {
+ if (!_reduceBlock)
+ return nil;
+ TDLazyArrayOfJSON* lazyKeys = [[TDLazyArrayOfJSON alloc] initWithArray: keys];
+ TDLazyArrayOfJSON* lazyVals = [[TDLazyArrayOfJSON alloc] initWithArray: values];
+ id result = _reduceBlock(lazyKeys, lazyVals, NO);
+ [lazyKeys release];
+ [lazyVals release];
+ return result ?: $null;
+}
+
+
+- (NSMutableArray*) reducedQuery: (FMResultSet*)r group: (BOOL)group groupLevel: (unsigned)groupLevel
+{
+ NSMutableArray* keysToReduce = nil, *valuesToReduce = nil;
+ if (_reduceBlock) {
+ keysToReduce = [[NSMutableArray alloc] initWithCapacity: 100];
+ valuesToReduce = [[NSMutableArray alloc] initWithCapacity: 100];
+ }
+ NSData* lastKeyData = nil;
+
+ NSMutableArray* rows = $marray();
+ while ([r next]) {
+ @autoreleasepool {
+ NSData* keyData = [r dataForColumnIndex: 0];
+ NSData* valueData = [r dataForColumnIndex: 1];
+ Assert(keyData);
+ if (group && !groupTogether(keyData, lastKeyData, groupLevel)) {
+ if (lastKeyData) {
+ // This pair starts a new group, so reduce & record the last one:
+ id reduced = [self reduceKeys: keysToReduce values: valuesToReduce];
+ [rows addObject: $dict({@"key", groupKey(lastKeyData, groupLevel)},
+ {@"value", reduced})];
+ [keysToReduce removeAllObjects];
+ [valuesToReduce removeAllObjects];
+ [lastKeyData release];
+ }
+ lastKeyData = [keyData copy];
+ }
+ LogTo(ViewVerbose, @"Query %@: Will reduce row with key=%@, value=%@",
+ _name, [keyData my_UTF8ToString], [valueData my_UTF8ToString]);
+ [keysToReduce addObject: keyData];
+ [valuesToReduce addObject: valueData ?: $null];
+ }
+ }
+
+ if (keysToReduce.count > 0) {
+ // Finish the last group (or the entire list, if no grouping):
+ id key = group ? groupKey(lastKeyData, groupLevel) : $null;
+ id reduced = [self reduceKeys: keysToReduce values: valuesToReduce];
+ LogTo(ViewVerbose, @"Query %@: Reduced to key=%@, value=%@",
+ _name, toJSONString(key), toJSONString(reduced));
+ [rows addObject: $dict({@"key", key}, {@"value", reduced})];
+ }
+ [keysToReduce release];
+ [valuesToReduce release];
+ [lastKeyData release];
+ return rows;
+}
+
+
+#pragma mark - OTHER:
+
// This is really just for unit tests & debugging
- (NSArray*) dump {
if (self.viewID <= 0)
Oops, something went wrong.

0 comments on commit 2ad9cc8

Please sign in to comment.