Skip to content
This repository has been archived by the owner on Mar 9, 2022. It is now read-only.

Commit

Permalink
Optimize reduced/grouped queries through lazy parsing
Browse files Browse the repository at this point in the history
Avoid parsing the keys and values from the index unless it's necessary.
This can speed up some queries a lot, if the reduce function doesn't access the keys or the values.
  • Loading branch information
snej committed Oct 31, 2012
1 parent 8b3cb50 commit 2ad9cc8
Show file tree
Hide file tree
Showing 7 changed files with 208 additions and 81 deletions.
7 changes: 7 additions & 0 deletions Source/TDCollateJSON.h
Expand Up @@ -16,6 +16,13 @@ int TDCollateJSON(void *context,
int len1, const void * chars1,
int len2, const void * chars2);

/** Collation that compares only a limited number of top-level collection items.
If the first 'arrayLimit' items of the top-level array/object have been parsed and are equal, it will stop and return 0. (This is useful for view result grouping.) */
int TDCollateJSONLimited(void *context,
int len1, const void * chars1,
int len2, const void * chars2,
unsigned arrayLimit);

// CouchDB's default collation rules, including Unicode collation for strings
#define kTDCollateJSON_Unicode ((void*)0)

Expand Down
42 changes: 37 additions & 5 deletions Source/TDCollateJSON.m
Expand Up @@ -205,20 +205,26 @@ static double readNumber(const char* start, const char* end, char** endOfNumber)
}


int TDCollateJSON(void *context,
int len1, const void * chars1,
int len2, const void * chars2)
int TDCollateJSONLimited(void *context,
int len1, const void * chars1,
int len2, const void * chars2,
unsigned arrayLimit)
{
const char* str1 = chars1;
const char* str2 = chars2;
int depth = 0;
unsigned arrayIndex = 0;

do {
// Get the types of the next token in each string:
ValueType type1 = valueTypeOf(*str1);
ValueType type2 = valueTypeOf(*str2);
// If types don't match, stop and return their relative ordering:
if (type1 != type2) {
if (depth == 1 && (type1 == kComma || type2 == kComma)) {
if (++arrayIndex >= arrayLimit)
return 0;
}
if (context != kTDCollateJSON_Raw)
return cmp(type1, type2);
else
Expand Down Expand Up @@ -272,9 +278,14 @@ int TDCollateJSON(void *context,
case kEndObject:
++str1;
++str2;
if (depth == 1 && (++arrayIndex >= arrayLimit))
return 0;
--depth;
break;
case kComma:
if (depth == 1 && (++arrayIndex >= arrayLimit))
return 0;
// else fall through:
case kColon:
++str1;
++str2;
Expand All @@ -287,6 +298,14 @@ int TDCollateJSON(void *context,
}


int TDCollateJSON(void *context,
int len1, const void * chars1,
int len2, const void * chars2)
{
return TDCollateJSONLimited(context, len1, chars1, len2, chars2, UINT_MAX);
}


#pragma mark - UNIT TESTS:


Expand All @@ -313,15 +332,19 @@ static void testEscape(const char* source, char decoded) {
testEscape("\\u0000", 0);
}

static int collate(void *mode, const void * str1, const void * str2) {
static int collateLimited(void *mode, const void * str1, const void * str2, unsigned arrayLimit) {
// Be evil and put numeric garbage past the ends of str1 and str2 (see bug #138):
size_t len1 = strlen(str1), len2 = strlen(str2);
char buf1[len1 + 3], buf2[len2 + 3];
strlcpy(buf1, str1, sizeof(buf1));
strlcat(buf1, "99", sizeof(buf1));
strlcpy(buf2, str2, sizeof(buf1));
strlcat(buf2, "88", sizeof(buf1));
return TDCollateJSON(mode, (int)len1, buf1, (int)len2, buf2);
return TDCollateJSONLimited(mode, (int)len1, buf1, (int)len2, buf2, arrayLimit);
}

static int collate(void *mode, const void * str1, const void * str2) {
return collateLimited(mode, str1, str2, UINT_MAX);
}

TestCase(TDCollateScalars) {
Expand Down Expand Up @@ -407,4 +430,13 @@ static int collate(void *mode, const void * str1, const void * str2) {
CAssertEq(collate(mode, encode(@"\t"), encode(@" ")), -1);
CAssertEq(collate(mode, encode(@"\001"), encode(@" ")), -1);
}

TestCase(TDCollateLimited) {
void* mode = kTDCollateJSON_Unicode;
CAssertEq(collateLimited(mode, "[5,\"wow\"]", "[4,\"wow\"]", 1), 1);
CAssertEq(collateLimited(mode, "[5,\"wow\"]", "[5,\"wow\"]", 1), 0);
CAssertEq(collateLimited(mode, "[5,\"wow\"]", "[5,\"MOM\"]", 1), 0);
CAssertEq(collateLimited(mode, "[5,\"wow\"]", "[5]", 1), 0);
CAssertEq(collateLimited(mode, "[5,\"wow\"]", "[5,\"MOM\"]", 2), 1);
}
#endif
10 changes: 10 additions & 0 deletions Source/TDJSON.h
Expand Up @@ -70,3 +70,13 @@ typedef NSUInteger TDJSONWritingOptions;
+ (NSData*) appendDictionary: (NSDictionary*)dict
toJSONDictionaryData: (NSData*)json;
@end


/** Wrapper for an NSArray of JSON data, that avoids having to parse the data if it's not used.
NSData objects in the array will be parsed into native objects before being returned to the caller from -objectAtIndex. */
@interface TDLazyArrayOfJSON : NSArray
{
NSMutableArray* _array;
}
- (id) initWithArray: (NSMutableArray*)array;
@end
33 changes: 33 additions & 0 deletions Source/TDJSON.m
Expand Up @@ -107,3 +107,36 @@ + (NSData*) appendDictionary: (NSDictionary*)dict


@end


@implementation TDLazyArrayOfJSON

- (id) initWithArray: (NSMutableArray*)array {
self = [super init];
if (self) {
_array = [array retain];
}
return self;
}

- (void)dealloc
{
[_array release];
[super dealloc];
}

- (NSUInteger)count {
return _array.count;
}

- (id)objectAtIndex:(NSUInteger)index {
id obj = [_array objectAtIndex: index];
if ([obj isKindOfClass: [NSData class]]) {
obj = [TDJSON JSONObjectWithData: obj options: TDJSONReadingAllowFragments
error: nil];
[_array replaceObjectAtIndex: index withObject: obj];
}
return obj;
}

@end
185 changes: 110 additions & 75 deletions Source/TDView.m
Expand Up @@ -15,6 +15,7 @@

#import "TDView.h"
#import "TDInternal.h"
#import "TDCollateJSON.h"

#import "FMDatabase.h"
#import "FMDatabaseAdditions.h"
Expand Down Expand Up @@ -410,28 +411,6 @@ - (FMResultSet*) resultSetWithOptions: (const TDQueryOptions*)options
}


// Are key1 and key2 grouped together at this groupLevel?
static bool groupTogether(id key1, id key2, unsigned groupLevel) {
if (groupLevel == 0 || ![key1 isKindOfClass: [NSArray class]]
|| ![key2 isKindOfClass: [NSArray class]])
return [key1 isEqual: key2];
unsigned end = MIN(groupLevel, MIN([key1 count], [key2 count]));
for (unsigned i = 0; i< end; ++i) {
if (![key1[i] isEqual: key2[i]])
return false;
}
return true;
}

// Returns the prefix of the key to use in the result row, at this groupLevel
static id groupKey(id key, unsigned groupLevel) {
if (groupLevel > 0 && [key isKindOfClass: [NSArray class]] && [key count] > groupLevel)
return [key subarrayWithRange: NSMakeRange(0, groupLevel)];
else
return key;
}


- (NSArray*) queryWithOptions: (const TDQueryOptions*)options
status: (TDStatus*)outStatus
{
Expand All @@ -442,47 +421,28 @@ - (NSArray*) queryWithOptions: (const TDQueryOptions*)options
if (!r)
return nil;

NSMutableArray* rows;

unsigned groupLevel = options->groupLevel;
bool group = options->group || groupLevel > 0;
bool reduce = options->reduce || group;
if (options->reduce || group) {
// Reduced or grouped query:
// Reduced or grouped query:
if (!_reduceBlock && !group) {
Warn(@"Cannot use reduce option in view %@ which has no reduce block defined", _name);
*outStatus = kTDStatusBadParam;
return nil;
}
rows = [self reducedQuery: r group: group groupLevel: groupLevel];

if (reduce && !_reduceBlock && !group) {
Warn(@"Cannot use reduce option in view %@ which has no reduce block defined", _name);
*outStatus = kTDStatusBadParam;
return nil;
}

NSMutableArray* rows = $marray();
NSMutableArray* keysToReduce=nil, *valuesToReduce=nil;
id lastKey = nil;
if (reduce) {
keysToReduce = [[NSMutableArray alloc] initWithCapacity: 100];
valuesToReduce = [[NSMutableArray alloc] initWithCapacity: 100];
}

while ([r next]) {
@autoreleasepool {
id key = fromJSON([r dataNoCopyForColumnIndex: 0]);
id value = fromJSON([r dataNoCopyForColumnIndex: 1]);
Assert(key);
if (reduce) {
// Reduced or grouped query:
if (group && !groupTogether(key, lastKey, groupLevel) && lastKey) {
// This pair starts a new group, so reduce & record the last one:
id reduced = _reduceBlock ? _reduceBlock(keysToReduce, valuesToReduce,NO) : nil;
[rows addObject: $dict({@"key", groupKey(lastKey, groupLevel)},
{@"value", (reduced ?: $null)})];
[keysToReduce removeAllObjects];
[valuesToReduce removeAllObjects];
}
LogTo(ViewVerbose, @"Query %@: Will reduce row with key=%@, value=%@",
_name, toJSONString(key), toJSONString(value));
[keysToReduce addObject: key];
[valuesToReduce addObject: value ?: $null];
lastKey = key;

} else {
// Regular query:
} else {
// Regular query:
rows = $marray();
while ([r next]) {
@autoreleasepool {
id key = fromJSON([r dataNoCopyForColumnIndex: 0]);
id value = fromJSON([r dataNoCopyForColumnIndex: 1]);
Assert(key);
NSString* docID = [r stringForColumnIndex: 2];
id docContents = nil;
if (options->includeDocs) {
Expand Down Expand Up @@ -514,28 +474,103 @@ - (NSArray*) queryWithOptions: (const TDQueryOptions*)options
}
}
}

if (reduce) {
if (keysToReduce.count > 0) {
// Finish the last group (or the entire list, if no grouping):
id key = group ? groupKey(lastKey, groupLevel) : $null;
id reduced = _reduceBlock ? _reduceBlock(keysToReduce, valuesToReduce,NO) : nil;
LogTo(ViewVerbose, @"Query %@: Reduced to key=%@, value=%@",
_name, toJSONString(key), toJSONString(reduced));
[rows addObject: $dict({@"key", key},
{@"value", (reduced ?: $null)})];
}
[keysToReduce release];
[valuesToReduce release];
}


[r close];
*outStatus = kTDStatusOK;
LogTo(View, @"Query %@: Returning %u rows", _name, (unsigned)rows.count);
return rows;
}


#pragma mark - REDUCING/GROUPING:


// Are key1 and key2 grouped together at this groupLevel?
static bool groupTogether(NSData* key1, NSData* key2, unsigned groupLevel) {
if (!key1 || !key2)
return NO;
if (groupLevel == 0)
groupLevel = UINT_MAX;
return TDCollateJSONLimited(kTDCollateJSON_Unicode,
(int)key1.length, key1.bytes,
(int)key2.length, key2.bytes,
groupLevel) == 0;
}

// Returns the prefix of the key to use in the result row, at this groupLevel
static id groupKey(NSData* keyJSON, unsigned groupLevel) {
id key = fromJSON(keyJSON);
if (groupLevel > 0 && [key isKindOfClass: [NSArray class]] && [key count] > groupLevel)
return [key subarrayWithRange: NSMakeRange(0, groupLevel)];
else
return key;
}


// Invokes the reduce function on the parallel arrays of keys and values
- (id) reduceKeys: (NSMutableArray*)keys values: (NSMutableArray*)values {
if (!_reduceBlock)
return nil;
TDLazyArrayOfJSON* lazyKeys = [[TDLazyArrayOfJSON alloc] initWithArray: keys];
TDLazyArrayOfJSON* lazyVals = [[TDLazyArrayOfJSON alloc] initWithArray: values];
id result = _reduceBlock(lazyKeys, lazyVals, NO);
[lazyKeys release];
[lazyVals release];
return result ?: $null;
}


- (NSMutableArray*) reducedQuery: (FMResultSet*)r group: (BOOL)group groupLevel: (unsigned)groupLevel
{
NSMutableArray* keysToReduce = nil, *valuesToReduce = nil;
if (_reduceBlock) {
keysToReduce = [[NSMutableArray alloc] initWithCapacity: 100];
valuesToReduce = [[NSMutableArray alloc] initWithCapacity: 100];
}
NSData* lastKeyData = nil;

NSMutableArray* rows = $marray();
while ([r next]) {
@autoreleasepool {
NSData* keyData = [r dataForColumnIndex: 0];
NSData* valueData = [r dataForColumnIndex: 1];
Assert(keyData);
if (group && !groupTogether(keyData, lastKeyData, groupLevel)) {
if (lastKeyData) {
// This pair starts a new group, so reduce & record the last one:
id reduced = [self reduceKeys: keysToReduce values: valuesToReduce];
[rows addObject: $dict({@"key", groupKey(lastKeyData, groupLevel)},
{@"value", reduced})];
[keysToReduce removeAllObjects];
[valuesToReduce removeAllObjects];
[lastKeyData release];
}
lastKeyData = [keyData copy];
}
LogTo(ViewVerbose, @"Query %@: Will reduce row with key=%@, value=%@",
_name, [keyData my_UTF8ToString], [valueData my_UTF8ToString]);
[keysToReduce addObject: keyData];
[valuesToReduce addObject: valueData ?: $null];
}
}

if (keysToReduce.count > 0) {
// Finish the last group (or the entire list, if no grouping):
id key = group ? groupKey(lastKeyData, groupLevel) : $null;
id reduced = [self reduceKeys: keysToReduce values: valuesToReduce];
LogTo(ViewVerbose, @"Query %@: Reduced to key=%@, value=%@",
_name, toJSONString(key), toJSONString(reduced));
[rows addObject: $dict({@"key", key}, {@"value", reduced})];
}
[keysToReduce release];
[valuesToReduce release];
[lastKeyData release];
return rows;
}


#pragma mark - OTHER:

// This is really just for unit tests & debugging
- (NSArray*) dump {
if (self.viewID <= 0)
Expand Down

0 comments on commit 2ad9cc8

Please sign in to comment.