Skip to content

Commit

Permalink
Merge pull request #1060 from thomasvl/drop_spinlocks
Browse files Browse the repository at this point in the history
Drop all use of OSSpinLock
  • Loading branch information
thomasvl committed Dec 17, 2015
2 parents 9e1777f + d6590d6 commit 6b228f3
Show file tree
Hide file tree
Showing 11 changed files with 53 additions and 874 deletions.
1 change: 0 additions & 1 deletion Makefile.am
Expand Up @@ -468,7 +468,6 @@ objectivec_EXTRA_DIST= \
objectivec/Tests/GPBMessageTests.m \
objectivec/Tests/GPBObjectiveCPlusPlusTest.mm \
objectivec/Tests/GPBPerfTests.m \
objectivec/Tests/GPBStringTests.m \
objectivec/Tests/GPBSwiftTests.swift \
objectivec/Tests/GPBTestUtilities.h \
objectivec/Tests/GPBTestUtilities.m \
Expand Down
329 changes: 11 additions & 318 deletions objectivec/GPBCodedInputStream.m
Expand Up @@ -216,7 +216,17 @@ int32_t GPBCodedInputStreamReadTag(GPBCodedInputStreamState *state) {
result = @"";
} else {
CheckSize(state, size);
result = GPBCreateGPBStringWithUTF8(&state->bytes[state->bufferPos], size);
result = [[NSString alloc] initWithBytes:&state->bytes[state->bufferPos]
length:size
encoding:NSUTF8StringEncoding];
if (!result) {
result = @"";
#ifdef DEBUG
// https://developers.google.com/protocol-buffers/docs/proto#scalar
NSLog(@"UTF8 failure, is some field type 'string' when it should be "
@"'bytes'?");
#endif
}
state->bufferPos += size;
}
return result;
Expand Down Expand Up @@ -478,320 +488,3 @@ - (int64_t)readSInt64 {
}

@end

@implementation GPBString {
@package
CFStringRef string_;
unsigned char *utf8_;
NSUInteger utf8Len_;

// This lock is used to gate access to utf8_. Once GPBStringInitStringValue()
// has been called, string_ will be filled in, and utf8_ will be NULL.
OSSpinLock lock_;

BOOL hasBOM_;
BOOL is7BitAscii_;
}

// Returns true if the passed in bytes are 7 bit ascii.
// This routine needs to be fast.
static bool AreBytesIn7BitASCII(const uint8_t *bytes, NSUInteger len) {
// In the loops below, it's more efficient to collect rather than do
// conditional at every step.
#if __LP64__
// Align bytes. This is especially important in case of 3 byte BOM.
while (len > 0 && ((size_t)bytes & 0x07)) {
if (*bytes++ & 0x80) return false;
len--;
}
while (len >= 32) {
uint64_t val = *(const uint64_t *)bytes;
uint64_t hiBits = (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
hiBits |= (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
hiBits |= (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
if (hiBits | (val & 0x8080808080808080ULL)) return false;
bytes += 8;
len -= 32;
}

while (len >= 16) {
uint64_t val = *(const uint64_t *)bytes;
uint64_t hiBits = (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
if (hiBits | (val & 0x8080808080808080ULL)) return false;
bytes += 8;
len -= 16;
}

while (len >= 8) {
uint64_t val = *(const uint64_t *)bytes;
if (val & 0x8080808080808080ULL) return false;
bytes += 8;
len -= 8;
}
#else // __LP64__
// Align bytes. This is especially important in case of 3 byte BOM.
while (len > 0 && ((size_t)bytes & 0x03)) {
if (*bytes++ & 0x80) return false;
len--;
}
while (len >= 16) {
uint32_t val = *(const uint32_t *)bytes;
uint32_t hiBits = (val & 0x80808080U);
bytes += 4;
val = *(const uint32_t *)bytes;
hiBits |= (val & 0x80808080U);
bytes += 4;
val = *(const uint32_t *)bytes;
hiBits |= (val & 0x80808080U);
bytes += 4;
val = *(const uint32_t *)bytes;
if (hiBits | (val & 0x80808080U)) return false;
bytes += 4;
len -= 16;
}

while (len >= 8) {
uint32_t val = *(const uint32_t *)bytes;
uint32_t hiBits = (val & 0x80808080U);
bytes += 4;
val = *(const uint32_t *)bytes;
if (hiBits | (val & 0x80808080U)) return false;
bytes += 4;
len -= 8;
}
#endif // __LP64__

while (len >= 4) {
uint32_t val = *(const uint32_t *)bytes;
if (val & 0x80808080U) return false;
bytes += 4;
len -= 4;
}

while (len--) {
if (*bytes++ & 0x80) return false;
}

return true;
}

static void GPBStringInitStringValue(GPBString *string) {
OSSpinLockLock(&string->lock_);
GPBStringInitStringValueAlreadyLocked(string);
OSSpinLockUnlock(&string->lock_);
}

static void GPBStringInitStringValueAlreadyLocked(GPBString *string) {
if (string->string_ == NULL && string->utf8_ != NULL) {
// Using kCFAllocatorMalloc for contentsDeallocator, as buffer in
// string->utf8_ is being handed off.
string->string_ = CFStringCreateWithBytesNoCopy(
NULL, string->utf8_, string->utf8Len_, kCFStringEncodingUTF8, false,
kCFAllocatorMalloc);
if (!string->string_) {
#ifdef DEBUG
// https://developers.google.com/protocol-buffers/docs/proto#scalar
NSLog(@"UTF8 failure, is some field type 'string' when it should be "
@"'bytes'?");
#endif
string->string_ = CFSTR("");
string->utf8Len_ = 0;
// On failure, we have to clean up the buffer.
free(string->utf8_);
}
string->utf8_ = NULL;
}
}

GPBString *GPBCreateGPBStringWithUTF8(const void *bytes, NSUInteger length) {
GPBString *result = [[GPBString alloc] initWithBytes:bytes length:length];
return result;
}

- (instancetype)initWithBytes:(const void *)bytes length:(NSUInteger)length {
self = [super init];
if (self) {
utf8_ = malloc(length);
memcpy(utf8_, bytes, length);
utf8Len_ = length;
lock_ = OS_SPINLOCK_INIT;
is7BitAscii_ = AreBytesIn7BitASCII(bytes, length);
if (length >= 3 && memcmp(utf8_, "\xef\xbb\xbf", 3) == 0) {
// We can't just remove the BOM from the string here, because in the case
// where we have > 1 BOM at the beginning of the string, we will remove one,
// and the internal NSString we create will remove the next one, and we will
// end up with a GPBString != NSString issue.
// We also just can't remove all the BOMs because then we would end up with
// potential cases where a GPBString and an NSString made with the same
// UTF8 buffer would in fact be different.
// We record the fact we have a BOM, and use it as necessary to simulate
// what NSString would return for various calls.
hasBOM_ = YES;
#if DEBUG
// Sending BOMs across the line is just wasting bits.
NSLog(@"Bad data? String should not have BOM!");
#endif // DEBUG
}
}
return self;
}

- (void)dealloc {
if (string_ != NULL) {
CFRelease(string_);
}
if (utf8_ != NULL) {
free(utf8_);
}
[super dealloc];
}

// Required NSString overrides.
- (NSUInteger)length {
if (is7BitAscii_) {
return utf8Len_;
} else {
GPBStringInitStringValue(self);
return CFStringGetLength(string_);
}
}

- (unichar)characterAtIndex:(NSUInteger)anIndex {
OSSpinLockLock(&lock_);
if (is7BitAscii_ && utf8_) {
unichar result = utf8_[anIndex];
OSSpinLockUnlock(&lock_);
return result;
} else {
GPBStringInitStringValueAlreadyLocked(self);
OSSpinLockUnlock(&lock_);
return CFStringGetCharacterAtIndex(string_, anIndex);
}
}

// Override a couple of methods that typically want high performance.

- (id)copyWithZone:(NSZone *)zone {
GPBStringInitStringValue(self);
return [(NSString *)string_ copyWithZone:zone];
}

- (id)mutableCopyWithZone:(NSZone *)zone {
GPBStringInitStringValue(self);
return [(NSString *)string_ mutableCopyWithZone:zone];
}

- (NSUInteger)hash {
// Must convert to string here to make sure that the hash is always
// consistent no matter what state the GPBString is in.
GPBStringInitStringValue(self);
return CFHash(string_);
}

- (BOOL)isEqual:(id)object {
if (self == object) {
return YES;
}
if ([object isKindOfClass:[NSString class]]) {
GPBStringInitStringValue(self);
return CFStringCompare(string_, (CFStringRef)object, 0) ==
kCFCompareEqualTo;
}
return NO;
}

- (void)getCharacters:(unichar *)buffer range:(NSRange)aRange {
OSSpinLockLock(&lock_);
if (is7BitAscii_ && utf8_) {
unsigned char *bytes = &(utf8_[aRange.location]);
for (NSUInteger i = 0; i < aRange.length; ++i) {
buffer[i] = bytes[i];
}
OSSpinLockUnlock(&lock_);
} else {
GPBStringInitStringValueAlreadyLocked(self);
OSSpinLockUnlock(&lock_);
CFStringGetCharacters(string_, CFRangeMake(aRange.location, aRange.length),
buffer);
}
}

- (NSUInteger)lengthOfBytesUsingEncoding:(NSStringEncoding)encoding {
if ((encoding == NSUTF8StringEncoding) ||
(encoding == NSASCIIStringEncoding && is7BitAscii_)) {
return utf8Len_ - (hasBOM_ ? 3 : 0);
} else {
GPBStringInitStringValue(self);
return [(NSString *)string_ lengthOfBytesUsingEncoding:encoding];
}
}

- (BOOL)getBytes:(void *)buffer
maxLength:(NSUInteger)maxLength
usedLength:(NSUInteger *)usedLength
encoding:(NSStringEncoding)encoding
options:(NSStringEncodingConversionOptions)options
range:(NSRange)range
remainingRange:(NSRangePointer)remainingRange {
// [NSString getBytes:maxLength:usedLength:encoding:options:range:remainingRange]
// does not return reliable results if the maxLength argument is 0
// (Radar 16385183). Therefore we have special cased it as a slow case so
// that it behaves however Apple intends it to behave. It should be a rare
// case.
//
// [NSString getBytes:maxLength:usedLength:encoding:options:range:remainingRange]
// does not return reliable results if the range is outside of the strings
// length (Radar 16396177). Therefore we have special cased it as a slow
// case so that it behaves however Apple intends it to behave. It should
// be a rare case.
//
// We can optimize the UTF8StringEncoding and NSASCIIStringEncoding with no
// options cases.
if ((options == 0) &&
(encoding == NSUTF8StringEncoding || encoding == NSASCIIStringEncoding) &&
(maxLength != 0) &&
(NSMaxRange(range) <= utf8Len_)) {
// Might be able to optimize it.
OSSpinLockLock(&lock_);
if (is7BitAscii_ && utf8_) {
NSUInteger length = range.length;
length = (length < maxLength) ? length : maxLength;
memcpy(buffer, utf8_ + range.location, length);
if (usedLength) {
*usedLength = length;
}
if (remainingRange) {
remainingRange->location = range.location + length;
remainingRange->length = range.length - length;
}
OSSpinLockUnlock(&lock_);
if (length > 0) {
return YES;
} else {
return NO;
}
} else {
GPBStringInitStringValueAlreadyLocked(self);
OSSpinLockUnlock(&lock_);
}
} else {
GPBStringInitStringValue(self);
}
return [(NSString *)string_ getBytes:buffer
maxLength:maxLength
usedLength:usedLength
encoding:encoding
options:options
range:range
remainingRange:remainingRange];
}

@end
17 changes: 0 additions & 17 deletions objectivec/GPBCodedInputStream_PackagePrivate.h
Expand Up @@ -39,19 +39,6 @@
@class GPBUnknownFieldSet;
@class GPBFieldDescriptor;

// GPBString is a string subclass that avoids the overhead of initializing
// a full NSString until it is actually needed. Lots of protocol buffers contain
// strings, and instantiating all of those strings and having them parsed to
// verify correctness when the message was being read was expensive, when many
// of the strings were never being used.
//
// Note for future-self. I tried implementing this using a NSProxy.
// Turned out the performance was horrible in client apps because folks
// like to use libraries like SBJSON that grab characters one at a time.
// The proxy overhead was a killer.
@interface GPBString : NSString
@end

typedef struct GPBCodedInputStreamState {
const uint8_t *bytes;
size_t bufferSize;
Expand Down Expand Up @@ -92,10 +79,6 @@ typedef struct GPBCodedInputStreamState {

CF_EXTERN_C_BEGIN

// Returns a GPBString with a +1 retain count.
GPBString *GPBCreateGPBStringWithUTF8(const void *bytes, NSUInteger length)
__attribute__((ns_returns_retained));

int32_t GPBCodedInputStreamReadTag(GPBCodedInputStreamState *state);

double GPBCodedInputStreamReadDouble(GPBCodedInputStreamState *state);
Expand Down

0 comments on commit 6b228f3

Please sign in to comment.