Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop all use of OSSpinLock #1060

Merged
merged 1 commit into from
Dec 17, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,6 @@ objectivec_EXTRA_DIST= \
objectivec/Tests/GPBMessageTests.m \
objectivec/Tests/GPBObjectiveCPlusPlusTest.mm \
objectivec/Tests/GPBPerfTests.m \
objectivec/Tests/GPBStringTests.m \
objectivec/Tests/GPBSwiftTests.swift \
objectivec/Tests/GPBTestUtilities.h \
objectivec/Tests/GPBTestUtilities.m \
Expand Down
329 changes: 11 additions & 318 deletions objectivec/GPBCodedInputStream.m
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,17 @@ int32_t GPBCodedInputStreamReadTag(GPBCodedInputStreamState *state) {
result = @"";
} else {
CheckSize(state, size);
result = GPBCreateGPBStringWithUTF8(&state->bytes[state->bufferPos], size);
result = [[NSString alloc] initWithBytes:&state->bytes[state->bufferPos]
length:size
encoding:NSUTF8StringEncoding];
if (!result) {
result = @"";
#ifdef DEBUG
// https://developers.google.com/protocol-buffers/docs/proto#scalar
NSLog(@"UTF8 failure, is some field type 'string' when it should be "
@"'bytes'?");
#endif
}
state->bufferPos += size;
}
return result;
Expand Down Expand Up @@ -478,320 +488,3 @@ - (int64_t)readSInt64 {
}

@end

@implementation GPBString {
@package
CFStringRef string_;
unsigned char *utf8_;
NSUInteger utf8Len_;

// This lock is used to gate access to utf8_. Once GPBStringInitStringValue()
// has been called, string_ will be filled in, and utf8_ will be NULL.
OSSpinLock lock_;

BOOL hasBOM_;
BOOL is7BitAscii_;
}

// Returns true if the passed in bytes are 7 bit ascii.
// This routine needs to be fast.
static bool AreBytesIn7BitASCII(const uint8_t *bytes, NSUInteger len) {
// In the loops below, it's more efficient to collect rather than do
// conditional at every step.
#if __LP64__
// Align bytes. This is especially important in case of 3 byte BOM.
while (len > 0 && ((size_t)bytes & 0x07)) {
if (*bytes++ & 0x80) return false;
len--;
}
while (len >= 32) {
uint64_t val = *(const uint64_t *)bytes;
uint64_t hiBits = (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
hiBits |= (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
hiBits |= (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
if (hiBits | (val & 0x8080808080808080ULL)) return false;
bytes += 8;
len -= 32;
}

while (len >= 16) {
uint64_t val = *(const uint64_t *)bytes;
uint64_t hiBits = (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
if (hiBits | (val & 0x8080808080808080ULL)) return false;
bytes += 8;
len -= 16;
}

while (len >= 8) {
uint64_t val = *(const uint64_t *)bytes;
if (val & 0x8080808080808080ULL) return false;
bytes += 8;
len -= 8;
}
#else // __LP64__
// Align bytes. This is especially important in case of 3 byte BOM.
while (len > 0 && ((size_t)bytes & 0x03)) {
if (*bytes++ & 0x80) return false;
len--;
}
while (len >= 16) {
uint32_t val = *(const uint32_t *)bytes;
uint32_t hiBits = (val & 0x80808080U);
bytes += 4;
val = *(const uint32_t *)bytes;
hiBits |= (val & 0x80808080U);
bytes += 4;
val = *(const uint32_t *)bytes;
hiBits |= (val & 0x80808080U);
bytes += 4;
val = *(const uint32_t *)bytes;
if (hiBits | (val & 0x80808080U)) return false;
bytes += 4;
len -= 16;
}

while (len >= 8) {
uint32_t val = *(const uint32_t *)bytes;
uint32_t hiBits = (val & 0x80808080U);
bytes += 4;
val = *(const uint32_t *)bytes;
if (hiBits | (val & 0x80808080U)) return false;
bytes += 4;
len -= 8;
}
#endif // __LP64__

while (len >= 4) {
uint32_t val = *(const uint32_t *)bytes;
if (val & 0x80808080U) return false;
bytes += 4;
len -= 4;
}

while (len--) {
if (*bytes++ & 0x80) return false;
}

return true;
}

static void GPBStringInitStringValue(GPBString *string) {
OSSpinLockLock(&string->lock_);
GPBStringInitStringValueAlreadyLocked(string);
OSSpinLockUnlock(&string->lock_);
}

static void GPBStringInitStringValueAlreadyLocked(GPBString *string) {
if (string->string_ == NULL && string->utf8_ != NULL) {
// Using kCFAllocatorMalloc for contentsDeallocator, as buffer in
// string->utf8_ is being handed off.
string->string_ = CFStringCreateWithBytesNoCopy(
NULL, string->utf8_, string->utf8Len_, kCFStringEncodingUTF8, false,
kCFAllocatorMalloc);
if (!string->string_) {
#ifdef DEBUG
// https://developers.google.com/protocol-buffers/docs/proto#scalar
NSLog(@"UTF8 failure, is some field type 'string' when it should be "
@"'bytes'?");
#endif
string->string_ = CFSTR("");
string->utf8Len_ = 0;
// On failure, we have to clean up the buffer.
free(string->utf8_);
}
string->utf8_ = NULL;
}
}

GPBString *GPBCreateGPBStringWithUTF8(const void *bytes, NSUInteger length) {
GPBString *result = [[GPBString alloc] initWithBytes:bytes length:length];
return result;
}

- (instancetype)initWithBytes:(const void *)bytes length:(NSUInteger)length {
self = [super init];
if (self) {
utf8_ = malloc(length);
memcpy(utf8_, bytes, length);
utf8Len_ = length;
lock_ = OS_SPINLOCK_INIT;
is7BitAscii_ = AreBytesIn7BitASCII(bytes, length);
if (length >= 3 && memcmp(utf8_, "\xef\xbb\xbf", 3) == 0) {
// We can't just remove the BOM from the string here, because in the case
// where we have > 1 BOM at the beginning of the string, we will remove one,
// and the internal NSString we create will remove the next one, and we will
// end up with a GPBString != NSString issue.
// We also just can't remove all the BOMs because then we would end up with
// potential cases where a GPBString and an NSString made with the same
// UTF8 buffer would in fact be different.
// We record the fact we have a BOM, and use it as necessary to simulate
// what NSString would return for various calls.
hasBOM_ = YES;
#if DEBUG
// Sending BOMs across the line is just wasting bits.
NSLog(@"Bad data? String should not have BOM!");
#endif // DEBUG
}
}
return self;
}

- (void)dealloc {
if (string_ != NULL) {
CFRelease(string_);
}
if (utf8_ != NULL) {
free(utf8_);
}
[super dealloc];
}

// Required NSString overrides.
- (NSUInteger)length {
if (is7BitAscii_) {
return utf8Len_;
} else {
GPBStringInitStringValue(self);
return CFStringGetLength(string_);
}
}

- (unichar)characterAtIndex:(NSUInteger)anIndex {
OSSpinLockLock(&lock_);
if (is7BitAscii_ && utf8_) {
unichar result = utf8_[anIndex];
OSSpinLockUnlock(&lock_);
return result;
} else {
GPBStringInitStringValueAlreadyLocked(self);
OSSpinLockUnlock(&lock_);
return CFStringGetCharacterAtIndex(string_, anIndex);
}
}

// Override a couple of methods that typically want high performance.

- (id)copyWithZone:(NSZone *)zone {
GPBStringInitStringValue(self);
return [(NSString *)string_ copyWithZone:zone];
}

- (id)mutableCopyWithZone:(NSZone *)zone {
GPBStringInitStringValue(self);
return [(NSString *)string_ mutableCopyWithZone:zone];
}

- (NSUInteger)hash {
// Must convert to string here to make sure that the hash is always
// consistent no matter what state the GPBString is in.
GPBStringInitStringValue(self);
return CFHash(string_);
}

- (BOOL)isEqual:(id)object {
if (self == object) {
return YES;
}
if ([object isKindOfClass:[NSString class]]) {
GPBStringInitStringValue(self);
return CFStringCompare(string_, (CFStringRef)object, 0) ==
kCFCompareEqualTo;
}
return NO;
}

- (void)getCharacters:(unichar *)buffer range:(NSRange)aRange {
OSSpinLockLock(&lock_);
if (is7BitAscii_ && utf8_) {
unsigned char *bytes = &(utf8_[aRange.location]);
for (NSUInteger i = 0; i < aRange.length; ++i) {
buffer[i] = bytes[i];
}
OSSpinLockUnlock(&lock_);
} else {
GPBStringInitStringValueAlreadyLocked(self);
OSSpinLockUnlock(&lock_);
CFStringGetCharacters(string_, CFRangeMake(aRange.location, aRange.length),
buffer);
}
}

- (NSUInteger)lengthOfBytesUsingEncoding:(NSStringEncoding)encoding {
if ((encoding == NSUTF8StringEncoding) ||
(encoding == NSASCIIStringEncoding && is7BitAscii_)) {
return utf8Len_ - (hasBOM_ ? 3 : 0);
} else {
GPBStringInitStringValue(self);
return [(NSString *)string_ lengthOfBytesUsingEncoding:encoding];
}
}

- (BOOL)getBytes:(void *)buffer
maxLength:(NSUInteger)maxLength
usedLength:(NSUInteger *)usedLength
encoding:(NSStringEncoding)encoding
options:(NSStringEncodingConversionOptions)options
range:(NSRange)range
remainingRange:(NSRangePointer)remainingRange {
// [NSString getBytes:maxLength:usedLength:encoding:options:range:remainingRange]
// does not return reliable results if the maxLength argument is 0
// (Radar 16385183). Therefore we have special cased it as a slow case so
// that it behaves however Apple intends it to behave. It should be a rare
// case.
//
// [NSString getBytes:maxLength:usedLength:encoding:options:range:remainingRange]
// does not return reliable results if the range is outside of the strings
// length (Radar 16396177). Therefore we have special cased it as a slow
// case so that it behaves however Apple intends it to behave. It should
// be a rare case.
//
// We can optimize the UTF8StringEncoding and NSASCIIStringEncoding with no
// options cases.
if ((options == 0) &&
(encoding == NSUTF8StringEncoding || encoding == NSASCIIStringEncoding) &&
(maxLength != 0) &&
(NSMaxRange(range) <= utf8Len_)) {
// Might be able to optimize it.
OSSpinLockLock(&lock_);
if (is7BitAscii_ && utf8_) {
NSUInteger length = range.length;
length = (length < maxLength) ? length : maxLength;
memcpy(buffer, utf8_ + range.location, length);
if (usedLength) {
*usedLength = length;
}
if (remainingRange) {
remainingRange->location = range.location + length;
remainingRange->length = range.length - length;
}
OSSpinLockUnlock(&lock_);
if (length > 0) {
return YES;
} else {
return NO;
}
} else {
GPBStringInitStringValueAlreadyLocked(self);
OSSpinLockUnlock(&lock_);
}
} else {
GPBStringInitStringValue(self);
}
return [(NSString *)string_ getBytes:buffer
maxLength:maxLength
usedLength:usedLength
encoding:encoding
options:options
range:range
remainingRange:remainingRange];
}

@end
17 changes: 0 additions & 17 deletions objectivec/GPBCodedInputStream_PackagePrivate.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,6 @@
@class GPBUnknownFieldSet;
@class GPBFieldDescriptor;

// GPBString is a string subclass that avoids the overhead of initializing
// a full NSString until it is actually needed. Lots of protocol buffers contain
// strings, and instantiating all of those strings and having them parsed to
// verify correctness when the message was being read was expensive, when many
// of the strings were never being used.
//
// Note for future-self. I tried implementing this using a NSProxy.
// Turned out the performance was horrible in client apps because folks
// like to use libraries like SBJSON that grab characters one at a time.
// The proxy overhead was a killer.
@interface GPBString : NSString
@end

typedef struct GPBCodedInputStreamState {
const uint8_t *bytes;
size_t bufferSize;
Expand Down Expand Up @@ -92,10 +79,6 @@ typedef struct GPBCodedInputStreamState {

CF_EXTERN_C_BEGIN

// Returns a GPBString with a +1 retain count.
GPBString *GPBCreateGPBStringWithUTF8(const void *bytes, NSUInteger length)
__attribute__((ns_returns_retained));

int32_t GPBCodedInputStreamReadTag(GPBCodedInputStreamState *state);

double GPBCodedInputStreamReadDouble(GPBCodedInputStreamState *state);
Expand Down
Loading