Permalink
Browse files

Fix for Unicode decoding issue when using incremental networking.

Summary:
This is **a critical issue**.

The issue arises when incremental networking is enabled from JS by setting `onprogress` or `onload` on an `XMLHttpRequest` object.

The results:

![example1](https://cloud.githubusercontent.com/assets/2270433/18829964/5a54ff30-83e7-11e6-9806-97857dce0430.png)

![example2](https://cloud.githubusercontent.com/assets/2270433/18829966/5bf40a66-83e7-11e6-84e6-9e4d76ba4f8b.png)

Unicode characters get corrupted seemingly in random. The issue is from the way Unicode character parsing is handled in `RCTNetworking.mm`. When incremental networking is enabled, each chunk of data is decoded and passed to JS:

```objective-c
incrementalDataBlock = ^(NSData *data, int64_t progress, int64_t total) {
NSString *responseString = [RCTNetworking decodeTextData:data fromResponse:task.response];
if (!responseString) {
  RCTLogWarn(@"Received data was not a string, or was not a recognised encoding.");
  return;
}
NSArray<id> *responseJSON = @[task.requestID, responseString, @(prog
Closes #10110

Reviewed By: yungsters

Differential Revision: D4101533

Pulled By: fkgozali

fbshipit-source-id: 2674eaf0dd4568889070c6cde5cdf12edc5be521
  • Loading branch information...
LeoNatan authored and Facebook Github Bot committed Oct 31, 2016
1 parent 6d3e074 commit 3ac3749ac37bf5f202eab535528f324461029366
@@ -104,6 +104,7 @@
2DD323E91DA2DE3F000FE1B8 /* libRCTWebSocket-tvOS.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 2DD323D51DA2DD8B000FE1B8 /* libRCTWebSocket-tvOS.a */; };
2DD323EA1DA2DE3F000FE1B8 /* libReact-tvOS.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 2DD323D91DA2DD8B000FE1B8 /* libReact-tvOS.a */; };
3578590A1B28D2CF00341EDB /* libRCTLinking.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 357859011B28D2C500341EDB /* libRCTLinking.a */; };
+ 39AA31A41DC1DFDC000F7EBB /* RCTUnicodeDecodeTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 39AA31A31DC1DFDC000F7EBB /* RCTUnicodeDecodeTests.m */; };
3D13F8481D6F6AF900E69E0E /* ImageInBundle.png in Resources */ = {isa = PBXBuildFile; fileRef = 3D13F8441D6F6AF200E69E0E /* ImageInBundle.png */; };
3D13F84A1D6F6AFD00E69E0E /* OtherImages.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 3D13F8451D6F6AF200E69E0E /* OtherImages.xcassets */; };
3D299BAF1D33EBFA00FA1057 /* RCTLoggingTests.m in Sources */ = {isa = PBXBuildFile; fileRef = 3D299BAE1D33EBFA00FA1057 /* RCTLoggingTests.m */; };
@@ -388,6 +389,7 @@
2DD323A01DA2DD8B000FE1B8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
2DD323A51DA2DD8B000FE1B8 /* UIExplorer-tvOSUnitTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "UIExplorer-tvOSUnitTests.xctest"; sourceTree = BUILT_PRODUCTS_DIR; };
357858F81B28D2C400341EDB /* RCTLinking.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = RCTLinking.xcodeproj; path = ../../Libraries/LinkingIOS/RCTLinking.xcodeproj; sourceTree = "<group>"; };
+ 39AA31A31DC1DFDC000F7EBB /* RCTUnicodeDecodeTests.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = RCTUnicodeDecodeTests.m; sourceTree = "<group>"; };
3D13F83E1D6F6AE000E69E0E /* UIExplorerBundle.bundle */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = UIExplorerBundle.bundle; sourceTree = BUILT_PRODUCTS_DIR; };
3D13F8401D6F6AE000E69E0E /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; name = Info.plist; path = ../Info.plist; sourceTree = "<group>"; };
3D13F8441D6F6AF200E69E0E /* ImageInBundle.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = ImageInBundle.png; sourceTree = "<group>"; };
@@ -636,6 +638,7 @@
138D6A161B53CD440074A87E /* RCTShadowViewTests.m */,
1497CFAB1B21F5E400C1F8F2 /* RCTUIManagerTests.m */,
13BCE84E1C9C209600DD7AAD /* RCTComponentPropsTests.m */,
+ 39AA31A31DC1DFDC000F7EBB /* RCTUnicodeDecodeTests.m */,
143BC57E1B21E18100462512 /* Info.plist */,
3DD981D51D33C6FB007DC7BE /* TestBundle.js */,
14D6D7101B220EB3001FB087 /* libOCMock.a */,
@@ -1346,6 +1349,7 @@
001BFCE41D838343008E587E /* RCTMultipartStreamReaderTests.m in Sources */,
13DF61B61B67A45000EDB188 /* RCTMethodArgumentTests.m in Sources */,
138D6A181B53CD440074A87E /* RCTShadowViewTests.m in Sources */,
+ 39AA31A41DC1DFDC000F7EBB /* RCTUnicodeDecodeTests.m in Sources */,
13B6C1A31C34225900D3FAF5 /* RCTURLUtilsTests.m in Sources */,
8385CF041B87479200C6273E /* RCTImageLoaderHelpers.m in Sources */,
68FF44381CF6111500720EFD /* RCTBundleURLProviderTests.m in Sources */,
@@ -0,0 +1,77 @@
+/**
+ * The examples provided by Facebook are for non-commercial testing and
+ * evaluation purposes only.
+ *
+ * Facebook reserves all rights not expressly granted.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL
+ * FACEBOOK BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#import <XCTest/XCTest.h>
+
+#import "RCTNetworking.h"
+
+static NSString* const niqqudStringB64 = @"15HWsNa816jWtdeQ16nWtNeB15nXqiwg15HWuNa816jWuNeQINeQ1rHXnNa515TWtNeZ150sINeQ1rXXqiDXlNa316nWuNa814HXnta315nWtNedLCDXldaw15DWtdeqINeU1rjXkNa416jWttelLg==";
+
+@interface RCTNetworking ()
+
++ (NSString *)decodeTextData:(NSData *)data fromResponse:(NSURLResponse *)response withCarryData:(NSMutableData*)inputCarryData;
+
+@end
+
+@interface RCTUnicodeDecodeTests : XCTestCase
+
+@end
+
+@implementation RCTUnicodeDecodeTests
+
+- (void)runTestForString:(NSString*)unicodeString usingEncoding:(NSString*)encodingName cutAt:(NSUInteger)cutPoint
+{
+ CFStringEncoding cfEncoding = CFStringConvertIANACharSetNameToEncoding((CFStringRef)encodingName);
+ NSStringEncoding encoding = CFStringConvertEncodingToNSStringEncoding(cfEncoding);
+
+ NSData* unicodeBytes = [unicodeString dataUsingEncoding:encoding];
+
+ NSURLResponse* fakeResponse = [[NSHTTPURLResponse alloc] initWithURL:[NSURL URLWithString:@"testurl://"]
+ statusCode:200
+ HTTPVersion:@"1.1"
+ headerFields:@{@"content-type": [NSString stringWithFormat:@"text/plain; charset=%@", encodingName]}];
+ XCTAssert([fakeResponse.textEncodingName isEqualToString:encodingName]);
+
+ NSMutableData* carryStorage = [NSMutableData new];
+ NSMutableString* parsedString = [NSMutableString new];
+
+ [parsedString appendString:[RCTNetworking decodeTextData:[unicodeBytes subdataWithRange:NSMakeRange(0, cutPoint)]
+ fromResponse:fakeResponse
+ withCarryData:carryStorage] ?: @""];
+
+ [parsedString appendString:[RCTNetworking decodeTextData:[unicodeBytes subdataWithRange:NSMakeRange(cutPoint, unicodeBytes.length - cutPoint)]
+ fromResponse:fakeResponse
+ withCarryData:carryStorage] ?: @""];
+
+ XCTAssert(carryStorage.length == 0);
+ XCTAssert([parsedString isEqualToString:unicodeString]);
+}
+
+- (void)testNiqqud
+{
+ NSString* unicodeString = [[NSString alloc] initWithData:[[NSData alloc] initWithBase64EncodedString:niqqudStringB64
+ options:(NSDataBase64DecodingOptions)0]
+ encoding:NSUTF8StringEncoding];
+
+ [self runTestForString:unicodeString usingEncoding:@"utf-8" cutAt:25];
+}
+
+- (void)testEmojis
+{
+ NSString* unicodeString = @"\U0001F602\U0001F602";
+
+ [self runTestForString:unicodeString usingEncoding:@"utf-8" cutAt:7];
+}
+
+@end
@@ -333,22 +333,60 @@ - (RCTURLRequestCancellationBlock)processDataForHTTPQuery:(nullable NSDictionary
return callback(nil, nil);
}
-+ (NSString *)decodeTextData:(NSData *)data fromResponse:(NSURLResponse *)response
++ (NSString *)decodeTextData:(NSData *)data fromResponse:(NSURLResponse *)response withCarryData:(NSMutableData*)inputCarryData
{
NSStringEncoding encoding = NSUTF8StringEncoding;
if (response.textEncodingName) {
CFStringEncoding cfEncoding = CFStringConvertIANACharSetNameToEncoding((CFStringRef)response.textEncodingName);
encoding = CFStringConvertEncodingToNSStringEncoding(cfEncoding);
}
+
+ NSMutableData* currentCarryData = inputCarryData ?: [NSMutableData new];
+ [currentCarryData appendData:data];
+
// Attempt to decode text
- NSString *encodedResponse = [[NSString alloc] initWithData:data encoding:encoding];
- if (!encodedResponse && data.length) {
- // We don't have an encoding, or the encoding is incorrect, so now we try to guess
- [NSString stringEncodingForData:data
- encodingOptions:nil
- convertedString:&encodedResponse
- usedLossyConversion:NULL];
+ NSString *encodedResponse = [[NSString alloc] initWithData:currentCarryData encoding:encoding];
+
+ if (!encodedResponse && data.length > 0) {
+ if (encoding == NSUTF8StringEncoding && inputCarryData) {
+ // If decode failed, we attempt to trim broken character bytes from the data.
+ // At this time, only UTF-8 support is enabled. Multibyte encodings, such as UTF-16 and UTF-32, require a lot of additional work
+ // to determine wether BOM was included in the first data packet. If so, save it, and attach it to each new data packet. If not,
+ // an encoding has to be selected with a suitable byte order (for ARM iOS, it would be little endianness).
+
+ CFStringEncoding cfEncoding = CFStringConvertNSStringEncodingToEncoding(encoding);
+ // Taking a single unichar is not good enough, due to Unicode combining character sequences or characters outside the BMP.
+ // See https://www.objc.io/issues/9-strings/unicode/#common-pitfalls
+ // We'll attempt with a sequence of two characters, the most common combining character sequence and characters outside the BMP (emojis).
+ CFIndex maxCharLength = CFStringGetMaximumSizeForEncoding(2, cfEncoding);
+
+ NSUInteger removedBytes = 1;
+
+ while (removedBytes < maxCharLength) {
+ encodedResponse = [[NSString alloc] initWithData:[currentCarryData subdataWithRange:NSMakeRange(0, currentCarryData.length - removedBytes)]
+ encoding:encoding];
+
+ if (encodedResponse != nil) {
+ break;
+ }
+
+ removedBytes += 1;
+ }
+ } else {
+ // We don't have an encoding, or the encoding is incorrect, so now we try to guess
+ [NSString stringEncodingForData:data
+ encodingOptions:@{ NSStringEncodingDetectionSuggestedEncodingsKey: @[ @(encoding) ] }
+ convertedString:&encodedResponse
+ usedLossyConversion:NULL];
+ }
+ }
+
+ if (inputCarryData) {
+ NSUInteger encodedResponseLength = [encodedResponse dataUsingEncoding:encoding].length;
+ NSData* newCarryData = [currentCarryData subdataWithRange:NSMakeRange(encodedResponseLength, currentCarryData.length - encodedResponseLength)];
+ [inputCarryData setData:newCarryData];
}
+
return encodedResponse;
}
@@ -364,7 +402,8 @@ - (void)sendData:(NSData *)data
NSString *responseString;
if ([responseType isEqualToString:@"text"]) {
- responseString = [RCTNetworking decodeTextData:data fromResponse:task.response];
+ // No carry storage is required here because the entire data has been loaded.
+ responseString = [RCTNetworking decodeTextData:data fromResponse:task.response withCarryData:nil];
if (!responseString) {
RCTLogWarn(@"Received data was not a string, or was not a recognised encoding.");
return;
@@ -417,13 +456,28 @@ - (void)sendRequest:(NSURLRequest *)request
RCTURLRequestProgressBlock downloadProgressBlock = nil;
if (incrementalUpdates) {
if ([responseType isEqualToString:@"text"]) {
+
+ // We need this to carry over bytes, which could not be decoded into text (such as broken UTF-8 characters).
+ // The incremental data block holds the ownership of this object, and will be released upon release of the block.
+ NSMutableData* incrementalDataCarry = [NSMutableData new];
+
incrementalDataBlock = ^(NSData *data, int64_t progress, int64_t total) {
- NSString *responseString = [RCTNetworking decodeTextData:data fromResponse:task.response];
+ NSUInteger initialCarryLength = incrementalDataCarry.length;
+
+ NSString *responseString = [RCTNetworking decodeTextData:data
+ fromResponse:task.response
+ withCarryData:incrementalDataCarry];
if (!responseString) {
RCTLogWarn(@"Received data was not a string, or was not a recognised encoding.");
return;
}
- NSArray<id> *responseJSON = @[task.requestID, responseString, @(progress), @(total)];
+
+ // Update progress to include the previous carry length and reduce the current carry length.
+ NSArray<id> *responseJSON = @[task.requestID,
+ responseString,
+ @(progress + initialCarryLength - incrementalDataCarry.length),
+ @(total)];
+
[self sendEventWithName:@"didReceiveNetworkIncrementalData" body:responseJSON];
};
} else {

0 comments on commit 3ac3749

Please sign in to comment.