From 7ae7a8440cd96f9cb464579123ac9ffb3eb2e620 Mon Sep 17 00:00:00 2001 From: Alex Rickabaugh Date: Fri, 28 Apr 2017 11:54:40 -0700 Subject: [PATCH] fix(http): introduce encodingHint for text() for better ArrayBuffer support Currently, if a Response has an ArrayBuffer body and text() is called, Angular attempts to convert the ArrayBuffer to a string. Doing this requires knowing the encoding of the bytes in the buffer, which is context that we don't have. Instead, we assume that the buffer is encoded in UTF-16, and attempt to process it that way. Unfortunately the approach chosen (interpret buffer as Uint16Array and create a Javascript string from each entry using String.fromCharCode) is incorrect as it does not handle UTF-16 surrogate pairs. What Angular actually implements, then, is UCS-2 decoding, which is equivalent to UTF-16 with characters restricted to the base plane. No standard way of decoding UTF-8 or UTF-16 exists in the browser today. APIs like TextDecoder are only supported in a few browsers, and although hacks like using the FileReader API with a Blob to force browsers to do content encoding detection and decoding exist, they're slow and not compatible with the synchronous text() API. Thus, this bug is fixed by introducing an encodingHint parameter to text(). The default value of this parameter is 'legacy', indicating that the existing broken behavior should be used - this prevents breaking existing apps. The only other possible value of the hint is 'iso-8859' which interprets each byte of the buffer with String.fromCharCode. UTF-8 and UTF-16 are not supported - it is up to the consumer to get the ArrayBuffer and decode it themselves. The parameter is a hint, as it's not always used (for example, if the conversion to text doesn't involve an ArrayBuffer source). Additionally, this leaves the door open for future implementations to perform more sophisticated encoding detection and ignore the user-provided value if it can be proven to be incorrect. Fixes #15932. PR Close #16420 --- packages/http/src/body.ts | 22 ++++++++++++++++++++-- packages/http/src/http_utils.ts | 9 +++++++++ packages/http/src/static_request.ts | 3 ++- packages/http/test/static_request_spec.ts | 17 ++++++++++++++++- 4 files changed, 47 insertions(+), 4 deletions(-) diff --git a/packages/http/src/body.ts b/packages/http/src/body.ts index 053a7e94754d1..4b091fb5b9814 100644 --- a/packages/http/src/body.ts +++ b/packages/http/src/body.ts @@ -37,14 +37,32 @@ export abstract class Body { /** * Returns the body as a string, presuming `toString()` can be called on the response body. + * + * When decoding an `ArrayBuffer`, the optional `encodingHint` parameter determines how the + * bytes in the buffer will be interpreted. Valid values are: + * + * - `legacy` - incorrectly interpret the bytes as UTF-16 (technically, UCS-2). Only characters + * in the Basic Multilingual Plane are supported, surrogate pairs are not handled correctly. + * In addition, the endianness of the 16-bit octet pairs in the `ArrayBuffer` is not taken + * into consideration. This is the default behavior to avoid breaking apps, but should be + * considered deprecated. + * + * - `iso-8859` - interpret the bytes as ISO-8859 (which can be used for ASCII encoded text). */ - text(): string { + text(encodingHint: 'legacy'|'iso-8859' = 'legacy'): string { if (this._body instanceof URLSearchParams) { return this._body.toString(); } if (this._body instanceof ArrayBuffer) { - return String.fromCharCode.apply(null, new Uint16Array(this._body)); + switch (encodingHint) { + case 'legacy': + return String.fromCharCode.apply(null, new Uint16Array(this._body as ArrayBuffer)); + case 'iso-8859': + return String.fromCharCode.apply(null, new Uint8Array(this._body as ArrayBuffer)); + default: + throw new Error(`Invalid value for encodingHint: ${encodingHint}`); + } } if (this._body == null) { diff --git a/packages/http/src/http_utils.ts b/packages/http/src/http_utils.ts index a03f52d2f05b6..694d40eebd3b6 100644 --- a/packages/http/src/http_utils.ts +++ b/packages/http/src/http_utils.ts @@ -42,6 +42,15 @@ export function getResponseURL(xhr: any): string|null { return null; } +export function stringToArrayBuffer8(input: String): ArrayBuffer { + const view = new Uint8Array(input.length); + for (let i = 0, strLen = input.length; i < strLen; i++) { + view[i] = input.charCodeAt(i); + } + return view.buffer; +} + + export function stringToArrayBuffer(input: String): ArrayBuffer { const view = new Uint16Array(input.length); for (let i = 0, strLen = input.length; i < strLen; i++) { diff --git a/packages/http/src/static_request.ts b/packages/http/src/static_request.ts index fb48126e1a775..7005bb4cb0065 100644 --- a/packages/http/src/static_request.ts +++ b/packages/http/src/static_request.ts @@ -186,4 +186,5 @@ const noop = function() {}; const w = typeof window == 'object' ? window : noop; const FormData = (w as any /** TODO #9100 */)['FormData'] || noop; const Blob = (w as any /** TODO #9100 */)['Blob'] || noop; -export const ArrayBuffer = (w as any /** TODO #9100 */)['ArrayBuffer'] || noop; +export const ArrayBuffer: ArrayBufferConstructor = + (w as any /** TODO #9100 */)['ArrayBuffer'] || noop; diff --git a/packages/http/test/static_request_spec.ts b/packages/http/test/static_request_spec.ts index c95dfb527f984..79878966daba9 100644 --- a/packages/http/test/static_request_spec.ts +++ b/packages/http/test/static_request_spec.ts @@ -7,10 +7,12 @@ */ import {describe, expect, it} from '@angular/core/testing/src/testing_internal'; +import {ɵgetDOM as getDOM} from '@angular/platform-browser'; import {RequestOptions} from '../src/base_request_options'; import {ContentType} from '../src/enums'; import {Headers} from '../src/headers'; +import {stringToArrayBuffer, stringToArrayBuffer8} from '../src/http_utils'; import {ArrayBuffer, Request} from '../src/static_request'; export function main() { @@ -118,6 +120,19 @@ export function main() { it('should use search if present', () => { const req = new Request({url: 'http://test.com', search: 'a=1&b=2'}); expect(req.url).toBe('http://test.com?a=1&b=2'); - }) + }); + + if (getDOM().supportsWebAnimation()) { + it('should serialize an ArrayBuffer to string via legacy encoding', () => { + const str = '\u89d2\u5ea6'; + expect(new Request({body: stringToArrayBuffer(str), url: '/'}).text()).toEqual(str); + }); + + it('should serialize an ArrayBuffer to string via iso-8859 encoding', () => { + const str = 'abcd'; + expect(new Request({body: stringToArrayBuffer8(str), url: '/'}).text('iso-8859')) + .toEqual(str); + }); + } }); }