angular · chuckjaz · Mar 16, 2017 · Mar 14, 2017
diff --git a/modules/@angular/compiler/src/i18n/digest.ts b/modules/@angular/compiler/src/i18n/digest.ts
@@ -206,47 +206,38 @@ enum Endian {
   Big,
 }
 
-function utf8Encode(str: string): string {
-  let encoded: string = '';
-
+export function utf8Encode(str: string): string {
+  let encoded = '';
   for (let index = 0; index < str.length; index++) {
-    const codePoint = decodeSurrogatePairs(str, index);
+    let codePoint = str.charCodeAt(index);
+
+    // decode surrogate
+    // see https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
+    if (codePoint >= 0xd800 && codePoint <= 0xdbff && str.length > (index + 1)) {
+      const low = str.charCodeAt(index + 1);
+      if (low >= 0xdc00 && low <= 0xdfff) {
+        index++;
+        codePoint = ((codePoint - 0xd800) << 10) + low - 0xdc00 + 0x10000;
+      }
+    }
 
     if (codePoint <= 0x7f) {
       encoded += String.fromCharCode(codePoint);
     } else if (codePoint <= 0x7ff) {
-      encoded += String.fromCharCode(0xc0 | codePoint >>> 6, 0x80 | codePoint & 0x3f);
+      encoded += String.fromCharCode(((codePoint >> 6) & 0x1F) | 0xc0, (codePoint & 0x3f) | 0x80);
     } else if (codePoint <= 0xffff) {
       encoded += String.fromCharCode(
-          0xe0 | codePoint >>> 12, 0x80 | codePoint >>> 6 & 0x3f, 0x80 | codePoint & 0x3f);
+          (codePoint >> 12) | 0xe0, ((codePoint >> 6) & 0x3f) | 0x80, (codePoint & 0x3f) | 0x80);
     } else if (codePoint <= 0x1fffff) {
       encoded += String.fromCharCode(
-          0xf0 | codePoint >>> 18, 0x80 | codePoint >>> 12 & 0x3f, 0x80 | codePoint >>> 6 & 0x3f,
-          0x80 | codePoint & 0x3f);
+          ((codePoint >> 18) & 0x07) | 0xf0, ((codePoint >> 12) & 0x3f) | 0x80,
+          ((codePoint >> 6) & 0x3f) | 0x80, (codePoint & 0x3f) | 0x80);
     }
   }
 
   return encoded;
 }
 
-// see https://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
-function decodeSurrogatePairs(str: string, index: number): number {
-  if (index < 0 || index >= str.length) {
-    throw new Error(`index=${index} is out of range in "${str}"`);
-  }
-
-  const high = str.charCodeAt(index);
-
-  if (high >= 0xd800 && high <= 0xdfff && str.length > index + 1) {
-    const low = byteAt(str, index + 1);
-    if (low >= 0xdc00 && low <= 0xdfff) {
-      return (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000;
-    }
-  }
-
-  return high;
-}
-
 function add32(a: number, b: number): number {
   return add32to64(a, b)[1];
 }

diff --git a/modules/@angular/compiler/test/i18n/digest_spec.ts b/modules/@angular/compiler/test/i18n/digest_spec.ts
@@ -6,7 +6,7 @@
  * found in the LICENSE file at https://angular.io/license
  */
 
-import {computeMsgId, sha1} from '../../src/i18n/digest';
+import {computeMsgId, sha1, utf8Encode} from '../../src/i18n/digest';
 
 export function main(): void {
   describe('digest', () => {
@@ -100,7 +100,46 @@ export function main(): void {
         }
         expect(computeMsgId(result, '')).toEqual('2122606631351252558');
       });
+    });
 
+    describe('utf8encode', () => {
+      // tests from https://github.com/mathiasbynens/wtf-8
+      it('should encode to utf8', () => {
+        const tests = [
+          ['abc', 'abc'],
+          // // 1-byte
+          ['\0', '\0'],
+          // // 2-byte
+          ['\u0080', '\xc2\x80'],
+          ['\u05ca', '\xd7\x8a'],
+          ['\u07ff', '\xdf\xbf'],
+          // // 3-byte
+          ['\u0800', '\xe0\xa0\x80'],
+          ['\u2c3c', '\xe2\xb0\xbc'],
+          ['\uffff', '\xef\xbf\xbf'],
+          // //4-byte
+          ['\uD800\uDC00', '\xF0\x90\x80\x80'],
+          ['\uD834\uDF06', '\xF0\x9D\x8C\x86'],
+          ['\uDBFF\uDFFF', '\xF4\x8F\xBF\xBF'],
+          // unmatched surrogate halves
+          // high surrogates: 0xD800 to 0xDBFF
+          ['\uD800', '\xED\xA0\x80'],
+          ['\uD800\uD800', '\xED\xA0\x80\xED\xA0\x80'],
+          ['\uD800A', '\xED\xA0\x80A'],
+          ['\uD800\uD834\uDF06\uD800', '\xED\xA0\x80\xF0\x9D\x8C\x86\xED\xA0\x80'],
+          ['\uD9AF', '\xED\xA6\xAF'],
+          ['\uDBFF', '\xED\xAF\xBF'],
+          // low surrogates: 0xDC00 to 0xDFFF
+          ['\uDC00', '\xED\xB0\x80'],
+          ['\uDC00\uDC00', '\xED\xB0\x80\xED\xB0\x80'],
+          ['\uDC00A', '\xED\xB0\x80A'],
+          ['\uDC00\uD834\uDF06\uDC00', '\xED\xB0\x80\xF0\x9D\x8C\x86\xED\xB0\x80'],
+          ['\uDEEE', '\xED\xBB\xAE'],
+          ['\uDFFF', '\xED\xBF\xBF'],
+        ];
+        tests.forEach(
+            ([input, output]: [string, string]) => { expect(utf8Encode(input)).toEqual(output); });
+      });
     });
   });
 }