Skip to content

Commit

Permalink
Add goog.crypt.base64.{en,de}codeStringUtf8
Browse files Browse the repository at this point in the history
The non-`Utf8` version has lossy behavior when passed strings with characters outside the Latin-1 range (i.e. charCode > 255).  This cannot be fixed in-place because the correct behavior depends on whether the input string is intended as text or binary.  If it's binary, then the correct behavior is to throw (and a future change will start throwing asynchronously, so that this case should start showing up in logs).  If it's text, then it's appropriate to first encode non-ASCII characters (i.e. charCode > 127) with UTF-8, but note that this is wholly inappropriate for binary input, since it changes the encoding of bytes in the [128..255] range.

RELNOTES[NEW]: Added `goog.crypt.base64.{en,de}codeStringUtf8`

PiperOrigin-RevId: 469817557
Change-Id: I8accaa98859aa05ec337cd0aaa2f1e2f7d185fa8
  • Loading branch information
shicks authored and Copybara-Service committed Aug 24, 2022
1 parent 50a2ae0 commit 19ae2f1
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 23 deletions.
47 changes: 44 additions & 3 deletions closure/goog/crypt/base64.js
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,10 @@ goog.crypt.base64.encodeByteArray = function(input, alphabet) {


/**
* Base64-encode a string.
* Base64-encode a binary string.
*
* @param {string} input A string to encode.
* @param {string} input A string to encode. Must not contain characters
* outside of the Latin-1 range (i.e. charCode > 255).
* @param {!goog.crypt.base64.Alphabet=} alphabet Base 64 alphabet to
* use in encoding. Alphabet.DEFAULT is used by default.
* @return {string} The base64 encoded string.
Expand All @@ -231,7 +232,28 @@ goog.crypt.base64.encodeString = function(input, alphabet) {


/**
* Base64-decode a string.
* Base64-encode a text string. Non-ASCII characters (charCode > 127) will be
* encoded as UTF-8.
*
* @param {string} input A string to encode.
* @param {!goog.crypt.base64.Alphabet=} alphabet Base 64 alphabet to
* use in encoding. Alphabet.DEFAULT is used by default.
* @return {string} The base64 encoded string.
*/
goog.crypt.base64.encodeStringUtf8 = function(input, alphabet) {
'use strict';
// Shortcut for browsers that implement
// a native base64 encoder in the form of "btoa/atob"
if (goog.crypt.base64.HAS_NATIVE_ENCODE_ && !alphabet) {
return goog.global.btoa(unescape(encodeURIComponent(input)));
}
return goog.crypt.base64.encodeByteArray(
goog.crypt.stringToUtf8ByteArray(input), alphabet);
};


/**
* Base64-decode a string into a binary bytestring.
*
* @param {string} input Input to decode. Any whitespace is ignored, and the
* input maybe encoded with either supported alphabet (or a mix thereof).
Expand All @@ -258,6 +280,25 @@ goog.crypt.base64.decodeString = function(input, useCustomDecoder) {
};


/**
* Base64-decode a string. The input should be the result of a double-encoding
* a unicode string: first the unicode characters (>127) are encoded as UTF-8
* bytes, and then the resulting bytes are base64-encoded.
*
* @param {string} input Input to decode. Any whitespace is ignored, and the
* input maybe encoded with either supported alphabet (or a mix thereof).
* @param {boolean=} useCustomDecoder True indicates the custom decoder is used,
* which supports alternative alphabets. Note that passing false may still
* use the custom decoder on browsers without native support.
* @return {string} string representing the decoded value.
*/
goog.crypt.base64.decodeStringUtf8 = function(input, useCustomDecoder) {
'use strict';
return decodeURIComponent(
escape(goog.crypt.base64.decodeString(input, useCustomDecoder)));
};


/**
* Base64-decode a string to an Array of numbers.
*
Expand Down
72 changes: 52 additions & 20 deletions closure/goog/crypt/base64_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,11 @@ const tests = [

// Testing non-ascii characters (1-10 in chinese)
[
'\xe4\xb8\x80\xe4\xba\x8c\xe4\xb8\x89\xe5\x9b\x9b\xe4\xba\x94\xe5' +
'\x85\xad\xe4\xb8\x83\xe5\x85\xab\xe4\xb9\x9d\xe5\x8d\x81',
{
binary: '\xe4\xb8\x80\xe4\xba\x8c\xe4\xb8\x89\xe5\x9b\x9b\xe4\xba\x94' +
'\xe5\x85\xad\xe4\xb8\x83\xe5\x85\xab\xe4\xb9\x9d\xe5\x8d\x81',
text: '一二三四五六七八九十',
},
[
'5LiA5LqM5LiJ5Zub5LqU5YWt5LiD5YWr5Lmd5Y2B',
'5LiA5LqM5LiJ5Zub5LqU5YWt5LiD5YWr5Lmd5Y2B',
Expand All @@ -40,73 +43,100 @@ const tests = [
// Testing for web-safe alphabets
[
'>>>???>>>???=/',
['Pj4+Pz8/Pj4+Pz8/PS8=', 'Pj4+Pz8/Pj4+Pz8/PS8', 'Pj4-Pz8_Pj4-Pz8_PS8=', 'Pj4-Pz8_Pj4-Pz8_PS8.', 'Pj4-Pz8_Pj4-Pz8_PS8'],
[
'Pj4+Pz8/Pj4+Pz8/PS8=',
'Pj4+Pz8/Pj4+Pz8/PS8',
'Pj4-Pz8_Pj4-Pz8_PS8=',
'Pj4-Pz8_Pj4-Pz8_PS8.',
'Pj4-Pz8_Pj4-Pz8_PS8',
],
],
];
// clang-format on

/**
* Asserts encodings
* @param {string} input an input string.
* @param {string|{binary: string, text: string}} input an input string.
* @param {!Array<string>} expectedOutputs expected outputs in the order of
* base64.Alphabet enum.
*/
function assertEncodings(input, expectedOutputs) {
const arr = crypt.stringToByteArray(input);
const {text, binary} =
typeof input === 'string' ? {text: input, binary: input} : input;
const arr = crypt.stringToByteArray(binary);

// quick validity test
assertArrayEquals(arr, crypt.stringToUtf8ByteArray(text));

// encodeString
for (const name in base64.Alphabet) {
const alphabet = base64.Alphabet[name];
assertEquals(
base64.encodeString(input, alphabet), expectedOutputs[alphabet]);
expectedOutputs[alphabet], base64.encodeStringUtf8(text, alphabet));
assertEquals(
expectedOutputs[alphabet], base64.encodeString(binary, alphabet));
}
// default case
assertEquals(
expectedOutputs[base64.Alphabet.DEFAULT], base64.encodeStringUtf8(text));
assertEquals(
base64.encodeString(input), // default case
expectedOutputs[base64.Alphabet.DEFAULT]);
expectedOutputs[base64.Alphabet.DEFAULT], base64.encodeString(binary));

// encodeByteArray with Array<number>
for (const name in base64.Alphabet) {
const alphabet = base64.Alphabet[name];
assertEquals(
base64.encodeByteArray(arr, alphabet), expectedOutputs[alphabet]);
expectedOutputs[alphabet], base64.encodeByteArray(arr, alphabet));
}
// default case
assertEquals(
base64.encodeByteArray(arr), // default case
expectedOutputs[base64.Alphabet.DEFAULT]);
expectedOutputs[base64.Alphabet.DEFAULT], base64.encodeByteArray(arr));

// encodeByteArray with Uint8Array
if (SUPPORT_TYPED_ARRAY) {
const uint8Arr = new Uint8Array(arr);
for (const name in base64.Alphabet) {
const alphabet = base64.Alphabet[name];
assertEquals(
base64.encodeByteArray(uint8Arr, alphabet),
expectedOutputs[alphabet]);
expectedOutputs[alphabet],
base64.encodeByteArray(uint8Arr, alphabet));
}
// default case
assertEquals(
base64.encodeByteArray(uint8Arr), // default case
expectedOutputs[base64.Alphabet.DEFAULT]);
expectedOutputs[base64.Alphabet.DEFAULT],
base64.encodeByteArray(uint8Arr));
}
}

/**
* Assert decodings
* @param {!Array<string>} inputs input strings in various encodings.
* @param {string} stringOutput expected output in string.
* @param {string|{text: string, binary: string}} expectedOutput expected output
* in string (optionally split out for text/binary).
*/
function assertDecodings(inputs, stringOutput) {
const arrOutput = crypt.stringToByteArray(stringOutput);
function assertDecodings(inputs, expectedOutput) {
const textOutput =
typeof expectedOutput === 'string' ? expectedOutput : expectedOutput.text;
const binaryOutput = typeof expectedOutput === 'string' ?
expectedOutput :
expectedOutput.binary;
const arrOutput = crypt.stringToByteArray(binaryOutput);
const uint8ArrOutput = SUPPORT_TYPED_ARRAY ? new Uint8Array(arrOutput) : null;

// Quick validity check that decoding the text version is equivalent.
assertArrayEquals(arrOutput, crypt.stringToUtf8ByteArray(textOutput));

for (let i = 0; i < inputs.length; i++) {
const input = inputs[i];

// decodeString
assertEquals(base64.decodeString(input, true), stringOutput);
assertEquals(textOutput, base64.decodeStringUtf8(input, true));
assertEquals(binaryOutput, base64.decodeString(input, true));

if (i === 0) {
// For Alphabet.DEFAULT, test with native decoder too
assertEquals(base64.decodeString(input), stringOutput);
assertEquals(textOutput, base64.decodeStringUtf8(input));
assertEquals(binaryOutput, base64.decodeString(input));
}

// decodeStringToByteArray
Expand Down Expand Up @@ -163,7 +193,9 @@ testSuite({
const decodedArr = crypt.stringToByteArray(decoded);

assertEquals(base64.decodeString(encoded), decoded); // native
assertEquals(base64.decodeStringUtf8(encoded), decoded);
assertEquals(base64.decodeString(encoded, true), decoded); // custom
assertEquals(base64.decodeStringUtf8(encoded, true), decoded);
assertArrayEquals(base64.decodeStringToByteArray(encoded), decodedArr);

if (SUPPORT_TYPED_ARRAY) {
Expand Down

0 comments on commit 19ae2f1

Please sign in to comment.