添加基准测试优化encode

cnwhy · Jun 24, 2019 · d6249aa · d6249aa
1 parent b8ce65e
commit d6249aa
Show file tree

Hide file tree

Showing 12 changed files with 985 additions and 114 deletions.
diff --git a/README.md b/README.md
@@ -3,38 +3,33 @@
 [![Coverage Status](https://coveralls.io/repos/github/cnwhy/Base64.js/badge.svg?branch=master)](https://coveralls.io/github/cnwhy/Base64.js?branch=master)  
 > **Base64** `编码`,`解码` 库;
 
-### 为什么重复造轮子?
-1. `btoa` , `atob` 只支持 `Latin1` 字符.
-2. 常用的的 Base64 编码库处理字符串时会**主动**修改错误(空)编码字符, 导致解码的数据与原数据不一至.  
-   比如用 nodejs 中的 `Buffer`:
-    ```js
-    var s = '\ud800';
-    var b64 = Buffer.from(s).toString('base64');
-    var _s = Buffer.from(b64, 'base64').toString();
-    console.log(s == _s); //false
-    ```
-3. `Base64`编/解码本该与字符串无关, 但几乎所有 Base64 的`decode`方法都输出字符串, 限制了使用场景.
-4. 可自定义生成自编/解码函数, 适应自定义 Base64 编码表,以及自定义编码字符串的场景.
-
-### 本库方案
-默认对于字符串的转换用`UTF-8`编码, 但无视无效符(解码按同一规则), 保证 js 的字符([UCS-2](https://zh.wikipedia.org/wiki/UTF-16#UTF-16%E8%88%87UCS-2%E7%9A%84%E9%97%9C%E4%BF%82))串可以无损转换.
-`decode()` 单纯将`Base64`解析`Byte[]`; 但重写返回字节数组的`toString()`方法, 以`UTF-8`编码解析为字符串.  
-本库还暴露 `createEncode`, `createDecode` 两个 API, 支持生成非标准的 Base64 方案, 可自定义(`table`, `pad`, `encoding`);
-
-### 适用场景
-
-1. 二进制数据与 Base64 互转
-2. 字符串与 Base64 互转
-
-### 兼容性
-
-通用, 对于不支持`ArrayBuffer`的环境将会用`Array`代替`Uint8Array`.
-
-## 安装
+## 适用场景
+
+- 二进制数据与 Base64 互转
+- 字符串与 Base64 互转
+
+## install
 ```
 npm i @cnwhy/base64
 ```
 
+## 为何重复造轮子?
+1. 需要单纯的Base64的库,而且能在浏览器上使用; (利用node的 `Buffer` 对像的方法出局)
+2. 支持字符串; (`btoa` , `atob` 只支持 [Latin1](https://zh.wikipedia.org/wiki/ISO/IEC_8859-1));
+4. `Base64`编/解码本该与字符串无关, 但现有库几乎只支持字符串;
+5. 能用上`Tree-shaking`, 项目一般只用需要(`encode` 或 `decode`), 我可不想copy代码;
+3. javascript 字符串无损转换 (因为这一点, 现有库几乎全军覆没), [具体例子](https://github.com/cnwhy/Base64.js/wiki/javascript%E5%AD%97%E7%AC%A6%E4%B8%B2%E6%97%A0%E6%8D%9F%E8%BD%AC%E6%8D%A2%E6%8E%A2%E8%AE%A8);
+6. 能应付异型`Base64`方案;
+
+
+
+## 兼容性
+通用, 对于不支持`ArrayBuffer`的环境将会用`Array`代替`Uint8Array`.  
+> 什么! 你要兼容IE6?  
+> 也不是不行, 把 `dist/Base64.umd.js` 最后那句 'Object.defineProperty(exports, '__esModule', { value: true });' 删了就可以了.
+
+
+
 ## 使用
 ```js
 const Base64 = require('base64.js');
@@ -64,6 +59,7 @@ true
 QmFzZTY05bqT8JCQgO+/vQ==
 false
 ```
+> 更多使用例子可以参看[这篇](https://blog.whyoop.com/2019/06/03/new-base64/#demo);
 
 ## API
 
@@ -91,3 +87,6 @@ Base64 = {
 	createDecode(table?: string[] | string, pad?: string, strDecode?: Function): (base64str: string) => Uint8Array | number[];
 }
 ```
+
+## 参考资料
+[https://tools.ietf.org/html/rfc4648](https://tools.ietf.org/html/rfc4648);
diff --git a/package.json b/package.json
@@ -31,7 +31,9 @@
   "license": "MIT",
   "devDependencies": {
     "@babel/preset-env": "^7.4.3",
+    "@types/benchmark": "^1.0.31",
     "ava": "^1.4.1",
+    "benchmark": "^2.1.4",
     "gbk.js": "^0.2.4",
     "nyc": "^14.0.0",
     "rimraf": "^2.6.3",

diff --git a/src/main.ts b/src/main.ts
@@ -1,4 +1,4 @@
-import { isArray, MyArrayBuffer, myUint8arrayClass, getUint8Array } from './poliyfill';
+import {isArray, MyLikeUint8array , isUint8Array, isArrayBuffer } from './poliyfill';
 const BASE64_TABLE = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
 const PAD = '=';
 
@@ -72,40 +72,45 @@ function createEncode(
 	}
 	const TABLE = getTable(table);
 	const PAD = getPad(pad, TABLE);
-	return function(u8arr: ArrayBuffer | Uint8Array | number[] | string): string {
+	return function(input: ArrayBuffer | Uint8Array | number[] | string): string {
 		let _u8arr;
-		if (u8arr instanceof myUint8arrayClass) {
-			_u8arr = u8arr;
-		} else if (u8arr instanceof MyArrayBuffer || isArray(u8arr)) {
-			_u8arr = getUint8Array(u8arr);
+		if (isArray(input) || isUint8Array(input)) {
+			_u8arr = input;
+		} else if (isArrayBuffer(input)) {
+			_u8arr = new Uint8Array(input);
 		} else if (typeof strEncode == 'function') {
-			_u8arr = strEncode(String(u8arr));
+			// 其它都当成 string 处理
+			_u8arr = strEncode(String(input));
 		} else {
 			// 未初始化 strEncode 函数则不支持string类型
-			throw TypeError('"strEncode" is not function');
+			throw TypeError(`Input type is not supported, "strEncode" is not function`);
 		}
-		let bitLength = Math.ceil((_u8arr.length * 8) / 6);
-		let str64Length = Math.ceil(_u8arr.length / 3) * 4;
-		let codes = new Array(str64Length);
-		let index = 0;
-		for (let i = 0; i < _u8arr.length; ) {
-			let a0 = _u8arr[i++];
-			let a1 = _u8arr[i++];
-			let a2 = _u8arr[i++];
-			codes[index++] = a0 >> 2;
-			codes[index++] = ((a0 << 4) | (a1 >> 4)) & 0x3f;
-			codes[index++] = ((a1 << 2) | (a2 >> 6)) & 0x3f;
-			codes[index++] = a2 & 0x3f;
+		var base64 = '';
+		var _l = _u8arr.length % 3;
+		var padLength = _l ? _l === 2 ? 1 : 2 : 0;
+		var loopLength = _u8arr.length - _l;
+		var a0, a1, a2, i = 0;
+		while (i < loopLength) {
+			a0 = _u8arr[i++];
+			a1 = _u8arr[i++];
+			a2 = _u8arr[i++];
+			base64 =
+				base64 +
+				TABLE[a0 >> 2] +
+				TABLE[((a0 << 4) | (a1 >> 4)) & 0x3f] +
+				TABLE[((a1 << 2) | (a2 >> 6)) & 0x3f] +
+				TABLE[a2 & 0x3f];
 		}
-		let base64 = '';
-		for (let i = 0; i < codes.length; i++) {
-			const code = codes[i];
-			base64 += i > bitLength - 1 ? PAD : TABLE[code];
+		if (padLength) {
+			a0 = _u8arr[i++];
+			a1 = _u8arr[i++] || 0;
+			base64 =
+				base64 +
+				TABLE[a0 >> 2] +
+				TABLE[((a0 << 4) | (a1 >> 4)) & 0x3f] +
+				(padLength === 2 ? PAD + PAD : TABLE[(a1 << 2) & 0x3f] + PAD);
 		}
 		return base64;
-		// return codes.reduce((d, code, i) => {
-		// 	return (d += i > bitLength - 1 ? pad : table[code]);
-		// }, '');
 	};
 }
 
@@ -171,7 +176,7 @@ function createDecode(
 		let indexMax = length - getPads(base64Str);
 		let mc4 = indexMax % 4;
 		if (mc4 === 1) throw new TypeError('The parameter is not a base64 string!');
-		let buffer = new myUint8arrayClass(Math.floor((indexMax * 6) / 8));
+		let buffer = new MyLikeUint8array(Math.floor((indexMax * 6) / 8));
 		let index = 0;
 		let i = 0;
 		const next = function() {

diff --git a/src/poliyfill.ts b/src/poliyfill.ts
@@ -1,16 +1,25 @@
 // 手动 poliyfill 以最小的代码量兼容IE6(ES3);
+// const emptyFn = function(){};
 export const isArray =
 	Array.isArray ||
-	function(obj) {
-		Object.prototype.toString.call(obj) == '[object Array]';
+	function(obj): obj is Array<any> {
+		return Object.prototype.toString.call(obj) == '[object Array]';
 	};
 export const hasArrayBuffer = typeof ArrayBuffer === 'function';
-export const MyArrayBuffer = hasArrayBuffer ? ArrayBuffer : function() {};
-export const myUint8arrayClass = hasArrayBuffer ? Uint8Array : Array;
-export const getUint8Array = hasArrayBuffer
-	? function(arr: any) {
-			return new Uint8Array(arr);
-	  }
-	: function(arr: any) {
-			return typeof arr === 'number' ? new Array(arr) : arr;
-	  };
+// export const MyArrayBuffer = hasArrayBuffer ? ArrayBuffer : emptyFn;
+// export const MyUint8Array = hasArrayBuffer ? Uint8Array : emptyFn;
+export const MyLikeUint8array = hasArrayBuffer ? Uint8Array : Array;
+// export const myUint8arrayClass = hasArrayBuffer ? Uint8Array : Array;
+// export const getUint8Array = hasArrayBuffer
+// 	? function(arg: any) {
+// 			return new Uint8Array(arg);
+// 	  }
+// 	: function(arg: any) {
+// 			return typeof arg === 'number' ? new Array(arg) : arg;
+// 	  };
+export const isUint8Array = function(obj: any): obj is Uint8Array {
+	return hasArrayBuffer && obj instanceof Uint8Array;
+};
+export const isArrayBuffer = function(obj: any): obj is ArrayBuffer {
+	return hasArrayBuffer && obj instanceof ArrayBuffer;
+};
diff --git a/src/utf8.ts b/src/utf8.ts
@@ -1,55 +1,73 @@
-import { isArray, MyArrayBuffer, myUint8arrayClass, getUint8Array } from './poliyfill';
+import {isArray, isArrayBuffer, isUint8Array} from './poliyfill';
 const ERR_CODE = '\ufffd';
-
-function u2utf8(codePoint: number): number[] {
-	// 未暴露的方法, 内部调用无需判断;
-	// if (codePoint < 0 || codePoint > 0x7fffffff) throw new SyntaxError('Undefined Unicode code-point');
-	if (codePoint < 0x80) return [codePoint];
-	let n = 11;
-	while (codePoint >= 2 ** n) {
-		n += 5;
-	}
-	let length = Math.ceil(n / 6);
-	let u8 = new Array(length);
-	let i = 0;
-	u8[0] = (0xff ^ (2 ** (8 - length) - 1)) | (codePoint >> (6 * (length - 1)));
-	while (i < length - 1) {
-		u8[length - 1 - i] = 0x80 | ((codePoint >> (i * 6)) & 0x3f);
-		i++;
-	}
-	return u8;
-}
+type LikeUint8Array = number[] | Uint8Array;
 
 /**
  * 字符串utf8编码
  *
  * @param {string} str
  * @returns
  */
-function utf8Encode(str: string): Uint8Array {
-	let utf8: number[] = [];
-	let codePoints: number[] = [];
-	//将字符串(ucs2)转为Unicode
-	for (let i = 0; i < str.length; i++) {
+function utf8Encode(str: string): LikeUint8Array {
+	let bf: number[] = [];
+	let length = str.length;
+	let add = function(codePoint: number) {
+		if (codePoint < 0x80) {
+			return bf.push(codePoint);
+		}
+		if (codePoint < 0x800) {
+			return bf.push(0xc0 | (codePoint >> 6), 0x80 | (codePoint & 0x3f));
+		}
+		if (codePoint < 0x10000) {
+			return bf.push(
+				0xe0 | (codePoint >> 12),
+				0x80 | ((codePoint >> 6) & 0x3f),
+				0x80 | (codePoint & 0x3f)
+			);
+		}
+		if (codePoint < 0x200000) {
+			return bf.push(
+				0xf0 | (codePoint >> 18),
+				0x80 | ((codePoint >> 12) & 0x3f),
+				0x80 | ((codePoint >> 6) & 0x3f),
+				0x80 | (codePoint & 0x3f)
+			);
+		}
+		// 肯定不会用到的 注释掉 减少打包代码量
+		// if (codePoint < 0x4000000) {
+		// 	return bf.push(
+		// 		0xf8 | (codePoint >> 24),
+		// 		0x80 | ((codePoint >> 18) & 0x3f),
+		// 		0x80 | ((codePoint >> 12) & 0x3f),
+		// 		0x80 | ((codePoint >> 6) & 0x3f),
+		// 		0x80 | (codePoint & 0x3f)
+		// 	);
+		// }
+		// return bf.push(
+		// 	0xfc | (codePoint >> 30),
+		// 	0x80 | ((codePoint >> 24) & 0x3f),
+		// 	0x80 | ((codePoint >> 18) & 0x3f),
+		// 	0x80 | ((codePoint >> 12) & 0x3f),
+		// 	0x80 | ((codePoint >> 6) & 0x3f),
+		// 	0x80 | (codePoint & 0x3f)
+		// );
+	};
+
+	for (let i = 0; i < length; i++) {
 		let code = str.charCodeAt(i);
 		let cod1;
 		if (code < 0xd800 || code > 0xdfff) {
-			codePoints.push(code);
+			add(code);
 		} else if (code < 0xdc00 && (cod1 = str.charCodeAt(i + 1)) >= 0xdc00 && cod1 < 0xe000) {
 			//四字节字符处理
 			i++;
-			codePoints.push(0x10000 + (((code & 0x3ff) << 10) | (cod1 & 0x3ff)));
+			add(0x10000 + (((code & 0x3ff) << 10) | (cod1 & 0x3ff)));
 		} else {
 			//不自行处理 不正常编码
-			codePoints.push(code);
+			add(code);
 		}
 	}
-	//UTF8编码Unicode
-	for (let i = 0; i < codePoints.length; i++) {
-		let v = codePoints[i];
-		utf8.push.apply(utf8, u2utf8(v));
-	}
-	return getUint8Array(utf8);
+	return bf;
 }
 
 /**
@@ -62,12 +80,10 @@ function utf8Decode(buffer: ArrayBuffer | Uint8Array | number[]): string {
 	let u8: Uint8Array | number[];
 	let str = '';
 	let index = 0;
-	if (buffer instanceof myUint8arrayClass) {
-		// Uint8Array & Buffer
+	if (isArray(buffer) || isUint8Array(buffer)) {
 		u8 = buffer;
-	} else if (buffer instanceof MyArrayBuffer || isArray(buffer)) {
-		// ArrayBuffer & number[]
-		u8 = getUint8Array(buffer);
+	} else if (isArrayBuffer(buffer)) {
+		u8 = new Uint8Array(buffer);
 	} else {
 		return String(buffer);
 	}
@@ -111,7 +127,7 @@ function utf8Decode(buffer: ArrayBuffer | Uint8Array | number[]): string {
 				return _i;
 			}
 		} catch (e) {
-			// 不正常的UTF8字节数据, 替换为 � 
+			// 不正常的UTF8字节数据, 替换为 �
 			// 注:此处与utf8Encode的不正常编码不同;
 			// UTF8编码时不考虑代理区, UTF16需要考虑代理区;
 			str += ERR_CODE;
@@ -124,7 +140,4 @@ function utf8Decode(buffer: ArrayBuffer | Uint8Array | number[]): string {
 	return str;
 }
 
-export{
-	utf8Encode,
-	utf8Decode
-}
+export { utf8Encode, utf8Decode };