Skip to content

Commit

Permalink
添加基准测试 优化encode
Browse files Browse the repository at this point in the history
  • Loading branch information
cnwhy committed Jun 24, 2019
1 parent b8ce65e commit d6249aa
Show file tree
Hide file tree
Showing 12 changed files with 985 additions and 114 deletions.
55 changes: 27 additions & 28 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,33 @@
[![Coverage Status](https://coveralls.io/repos/github/cnwhy/Base64.js/badge.svg?branch=master)](https://coveralls.io/github/cnwhy/Base64.js?branch=master)
> **Base64** `编码`,`解码` 库;
### 为什么重复造轮子?
1. `btoa` , `atob` 只支持 `Latin1` 字符.
2. 常用的的 Base64 编码库处理字符串时会**主动**修改错误(空)编码字符, 导致解码的数据与原数据不一至.
比如用 nodejs 中的 `Buffer`:
```js
var s = '\ud800';
var b64 = Buffer.from(s).toString('base64');
var _s = Buffer.from(b64, 'base64').toString();
console.log(s == _s); //false
```
3. `Base64`编/解码本该与字符串无关, 但几乎所有 Base64 的`decode`方法都输出字符串, 限制了使用场景.
4. 可自定义生成自编/解码函数, 适应自定义 Base64 编码表,以及自定义编码字符串的场景.

### 本库方案
默认对于字符串的转换用`UTF-8`编码, 但无视无效符(解码按同一规则), 保证 js 的字符([UCS-2](https://zh.wikipedia.org/wiki/UTF-16#UTF-16%E8%88%87UCS-2%E7%9A%84%E9%97%9C%E4%BF%82))串可以无损转换.
`decode()` 单纯将`Base64`解析`Byte[]`; 但重写返回字节数组的`toString()`方法, 以`UTF-8`编码解析为字符串.
本库还暴露 `createEncode`, `createDecode` 两个 API, 支持生成非标准的 Base64 方案, 可自定义(`table`, `pad`, `encoding`);

### 适用场景

1. 二进制数据与 Base64 互转
2. 字符串与 Base64 互转

### 兼容性

通用, 对于不支持`ArrayBuffer`的环境将会用`Array`代替`Uint8Array`.

## 安装
## 适用场景

- 二进制数据与 Base64 互转
- 字符串与 Base64 互转

## install
```
npm i @cnwhy/base64
```

## 为何重复造轮子?
1. 需要单纯的Base64的库,而且能在浏览器上使用; (利用node的 `Buffer` 对像的方法出局)
2. 支持字符串; (`btoa` , `atob` 只支持 [Latin1](https://zh.wikipedia.org/wiki/ISO/IEC_8859-1));
4. `Base64`编/解码本该与字符串无关, 但现有库几乎只支持字符串;
5. 能用上`Tree-shaking`, 项目一般只用需要(`encode``decode`), 我可不想copy代码;
3. javascript 字符串无损转换 (因为这一点, 现有库几乎全军覆没), [具体例子](https://github.com/cnwhy/Base64.js/wiki/javascript%E5%AD%97%E7%AC%A6%E4%B8%B2%E6%97%A0%E6%8D%9F%E8%BD%AC%E6%8D%A2%E6%8E%A2%E8%AE%A8);
6. 能应付异型`Base64`方案;



## 兼容性
通用, 对于不支持`ArrayBuffer`的环境将会用`Array`代替`Uint8Array`.
> 什么! 你要兼容IE6?
> 也不是不行, 把 `dist/Base64.umd.js` 最后那句 'Object.defineProperty(exports, '__esModule', { value: true });' 删了就可以了.


## 使用
```js
const Base64 = require('base64.js');
Expand Down Expand Up @@ -64,6 +59,7 @@ true
QmFzZTY05bqT8JCQgO+/vQ==
false
```
> 更多使用例子可以参看[这篇](https://blog.whyoop.com/2019/06/03/new-base64/#demo);
## API
Expand Down Expand Up @@ -91,3 +87,6 @@ Base64 = {
createDecode(table?: string[] | string, pad?: string, strDecode?: Function): (base64str: string) => Uint8Array | number[];
}
```
## 参考资料
[https://tools.ietf.org/html/rfc4648](https://tools.ietf.org/html/rfc4648);
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
"license": "MIT",
"devDependencies": {
"@babel/preset-env": "^7.4.3",
"@types/benchmark": "^1.0.31",
"ava": "^1.4.1",
"benchmark": "^2.1.4",
"gbk.js": "^0.2.4",
"nyc": "^14.0.0",
"rimraf": "^2.6.3",
Expand Down
61 changes: 33 additions & 28 deletions src/main.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { isArray, MyArrayBuffer, myUint8arrayClass, getUint8Array } from './poliyfill';
import {isArray, MyLikeUint8array , isUint8Array, isArrayBuffer } from './poliyfill';
const BASE64_TABLE = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
const PAD = '=';

Expand Down Expand Up @@ -72,40 +72,45 @@ function createEncode(
}
const TABLE = getTable(table);
const PAD = getPad(pad, TABLE);
return function(u8arr: ArrayBuffer | Uint8Array | number[] | string): string {
return function(input: ArrayBuffer | Uint8Array | number[] | string): string {
let _u8arr;
if (u8arr instanceof myUint8arrayClass) {
_u8arr = u8arr;
} else if (u8arr instanceof MyArrayBuffer || isArray(u8arr)) {
_u8arr = getUint8Array(u8arr);
if (isArray(input) || isUint8Array(input)) {
_u8arr = input;
} else if (isArrayBuffer(input)) {
_u8arr = new Uint8Array(input);
} else if (typeof strEncode == 'function') {
_u8arr = strEncode(String(u8arr));
// 其它都当成 string 处理
_u8arr = strEncode(String(input));
} else {
// 未初始化 strEncode 函数则不支持string类型
throw TypeError('"strEncode" is not function');
throw TypeError(`Input type is not supported, "strEncode" is not function`);
}
let bitLength = Math.ceil((_u8arr.length * 8) / 6);
let str64Length = Math.ceil(_u8arr.length / 3) * 4;
let codes = new Array(str64Length);
let index = 0;
for (let i = 0; i < _u8arr.length; ) {
let a0 = _u8arr[i++];
let a1 = _u8arr[i++];
let a2 = _u8arr[i++];
codes[index++] = a0 >> 2;
codes[index++] = ((a0 << 4) | (a1 >> 4)) & 0x3f;
codes[index++] = ((a1 << 2) | (a2 >> 6)) & 0x3f;
codes[index++] = a2 & 0x3f;
var base64 = '';
var _l = _u8arr.length % 3;
var padLength = _l ? _l === 2 ? 1 : 2 : 0;
var loopLength = _u8arr.length - _l;
var a0, a1, a2, i = 0;
while (i < loopLength) {
a0 = _u8arr[i++];
a1 = _u8arr[i++];
a2 = _u8arr[i++];
base64 =
base64 +
TABLE[a0 >> 2] +
TABLE[((a0 << 4) | (a1 >> 4)) & 0x3f] +
TABLE[((a1 << 2) | (a2 >> 6)) & 0x3f] +
TABLE[a2 & 0x3f];
}
let base64 = '';
for (let i = 0; i < codes.length; i++) {
const code = codes[i];
base64 += i > bitLength - 1 ? PAD : TABLE[code];
if (padLength) {
a0 = _u8arr[i++];
a1 = _u8arr[i++] || 0;
base64 =
base64 +
TABLE[a0 >> 2] +
TABLE[((a0 << 4) | (a1 >> 4)) & 0x3f] +
(padLength === 2 ? PAD + PAD : TABLE[(a1 << 2) & 0x3f] + PAD);
}
return base64;
// return codes.reduce((d, code, i) => {
// return (d += i > bitLength - 1 ? pad : table[code]);
// }, '');
};
}

Expand Down Expand Up @@ -171,7 +176,7 @@ function createDecode(
let indexMax = length - getPads(base64Str);
let mc4 = indexMax % 4;
if (mc4 === 1) throw new TypeError('The parameter is not a base64 string!');
let buffer = new myUint8arrayClass(Math.floor((indexMax * 6) / 8));
let buffer = new MyLikeUint8array(Math.floor((indexMax * 6) / 8));
let index = 0;
let i = 0;
const next = function() {
Expand Down
31 changes: 20 additions & 11 deletions src/poliyfill.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,25 @@
// 手动 poliyfill 以最小的代码量兼容IE6(ES3);
// const emptyFn = function(){};
export const isArray =
Array.isArray ||
function(obj) {
Object.prototype.toString.call(obj) == '[object Array]';
function(obj): obj is Array<any> {
return Object.prototype.toString.call(obj) == '[object Array]';
};
export const hasArrayBuffer = typeof ArrayBuffer === 'function';
export const MyArrayBuffer = hasArrayBuffer ? ArrayBuffer : function() {};
export const myUint8arrayClass = hasArrayBuffer ? Uint8Array : Array;
export const getUint8Array = hasArrayBuffer
? function(arr: any) {
return new Uint8Array(arr);
}
: function(arr: any) {
return typeof arr === 'number' ? new Array(arr) : arr;
};
// export const MyArrayBuffer = hasArrayBuffer ? ArrayBuffer : emptyFn;
// export const MyUint8Array = hasArrayBuffer ? Uint8Array : emptyFn;
export const MyLikeUint8array = hasArrayBuffer ? Uint8Array : Array;
// export const myUint8arrayClass = hasArrayBuffer ? Uint8Array : Array;
// export const getUint8Array = hasArrayBuffer
// ? function(arg: any) {
// return new Uint8Array(arg);
// }
// : function(arg: any) {
// return typeof arg === 'number' ? new Array(arg) : arg;
// };
export const isUint8Array = function(obj: any): obj is Uint8Array {
return hasArrayBuffer && obj instanceof Uint8Array;
};
export const isArrayBuffer = function(obj: any): obj is ArrayBuffer {
return hasArrayBuffer && obj instanceof ArrayBuffer;
};
101 changes: 57 additions & 44 deletions src/utf8.ts
Original file line number Diff line number Diff line change
@@ -1,55 +1,73 @@
import { isArray, MyArrayBuffer, myUint8arrayClass, getUint8Array } from './poliyfill';
import {isArray, isArrayBuffer, isUint8Array} from './poliyfill';
const ERR_CODE = '\ufffd';

function u2utf8(codePoint: number): number[] {
// 未暴露的方法, 内部调用无需判断;
// if (codePoint < 0 || codePoint > 0x7fffffff) throw new SyntaxError('Undefined Unicode code-point');
if (codePoint < 0x80) return [codePoint];
let n = 11;
while (codePoint >= 2 ** n) {
n += 5;
}
let length = Math.ceil(n / 6);
let u8 = new Array(length);
let i = 0;
u8[0] = (0xff ^ (2 ** (8 - length) - 1)) | (codePoint >> (6 * (length - 1)));
while (i < length - 1) {
u8[length - 1 - i] = 0x80 | ((codePoint >> (i * 6)) & 0x3f);
i++;
}
return u8;
}
type LikeUint8Array = number[] | Uint8Array;

/**
* 字符串utf8编码
*
* @param {string} str
* @returns
*/
function utf8Encode(str: string): Uint8Array {
let utf8: number[] = [];
let codePoints: number[] = [];
//将字符串(ucs2)转为Unicode
for (let i = 0; i < str.length; i++) {
function utf8Encode(str: string): LikeUint8Array {
let bf: number[] = [];
let length = str.length;
let add = function(codePoint: number) {
if (codePoint < 0x80) {
return bf.push(codePoint);
}
if (codePoint < 0x800) {
return bf.push(0xc0 | (codePoint >> 6), 0x80 | (codePoint & 0x3f));
}
if (codePoint < 0x10000) {
return bf.push(
0xe0 | (codePoint >> 12),
0x80 | ((codePoint >> 6) & 0x3f),
0x80 | (codePoint & 0x3f)
);
}
if (codePoint < 0x200000) {
return bf.push(
0xf0 | (codePoint >> 18),
0x80 | ((codePoint >> 12) & 0x3f),
0x80 | ((codePoint >> 6) & 0x3f),
0x80 | (codePoint & 0x3f)
);
}
// 肯定不会用到的 注释掉 减少打包代码量
// if (codePoint < 0x4000000) {
// return bf.push(
// 0xf8 | (codePoint >> 24),
// 0x80 | ((codePoint >> 18) & 0x3f),
// 0x80 | ((codePoint >> 12) & 0x3f),
// 0x80 | ((codePoint >> 6) & 0x3f),
// 0x80 | (codePoint & 0x3f)
// );
// }
// return bf.push(
// 0xfc | (codePoint >> 30),
// 0x80 | ((codePoint >> 24) & 0x3f),
// 0x80 | ((codePoint >> 18) & 0x3f),
// 0x80 | ((codePoint >> 12) & 0x3f),
// 0x80 | ((codePoint >> 6) & 0x3f),
// 0x80 | (codePoint & 0x3f)
// );
};

for (let i = 0; i < length; i++) {
let code = str.charCodeAt(i);
let cod1;
if (code < 0xd800 || code > 0xdfff) {
codePoints.push(code);
add(code);
} else if (code < 0xdc00 && (cod1 = str.charCodeAt(i + 1)) >= 0xdc00 && cod1 < 0xe000) {
//四字节字符处理
i++;
codePoints.push(0x10000 + (((code & 0x3ff) << 10) | (cod1 & 0x3ff)));
add(0x10000 + (((code & 0x3ff) << 10) | (cod1 & 0x3ff)));
} else {
//不自行处理 不正常编码
codePoints.push(code);
add(code);
}
}
//UTF8编码Unicode
for (let i = 0; i < codePoints.length; i++) {
let v = codePoints[i];
utf8.push.apply(utf8, u2utf8(v));
}
return getUint8Array(utf8);
return bf;
}

/**
Expand All @@ -62,12 +80,10 @@ function utf8Decode(buffer: ArrayBuffer | Uint8Array | number[]): string {
let u8: Uint8Array | number[];
let str = '';
let index = 0;
if (buffer instanceof myUint8arrayClass) {
// Uint8Array & Buffer
if (isArray(buffer) || isUint8Array(buffer)) {
u8 = buffer;
} else if (buffer instanceof MyArrayBuffer || isArray(buffer)) {
// ArrayBuffer & number[]
u8 = getUint8Array(buffer);
} else if (isArrayBuffer(buffer)) {
u8 = new Uint8Array(buffer);
} else {
return String(buffer);
}
Expand Down Expand Up @@ -111,7 +127,7 @@ function utf8Decode(buffer: ArrayBuffer | Uint8Array | number[]): string {
return _i;
}
} catch (e) {
// 不正常的UTF8字节数据, 替换为 �
// 不正常的UTF8字节数据, 替换为 �
// 注:此处与utf8Encode的不正常编码不同;
// UTF8编码时不考虑代理区, UTF16需要考虑代理区;
str += ERR_CODE;
Expand All @@ -124,7 +140,4 @@ function utf8Decode(buffer: ArrayBuffer | Uint8Array | number[]): string {
return str;
}

export{
utf8Encode,
utf8Decode
}
export { utf8Encode, utf8Decode };

0 comments on commit d6249aa

Please sign in to comment.