Skip to content

Commit

Permalink
优化 utf8Decode 增加基准测试
Browse files Browse the repository at this point in the history
  • Loading branch information
cnwhy committed Jun 25, 2019
1 parent 87bd610 commit 64a5aae
Show file tree
Hide file tree
Showing 5 changed files with 520 additions and 92 deletions.
7 changes: 3 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,12 @@ npm i @cnwhy/base64
6. 能应付异型`Base64`方案;

## 兼容性
通用, 对于不支持`ArrayBuffer`的环境将会用`Array`代替`Uint8Array`.
> 什么! 你要兼容IE6?
> 也不是不行, 把 `dist/Base64.umd.js` 最后那句 'Object.defineProperty(exports, '__esModule', { value: true });' 删了就可以了.
通用, 对于不支持`ArrayBuffer`的环境将会用`Array`代替`Uint8Array`.

## 使用
```js
const { encode, decode, createEncode, createDecode } = require('@cnwhy/base64');
// import { encode, decode, createEncode, createDecode } from '@cnwhy/base64';

// 1. 字符串
let str = '中国𐄡美国';
Expand All @@ -51,7 +50,7 @@ const TABLE = 'xQh}s7*y~A|nkj4Bf%z1R,P+)mMS{(&EWCKegp6r!OX</LuY-l9^ZJ#cTU[vHda$'
const PAD = '.';

// 自定义字符串编码/解码方法
const Utf16Encode = function(str) { //
const Utf16Encode = function(str) {
let cods = str.split('').map(s => s.charCodeAt(0));
return new Uint8Array(new Uint16Array(cods).buffer);
}
Expand Down
92 changes: 47 additions & 45 deletions src/utf8.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,56 +88,58 @@ function utf8Decode(buffer: ArrayBuffer | Uint8Array | number[]): string {
} else {
return String(buffer);
}
function setChar(i: number): number {
let _i = i;
let c0 = u8[_i++];
try {
if (c0 < 0x80) {
str += String.fromCharCode(c0);
return _i;
} else if (c0 < 0xc2 || c0 > 0xfd) {
// 多字节 `u+0080` 转第一位最小值是 1100 0010 , 0000 0000
// 多字节 第一字节 最大位是 `1111 1101`
throw 'code err';
while (index < u8.length) {
let c0 = u8[index++];
if (c0 < 0x80) {
str += String.fromCharCode(c0);
} else if (c0 < 0xc2 || c0 > 0xfd) {
// 多字节 `u+0080` 转第一位最小值是 1100 0010 , 0000 0000
// 多字节 第一字节 最大位是 `1111 1101`
// throw 'code err';
str += ERR_CODE;
continue;
} else {
let _i = index;
let code = 0;
let n = 0;
if (c0 < 0xe0) {
code |= (c0 & 31) << 6;
n = 1;
} else if (c0 < 0xf0) {
n = 2;
code |= (c0 & 15) << 12;
} else if (c0 < 0xf8) {
n = 3;
code |= (c0 & 7) << 18;
} else if (c0 < 0xfc) {
n = 4;
code |= (c0 & 3) << 24;
} else {
let mk = 0x80;
let w = 6;
let cs: number[] = [];
let code = 0;
while (c0 >= (mk | (2 ** w))) {
let cn = u8[_i++];
// if(cn < 0x80 || cn > 0xfb) throw 'code err';
if ((cn & 0xc0) ^ 0x80) throw 'code err';
cs.push(cn);
mk = mk | (2 ** w);
w--;
}
cs = cs.reverse();
for (let k = 0; k < cs.length; k++) {
let _c = cs[k] & 0x3f;
code |= _c << (k * 6);
}
code |= (c0 & (2 ** w - 1)) << (cs.length * 6);
if (code > 0xffff) {
let _code = code - 0x10000;
str += String.fromCharCode(0xd800 | (_code >> 10));
str += String.fromCharCode(0xdc00 | (_code & 0x3ff));
} else {
str += String.fromCharCode(code & 0xffff);
n = 5;
code |= (c0 & 1) << 30;
}
while (n--) {
let c = u8[_i++];
if (c >> 6 != 2) {
code = -1;
break;
}
return _i;
code |= (c & 0x3f) << (n * 6);
}
} catch (e) {
// 不正常的UTF8字节数据, 替换为 �
// 注:此处与utf8Encode的不正常编码不同;
// UTF8编码时不考虑代理区, UTF16需要考虑代理区;
str += ERR_CODE;
return i + 1;
// Unicode -> Utf16
if (code > 0xffff) {
let _code = code - 0x10000;
str += String.fromCharCode(0xd800 | (_code >> 10));
str += String.fromCharCode(0xdc00 | (_code & 0x3ff));
} else if (code > 0) {
str += String.fromCharCode(code);
} else {
str += ERR_CODE;
continue;
}
index = _i;
}
}
while (index < u8.length) {
index = setChar(index);
}
return str;
}

Expand Down
5 changes: 5 additions & 0 deletions test/benchmark/test/arrVScharAt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,9 @@ getSuite()
var k = STR.charAt(v);
});
})
.add('str[index]', function() {
getIndexs.forEach(v => {
var k = STR[v];
});
})
.run();
171 changes: 171 additions & 0 deletions test/benchmark/test/base64Decode.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import Benchmark from 'benchmark';
import { encode, decode } from '../../../src/Base64';
function getSuite() {
return new Benchmark.Suite()
.on('cycle', function(event: any) {
console.log(event.target.toString());
})
.on('complete', function(this: any) {
console.log(`字节长度 ${Max} :`);
console.table(
Array.from(this).map(v => {
// @ts-ignore
v.hzk = (v.hz / 1000).toFixed(2) + 'K';
return v;
}),
['name', 'hzk']
);
});
}

const TABLE = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'.split('');
const TABLE_JOIN = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
const PAD = '=';
const Max = 64;
const Data: number[] = [];
const Data1: number[] = [];
const Data2: number[] = [];
for (let i = 0; i < Max; i++) {
Data.push((Math.random() * 256) >> 0);
}
for (let i = 0; i < Max + 1; i++) {
Data1.push((Math.random() * 256) >> 0);
}
for (let i = 0; i < Max + 2; i++) {
Data2.push((Math.random() * 256) >> 0);
}
const DataB64 = encode(Data);
const DataB64_1 = encode(Data1);
const DataB64_2 = encode(Data2);

const getV = function(char: string): number {
let index = TABLE_JOIN.indexOf(char);
if (index == -1) throw new TypeError(`"${char}" not base64 char`);
return index;
};
const getPads = function(base64Str: string): number {
let index = base64Str.length;
let pads = 0;
while (index-- > 0 && base64Str.charAt(index) === PAD) {
pads++;
}
return pads;
};
function B64DecodeV0_2_3(base64Str: string): Uint8Array | number[] {
// base64Str = base64Str.trim();
let length = base64Str.length;
let indexMax = length - getPads(base64Str);
let mc4 = indexMax % 4;
if (mc4 === 1) throw new TypeError('The parameter is not a base64 string!');
let buffer = new Uint8Array(Math.floor((indexMax * 6) / 8));
let index = 0;
let i = 0;
const next = function() {
return getV(base64Str.charAt(i++));
};
for (let loopLength = indexMax - mc4; i < loopLength; ) {
let [c0, c1, c2, c3] = [next(), next(), next(), next()];
buffer[index++] = ((c0 << 2) | (c1 >> 4)) & 0xff;
buffer[index++] = ((c1 << 4) | (c2 >> 2)) & 0xff;
buffer[index++] = ((c2 << 6) | c3) & 0xff;
}
if (mc4) {
let c1;
buffer[index++] = ((next() << 2) | ((c1 = next()) >> 4)) & 0xff;
if (mc4 === 3) {
buffer[index++] = ((c1 << 4) | (next() >> 2)) & 0xff;
}
}
// 复写toString以UTF8编码输出;
// toString && (buffer.toString = toString);
return buffer;
}

const getV1 = function(char: string): number {
if (char === PAD) return -2;
return TABLE_JOIN.indexOf(char);
};

function B64Decode(base64Str: string) {
let index = 0;
let i = 0;
let sLength = base64Str.length;
let buffer = [];
while (index < sLength) {
let c0 = TABLE_JOIN.indexOf(base64Str.charAt(index++));
let c1 = TABLE_JOIN.indexOf(base64Str.charAt(index++));
let c2 = TABLE_JOIN.indexOf(base64Str.charAt(index++));
let c3 = TABLE_JOIN.indexOf(base64Str.charAt(index++));
buffer[i++] = ((c0 << 2) | (c1 >> 4)) & 0xff;
if (c2 == -1) break;
buffer[i++] = ((c1 << 4) | (c2 >> 2)) & 0xff;
if (c3 == -1) break;
buffer[i++] = ((c2 << 6) | c3) & 0xff;
}
return buffer;
}

function B64Decode1(base64Str: string): Uint8Array | number[] {
// base64Str = base64Str.trim();
let length = base64Str.length;
let indexMax = length - getPads(base64Str);
let mc4 = indexMax % 4;
if (mc4 === 1) throw new TypeError('The parameter is not a base64 string!');
let buffer = new Uint8Array(Math.floor((indexMax * 6) / 8));
let index = 0;
let i = 0;
let next = function() {
let char = base64Str.charAt(i++)
let index = TABLE_JOIN.indexOf(char);
if (index == -1) throw new TypeError(`"${char}" not base64 char`);
return index;
};
for (let loopLength = indexMax - mc4; i < loopLength; ) {
let [c0, c1, c2, c3] = [next(), next(), next(), next()];
buffer[index++] = (c0 << 2) | (c1 >> 4);
buffer[index++] = ((c1 << 4) | (c2 >> 2)) & 0xff;
buffer[index++] = ((c2 << 6) | c3) & 0xff;
}
if (mc4) {
let c1;
buffer[index++] = ((next() << 2) | ((c1 = next()) >> 4)) & 0xff;
if (mc4 === 3) {
buffer[index++] = ((c1 << 4) | (next() >> 2)) & 0xff;
}
}
// 复写toString以UTF8编码输出;
// toString && (buffer.toString = toString);
return buffer;
}


function test(fn: any) {
return (
Data.join(',') == fn(DataB64) && Data1.join(',') == fn(DataB64_1) && Data2.join(',') == fn(DataB64_2)
);
}

console.log('B64DecodeV0_2_3', test(B64DecodeV0_2_3));
console.log('B64Decode', test(B64Decode));
console.log('B64Decode1', test(B64Decode1));
// console.log(Data,B64Decode(DataB64));

DataB64;

getSuite()
.add('B64DecodeV0_2_3', function() {
var a1 = B64DecodeV0_2_3(DataB64);
var a2 = B64DecodeV0_2_3(DataB64_1);
var a3 = B64DecodeV0_2_3(DataB64_2);
})
.add('B64Decode', function() {
var a1 = B64Decode(DataB64);
var a2 = B64Decode(DataB64_1);
var a3 = B64Decode(DataB64_2);
})
.add('B64Decode1', function() {
var a1 = B64Decode1(DataB64);
var a2 = B64Decode1(DataB64_1);
var a3 = B64Decode1(DataB64_2);
})
.run();

0 comments on commit 64a5aae

Please sign in to comment.