forked from kevinAlbs/Base122
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Base122.js
148 lines (135 loc) · 5.24 KB
/
Base122.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
const kString = 0
const kUint8Array = 1
const kDebug = false
const kIllegals = [
0 // null
, 10 // newline
, 13 // carriage return
, 34 // double quote
, 38 // ampersand
, 92 // backslash
];
const kShortened = 0b111 // Uses the illegal index to signify the last two-byte char encodes <= 7 bits.
/**
* Encodes raw data into base-122.
* @param {Uint8Array|Buffer|Array|String} rawData - The data to be encoded. This can be an array
* or Buffer with raw data bytes or a string of bytes (i.e. the type of argument to btoa())
* @returns {Array} The base-122 encoded data as a regular array of UTF-8 character byte values.
*/
function encode(rawData) {
let dataType = typeof(rawData) == 'string' ? kString : kUint8Array
, curIndex = 0
, curBit = 0 // Points to current bit needed
, curMask = 0b10000000
, outData = []
, getByte = dataType == kString ? i => rawData.codePointAt(i) : i => rawData[i]
;
// Get seven bits of input data. Returns false if there is no input left.
function get7() {
if (curIndex >= rawData.length) return false;
// Shift, mask, unshift to get first part.
let firstByte = getByte(curIndex);
let firstPart = ((0b11111110 >>> curBit) & firstByte) << curBit;
// Align it to a seven bit chunk.
firstPart >>= 1;
// Check if we need to go to the next byte for more bits.
curBit += 7;
if (curBit < 8) return firstPart; // Do not need next byte.
curBit -= 8;
curIndex++;
// Now we want bits [0..curBit] of the next byte if it exists.
if (curIndex >= rawData.length) return firstPart;
let secondByte = getByte(curIndex);
let secondPart = ((0xFF00 >>> curBit) & secondByte) & 0xFF;
// Align it.
secondPart >>= 8 - curBit;
return firstPart | secondPart;
}
while(true) {
// Grab 7 bits.
let bits = get7();
if (bits === false) break;
debugLog('Seven input bits', print7Bits(bits), bits);
let illegalIndex = kIllegals.indexOf(bits);
if (illegalIndex != -1) {
// Since this will be a two-byte character, get the next chunk of seven bits.
let nextBits = get7();
debugLog('Handle illegal sequence', print7Bits(bits), print7Bits(nextBits));
let b1 = 0b11000010, b2 = 0b10000000;
if (nextBits === false) {
debugLog('Last seven bits are an illegal sequence.');
b1 |= (0b111 & kShortened) << 2
nextBits = bits; // Encode these bits after the shortened signifier.
} else {
b1 |= (0b111 & illegalIndex) << 2;
}
// Push first bit onto first byte, remaining 6 onto second.
let firstBit = (nextBits & 0b01000000) > 0 ? 1 : 0;
b1 |= firstBit;
b2 |= nextBits & 0b00111111;
outData.push(b1);
outData.push(b2);
} else {
outData.push(bits);
}
}
return new TextDecoder().decode(new Uint8Array(outData));
}
/**
* Decodes base-122 encoded data back to the original data.
* @param {Uint8Array|Buffer|String} rawData - The data to be decoded. This can be a Uint8Array
* or Buffer with raw data bytes or a string of bytes (i.e. the type of argument to btoa())
* @returns {Array} The data in a regular array representing byte values.
*/
function decode(base122Data) {
let strData = typeof(base122Data) == 'string' ? base122Data : utf8DataToString(base122Data)
, decoded = []
, curByte = 0
, bitOfByte = 0
;
function push7(byte) {
byte <<= 1;
// Align this byte to offset for current byte.
curByte |= (byte >>> bitOfByte);
bitOfByte += 7;
if (bitOfByte >= 8) {
decoded.push(curByte);
bitOfByte -= 8;
// Now, take the remainder, left shift by what has been taken.
curByte = (byte << (7 - bitOfByte)) & 255;
}
}
for (let i = 0; i < strData.length; i++) {
let c = strData.charCodeAt(i);
// Check if this is a two-byte character.
if (c > 127) {
// Note, the charCodeAt will give the codePoint, thus
// 0b110xxxxx 0b10yyyyyy will give => xxxxxyyyyyy
let illegalIndex = (c >>> 8) & 7; // 7 = 0b111.
// We have to first check if this is a shortened two-byte character, i.e. if it only
// encodes <= 7 bits.
if (illegalIndex != kShortened) push7(kIllegals[illegalIndex]);
// Always push the rest.
push7(c & 127);
} else {
// One byte characters can be pushed directly.
push7(c);
}
}
return new Uint8Array(decoded);
}
// For debugging.
function debugLog() {
if (kDebug) console.log(...arguments);
}
// For debugging.
function print7Bits(num) {
return "0000000".substring(num.toString(2).length) + num.toString(2);
}
// For debugging.
function print8Bits(num) {
return "00000000".substring(num.toString(2).length) + num.toString(2);
}
const encodeJSString = (s) => "`" + s.replace(/\\/g, '\\\\').replace(/\$/g, "\\$").replace(/`/g, '\\`') + "`";
const encodeJS = (s) => encodeJSString(encode(s));
export const Base122 = { encode, decode, encodeJS };