-
Notifications
You must be signed in to change notification settings - Fork 0
/
text.ts
149 lines (140 loc) · 5.67 KB
/
text.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/**
* Convert from string to UTF-8 binary.
* @example
* ```ts
* const text = "HelloWorld!";
* const encode = u8Encode(text);
* const decode = u8Decode(encode);
* ```
*/
export function u8Encode(data:string):Uint8Array{
return new TextEncoder().encode(data);
}
/**
* Convert from UTF-8 binary to string.
* @example
* ```ts
* const text = "HelloWorld!";
* const encode = u8Encode(text);
* const decode = u8Decode(encode);
* ```
*/
export function u8Decode(data:Uint8Array):string{
return new TextDecoder().decode(data);
}
/**
* Convert from any encoded binary to string.
* Default codec is SHIFT-JIS.
* @example
* ```ts
* const bin = await Deno.readFile("./file");
* const decode = textDecode(bin);
* ```
*/
export function textDecode(data:Uint8Array, codec?:string):string{
return new TextDecoder(codec ?? "shift-jis").decode(data);
}
/**
* Convert from binary to hex string.
* @example
* ```ts
* const bin = await Deno.readFile("./file");
* const encode = hexEncode(bin);
* const decode = hexDecode(encode);
* ```
*/
export function hexEncode(data:Uint8Array):string{
return [...data].map(v => pad0(v, 2, 16)).join("");
}
/**
* Convert from hex string to binary.
* @example
* ```ts
* const bin = await Deno.readFile("./file");
* const encode = hexEncode(bin);
* const decode = hexDecode(encode);
* ```
*/
export function hexDecode(data:string):Uint8Array{
return new Uint8Array(data.match(/[0-9a-fA-F]{2}/g)?.map(v => Number(`0x${v}`)) ?? []);
}
/**
* Trim head and tail blank, remove CR and consecutive space (tab, LF) to single space (tab, LF).
* @example
* ```ts
* const format = trimExtend(" Lorem ipsum\r dolor sit \r\r amet. ");
* ```
*/
export function trimExtend(data:string):string{
return data.trim().replace(/\r/g, "").replace(/ +/g, " ").replace(/\t+/g, "\t").replace(/\n+/g, "\n").replace(/^ /mg, "").replace(/ $/mg, "");
}
/**
* Convert half-width string (ex: Japanese Kana) to full-width and full-width alphanumeric symbols to half-width.
* @example
* ```ts
* const format = fixWidth("1+1=2");
* ```
*/
export function fixWidth(data:string):string{
return Object.entries({
"ヴ": "ヴ",
"ガ": "ガ", "ギ": "ギ", "グ": "グ", "ゲ": "ゲ", "ゴ": "ゴ",
"ザ": "ザ", "ジ": "ジ", "ズ": "ズ", "ゼ": "ゼ", "ゾ": "ゾ",
"ダ": "ダ", "ヂ": "ヂ", "ヅ": "ヅ", "デ": "デ", "ド": "ド",
"バ": "バ", "ビ": "ビ", "ブ": "ブ", "ベ": "ベ", "ボ": "ボ",
"パ": "パ", "ピ": "ピ", "プ": "プ", "ペ": "ペ", "ポ": "ポ",
"ア": "ア", "イ": "イ", "ウ": "ウ", "エ": "エ", "オ": "オ",
"カ": "カ", "キ": "キ", "ク": "ク", "ケ": "ケ", "コ": "コ",
"サ": "サ", "シ": "シ", "ス": "ス", "セ": "セ", "ソ": "ソ",
"タ": "タ", "チ": "チ", "ツ": "ツ", "テ": "テ", "ト": "ト",
"ナ": "ナ", "ニ": "ニ", "ヌ": "ヌ", "ネ": "ネ", "ノ": "ノ",
"ハ": "ハ", "ヒ": "ヒ", "フ": "フ", "ヘ": "ヘ", "ホ": "ホ",
"マ": "マ", "ミ": "ミ", "ム": "ム", "メ": "メ", "モ": "モ",
"ヤ": "ヤ", "ユ": "ユ", "ヨ": "ヨ",
"ラ": "ラ", "リ": "リ", "ル": "ル", "レ": "レ", "ロ": "ロ",
"ワ": "ワ", "ヲ": "ヲ", "ン": "ン",
"ァ": "ァ", "ィ": "ィ", "ゥ": "ゥ", "ェ": "ェ", "ォ": "ォ",
"ッ": "ッ",
"ャ": "ャ", "ュ": "ュ", "ョ": "ョ",
"、": "、", "。": "。", "・": "・", "ー": "ー", "「": "「", "」": "」",
"A": "A", "B": "B", "C": "C", "D": "D", "E": "E", "F": "F", "G": "G", "H": "H", "I": "I", "J": "J", "K": "K", "L": "L", "M": "M",
"N": "N", "O": "O", "P": "P", "Q": "Q", "R": "R", "S": "S", "T": "T", "U": "U", "V": "V", "W": "W", "X": "X", "Y": "Y", "Z": "Z",
"a": "a", "b": "b", "c": "c", "d": "d", "e": "e", "f": "f", "g": "g", "h": "h", "i": "i", "j": "j", "k": "k", "l": "l", "m": "m",
"n": "n", "o": "o", "p": "p", "q": "q", "r": "r", "s": "s", "t": "t", "u": "u", "v": "v", "w": "w", "x": "x", "y": "y", "z": "z",
"0": "0", "1": "1", "2": "2", "3": "3", "4": "4", "5": "5", "6": "6", "7": "7", "8": "8", "9": "9",
"!": "!", """: "\"", "#": "#", "$": "$", "%": "%", "&": "&", "'": "'", "(": "(", ")": ")", "*": "*", "+": "+", ",": ",", "-": "-", ".": ".", "/": "/", ":": ":",
";": ";", "<": "<", "=": "=", ">": ">", "?": "?", "@": "@", "[": "[", "\": "\\", "]": "]", "^": "^", "_": "_", "`": "`", "{": "{", "|": "|", "}": "}", "~": "~", " ": " "
}).reduce((text, [k, v]) => text.replace(new RegExp(k, "g"), v), data);
}
/**
* Clean up text with `fixWidth()` and `trimExtend()`.
* @example
* ```ts
* const format = cleanText("1 + 1 = 2 ");
* ```
*/
export function cleanText(data:string):string{
return trimExtend(fixWidth(data));
}
/**
* Accurately recognize string that contain character above `0x010000` and array them one by character.
* Useful for calculate number of characters with string contains emoji.
* @example
* ```ts
* const characters = accurateSegment("😀😃😄😁😆😅😂🤣");
* ```
*/
export function accurateSegment(data:string):string[]{
return [...new Intl.Segmenter().segment(data)].map(({segment}) => segment);
}
/**
* Create string with zero padding at beginning of number.
* Output is 2 digits by default.
* @example
* ```ts
* const pad = pad0(8);
* ```
*/
export function pad0(data:number, digit?:number, radix?:number):string{
return data.toString(radix).toUpperCase().padStart(digit ?? 2, "0");
}