-
Notifications
You must be signed in to change notification settings - Fork 84
/
tokenizer.spec.ts
82 lines (81 loc) · 1.58 KB
/
tokenizer.spec.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import { MatchMetadata } from "../../shared/interfaces";
import { tokenizer } from "./tokenizer";
describe("tokenizer", () => {
test.each<[string | string[] | null | undefined, MatchMetadata, any[]]>([
[null, {}, []],
[
["already", "tokenized"],
{},
[
{
str: "already",
metadata: {},
},
{
str: "tokenized",
metadata: {},
},
],
],
[
"api_gateway: 很好用。Good.",
{},
[
{
metadata: {
index: 0,
position: [0, 11],
},
str: "api_gateway",
},
{
metadata: {
index: 1,
position: [0, 3],
},
str: "api",
},
{
metadata: {
index: 2,
position: [4, 7],
},
str: "gateway",
},
{
metadata: {
index: 3,
position: [13, 1],
},
str: "很",
},
{
metadata: {
index: 4,
position: [14, 1],
},
str: "好",
},
{
metadata: {
index: 5,
position: [15, 1],
},
str: "用",
},
{
metadata: {
index: 6,
position: [17, 4],
},
str: "good",
},
],
],
])(
"tokenizer('%s', zhDictionary) should return %j",
(input, metadata, tokens) => {
expect(tokenizer(input, metadata)).toEqual(tokens);
}
);
});