-
Notifications
You must be signed in to change notification settings - Fork 2
/
types.ts
114 lines (89 loc) · 2.08 KB
/
types.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
export type OutputFormatType =
| 'wakati'
| 'yomi'
| 'chasen'
| 'dump'
| 'simple'
| 'none'
| 'em';
// Ref 1: `mecab --help`
// Ref 2: http://www.mwsoft.jp/programming/munou/mecab_command.html (Japanese)
export type MecabOptions = {
rcfile?: string;
dicdir?: string;
userdic?: string;
latticeLevel?: number;
dictionaryInfo?: boolean;
outputFormatType?: OutputFormatType;
allMorphs?: boolean;
nbest?: number;
partial?: boolean;
marginal?: boolean;
maxGroupingSize?: number;
nodeFormat?: string;
unkFormat?: string;
bosFormat?: string;
eosFormat?: string;
eonFormat?: string;
unkFeature?: string;
inputBufferSize?: number;
dumpConfig?: boolean;
allocateSentence?: boolean;
theta?: number;
costFactor?: number;
output?: string;
};
// Ref: https://taku910.github.io/mecab/#usage-tools (Japanese)
export type Feature = {
// 品詞 (part of speech)
pos?: string;
// 品詞細分類1, 品詞細分類2, 品詞細分類3
posSubs: [string | undefined, string | undefined, string | undefined];
// 活用型
conjugatedType?: string;
// 活用形
conjugatedForm?: string;
// 原形
basicForm?: string;
// 読み
reading?: string;
// 発音
pronunciation?: string;
};
// Ref: https://github.com/taku910/mecab/blob/046fa78b2ed56fbd4fac312040f6d62fc1bc31e3/mecab/src/mecab.h#L218-L243
export type Stats = readonly ['NORMAL', 'UNKNOWN', 'BOS', 'EOS', 'EON'];
export type Stat = Stats[number];
// Ref: http://taku910.github.io/mecab/bindings.html (Japanese)
export type Token = {
// Node id
id: number;
// 形態素の文字列情報
surface: string;
// 素性情報
feature: Feature;
// 形態素の始端
startPosition: number;
// 形態素の終端
endPosition: number;
// 右文脈 id
rcAttr: number;
// 左文脈 id
lcAttr: number;
// 形態素 id
posid: number;
// 文字種情報
charType: number;
// 形態素の種類
stat: Stat;
// ベスト解かどうか
isbest: boolean;
// Forward log 確率
alpha: number;
// Backward log 確率
beta: number;
// 周辺確率
prob: number;
// 単語生起コスト
cost: number;
_: string[];
};