This repository has been archived by the owner on Dec 16, 2022. It is now read-only.
/
util.py
87 lines (85 loc) · 2.55 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# Copied from spaCy version 2.x
from spacy.symbols import (
POS,
PUNCT,
SYM,
ADJ,
CCONJ,
NUM,
DET,
ADV,
ADP,
X,
VERB,
NOUN,
PROPN,
PART,
INTJ,
SPACE,
PRON,
)
TAG_MAP = {
".": {POS: PUNCT, "PunctType": "peri"},
",": {POS: PUNCT, "PunctType": "comm"},
"-LRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "ini"},
"-RRB-": {POS: PUNCT, "PunctType": "brck", "PunctSide": "fin"},
"``": {POS: PUNCT, "PunctType": "quot", "PunctSide": "ini"},
'""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
":": {POS: PUNCT},
"$": {POS: SYM},
"#": {POS: SYM},
"AFX": {POS: ADJ, "Hyph": "yes"},
"CC": {POS: CCONJ, "ConjType": "comp"},
"CD": {POS: NUM, "NumType": "card"},
"DT": {POS: DET},
"EX": {POS: PRON, "AdvType": "ex"},
"FW": {POS: X, "Foreign": "yes"},
"HYPH": {POS: PUNCT, "PunctType": "dash"},
"IN": {POS: ADP},
"JJ": {POS: ADJ, "Degree": "pos"},
"JJR": {POS: ADJ, "Degree": "comp"},
"JJS": {POS: ADJ, "Degree": "sup"},
"LS": {POS: X, "NumType": "ord"},
"MD": {POS: VERB, "VerbType": "mod"},
"NIL": {POS: X},
"NN": {POS: NOUN, "Number": "sing"},
"NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"},
"NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"},
"NNS": {POS: NOUN, "Number": "plur"},
"PDT": {POS: DET},
"POS": {POS: PART, "Poss": "yes"},
"PRP": {POS: PRON, "PronType": "prs"},
"PRP$": {POS: DET, "PronType": "prs", "Poss": "yes"},
"RB": {POS: ADV, "Degree": "pos"},
"RBR": {POS: ADV, "Degree": "comp"},
"RBS": {POS: ADV, "Degree": "sup"},
"RP": {POS: ADP},
"SP": {POS: SPACE},
"SYM": {POS: SYM},
"TO": {POS: PART, "PartType": "inf", "VerbForm": "inf"},
"UH": {POS: INTJ},
"VB": {POS: VERB, "VerbForm": "inf"},
"VBD": {POS: VERB, "VerbForm": "fin", "Tense": "past"},
"VBG": {POS: VERB, "VerbForm": "part", "Tense": "pres", "Aspect": "prog"},
"VBN": {POS: VERB, "VerbForm": "part", "Tense": "past", "Aspect": "perf"},
"VBP": {POS: VERB, "VerbForm": "fin", "Tense": "pres"},
"VBZ": {
POS: VERB,
"VerbForm": "fin",
"Tense": "pres",
"Number": "sing",
"Person": "three",
},
"WDT": {POS: DET},
"WP": {POS: PRON},
"WP$": {POS: DET, "Poss": "yes"},
"WRB": {POS: ADV},
"ADD": {POS: X},
"NFP": {POS: PUNCT},
"GW": {POS: X},
"XX": {POS: X},
"BES": {POS: VERB},
"HVS": {POS: VERB},
"_SP": {POS: SPACE},
}