-
Notifications
You must be signed in to change notification settings - Fork 15
/
features.go
156 lines (124 loc) · 3.09 KB
/
features.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
package pos
import (
"bytes"
"fmt"
"github.com/chewxy/lingo"
)
type featureType byte
//go:generate stringer -type=featureType
const (
bias featureType = iota
ithWord_
nextWord_
next2Word_
ithSuffix3_
ithPrefix1_
prevPOSTag_
prev2POSTag_
prevSuffix3_
nextSuffix3_
ithShape_
ithCluster_
nextCluster_
next2Cluster_
prevCluster_
prev2Cluster_
ithFlags_
nextFlags_
next2Flags_
prevFlags_
prev2Flags_
prevLemma_prevPOSTag
prevPOSTag_ithWord
prevPOSTag_prev2POSTag
prev2Lemma_prev2POSTag
MAXFEATURETYPE
)
var featCtxMap = map[featureType]contextType{
ithWord_: ithWord,
nextWord_: nextWord,
next2Word_: next2Word,
ithSuffix3_: ithSuffix3,
ithPrefix1_: ithPrefix1,
prevPOSTag_: prevPOSTag,
prev2POSTag_: prev2POSTag,
prevSuffix3_: prevSuffix3,
nextSuffix3_: nextSuffix3,
ithShape_: ithShape,
ithCluster_: ithCluster,
nextCluster_: nextCluster,
next2Cluster_: next2Cluster,
prevCluster_: prevCluster,
prev2Cluster_: prev2Cluster,
ithFlags_: ithFlags,
nextFlags_: nextFlags,
next2Flags_: next2Flags,
prevFlags_: prevFlags,
prev2Flags_: prev2Flags,
}
type feature interface {
FeatType() featureType
String() string
}
type singleFeature struct {
featureType
value string
}
func (sf singleFeature) FeatType() featureType { return sf.featureType }
func (sf singleFeature) String() string {
return fmt.Sprintf("singleFeature{%v, %q}", sf.featureType, sf.value)
}
type tupleFeature struct {
featureType
value1 string
value2 string
}
func (tf tupleFeature) FeatType() featureType { return tf.featureType }
func (tf tupleFeature) String() string {
return fmt.Sprintf("tupleFeature {%v, %q, %q}", tf.featureType, tf.value1, tf.value2)
}
type featureMap map[feature]float64
func (fm featureMap) String() string {
var buf bytes.Buffer
for f := range fm {
fmt.Fprintf(&buf, "%s: 1,\n", f)
}
return buf.String()
}
func (fm *featureMap) add(f feature) { (*fm)[f]++ }
type sfFeatures [prevLemma_prevPOSTag]singleFeature
type tfFeatures [MAXFEATURETYPE - prevLemma_prevPOSTag]tupleFeature
func fillFromContext(c contextMap) (sf sfFeatures, tf tfFeatures) {
for i := bias; i < prevLemma_prevPOSTag; i++ {
sf[i] = singleFeature{i, c[featCtxMap[i]]}
}
const last = prevLemma_prevPOSTag
tf[prevLemma_prevPOSTag-last] = tupleFeature{prevLemma_prevPOSTag, c[prevLemma], c[prevPOSTag]}
tf[prevPOSTag_ithWord-last] = tupleFeature{prevPOSTag_ithWord, c[prevPOSTag], c[ithWord]}
tf[prevPOSTag_prev2POSTag-last] = tupleFeature{prevPOSTag_prev2POSTag, c[prevPOSTag], c[prev2POSTag]}
tf[prev2Lemma_prev2POSTag-last] = tupleFeature{prev2Lemma_prev2POSTag, c[prev2Lemma], c[prev2POSTag]}
return
}
func getFeatures(s lingo.AnnotatedSentence, i int) (sfFeatures, tfFeatures) {
length := len(s)
// set up context defaults
prev2 := lingo.NullAnnotation()
prev := lingo.NullAnnotation()
ith := s[i]
next := lingo.NullAnnotation()
next2 := lingo.NullAnnotation()
if i-1 >= 0 {
prev = s[i-1]
}
if i-2 >= 0 {
prev2 = s[i-2]
}
if i+1 < length {
next = s[i+1]
}
if i+2 < length {
next2 = s[i+2]
}
c := getContext(prev2, prev, ith, next, next2)
return fillFromContext(c)
}