/
eval.go
120 lines (99 loc) · 3.15 KB
/
eval.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
// Copyright 2016 The Citar Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package common
import (
"fmt"
"github.com/danieldk/citar/model"
"github.com/danieldk/citar/tagger"
"github.com/danieldk/conllx"
)
// The Evaluator type is used to keep counts on the number of
// correctly/incorrectly tagged known/unknown tokens.
type Evaluator struct {
tagger tagger.HMMTagger
model model.Model
knownCorrect uint
knownIncorrect uint
unknownCorrect uint
unknownIncorrect uint
}
// NewEvaluator creates an evaluator that uses the provided tagger and
// the corresponding model. The model is used to distinguish known and
// unknown tokens.
func NewEvaluator(tagger tagger.HMMTagger, model model.Model) *Evaluator {
return &Evaluator{
tagger: tagger,
model: model,
}
}
// Process a sentence, tagging it using the Evaluator's tagger and counting
// the number of tokens that were tagged correctly.
func (e *Evaluator) Process(sent []conllx.Token) error {
words := make([]string, 0, len(sent))
for _, token := range sent {
if form, ok := token.Form(); ok {
words = append(words, form)
} else {
return fmt.Errorf("Token does not have a form: %s", token)
}
}
tags, _ := e.tagger.Tag(words).Tags()
for idx, token := range sent {
_, inLexicon := e.model.WordTagFreqs()[words[idx]]
correctTag, ok := token.PosTag()
if !ok {
return fmt.Errorf("Token does not have a tag: %s", token)
}
if tags[idx] == correctTag {
if inLexicon {
e.knownCorrect++
} else {
e.unknownCorrect++
}
} else {
if inLexicon {
e.knownIncorrect++
} else {
e.unknownIncorrect++
}
}
}
return nil
}
// KnownCorrect returns the number of correctly tagged known words.
func (e *Evaluator) KnownCorrect() uint {
return e.knownCorrect
}
// KnownIncorrect returns the number of incorrectly tagged known words.
func (e *Evaluator) KnownIncorrect() uint {
return e.knownIncorrect
}
// UnknownCorrect returns the number of correctly tagged unknown words.
func (e *Evaluator) UnknownCorrect() uint {
return e.unknownCorrect
}
// UnknownIncorrect returns the number of incorrectly tagged unknown words.
func (e *Evaluator) UnknownIncorrect() uint {
return e.unknownIncorrect
}
// OverallCorrect returns the number of correctly tagged words.
func (e *Evaluator) OverallCorrect() uint {
return e.knownCorrect + e.unknownCorrect
}
// OverallIncorrect returns the number of incorrectly tagged words.
func (e *Evaluator) OverallIncorrect() uint {
return e.knownIncorrect + e.unknownIncorrect
}
// KnownAccuracy returns the tagging accuracy of known words.
func (e *Evaluator) KnownAccuracy() float64 {
return float64(e.KnownCorrect()) / float64(e.KnownCorrect()+e.KnownIncorrect())
}
// Accuracy returns the tagging accuracy.
func (e *Evaluator) Accuracy() float64 {
return float64(e.OverallCorrect()) / float64(e.OverallCorrect()+e.OverallIncorrect())
}
// UnknownAccuracy returns the tagging accuracy of unknown words.
func (e *Evaluator) UnknownAccuracy() float64 {
return float64(e.UnknownCorrect()) / float64(e.UnknownCorrect()+e.UnknownIncorrect())
}