Skip to content

Commit

Permalink
fix: use the same tokenizer in CheckPOS as the online demo
Browse files Browse the repository at this point in the history
  • Loading branch information
jdkato committed Nov 19, 2020
1 parent 8991e09 commit 0ff6f1f
Showing 1 changed file with 8 additions and 10 deletions.
18 changes: 8 additions & 10 deletions core/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,20 +145,18 @@ func TextToTokens(text string, needsTagging bool) []tag.Token {
// CheckPOS determines if a match (as found by an extension point) also matches
// the expected part-of-speech in text.
func CheckPOS(loc []int, expected, text string) bool {
var word string

pos := 1

observed := []string{}
for _, tok := range Tag(TextToWords(text, false)) {
for _, tok := range TextToTokens(text, true) {
if InRange(pos, loc) {
if len(tok.Text) > 1 {
word = strings.ToLower(strings.TrimRight(tok.Text, ",.!?:;"))
} else {
word = tok.Text
}
observed = append(observed, (word + "/" + tok.Tag))
observed = append(observed, (tok.Text + "/" + tok.Tag))
}
pos += len(tok.Text)
if !StringInSlice(tok.Tag, []string{"POS", ".", ",", ":", ";", "?"}) {
// Space-bounded ...
pos++
}
pos += len(tok.Text) + 1
}

match, _ := regexp.MatchString(expected, strings.Join(observed, " "))
Expand Down

0 comments on commit 0ff6f1f

Please sign in to comment.