Skip to content
This repository has been archived by the owner on May 14, 2023. It is now read-only.

Commit

Permalink
Merge branch 'v2' of ssh://github.com/jdkato/prose into v2
Browse files Browse the repository at this point in the history
  • Loading branch information
jdkato committed Aug 14, 2019
2 parents 2d9cbbf + 767a230 commit 35f9021
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 30 deletions.
32 changes: 17 additions & 15 deletions extract.go
Expand Up @@ -42,9 +42,11 @@ func (m *mappedProbDist) max() string {

func newMappedProbDist(dict map[string]float64, normalize bool) *mappedProbDist {
if normalize {
values := []float64{}
values := make([]float64, len(dict))
i := 0
for _, v := range dict {
values = append(values, v)
values[i] = v
i++
}
sum := sumLogs(values)
if sum <= math.Inf(-1) {
Expand Down Expand Up @@ -246,20 +248,19 @@ func adjustPos(text string, start, end int) (int, int) {
}

func extractFeatures(tokens []*Token, history []string) []feature {
features := []feature{}
features := make([]feature, len(tokens))
for i := range tokens {
features = append(features,
feature{
label: history[i],
features: extract(i, tokens, history)})
features[i] = feature{
label: history[i],
features: extract(i, tokens, history)}
}
return features
}

func assignLabels(tokens []*Token, entity *EntityContext) []string {
history := []string{}
for range tokens {
history = append(history, "O")
history := make([]string, len(tokens))
for i := range tokens {
history[i] = "O"
}

if entity.Accept {
Expand Down Expand Up @@ -404,11 +405,12 @@ func parseEntities(ents []string) string {
}

func coalesce(parts []*Token) Entity {
labels := []string{}
tokens := []string{}
for _, tok := range parts {
tokens = append(tokens, tok.Text)
labels = append(labels, tok.Label)
length := len(parts)
labels := make([]string, length)
tokens := make([]string, length)
for i, tok := range parts {
tokens[i] = tok.Text
labels[i] = tok.Label
}
return Entity{
Label: parseEntities(labels),
Expand Down
7 changes: 4 additions & 3 deletions segment.go
Expand Up @@ -49,9 +49,10 @@ func newPunktSentenceTokenizer() *punktSentenceTokenizer {

// segment splits text into sentences.
func (p punktSentenceTokenizer) segment(text string) []Sentence {
sents := []Sentence{}
for _, s := range p.tokenizer.Tokenize(text) {
sents = append(sents, Sentence{Text: strings.TrimSpace(s.Text)})
tokens := p.tokenizer.Tokenize(text)
sents := make([]Sentence, len(tokens))
for i := range tokens {
sents[i] = Sentence{Text: strings.TrimSpace(tokens[i].Text)}
}
return sents
}
Expand Down
32 changes: 20 additions & 12 deletions tag.go
Expand Up @@ -41,16 +41,20 @@ func (t TupleSlice) Swap(i, j int) { t[i], t[j] = t[j], t[i] }

// ReadTagged converts pre-tagged input into a TupleSlice suitable for training.
func ReadTagged(text, sep string) TupleSlice {
t := TupleSlice{}
for _, sent := range strings.Split(text, "\n") {
tokens := []string{}
tags := []string{}
for _, token := range strings.Split(sent, " ") {
lines := strings.Split(text, "\n")
length := len(lines)
t := make(TupleSlice, length)
for i, sent := range lines {
set := strings.Split(sent, " ")
length = len(set)
tokens := make([]string, length)
tags := make([]string, length)
for j, token := range set {
parts := strings.Split(token, sep)
tokens = append(tokens, parts[0])
tags = append(tags, parts[1])
tokens[j] = parts[0]
tags[j] = parts[1]
}
t = append(t, [][]string{tokens, tags})
t[i] = [][]string{tokens, tags}
}
return t
}
Expand Down Expand Up @@ -171,11 +175,15 @@ func (pt *perceptronTagger) tag(tokens []*Token) []*Token {
var found bool

p1, p2 := "-START-", "-START2-"
context := []string{p1, p2}
for _, t := range tokens {
context = append(context, normalize(t.Text))
length := len(tokens) + 4
context := make([]string, length)
context[0] = p1
context[1] = p2
for i, t := range tokens {
context[i+2] = normalize(t.Text)
}
context = append(context, []string{"-END-", "-END2-"}...)
context[length-2] = "-END-"
context[length-1] = "-END2-"
for i := 0; i < len(tokens); i++ {
word := tokens[i].Text
if word == "-" {
Expand Down

0 comments on commit 35f9021

Please sign in to comment.