Skip to content

Commit

Permalink
update ranker
Browse files Browse the repository at this point in the history
  • Loading branch information
boyter committed Jun 10, 2020
1 parent e4efb5e commit 26f8936
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 1 deletion.
6 changes: 6 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,12 @@ func main() {
"text",
"set output format [text, json]",
)
flags.StringVar(
&processor.Ranker,
"ranker",
"tfidfl",
"set ranking algorithm [tfidf, tfidfl]",
)

// the below flags we want but are not enabled as yet

Expand Down
3 changes: 3 additions & 0 deletions processor/arguments.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ var IncludeBinaryFiles = false
// Format sets the output format of the formatter
var Format = ""

// Ranker sets which ranking algorithm to use
var Ranker = ""

// FileOutput sets the file that output should be written to
var FileOutput = ""

Expand Down
7 changes: 6 additions & 1 deletion processor/result_ranker.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,12 @@ func rankResultsTFIDF(corpusCount int, results []*fileJob, documentFrequencies m
tf := float64(len(wordCount)) / words
idf := math.Log10(float64(corpusCount) / float64(documentFrequencies[word]))

weight += tf * idf
// TODO adding math.Sqrt around tf can improve results https://opensourceconnections.com/blog/2015/10/16/bm25-the-next-generation-of-lucene-relevation/
if Ranker == "tfidfl" {
weight += math.Sqrt(tf) * idf * (1/math.Sqrt(words))
} else {
weight += tf * idf
}
}

// Override the score here because we don't want whatever we got originally
Expand Down

0 comments on commit 26f8936

Please sign in to comment.