Skip to content
This repository has been archived by the owner on Oct 20, 2018. It is now read-only.

Make annotation probability configurable via a parameter #188

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 3 additions & 2 deletions core/src/main/scala/org/dbpedia/spotlight/db/DBSpotter.scala
Expand Up @@ -15,7 +15,8 @@ import java.util.regex.Pattern
abstract class DBSpotter(
surfaceFormStore: SurfaceFormStore,
spotFeatureWeights: Option[Seq[Double]],
stopwords: Set[String]
stopwords: Set[String],
annotationThreshold:Float
) extends Spotter {

var tokenizer: TextTokenizer = null
Expand Down Expand Up @@ -111,7 +112,7 @@ abstract class DBSpotter(

protected def surfaceFormMatch(spot: String): Boolean = {
if (spotFeatureWeightVector.isDefined)
spotScore(spot) >= 0.5
spotScore(spot) >= annotationThreshold
else
spotScore(spot) >= 0.25
}
Expand Down
5 changes: 3 additions & 2 deletions core/src/main/scala/org/dbpedia/spotlight/db/FSASpotter.scala
Expand Up @@ -17,8 +17,9 @@ class FSASpotter(
fsaDictionary: FSADictionary,
surfaceFormStore: SurfaceFormStore,
spotFeatureWeights: Option[Seq[Double]],
stopwords: Set[String]
) extends DBSpotter(surfaceFormStore, spotFeatureWeights, stopwords) {
stopwords: Set[String],
annotationThreshold:Float
) extends DBSpotter(surfaceFormStore, spotFeatureWeights, stopwords, annotationThreshold) {

def generateCandidates(sentence: List[Token]): Seq[Span] = {

Expand Down
Expand Up @@ -24,9 +24,10 @@ class OpenNLPSpotter(
surfaceFormStore: SurfaceFormStore,
stopwords: Set[String],
spotFeatureWeights: Option[Seq[Double]],
annotationThreshold:Float,
phraseTags: Set[String] = Set("NP"),
nnTag: String = "NN"
) extends DBSpotter(surfaceFormStore, spotFeatureWeights, stopwords) {
) extends DBSpotter(surfaceFormStore, spotFeatureWeights, stopwords, annotationThreshold) {

val chunker = chunkerModel match {
case Some(m) => Some(new ChunkerME(m))
Expand Down
Expand Up @@ -75,6 +75,8 @@ object SpotlightModel {
val c = properties.getProperty("opennlp_parallel", Runtime.getRuntime.availableProcessors().toString).toInt
val cores = (1 to c)

val annotationThreshold = properties.getProperty("annotation_threshold", "0.5").toFloat

val tokenizer: TextTokenizer = if(new File(modelFolder, "opennlp").exists()) {

//Create the tokenizer:
Expand Down Expand Up @@ -112,6 +114,7 @@ object SpotlightModel {
new GenerativeContextSimilarity(tokenTypeStore)
))


//If there is at least one NE model or a chunker, use the OpenNLP spotter:
val spotter = if( new File(modelFolder, "opennlp").exists() && new File(modelFolder, "opennlp").list().exists(f => f.startsWith("ner-") || f.startsWith("chunker")) ) {
val nerModels = new File(modelFolder, "opennlp").list().filter(_.startsWith("ner-")).map { f: String =>
Expand All @@ -129,7 +132,8 @@ object SpotlightModel {
nerModels,
sfStore,
stopwords,
Some(loadSpotterThresholds(new File(modelFolder, "spotter_thresholds.txt")))
Some(loadSpotterThresholds(new File(modelFolder, "spotter_thresholds.txt"))),
annotationThreshold
).asInstanceOf[Spotter]

if(cores.size == 1)
Expand All @@ -146,7 +150,8 @@ object SpotlightModel {
dict,
sfStore,
Some(loadSpotterThresholds(new File(modelFolder, "spotter_thresholds.txt"))),
stopwords
stopwords,
annotationThreshold
).asInstanceOf[Spotter]
}

Expand Down