dbpedia-spotlight · sandroacoelho · Nov 4, 2013 · Feb 11, 2014 · Feb 11, 2014
diff --git a/core/src/main/scala/org/dbpedia/spotlight/db/DBSpotter.scala b/core/src/main/scala/org/dbpedia/spotlight/db/DBSpotter.scala
@@ -15,7 +15,8 @@ import java.util.regex.Pattern
 abstract class DBSpotter(
  surfaceFormStore: SurfaceFormStore,
  spotFeatureWeights: Option[Seq[Double]],
- stopwords: Set[String]
+ stopwords: Set[String],
+ annotationThreshold:Float
 ) extends Spotter {
 
   var tokenizer: TextTokenizer = null
@@ -111,7 +112,7 @@ abstract class DBSpotter(
 
   protected def surfaceFormMatch(spot: String): Boolean = {
     if (spotFeatureWeightVector.isDefined)
-      spotScore(spot) >= 0.5
+      spotScore(spot) >= annotationThreshold
     else
       spotScore(spot) >= 0.25
   }

diff --git a/core/src/main/scala/org/dbpedia/spotlight/db/FSASpotter.scala b/core/src/main/scala/org/dbpedia/spotlight/db/FSASpotter.scala
@@ -17,8 +17,9 @@ class FSASpotter(
   fsaDictionary: FSADictionary,
   surfaceFormStore: SurfaceFormStore,
   spotFeatureWeights: Option[Seq[Double]],
-  stopwords: Set[String]
-) extends DBSpotter(surfaceFormStore, spotFeatureWeights, stopwords) {
+  stopwords: Set[String],
+  annotationThreshold:Float
+) extends DBSpotter(surfaceFormStore, spotFeatureWeights, stopwords, annotationThreshold) {
 
   def generateCandidates(sentence: List[Token]): Seq[Span] = {
 

diff --git a/core/src/main/scala/org/dbpedia/spotlight/db/OpenNLPSpotter.scala b/core/src/main/scala/org/dbpedia/spotlight/db/OpenNLPSpotter.scala
@@ -24,9 +24,10 @@ class OpenNLPSpotter(
   surfaceFormStore: SurfaceFormStore,
   stopwords: Set[String],
   spotFeatureWeights: Option[Seq[Double]],
+  annotationThreshold:Float,
   phraseTags: Set[String] = Set("NP"),
   nnTag: String = "NN"
-) extends DBSpotter(surfaceFormStore, spotFeatureWeights, stopwords) {
+) extends DBSpotter(surfaceFormStore, spotFeatureWeights, stopwords, annotationThreshold) {
 
   val chunker = chunkerModel match {
     case Some(m) => Some(new ChunkerME(m))

diff --git a/core/src/main/scala/org/dbpedia/spotlight/db/SpotlightModel.scala b/core/src/main/scala/org/dbpedia/spotlight/db/SpotlightModel.scala
@@ -75,6 +75,8 @@ object SpotlightModel {
     val c = properties.getProperty("opennlp_parallel", Runtime.getRuntime.availableProcessors().toString).toInt
     val cores = (1 to c)
 
+    val annotationThreshold = properties.getProperty("annotation_threshold", "0.5").toFloat
+
     val tokenizer: TextTokenizer = if(new File(modelFolder, "opennlp").exists()) {
 
       //Create the tokenizer:
@@ -112,6 +114,7 @@ object SpotlightModel {
       new GenerativeContextSimilarity(tokenTypeStore)
     ))
 
+
     //If there is at least one NE model or a chunker, use the OpenNLP spotter:
     val spotter = if( new File(modelFolder, "opennlp").exists() && new File(modelFolder, "opennlp").list().exists(f => f.startsWith("ner-") || f.startsWith("chunker")) ) {
       val nerModels = new File(modelFolder, "opennlp").list().filter(_.startsWith("ner-")).map { f: String =>
@@ -129,7 +132,8 @@ object SpotlightModel {
         nerModels,
         sfStore,
         stopwords,
-        Some(loadSpotterThresholds(new File(modelFolder, "spotter_thresholds.txt")))
+        Some(loadSpotterThresholds(new File(modelFolder, "spotter_thresholds.txt"))),
+        annotationThreshold
       ).asInstanceOf[Spotter]
 
       if(cores.size == 1)
@@ -146,7 +150,8 @@ object SpotlightModel {
         dict,
         sfStore,
         Some(loadSpotterThresholds(new File(modelFolder, "spotter_thresholds.txt"))),
-        stopwords
+        stopwords,
+        annotationThreshold
       ).asInstanceOf[Spotter]
     }