Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Bugfix in model creation.

  • Loading branch information...
commit d7ab6f763a538c57546e0e43ce4d4d0d6a1b6d76 1 parent 8c26d2b
@jodaiber jodaiber authored
View
2  core/src/main/scala/org/dbpedia/spotlight/db/FSASpotter.scala
@@ -70,7 +70,7 @@ object FSASpotter {
//State ID for None
val REJECTING_STATE = -2
- def buildDictionary(sfStore: SurfaceFormStore, tokenizer: StringTokenizer): FSADictionary = {
+ def buildDictionary(sfStore: SurfaceFormStore, tokenizer: TextTokenizer): FSADictionary = {
//Temporary FSA DSs:
val transitions: ArrayBuffer[Map[Int, Int]] = ArrayBuffer[Map[Int, Int]]()
View
23 index/src/main/scala/org/dbpedia/spotlight/db/CreateSpotlightModel.scala
@@ -3,7 +3,7 @@ package org.dbpedia.spotlight.db
import io._
import java.io.{FileOutputStream, FileInputStream, File}
import memory.MemoryStore
-import model.{StringTokenizer, Stemmer}
+import model.{TextTokenizer, StringTokenizer, Stemmer}
import scala.io.Source
import org.tartarus.snowball.SnowballProgram
import java.util.{Locale, Properties}
@@ -205,7 +205,26 @@ object CreateSpotlightModel {
)
memoryIndexer.writeTokenOccurrences()
- val fsaDict = FSASpotter.buildDictionary(sfStore, rawTokenizer)
+
+ val tokenizer: TextTokenizer = if (opennlpFolder.isDefined) {
+ val opennlpOut = new File(outputFolder, OPENNLP_FOLDER)
+ val oToken = new TokenizerME(new TokenizerModel(new FileInputStream(new File(opennlpOut, "token.bin"))))
+ val oSent = new SentenceDetectorME(new SentenceModel(new FileInputStream(new File(opennlpOut, "sent.bin"))))
+
+ new OpenNLPTokenizer(
+ oToken,
+ Set[String](),
+ stemmer,
+ oSent,
+ null,
+ tokenStore
+ )
+
+ } else {
+ new LanguageIndependentTokenizer(Set[String](), stemmer, locale, tokenStore)
+ }
+ val fsaDict = FSASpotter.buildDictionary(sfStore, tokenizer)
+
MemoryStore.dump(fsaDict, new File(outputFolder, "fsa_dict.mem"))
FileUtils.write(
Please sign in to comment.
Something went wrong with that request. Please try again.