Skip to content
Browse files

rolling back topical, moving to branch

  • Loading branch information...
1 parent 9c0d806 commit 2ffa1d8d6078b4bce6577c13568eb8ec95323b54 @pablomendes pablomendes committed Jul 10, 2012
View
1 .gitignore
@@ -1,3 +1,4 @@
*.iml
.idea
target
+*.log
View
122 core/src/main/scala/org/dbpedia/spotlight/db/model/TopicalPriorStore.scala
@@ -1,122 +0,0 @@
-package org.dbpedia.spotlight.db.model
-
-import org.dbpedia.spotlight.model.{Topic, DBpediaResource}
-import io.Source
-import java.io.{IOException, File}
-import com.officedepot.cdap2.collection.CompactHashMap
-import org.apache.commons.logging.LogFactory
-
-/**
- *
- * @author pablomendes
- */
-
-trait TopicalPriorStore {
-
-// def getTopicalPriorCounts(resource:DBpediaResource): Map[Topic,Int]
-// def getTopicalPriorCount(resource:DBpediaResource, topic: Topic): Int
-// def getTotalCounts(): Map[Topic,Int]
-
- def getTopicalPriorCounts(resource:DBpediaResource): Map[String,Int]
- def getTopicalPriorCount(resource:DBpediaResource, topic: String): Int
- def getTotalCounts(): Map[String,Int]
-
-}
-
-object HashMapTopicalPriorStore extends TopicalPriorStore {
- private val LOG = LogFactory.getLog(this.getClass)
-
- val totalCounts = new CompactHashMap[String,Int]() // topic -> total
- val topicalPriors = new CompactHashMap[String,CompactHashMap[String,Int]]() // topic -> (resource -> count)
-
- val ExtractCountAndResource = """^\s+(\d+)\s+(\S+)$""".r
-
-// def getTotalCounts() = {
-// totalCounts.map{ case (topic,count) => new Topic(topic) -> count }.toMap
-// }
-//
-// def getTopicalPriorCounts(resource:DBpediaResource): Map[Topic,Int] = {
-// topicalPriors.keys.map( topic => {
-// new Topic(topic) -> getTopicalPriorCount(resource.uri,topic)
-// }).toMap
-// }
-//
-// def getTopicalPriorCount(resource: DBpediaResource, topic: Topic): Int = getTopicalPriorCount(resource.uri, topic.name)
-
- def getTotalCounts() = {
- totalCounts.toMap
- }
- def getTopicalPriorCounts(resource:DBpediaResource): Map[String,Int] = {
- topicalPriors.keys.map( topic => {
- topic -> getTopicalPriorCount(resource.uri,topic)
- }).toMap
- }
- def getTopicalPriorCount(resource: DBpediaResource, topic: String): Int = getTopicalPriorCount(resource.uri, topic)
-
-
- private def getTopicalPriorCount(uri: String, topic: String): Int = {
- val statsForTopic = topicalPriors.getOrElse(topic, new CompactHashMap[String,Int])
- statsForTopic.getOrElse(uri, 0)
- }
-
- def fromDir(dir: File) : TopicalPriorStore = {
- LOG.info("Loading topical priors.")
- if (dir.exists() && dir.isDirectory) {
- dir.listFiles().foreach( file => {
- if (file.getName.endsWith(".count")) {
- var total = 0
- val topic = file.getName.replaceAll(".count","").trim
- val statsForTopic = new CompactHashMap[String,Int]
- Source.fromFile(file).getLines()
- //.take(5)
- .foreach( line => {
- line match {
- case ExtractCountAndResource(count,uri) => {
- val c = count.toInt
- statsForTopic.put(uri.trim,c)
- total = total + c
- }
- case _ => println("no match")
- }
- })
- topicalPriors.put(topic,statsForTopic)
- totalCounts.put(topic, total)
- }
- })
- } else {
- throw new IOException("Could not load directory with topics.")
- }
- LOG.info("Done.")
- this
- }
-
- def main(args: Array[String]) {
- val dir = if (args.size>0) new File(args(0)) else new File("data/topics")
- fromDir(dir)
- stats()
- test()
- }
-
- def stats() {
-
- println("topics "+topicalPriors.keys.size)
-
- topicalPriors.keys.foreach( topic => {
- println("topic: %s; resources: %d".format(topic,topicalPriors.getOrElse(topic, new CompactHashMap[String,Int]).size))
- })
-
- println("# topics: "+topicalPriors.keys.size)
-
- }
-
- def test() {
-// val mjCount = getTopicalPriorCount(new DBpediaResource("Michael_Jackson"), new Topic("other"))
-// val otherCount = getTotalCounts().getOrElse(new Topic("other"),0)
-// println("MJ distribution: "+getTopicalPriorCounts(new DBpediaResource("Michael_Jackson")))
-// println("c(MJ,other): "+mjCount)
-// println("c(other): "+otherCount)
-// println("p(MJ|other): "+ mjCount.toDouble / otherCount)
-// println("log(p(MJ|other)): "+ scala.math.log(mjCount.toDouble / otherCount))
- }
-
-}
View
226 core/src/main/scala/org/dbpedia/spotlight/disambiguate/TopicBiasedDisambiguator.scala
@@ -1,226 +0,0 @@
-/*
- * Copyright 2012 DBpedia Spotlight Development Team
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Check our project website for information on how to acknowledge the authors and how to contribute to the project: http://spotlight.dbpedia.org
- */
-
-package org.dbpedia.spotlight.disambiguate
-
-import org.apache.commons.logging.LogFactory
-import java.lang.UnsupportedOperationException
-import scalaj.collection.Imports._
-import org.apache.lucene.search.similar.MoreLikeThis
-import org.dbpedia.spotlight.exceptions.{ItemNotFoundException, SearchException, InputException}
-import org.apache.lucene.search.{ScoreDoc, Explanation}
-import org.dbpedia.spotlight.model._
-import org.apache.lucene.index.Term
-import com.officedepot.cdap2.collection.CompactHashSet
-import org.dbpedia.spotlight.lucene.search.MergedOccurrencesContextSearcher
-import org.dbpedia.spotlight.lucene.LuceneManager.DBpediaResourceField
-import java.io.StringReader
-import org.dbpedia.spotlight.db.model.TopicalPriorStore
-import org.dbpedia.spotlight.topics.TopicExtractor
-
-/**
- *
- * @author pablomendes
- */
-class TopicBiasedDisambiguator(val candidateSearcher: CandidateSearcher,
- val contextSearcher: MergedOccurrencesContextSearcher, //TODO should be ContextSearcher. Need a generic disambiguator to enable this.
- val topicalPriorStore: TopicalPriorStore)
- extends ParagraphDisambiguator {
-
- private val LOG = LogFactory.getLog(this.getClass)
-
- @throws(classOf[InputException])
- def disambiguate(paragraph: Paragraph): List[DBpediaResourceOccurrence] = {
- // return first from each candidate set
- bestK(paragraph, 5)
- .filter(kv =>
- kv._2.nonEmpty)
- .map(kv =>
- kv._2.head)
- .toList
- }
-
- //WARNING: this is repetition of BaseSearcher.getHits
- //TODO move to subclass of BaseSearcher
- def query(text: Text, allowedUris: Array[DBpediaResource]) = {
- LOG.debug("Setting up query.")
-
- val context = if (text.text.size < 250) text.text.concat(" " + text.text) else text.text //HACK for text that is too short
- //LOG.debug(context)
- val nHits = allowedUris.size
- val filter = new org.apache.lucene.search.TermsFilter() //TODO can use caching? val filter = new FieldCacheTermsFilter(DBpediaResourceField.CONTEXT.toString,allowedUris)
- allowedUris.foreach(u => filter.addTerm(new Term(DBpediaResourceField.URI.toString, u.uri)))
-
- val mlt = new MoreLikeThis(contextSearcher.mReader);
- mlt.setFieldNames(Array(DBpediaResourceField.CONTEXT.toString))
- mlt.setAnalyzer(contextSearcher.getLuceneManager.defaultAnalyzer)
- //LOG.debug("Analyzer %s".format(contextLuceneManager.defaultAnalyzer))
- //val inputStream = new ByteArrayInputStream(context.getBytes("UTF-8"));
- val query = mlt.like(new StringReader(context), DBpediaResourceField.CONTEXT.toString);
- LOG.debug("Running query.")
- contextSearcher.getHits(query, nHits, 50000, filter)
- }
-
- //If you want us to extract allCandidates from occs while we're generating it, you have to pass in the allCandidates parameter
- //If you don't pass anything down, we will just fill in a dummy hashset and let the garbage collector deal with it
- def getCandidates(paragraph: Paragraph, allCandidates: CompactHashSet[DBpediaResource] = CompactHashSet[DBpediaResource]()) = {
- val s1 = System.nanoTime()
- // step1: get candidates for all surface forms (TODO here building allCandidates directly, but could extract from occs)
- val occs = paragraph.occurrences
- .foldLeft(Map[SurfaceFormOccurrence, List[DBpediaResource]]())(
- (acc, sfOcc) => {
- LOG.debug("searching...")
- var candidates = new java.util.HashSet[DBpediaResource]().asScala
- try {
- candidates = candidateSearcher.getCandidates(sfOcc.surfaceForm).asScala //.map(r => r.uri)
- } catch {
- case e: ItemNotFoundException => LOG.debug(e);
- }
- //ATTENTION there is no r.support at this point
- //TODO if support comes from candidate index, it means c(sf,r).
-
- //LOG.debug("# candidates for: %s = %s (%s)".format(sfOcc.surfaceForm,candidates.size,candidates))
- LOG.debug("# candidates for: %s = %s.".format(sfOcc.surfaceForm, candidates.size))
- candidates.foreach(r => allCandidates.add(r))
- acc + (sfOcc -> candidates.toList)
- });
- val e1 = System.nanoTime()
- //LOG.debug("Time with %s: %f.".format(m1, (e1-s1) / 1000000.0 ))
- occs
- }
-
- def getTopicalScore(textTopics: Map[String, Double], resource: DBpediaResource): Double = {
- //TODO Topic->Double
- val resourceTopicCounts = topicalPriorStore.getTopicalPriorCounts(resource)
- val topicTotals = topicalPriorStore.getTotalCounts()
- val score = textTopics.map {
- case (topic, textScore) => {
- val total = topicTotals.get(topic) match {
- case Some(n) => n
- case None => throw new SearchException("Topic set was not loaded correctly.")
- }
- val resourcePrior = resourceTopicCounts.getOrElse(topic, 0).toDouble / total.toDouble
- math.log(resourcePrior) + math.log(textScore)
- }
- }.sum
- math.exp(score)
- }
-
- def bestK(paragraph: Paragraph, k: Int): Map[SurfaceFormOccurrence, List[DBpediaResourceOccurrence]] = {
-
- LOG.debug("Running bestK for paragraph %s.".format(paragraph.id))
-
- if (paragraph.occurrences.size == 0) return Map[SurfaceFormOccurrence, List[DBpediaResourceOccurrence]]()
-
- val topics = TopicExtractor.getTopics(paragraph.text.text)
-
- // val m1 = if (candLuceneManager.getDBpediaResourceFactory == null) "lucene" else "jdbc"
- // val m2 = if (contextLuceneManager.getDBpediaResourceFactory == null) "lucene" else "jdbc"
-
- // step1: get candidates for all surface forms
- // (TODO here building allCandidates directly, but could extract from occs)
- var allCandidates = CompactHashSet[DBpediaResource]();
- val occs = getCandidates(paragraph, allCandidates)
-
- val s2 = System.nanoTime()
- // step2: query once for the paragraph context, get scores for each candidate resource
- var hits: Array[ScoreDoc] = null
- try {
- hits = query(paragraph.text, allCandidates.toArray)
- } catch {
- case e: Exception => throw new SearchException(e);
- case r: RuntimeException => throw new SearchException(r);
- case _ => LOG.error("Unknown really scary error happened. You can cry now.")
- }
- // LOG.debug("Hits (%d): %s".format(hits.size, hits.map( sd => "%s=%s".format(sd.doc,sd.score) ).mkString(",")))
-
- // LOG.debug("Reading DBpediaResources.")
- val scores = hits
- .foldRight(Map[String, Tuple2[DBpediaResource, Double]]())((hit, acc) => {
- var resource: DBpediaResource = contextSearcher.getDBpediaResource(hit.doc) //this method returns resource.support=c(r)
- val topicalScore = getTopicalScore(topics, resource)
- var score = if (topicalScore > 0) hit.score * topicalScore else hit.score
- //TODO can mix here the scores: c(s,r) / c(r)
- acc + (resource.uri ->(resource, score))
- });
- val e2 = System.nanoTime()
- //LOG.debug("Scores (%d): %s".format(scores.size, scores))
-
- //LOG.debug("Time with %s: %f.".format(m2, (e2-s2) / 1000000.0 ))
-
- // pick the best k for each surface form
- val r = occs.keys.foldLeft(Map[SurfaceFormOccurrence, List[DBpediaResourceOccurrence]]())((acc, aSfOcc) => {
- val candOccs = occs.getOrElse(aSfOcc, List[DBpediaResource]())
- .map(shallowResource => {
- val (resource: DBpediaResource, supportConfidence: (Int, Double)) = scores.get(shallowResource.uri) match {
- case Some((fullResource, contextualScore)) => {
- (fullResource, (fullResource.support, contextualScore))
- }
- case _ => (shallowResource, (shallowResource.support, 0.0))
- }
- Factory.DBpediaResourceOccurrence.from(aSfOcc,
- resource, //TODO this resource may contain the c(s,r) that can be used for conditional prob.
- supportConfidence)
- })
- .sortBy(o => o.contextualScore) //TODO should be final score
- .reverse
- .take(k)
- acc + (aSfOcc -> candOccs)
- });
-
- // LOG.debug("Reranked (%d)".format(r.size))
-
- r
- }
-
- def name(): String = {
- "TopicBiasedDisambiguator"
- }
-
- def ambiguity(sf: SurfaceForm): Int = {
- candidateSearcher.getAmbiguity(sf)
- }
-
- def support(resource: DBpediaResource): Int = {
- throw new UnsupportedOperationException("Not implemented.")
- }
-
- @throws(classOf[SearchException])
- def explain(goldStandardOccurrence: DBpediaResourceOccurrence, nExplanations: Int): List[Explanation] = {
- throw new UnsupportedOperationException("Not implemented.")
- }
-
- def contextTermsNumber(resource: DBpediaResource): Int = {
- throw new UnsupportedOperationException("Not implemented.")
- }
-
- def averageIdf(context: Text): Double = {
- throw new UnsupportedOperationException("Not implemented.")
- }
-
-
- //TODO better than throw exception, we should split the interface Disambiguator accordingly
- def disambiguate(sfOccurrence: SurfaceFormOccurrence): DBpediaResourceOccurrence = {
- throw new UnsupportedOperationException("Cannot disambiguate single occurrence. This disambiguator uses multiple occurrences in the same paragraph as disambiguation context.")
- }
-
- def bestK(sfOccurrence: SurfaceFormOccurrence, k: Int): java.util.List[DBpediaResourceOccurrence] = {
- throw new UnsupportedOperationException("Cannot disambiguate single occurrence. This disambiguator uses multiple occurrences in the same paragraph as disambiguation context.")
- }
-
-}
View
178 core/src/main/scala/org/dbpedia/spotlight/disambiguate/TopicalDisambiguator.scala
@@ -1,178 +0,0 @@
-/*
- * Copyright 2012 DBpedia Spotlight Development Team
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Check our project website for information on how to acknowledge the authors and how to contribute to the project: http://spotlight.dbpedia.org
- */
-
-package org.dbpedia.spotlight.disambiguate
-
-import org.apache.commons.logging.LogFactory
-import org.dbpedia.spotlight.lucene.disambiguate.MergedOccurrencesDisambiguator
-import java.lang.UnsupportedOperationException
-import scalaj.collection.Imports._
-import org.apache.lucene.search.similar.MoreLikeThis
-import org.dbpedia.spotlight.exceptions.{ItemNotFoundException, SearchException, InputException}
-import org.apache.lucene.search.{ScoreDoc, Explanation}
-import org.dbpedia.spotlight.model._
-import org.apache.lucene.index.Term
-import com.officedepot.cdap2.collection.CompactHashSet
-import org.dbpedia.spotlight.lucene.search.MergedOccurrencesContextSearcher
-import org.dbpedia.spotlight.lucene.LuceneManager.DBpediaResourceField
-import java.io.StringReader
-import org.dbpedia.spotlight.db.model.TopicalPriorStore
-import org.dbpedia.spotlight.topics.TopicExtractor
-
-/**
- * Uses only topic prior to decide on disambiguation.
- * Baseline disambiguator.For evaluation only.
- *
- * @author pablomendes
- */
-class TopicalDisambiguator(val candidateSearcher: CandidateSearcher,val topicalPriorStore: TopicalPriorStore)
- extends ParagraphDisambiguator {
-
- private val LOG = LogFactory.getLog(this.getClass)
-
- @throws(classOf[InputException])
- def disambiguate(paragraph: Paragraph): List[DBpediaResourceOccurrence] = {
- // return first from each candidate set
- bestK(paragraph, 5)
- .filter(kv =>
- kv._2.nonEmpty)
- .map(kv =>
- kv._2.head)
- .toList
- }
-
- //If you want us to extract allCandidates from occs while we're generating it, you have to pass in the allCandidates parameter
- //If you don't pass anything down, we will just fill in a dummy hashset and let the garbage collector deal with it
- def getCandidates(paragraph: Paragraph, allCandidates: CompactHashSet[DBpediaResource] = CompactHashSet[DBpediaResource]()) = {
- val s1 = System.nanoTime()
- // step1: get candidates for all surface forms (TODO here building allCandidates directly, but could extract from occs)
- val occs = paragraph.occurrences
- .foldLeft(Map[SurfaceFormOccurrence, List[DBpediaResource]]())(
- (acc, sfOcc) => {
- LOG.debug("searching...")
- var candidates = new java.util.HashSet[DBpediaResource]().asScala
- try {
- candidates = candidateSearcher.getCandidates(sfOcc.surfaceForm).asScala //.map(r => r.uri)
- } catch {
- case e: ItemNotFoundException => LOG.debug(e);
- }
- //ATTENTION there is no r.support at this point
- //TODO if support comes from candidate index, it means c(sf,r).
-
- LOG.trace("# candidates for: %s = %s.".format(sfOcc.surfaceForm, candidates.size))
- candidates.foreach(r => allCandidates.add(r))
- acc + (sfOcc -> candidates.toList)
- });
- val e1 = System.nanoTime()
- //LOG.debug("Time with %s: %f.".format(m1, (e1-s1) / 1000000.0 ))
- occs
- }
-
- def getTopicalScore(textTopics: Map[String,Double], resource: DBpediaResource) : Double = {//TODO Topic->Double
- val resourceTopicCounts = topicalPriorStore.getTopicalPriorCounts(resource)
- val topicTotals = topicalPriorStore.getTotalCounts()
- LOG.trace("resource: %s".format(resource.uri))
- val score = textTopics.map{ case (topic,textScore) => {
- val total = topicTotals.get(topic) match {
- case Some(n) => n
- case None => throw new SearchException("Topic set was not loaded correctly.")
- }
- val resourcePrior = resourceTopicCounts.getOrElse(topic,0).toDouble / total.toDouble
- if (resourcePrior>0.0)
- LOG.trace("\t\ttopic: %s, rescource prior: %.5f".format(topic,resourcePrior))
- math.log(resourcePrior) + math.log(textScore)
- }
- }.sum
- math.exp(score)
- }
-
- def bestK(paragraph: Paragraph, k: Int): Map[SurfaceFormOccurrence, List[DBpediaResourceOccurrence]] = {
-
- LOG.debug("Running bestK for paragraph %s.".format(paragraph.id))
-
- if (paragraph.occurrences.size == 0) return Map[SurfaceFormOccurrence, List[DBpediaResourceOccurrence]]()
-
- val topics = TopicExtractor.getTopics(paragraph.text.text)
- LOG.trace("text: %s".format(topics.filter(_._2>0).toMap.toString))
-
- // val m1 = if (candLuceneManager.getDBpediaResourceFactory == null) "lucene" else "jdbc"
- // val m2 = if (contextLuceneManager.getDBpediaResourceFactory == null) "lucene" else "jdbc"
-
- // step1: get candidates for all surface forms
- // (TODO here building allCandidates directly, but could extract from occs)
- var allCandidates = CompactHashSet[DBpediaResource]();
- val occs = getCandidates(paragraph, allCandidates)
-
- // pick the best k for each surface form
- val r = occs.keys.foldLeft(Map[SurfaceFormOccurrence, List[DBpediaResourceOccurrence]]())((acc, aSfOcc) => {
- val candOccs = occs.getOrElse(aSfOcc, List[DBpediaResource]())
- .map(shallowResource => {
-
- val supportConfidence = (1, getTopicalScore(topics,shallowResource))
-
- Factory.DBpediaResourceOccurrence.from(aSfOcc,
- shallowResource,
- supportConfidence)
- })
- .sortBy(o => o.contextualScore) //TODO should be final score
- .reverse
- .take(k)
- acc + (aSfOcc -> candOccs)
- });
-
- // LOG.debug("Reranked (%d)".format(r.size))
-
- r
- }
-
- def name(): String = {
- "TopicalDisambiguator"
- }
-
- def ambiguity(sf: SurfaceForm): Int = {
- candidateSearcher.getAmbiguity(sf)
- }
-
- def support(resource: DBpediaResource): Int = {
- throw new UnsupportedOperationException("Not implemented.")
- }
-
- @throws(classOf[SearchException])
- def explain(goldStandardOccurrence: DBpediaResourceOccurrence, nExplanations: Int): List[Explanation] = {
- throw new UnsupportedOperationException("Not implemented.")
- }
-
- def contextTermsNumber(resource: DBpediaResource): Int = {
- throw new UnsupportedOperationException("Not implemented.")
- }
-
- def averageIdf(context: Text): Double = {
- throw new UnsupportedOperationException("Not implemented.")
- }
-
-
- //TODO better than throw exception, we should split the interface Disambiguator accordingly
- def disambiguate(sfOccurrence: SurfaceFormOccurrence): DBpediaResourceOccurrence = {
- throw new UnsupportedOperationException("Cannot disambiguate single occurrence. This disambiguator uses multiple occurrences in the same paragraph as disambiguation context.")
- }
-
- def bestK(sfOccurrence: SurfaceFormOccurrence, k: Int): java.util.List[DBpediaResourceOccurrence] = {
- throw new UnsupportedOperationException("Cannot disambiguate single occurrence. This disambiguator uses multiple occurrences in the same paragraph as disambiguation context.")
- }
-
-}
View
40 core/src/main/scala/org/dbpedia/spotlight/model/Topic.scala
@@ -1,40 +0,0 @@
-/*
- * Copyright 2011 DBpedia Spotlight Development Team
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * Check our project website for information on how to acknowledge the authors and how to contribute to the project: http://spotlight.dbpedia.org
- */
-
-package org.dbpedia.spotlight.model
-
-/**
- * A topic: music, sports, politics...
- * @author pablomendes
- */
-
-class Topic(val name: String) {
-
- def canEqual(other: Any): Boolean =
- other.isInstanceOf[Topic]
-
- override def hashCode() : Int = {
- this.name.hashCode
- }
-
- override def equals(other: Any) : Boolean = {
- this.name.equals(other.toString)
- }
-
- override def toString() = this.name
-}
View
87 core/src/main/scala/org/dbpedia/spotlight/topics/TopicExtractor.scala
@@ -1,87 +0,0 @@
-/*
- * *
- * * Copyright 2011 Pablo Mendes, Max Jakob
- * *
- * * Licensed under the Apache License, Version 2.0 (the "License");
- * * you may not use this file except in compliance with the License.
- * * You may obtain a copy of the License at
- * *
- * * http://www.apache.org/licenses/LICENSE-2.0
- * *
- * * Unless required by applicable law or agreed to in writing, software
- * * distributed under the License is distributed on an "AS IS" BASIS,
- * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * * See the License for the specific language governing permissions and
- * * limitations under the License.
- *
- */
-
-package org.dbpedia.spotlight.topics
-
-import org.apache.commons.httpclient.{HttpStatus, DefaultHttpMethodRetryHandler, HttpClient}
-import org.apache.commons.httpclient.params.HttpMethodParams
-import org.apache.http.HttpException
-import java.io.IOException
-import org.apache.commons.httpclient.methods.GetMethod
-import java.net.URLEncoder
-import net.liftweb.json._
-
-
-/**
- *
- * @author pablomendes
- */
-
-object TopicExtractor {
-
- val client = new HttpClient
- val url_pattern = "http://160.45.137.73:2222/rest/topic?text=%s"
-
- def getTopics(text: String) : Map[String,Double] = {
-
- val url = String.format(url_pattern, URLEncoder.encode(text, "UTF8"))
- val method = new GetMethod(url)
- method.getParams.setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false))
-
- var response = ""
- try {
- val statusCode: Int = client.executeMethod(method)
- if (statusCode != HttpStatus.SC_OK) {
- println("Method failed: " + method.getStatusLine)
- }
- val responseBody: Array[Byte] = method.getResponseBody
- response = new String(responseBody)
- }
- catch {
- case e: HttpException => {
- println("Fatal protocol violation: " + e.getMessage)
- }
- case e: IOException => {
- println("Fatal transport error: " + e.getMessage)
- println(method.getQueryString)
- }
- }
- finally {
- method.releaseConnection
- }
-
- val parsed = parse(response)
- val pairs = (parsed \\ "topic" \\ classOf[JField])
- val topics = pairs.filter(_._1.equals("@topic")).map(p => p._2.toString)
- val scores = pairs.filter(_._1.equals("@score")).map(p => p._2.toString.toDouble)
- val map = topics.zip(scores).toMap[String,Double]
- map
- }
-
- def main(args: Array[String]) {
-
- val text = "basketball michael jordan"
-
- val response = getTopics(text)
- println("Response: "+response)
-
-
-
- }
-
-}

0 comments on commit 2ffa1d8

Please sign in to comment.
Something went wrong with that request. Please try again.