Skip to content
This repository
Browse code

rolling back topical, moving to branch

  • Loading branch information...
commit 2ffa1d8d6078b4bce6577c13568eb8ec95323b54 1 parent 9c0d806
Pablo Mendes pablomendes authored
1  .gitignore
... ... @@ -1,3 +1,4 @@
1 1 *.iml
2 2 .idea
3 3 target
  4 +*.log
122 core/src/main/scala/org/dbpedia/spotlight/db/model/TopicalPriorStore.scala
... ... @@ -1,122 +0,0 @@
1   -package org.dbpedia.spotlight.db.model
2   -
3   -import org.dbpedia.spotlight.model.{Topic, DBpediaResource}
4   -import io.Source
5   -import java.io.{IOException, File}
6   -import com.officedepot.cdap2.collection.CompactHashMap
7   -import org.apache.commons.logging.LogFactory
8   -
9   -/**
10   - *
11   - * @author pablomendes
12   - */
13   -
14   -trait TopicalPriorStore {
15   -
16   -// def getTopicalPriorCounts(resource:DBpediaResource): Map[Topic,Int]
17   -// def getTopicalPriorCount(resource:DBpediaResource, topic: Topic): Int
18   -// def getTotalCounts(): Map[Topic,Int]
19   -
20   - def getTopicalPriorCounts(resource:DBpediaResource): Map[String,Int]
21   - def getTopicalPriorCount(resource:DBpediaResource, topic: String): Int
22   - def getTotalCounts(): Map[String,Int]
23   -
24   -}
25   -
26   -object HashMapTopicalPriorStore extends TopicalPriorStore {
27   - private val LOG = LogFactory.getLog(this.getClass)
28   -
29   - val totalCounts = new CompactHashMap[String,Int]() // topic -> total
30   - val topicalPriors = new CompactHashMap[String,CompactHashMap[String,Int]]() // topic -> (resource -> count)
31   -
32   - val ExtractCountAndResource = """^\s+(\d+)\s+(\S+)$""".r
33   -
34   -// def getTotalCounts() = {
35   -// totalCounts.map{ case (topic,count) => new Topic(topic) -> count }.toMap
36   -// }
37   -//
38   -// def getTopicalPriorCounts(resource:DBpediaResource): Map[Topic,Int] = {
39   -// topicalPriors.keys.map( topic => {
40   -// new Topic(topic) -> getTopicalPriorCount(resource.uri,topic)
41   -// }).toMap
42   -// }
43   -//
44   -// def getTopicalPriorCount(resource: DBpediaResource, topic: Topic): Int = getTopicalPriorCount(resource.uri, topic.name)
45   -
46   - def getTotalCounts() = {
47   - totalCounts.toMap
48   - }
49   - def getTopicalPriorCounts(resource:DBpediaResource): Map[String,Int] = {
50   - topicalPriors.keys.map( topic => {
51   - topic -> getTopicalPriorCount(resource.uri,topic)
52   - }).toMap
53   - }
54   - def getTopicalPriorCount(resource: DBpediaResource, topic: String): Int = getTopicalPriorCount(resource.uri, topic)
55   -
56   -
57   - private def getTopicalPriorCount(uri: String, topic: String): Int = {
58   - val statsForTopic = topicalPriors.getOrElse(topic, new CompactHashMap[String,Int])
59   - statsForTopic.getOrElse(uri, 0)
60   - }
61   -
62   - def fromDir(dir: File) : TopicalPriorStore = {
63   - LOG.info("Loading topical priors.")
64   - if (dir.exists() && dir.isDirectory) {
65   - dir.listFiles().foreach( file => {
66   - if (file.getName.endsWith(".count")) {
67   - var total = 0
68   - val topic = file.getName.replaceAll(".count","").trim
69   - val statsForTopic = new CompactHashMap[String,Int]
70   - Source.fromFile(file).getLines()
71   - //.take(5)
72   - .foreach( line => {
73   - line match {
74   - case ExtractCountAndResource(count,uri) => {
75   - val c = count.toInt
76   - statsForTopic.put(uri.trim,c)
77   - total = total + c
78   - }
79   - case _ => println("no match")
80   - }
81   - })
82   - topicalPriors.put(topic,statsForTopic)
83   - totalCounts.put(topic, total)
84   - }
85   - })
86   - } else {
87   - throw new IOException("Could not load directory with topics.")
88   - }
89   - LOG.info("Done.")
90   - this
91   - }
92   -
93   - def main(args: Array[String]) {
94   - val dir = if (args.size>0) new File(args(0)) else new File("data/topics")
95   - fromDir(dir)
96   - stats()
97   - test()
98   - }
99   -
100   - def stats() {
101   -
102   - println("topics "+topicalPriors.keys.size)
103   -
104   - topicalPriors.keys.foreach( topic => {
105   - println("topic: %s; resources: %d".format(topic,topicalPriors.getOrElse(topic, new CompactHashMap[String,Int]).size))
106   - })
107   -
108   - println("# topics: "+topicalPriors.keys.size)
109   -
110   - }
111   -
112   - def test() {
113   -// val mjCount = getTopicalPriorCount(new DBpediaResource("Michael_Jackson"), new Topic("other"))
114   -// val otherCount = getTotalCounts().getOrElse(new Topic("other"),0)
115   -// println("MJ distribution: "+getTopicalPriorCounts(new DBpediaResource("Michael_Jackson")))
116   -// println("c(MJ,other): "+mjCount)
117   -// println("c(other): "+otherCount)
118   -// println("p(MJ|other): "+ mjCount.toDouble / otherCount)
119   -// println("log(p(MJ|other)): "+ scala.math.log(mjCount.toDouble / otherCount))
120   - }
121   -
122   -}
226 core/src/main/scala/org/dbpedia/spotlight/disambiguate/TopicBiasedDisambiguator.scala
... ... @@ -1,226 +0,0 @@
1   -/*
2   - * Copyright 2012 DBpedia Spotlight Development Team
3   - *
4   - * Licensed under the Apache License, Version 2.0 (the "License");
5   - * you may not use this file except in compliance with the License.
6   - * You may obtain a copy of the License at
7   - *
8   - * http://www.apache.org/licenses/LICENSE-2.0
9   - *
10   - * Unless required by applicable law or agreed to in writing, software
11   - * distributed under the License is distributed on an "AS IS" BASIS,
12   - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   - * See the License for the specific language governing permissions and
14   - * limitations under the License.
15   - *
16   - * Check our project website for information on how to acknowledge the authors and how to contribute to the project: http://spotlight.dbpedia.org
17   - */
18   -
19   -package org.dbpedia.spotlight.disambiguate
20   -
21   -import org.apache.commons.logging.LogFactory
22   -import java.lang.UnsupportedOperationException
23   -import scalaj.collection.Imports._
24   -import org.apache.lucene.search.similar.MoreLikeThis
25   -import org.dbpedia.spotlight.exceptions.{ItemNotFoundException, SearchException, InputException}
26   -import org.apache.lucene.search.{ScoreDoc, Explanation}
27   -import org.dbpedia.spotlight.model._
28   -import org.apache.lucene.index.Term
29   -import com.officedepot.cdap2.collection.CompactHashSet
30   -import org.dbpedia.spotlight.lucene.search.MergedOccurrencesContextSearcher
31   -import org.dbpedia.spotlight.lucene.LuceneManager.DBpediaResourceField
32   -import java.io.StringReader
33   -import org.dbpedia.spotlight.db.model.TopicalPriorStore
34   -import org.dbpedia.spotlight.topics.TopicExtractor
35   -
36   -/**
37   - *
38   - * @author pablomendes
39   - */
40   -class TopicBiasedDisambiguator(val candidateSearcher: CandidateSearcher,
41   - val contextSearcher: MergedOccurrencesContextSearcher, //TODO should be ContextSearcher. Need a generic disambiguator to enable this.
42   - val topicalPriorStore: TopicalPriorStore)
43   - extends ParagraphDisambiguator {
44   -
45   - private val LOG = LogFactory.getLog(this.getClass)
46   -
47   - @throws(classOf[InputException])
48   - def disambiguate(paragraph: Paragraph): List[DBpediaResourceOccurrence] = {
49   - // return first from each candidate set
50   - bestK(paragraph, 5)
51   - .filter(kv =>
52   - kv._2.nonEmpty)
53   - .map(kv =>
54   - kv._2.head)
55   - .toList
56   - }
57   -
58   - //WARNING: this is repetition of BaseSearcher.getHits
59   - //TODO move to subclass of BaseSearcher
60   - def query(text: Text, allowedUris: Array[DBpediaResource]) = {
61   - LOG.debug("Setting up query.")
62   -
63   - val context = if (text.text.size < 250) text.text.concat(" " + text.text) else text.text //HACK for text that is too short
64   - //LOG.debug(context)
65   - val nHits = allowedUris.size
66   - val filter = new org.apache.lucene.search.TermsFilter() //TODO can use caching? val filter = new FieldCacheTermsFilter(DBpediaResourceField.CONTEXT.toString,allowedUris)
67   - allowedUris.foreach(u => filter.addTerm(new Term(DBpediaResourceField.URI.toString, u.uri)))
68   -
69   - val mlt = new MoreLikeThis(contextSearcher.mReader);
70   - mlt.setFieldNames(Array(DBpediaResourceField.CONTEXT.toString))
71   - mlt.setAnalyzer(contextSearcher.getLuceneManager.defaultAnalyzer)
72   - //LOG.debug("Analyzer %s".format(contextLuceneManager.defaultAnalyzer))
73   - //val inputStream = new ByteArrayInputStream(context.getBytes("UTF-8"));
74   - val query = mlt.like(new StringReader(context), DBpediaResourceField.CONTEXT.toString);
75   - LOG.debug("Running query.")
76   - contextSearcher.getHits(query, nHits, 50000, filter)
77   - }
78   -
79   - //If you want us to extract allCandidates from occs while we're generating it, you have to pass in the allCandidates parameter
80   - //If you don't pass anything down, we will just fill in a dummy hashset and let the garbage collector deal with it
81   - def getCandidates(paragraph: Paragraph, allCandidates: CompactHashSet[DBpediaResource] = CompactHashSet[DBpediaResource]()) = {
82   - val s1 = System.nanoTime()
83   - // step1: get candidates for all surface forms (TODO here building allCandidates directly, but could extract from occs)
84   - val occs = paragraph.occurrences
85   - .foldLeft(Map[SurfaceFormOccurrence, List[DBpediaResource]]())(
86   - (acc, sfOcc) => {
87   - LOG.debug("searching...")
88   - var candidates = new java.util.HashSet[DBpediaResource]().asScala
89   - try {
90   - candidates = candidateSearcher.getCandidates(sfOcc.surfaceForm).asScala //.map(r => r.uri)
91   - } catch {
92   - case e: ItemNotFoundException => LOG.debug(e);
93   - }
94   - //ATTENTION there is no r.support at this point
95   - //TODO if support comes from candidate index, it means c(sf,r).
96   -
97   - //LOG.debug("# candidates for: %s = %s (%s)".format(sfOcc.surfaceForm,candidates.size,candidates))
98   - LOG.debug("# candidates for: %s = %s.".format(sfOcc.surfaceForm, candidates.size))
99   - candidates.foreach(r => allCandidates.add(r))
100   - acc + (sfOcc -> candidates.toList)
101   - });
102   - val e1 = System.nanoTime()
103   - //LOG.debug("Time with %s: %f.".format(m1, (e1-s1) / 1000000.0 ))
104   - occs
105   - }
106   -
107   - def getTopicalScore(textTopics: Map[String, Double], resource: DBpediaResource): Double = {
108   - //TODO Topic->Double
109   - val resourceTopicCounts = topicalPriorStore.getTopicalPriorCounts(resource)
110   - val topicTotals = topicalPriorStore.getTotalCounts()
111   - val score = textTopics.map {
112   - case (topic, textScore) => {
113   - val total = topicTotals.get(topic) match {
114   - case Some(n) => n
115   - case None => throw new SearchException("Topic set was not loaded correctly.")
116   - }
117   - val resourcePrior = resourceTopicCounts.getOrElse(topic, 0).toDouble / total.toDouble
118   - math.log(resourcePrior) + math.log(textScore)
119   - }
120   - }.sum
121   - math.exp(score)
122   - }
123   -
124   - def bestK(paragraph: Paragraph, k: Int): Map[SurfaceFormOccurrence, List[DBpediaResourceOccurrence]] = {
125   -
126   - LOG.debug("Running bestK for paragraph %s.".format(paragraph.id))
127   -
128   - if (paragraph.occurrences.size == 0) return Map[SurfaceFormOccurrence, List[DBpediaResourceOccurrence]]()
129   -
130   - val topics = TopicExtractor.getTopics(paragraph.text.text)
131   -
132   - // val m1 = if (candLuceneManager.getDBpediaResourceFactory == null) "lucene" else "jdbc"
133   - // val m2 = if (contextLuceneManager.getDBpediaResourceFactory == null) "lucene" else "jdbc"
134   -
135   - // step1: get candidates for all surface forms
136   - // (TODO here building allCandidates directly, but could extract from occs)
137   - var allCandidates = CompactHashSet[DBpediaResource]();
138   - val occs = getCandidates(paragraph, allCandidates)
139   -
140   - val s2 = System.nanoTime()
141   - // step2: query once for the paragraph context, get scores for each candidate resource
142   - var hits: Array[ScoreDoc] = null
143   - try {
144   - hits = query(paragraph.text, allCandidates.toArray)
145   - } catch {
146   - case e: Exception => throw new SearchException(e);
147   - case r: RuntimeException => throw new SearchException(r);
148   - case _ => LOG.error("Unknown really scary error happened. You can cry now.")
149   - }
150   - // LOG.debug("Hits (%d): %s".format(hits.size, hits.map( sd => "%s=%s".format(sd.doc,sd.score) ).mkString(",")))
151   -
152   - // LOG.debug("Reading DBpediaResources.")
153   - val scores = hits
154   - .foldRight(Map[String, Tuple2[DBpediaResource, Double]]())((hit, acc) => {
155   - var resource: DBpediaResource = contextSearcher.getDBpediaResource(hit.doc) //this method returns resource.support=c(r)
156   - val topicalScore = getTopicalScore(topics, resource)
157   - var score = if (topicalScore > 0) hit.score * topicalScore else hit.score
158   - //TODO can mix here the scores: c(s,r) / c(r)
159   - acc + (resource.uri ->(resource, score))
160   - });
161   - val e2 = System.nanoTime()
162   - //LOG.debug("Scores (%d): %s".format(scores.size, scores))
163   -
164   - //LOG.debug("Time with %s: %f.".format(m2, (e2-s2) / 1000000.0 ))
165   -
166   - // pick the best k for each surface form
167   - val r = occs.keys.foldLeft(Map[SurfaceFormOccurrence, List[DBpediaResourceOccurrence]]())((acc, aSfOcc) => {
168   - val candOccs = occs.getOrElse(aSfOcc, List[DBpediaResource]())
169   - .map(shallowResource => {
170   - val (resource: DBpediaResource, supportConfidence: (Int, Double)) = scores.get(shallowResource.uri) match {
171   - case Some((fullResource, contextualScore)) => {
172   - (fullResource, (fullResource.support, contextualScore))
173   - }
174   - case _ => (shallowResource, (shallowResource.support, 0.0))
175   - }
176   - Factory.DBpediaResourceOccurrence.from(aSfOcc,
177   - resource, //TODO this resource may contain the c(s,r) that can be used for conditional prob.
178   - supportConfidence)
179   - })
180   - .sortBy(o => o.contextualScore) //TODO should be final score
181   - .reverse
182   - .take(k)
183   - acc + (aSfOcc -> candOccs)
184   - });
185   -
186   - // LOG.debug("Reranked (%d)".format(r.size))
187   -
188   - r
189   - }
190   -
191   - def name(): String = {
192   - "TopicBiasedDisambiguator"
193   - }
194   -
195   - def ambiguity(sf: SurfaceForm): Int = {
196   - candidateSearcher.getAmbiguity(sf)
197   - }
198   -
199   - def support(resource: DBpediaResource): Int = {
200   - throw new UnsupportedOperationException("Not implemented.")
201   - }
202   -
203   - @throws(classOf[SearchException])
204   - def explain(goldStandardOccurrence: DBpediaResourceOccurrence, nExplanations: Int): List[Explanation] = {
205   - throw new UnsupportedOperationException("Not implemented.")
206   - }
207   -
208   - def contextTermsNumber(resource: DBpediaResource): Int = {
209   - throw new UnsupportedOperationException("Not implemented.")
210   - }
211   -
212   - def averageIdf(context: Text): Double = {
213   - throw new UnsupportedOperationException("Not implemented.")
214   - }
215   -
216   -
217   - //TODO better than throw exception, we should split the interface Disambiguator accordingly
218   - def disambiguate(sfOccurrence: SurfaceFormOccurrence): DBpediaResourceOccurrence = {
219   - throw new UnsupportedOperationException("Cannot disambiguate single occurrence. This disambiguator uses multiple occurrences in the same paragraph as disambiguation context.")
220   - }
221   -
222   - def bestK(sfOccurrence: SurfaceFormOccurrence, k: Int): java.util.List[DBpediaResourceOccurrence] = {
223   - throw new UnsupportedOperationException("Cannot disambiguate single occurrence. This disambiguator uses multiple occurrences in the same paragraph as disambiguation context.")
224   - }
225   -
226   -}
178 core/src/main/scala/org/dbpedia/spotlight/disambiguate/TopicalDisambiguator.scala
... ... @@ -1,178 +0,0 @@
1   -/*
2   - * Copyright 2012 DBpedia Spotlight Development Team
3   - *
4   - * Licensed under the Apache License, Version 2.0 (the "License");
5   - * you may not use this file except in compliance with the License.
6   - * You may obtain a copy of the License at
7   - *
8   - * http://www.apache.org/licenses/LICENSE-2.0
9   - *
10   - * Unless required by applicable law or agreed to in writing, software
11   - * distributed under the License is distributed on an "AS IS" BASIS,
12   - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   - * See the License for the specific language governing permissions and
14   - * limitations under the License.
15   - *
16   - * Check our project website for information on how to acknowledge the authors and how to contribute to the project: http://spotlight.dbpedia.org
17   - */
18   -
19   -package org.dbpedia.spotlight.disambiguate
20   -
21   -import org.apache.commons.logging.LogFactory
22   -import org.dbpedia.spotlight.lucene.disambiguate.MergedOccurrencesDisambiguator
23   -import java.lang.UnsupportedOperationException
24   -import scalaj.collection.Imports._
25   -import org.apache.lucene.search.similar.MoreLikeThis
26   -import org.dbpedia.spotlight.exceptions.{ItemNotFoundException, SearchException, InputException}
27   -import org.apache.lucene.search.{ScoreDoc, Explanation}
28   -import org.dbpedia.spotlight.model._
29   -import org.apache.lucene.index.Term
30   -import com.officedepot.cdap2.collection.CompactHashSet
31   -import org.dbpedia.spotlight.lucene.search.MergedOccurrencesContextSearcher
32   -import org.dbpedia.spotlight.lucene.LuceneManager.DBpediaResourceField
33   -import java.io.StringReader
34   -import org.dbpedia.spotlight.db.model.TopicalPriorStore
35   -import org.dbpedia.spotlight.topics.TopicExtractor
36   -
37   -/**
38   - * Uses only topic prior to decide on disambiguation.
39   - * Baseline disambiguator.For evaluation only.
40   - *
41   - * @author pablomendes
42   - */
43   -class TopicalDisambiguator(val candidateSearcher: CandidateSearcher,val topicalPriorStore: TopicalPriorStore)
44   - extends ParagraphDisambiguator {
45   -
46   - private val LOG = LogFactory.getLog(this.getClass)
47   -
48   - @throws(classOf[InputException])
49   - def disambiguate(paragraph: Paragraph): List[DBpediaResourceOccurrence] = {
50   - // return first from each candidate set
51   - bestK(paragraph, 5)
52   - .filter(kv =>
53   - kv._2.nonEmpty)
54   - .map(kv =>
55   - kv._2.head)
56   - .toList
57   - }
58   -
59   - //If you want us to extract allCandidates from occs while we're generating it, you have to pass in the allCandidates parameter
60   - //If you don't pass anything down, we will just fill in a dummy hashset and let the garbage collector deal with it
61   - def getCandidates(paragraph: Paragraph, allCandidates: CompactHashSet[DBpediaResource] = CompactHashSet[DBpediaResource]()) = {
62   - val s1 = System.nanoTime()
63   - // step1: get candidates for all surface forms (TODO here building allCandidates directly, but could extract from occs)
64   - val occs = paragraph.occurrences
65   - .foldLeft(Map[SurfaceFormOccurrence, List[DBpediaResource]]())(
66   - (acc, sfOcc) => {
67   - LOG.debug("searching...")
68   - var candidates = new java.util.HashSet[DBpediaResource]().asScala
69   - try {
70   - candidates = candidateSearcher.getCandidates(sfOcc.surfaceForm).asScala //.map(r => r.uri)
71   - } catch {
72   - case e: ItemNotFoundException => LOG.debug(e);
73   - }
74   - //ATTENTION there is no r.support at this point
75   - //TODO if support comes from candidate index, it means c(sf,r).
76   -
77   - LOG.trace("# candidates for: %s = %s.".format(sfOcc.surfaceForm, candidates.size))
78   - candidates.foreach(r => allCandidates.add(r))
79   - acc + (sfOcc -> candidates.toList)
80   - });
81   - val e1 = System.nanoTime()
82   - //LOG.debug("Time with %s: %f.".format(m1, (e1-s1) / 1000000.0 ))
83   - occs
84   - }
85   -
86   - def getTopicalScore(textTopics: Map[String,Double], resource: DBpediaResource) : Double = {//TODO Topic->Double
87   - val resourceTopicCounts = topicalPriorStore.getTopicalPriorCounts(resource)
88   - val topicTotals = topicalPriorStore.getTotalCounts()
89   - LOG.trace("resource: %s".format(resource.uri))
90   - val score = textTopics.map{ case (topic,textScore) => {
91   - val total = topicTotals.get(topic) match {
92   - case Some(n) => n
93   - case None => throw new SearchException("Topic set was not loaded correctly.")
94   - }
95   - val resourcePrior = resourceTopicCounts.getOrElse(topic,0).toDouble / total.toDouble
96   - if (resourcePrior>0.0)
97   - LOG.trace("\t\ttopic: %s, rescource prior: %.5f".format(topic,resourcePrior))
98   - math.log(resourcePrior) + math.log(textScore)
99   - }
100   - }.sum
101   - math.exp(score)
102   - }
103   -
104   - def bestK(paragraph: Paragraph, k: Int): Map[SurfaceFormOccurrence, List[DBpediaResourceOccurrence]] = {
105   -
106   - LOG.debug("Running bestK for paragraph %s.".format(paragraph.id))
107   -
108   - if (paragraph.occurrences.size == 0) return Map[SurfaceFormOccurrence, List[DBpediaResourceOccurrence]]()
109   -
110   - val topics = TopicExtractor.getTopics(paragraph.text.text)
111   - LOG.trace("text: %s".format(topics.filter(_._2>0).toMap.toString))
112   -
113   - // val m1 = if (candLuceneManager.getDBpediaResourceFactory == null) "lucene" else "jdbc"
114   - // val m2 = if (contextLuceneManager.getDBpediaResourceFactory == null) "lucene" else "jdbc"
115   -
116   - // step1: get candidates for all surface forms
117   - // (TODO here building allCandidates directly, but could extract from occs)
118   - var allCandidates = CompactHashSet[DBpediaResource]();
119   - val occs = getCandidates(paragraph, allCandidates)
120   -
121   - // pick the best k for each surface form
122   - val r = occs.keys.foldLeft(Map[SurfaceFormOccurrence, List[DBpediaResourceOccurrence]]())((acc, aSfOcc) => {
123   - val candOccs = occs.getOrElse(aSfOcc, List[DBpediaResource]())
124   - .map(shallowResource => {
125   -
126   - val supportConfidence = (1, getTopicalScore(topics,shallowResource))
127   -
128   - Factory.DBpediaResourceOccurrence.from(aSfOcc,
129   - shallowResource,
130   - supportConfidence)
131   - })
132   - .sortBy(o => o.contextualScore) //TODO should be final score
133   - .reverse
134   - .take(k)
135   - acc + (aSfOcc -> candOccs)
136   - });
137   -
138   - // LOG.debug("Reranked (%d)".format(r.size))
139   -
140   - r
141   - }
142   -
143   - def name(): String = {
144   - "TopicalDisambiguator"
145   - }
146   -
147   - def ambiguity(sf: SurfaceForm): Int = {
148   - candidateSearcher.getAmbiguity(sf)
149   - }
150   -
151   - def support(resource: DBpediaResource): Int = {
152   - throw new UnsupportedOperationException("Not implemented.")
153   - }
154   -
155   - @throws(classOf[SearchException])
156   - def explain(goldStandardOccurrence: DBpediaResourceOccurrence, nExplanations: Int): List[Explanation] = {
157   - throw new UnsupportedOperationException("Not implemented.")
158   - }
159   -
160   - def contextTermsNumber(resource: DBpediaResource): Int = {
161   - throw new UnsupportedOperationException("Not implemented.")
162   - }
163   -
164   - def averageIdf(context: Text): Double = {
165   - throw new UnsupportedOperationException("Not implemented.")
166   - }
167   -
168   -
169   - //TODO better than throw exception, we should split the interface Disambiguator accordingly
170   - def disambiguate(sfOccurrence: SurfaceFormOccurrence): DBpediaResourceOccurrence = {
171   - throw new UnsupportedOperationException("Cannot disambiguate single occurrence. This disambiguator uses multiple occurrences in the same paragraph as disambiguation context.")
172   - }
173   -
174   - def bestK(sfOccurrence: SurfaceFormOccurrence, k: Int): java.util.List[DBpediaResourceOccurrence] = {
175   - throw new UnsupportedOperationException("Cannot disambiguate single occurrence. This disambiguator uses multiple occurrences in the same paragraph as disambiguation context.")
176   - }
177   -
178   -}
40 core/src/main/scala/org/dbpedia/spotlight/model/Topic.scala
... ... @@ -1,40 +0,0 @@
1   -/*
2   - * Copyright 2011 DBpedia Spotlight Development Team
3   - *
4   - * Licensed under the Apache License, Version 2.0 (the "License");
5   - * you may not use this file except in compliance with the License.
6   - * You may obtain a copy of the License at
7   - *
8   - * http://www.apache.org/licenses/LICENSE-2.0
9   - *
10   - * Unless required by applicable law or agreed to in writing, software
11   - * distributed under the License is distributed on an "AS IS" BASIS,
12   - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   - * See the License for the specific language governing permissions and
14   - * limitations under the License.
15   - *
16   - * Check our project website for information on how to acknowledge the authors and how to contribute to the project: http://spotlight.dbpedia.org
17   - */
18   -
19   -package org.dbpedia.spotlight.model
20   -
21   -/**
22   - * A topic: music, sports, politics...
23   - * @author pablomendes
24   - */
25   -
26   -class Topic(val name: String) {
27   -
28   - def canEqual(other: Any): Boolean =
29   - other.isInstanceOf[Topic]
30   -
31   - override def hashCode() : Int = {
32   - this.name.hashCode
33   - }
34   -
35   - override def equals(other: Any) : Boolean = {
36   - this.name.equals(other.toString)
37   - }
38   -
39   - override def toString() = this.name
40   -}
87 core/src/main/scala/org/dbpedia/spotlight/topics/TopicExtractor.scala
... ... @@ -1,87 +0,0 @@
1   -/*
2   - * *
3   - * * Copyright 2011 Pablo Mendes, Max Jakob
4   - * *
5   - * * Licensed under the Apache License, Version 2.0 (the "License");
6   - * * you may not use this file except in compliance with the License.
7   - * * You may obtain a copy of the License at
8   - * *
9   - * * http://www.apache.org/licenses/LICENSE-2.0
10   - * *
11   - * * Unless required by applicable law or agreed to in writing, software
12   - * * distributed under the License is distributed on an "AS IS" BASIS,
13   - * * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   - * * See the License for the specific language governing permissions and
15   - * * limitations under the License.
16   - *
17   - */
18   -
19   -package org.dbpedia.spotlight.topics
20   -
21   -import org.apache.commons.httpclient.{HttpStatus, DefaultHttpMethodRetryHandler, HttpClient}
22   -import org.apache.commons.httpclient.params.HttpMethodParams
23   -import org.apache.http.HttpException
24   -import java.io.IOException
25   -import org.apache.commons.httpclient.methods.GetMethod
26   -import java.net.URLEncoder
27   -import net.liftweb.json._
28   -
29   -
30   -/**
31   - *
32   - * @author pablomendes
33   - */
34   -
35   -object TopicExtractor {
36   -
37   - val client = new HttpClient
38   - val url_pattern = "http://160.45.137.73:2222/rest/topic?text=%s"
39   -
40   - def getTopics(text: String) : Map[String,Double] = {
41   -
42   - val url = String.format(url_pattern, URLEncoder.encode(text, "UTF8"))
43   - val method = new GetMethod(url)
44   - method.getParams.setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler(3, false))
45   -
46   - var response = ""
47   - try {
48   - val statusCode: Int = client.executeMethod(method)
49   - if (statusCode != HttpStatus.SC_OK) {
50   - println("Method failed: " + method.getStatusLine)
51   - }
52   - val responseBody: Array[Byte] = method.getResponseBody
53   - response = new String(responseBody)
54   - }
55   - catch {
56   - case e: HttpException => {
57   - println("Fatal protocol violation: " + e.getMessage)
58   - }
59   - case e: IOException => {
60   - println("Fatal transport error: " + e.getMessage)
61   - println(method.getQueryString)
62   - }
63   - }
64   - finally {
65   - method.releaseConnection
66   - }
67   -
68   - val parsed = parse(response)
69   - val pairs = (parsed \\ "topic" \\ classOf[JField])
70   - val topics = pairs.filter(_._1.equals("@topic")).map(p => p._2.toString)
71   - val scores = pairs.filter(_._1.equals("@score")).map(p => p._2.toString.toDouble)
72   - val map = topics.zip(scores).toMap[String,Double]
73   - map
74   - }
75   -
76   - def main(args: Array[String]) {
77   -
78   - val text = "basketball michael jordan"
79   -
80   - val response = getTopics(text)
81   - println("Response: "+response)
82   -
83   -
84   -
85   - }
86   -
87   -}

0 comments on commit 2ffa1d8

Please sign in to comment.
Something went wrong with that request. Please try again.