Permalink
Browse files

slim version of module live and topical. removed everything that was …

…not necessary anymore
  • Loading branch information...
1 parent 65bff5e commit 48c53d5cbbe357319b8a56bc2f371909005f00d1 @dirkweissenborn dirkweissenborn committed Jul 8, 2013
@@ -65,5 +65,6 @@ org.dbpedia.spotlight.yahoo.region = us
# Topical configuration
org.dbpedia.spotlight.data.sortedArticlesCategories=/media/dirk/Data/Wikipedia/sorted.article_categories_en.nt
+#only NaiveBayesTopicalClassifier up to now
org.dbpedia.spotlight.topic.classifier.type=NaiveBayesTopicalClassifier
org.dbpedia.spotlight.topic.description=conf/topic_descriptions.xml
@@ -6,98 +6,79 @@
<iptc mediatopic="20000003"/>
<iptc mediatopic="20000004"/>
<categories>Animation,Cartooning</categories>
- <keywords></keywords>
</topic>
<topic name="cinema">
<iptc mediatopic="20000005"/>
<categories>Film</categories>
- <keywords></keywords>
</topic>
<topic name="literature">
<iptc mediatopic="20000013"/>
<categories>Literature</categories>
- <keywords></keywords>
</topic>
<topic name="music">
<iptc mediatopic="20000018"/>
- <categories>Music</categories>
- <keywords></keywords>
+ <categories>Music,Music_genres,20th-century_music_genres</categories>
</topic>
<topic name="performing_arts"> <!--"theatre_dance_opera" renamed because many things from all performing arts occured-->
<iptc mediatopic="20000028"/>
<iptc mediatopic="20000029"/>
<iptc mediatopic="20000007"/>
<categories>Opera,Opera_genres,Dance,Theatre</categories>
- <keywords></keywords>
</topic>
<!-- Visual arts -->
<topic name="architecture">
<iptc mediatopic="20000032"/>
<categories>Architecture</categories>
- <keywords></keywords>
<!--feed url="http://topics.nytimes.com/top/reference/timestopics/subjects/a/architecture/index.html?rss=1"/>
<feed url="http://www.architectsjournal.co.uk/XmlServers/navsectionRSS.aspx?navsectioncode=3"/-->
</topic>
- <!--topic name="fashion">
- <iptc mediatopic="20000011"/>
- <categories>Fashion,Clothing</categories>
- <keywords></keywords>
- </topic DID NOT WORK WELL-->
-
<topic name="painting_drawing">
<iptc mediatopic="20000035"/>
<iptc mediatopic="20000034"/>
<categories>Painting,Drawing</categories>
- <keywords></keywords>
</topic>
<topic name="sculpture">
<iptc mediatopic="20000037"/>
<categories>Sculpture</categories>
- <keywords></keywords>
</topic>
<!-- economy, business, finance-->
<topic name="economy_business_finance">
<iptc mediatopic="20000344"/>
<categories>Business,Finance</categories>
- <keywords></keywords>
</topic>
<!-- natural science -->
<topic name="biology">
<iptc mediatopic="20000719"/>
<categories>Biology</categories>
- <keywords></keywords>
<!--feed url="http://feeds.biologynews.net/biologynews/headlines?format=xml"/-->
</topic>
<topic name="chemistry">
<iptc mediatopic="20000725"/>
<categories>Chemistry</categories>
- <keywords></keywords>
</topic>
<topic name="geology_prehistoriclife"><!--"geology"-->
<iptc mediatopic="20000727"/>
<categories>Geology</categories>
- <keywords></keywords>
</topic>
<topic name="physics">
<iptc mediatopic="20000731"/>
<categories>Physics</categories>
- <keywords></keywords>
</topic>
@@ -106,28 +87,24 @@
<topic name="technology_engineering">
<iptc mediatopic="20000756"/>
<categories>Technology,Engineering</categories>
- <keywords></keywords>
</topic>
<!-- Crime, Law, Justice -->
<topic name="crime">
<iptc mediatopic="20000082"/>
<categories>Criminology,Crime</categories>
- <keywords></keywords>
</topic>
<topic name="law">
<iptc mediatopic="20000121"/>
<categories>Law</categories>
- <keywords></keywords>
</topic>
<!-- Education -->
<topic name="education">
<iptc mediatopic="05000000"/>
<categories>Education</categories>
- <keywords></keywords>
</topic>
@@ -136,51 +113,43 @@
<iptc mediatopic="20000248"/>
<iptc mediatopic="20000244"/>
<categories>Food_and_drink,Cuisine</categories>
- <keywords></keywords>
</topic>
<!-- Social Sciences -->
<topic name="anthropology">
<iptc mediatopic="20000743"/>
<categories>Anthropology</categories>
- <keywords></keywords>
</topic>
<topic name="archaeology">
<iptc mediatopic="20000744"/>
<categories>Archaeology</categories>
- <keywords></keywords>
</topic>
<topic name="economics">
<iptc mediatopic="20000745"/>
<categories>Economics</categories>
- <keywords></keywords>
</topic>
<topic name="geography">
<iptc mediatopic="20000746"/>
<categories>Geography,Places</categories>
- <keywords></keywords>
</topic>
<topic name="history">
<iptc mediatopic="20000747"/>
<categories>History,Chronology</categories>
- <keywords></keywords>
</topic>
<topic name="philosophy">
<iptc mediatopic="20000751"/>
<categories>Philosophy</categories>
- <keywords></keywords>
</topic>
<topic name="psychology">
<iptc mediatopic="20000753"/>
<categories>Psychology</categories>
- <keywords></keywords>
</topic>
<!-- politics -->
@@ -189,78 +158,67 @@
<iptc mediatopic="11000000"/>
<iptc mediatopic="20000752"/>
<categories>Politics,Political_science</categories>
- <keywords></keywords>
</topic>
<!-- Health -->
<topic name="health">
<iptc mediatopic="07000000"/>
<categories>Health,Diseases_and_disorders,Health_sciences</categories>
- <keywords></keywords>
</topic>
<!-- Structural Science -->
<topic name="mathematics">
<iptc mediatopic="20000715"/>
<categories>Mathematics</categories>
- <keywords></keywords>
</topic>
<topic name="computer_science">
<iptc mediatopic="20000763"/>
<categories>Computer_science,Computing‎</categories>
- <keywords></keywords>
</topic>
<!-- Sport -->
<topic name="sport">
<iptc mediatopic="15000000"/>
<categories>Sports,Sports_by_year</categories>
- <keywords></keywords>
</topic>
<!-- Mass media -->
<topic name="mass_media">
<iptc mediatopic="20000045"/>
<categories>Television,Radio,Mass_media,News,Journalism</categories>
- <keywords></keywords>
</topic>
<!-- Culture -->
<topic name="religion_belief">
<iptc mediatopic="12000000"/>
<categories>Religion,Belief</categories>
- <keywords></keywords>
</topic>
<topic name="transport">
<iptc mediatopic="20000337"/>
<categories>Transport</categories>
- <keywords></keywords>
</topic>
<!--topic name="agriculture">
<iptc mediatopic="20000210"/>
<categories>Agriculture</categories>
- <keywords></keywords>
</topic DID NOT WORK WELL-->
<topic name="video_game">
<iptc mediatopic="20000548"/>
<categories>Video_game_culture,Video_games</categories>
- <keywords></keywords>
</topic>
<topic name="war">
<iptc mediatopic="20000056"/>
<categories>War,Military</categories>
- <keywords></keywords>
</topic>
</topics>
View
@@ -50,6 +50,8 @@
<module>eval</module>
<module>uima</module>
<module>dist</module>
+ <module>live</module>
+ <module>topical</module>
</modules>
<build>
@@ -288,6 +290,8 @@
</configuration>
</execution>
+ <!--Dependencies for dbpedia spotlight live-->
+
<execution>
<id>install-hunposchain0.6_mod-jar</id>
<phase>install</phase>
@@ -363,6 +367,8 @@
</configuration>
</execution>
+ <!--Spotlight live dependencies-->
+
</executions>
</plugin>
View
@@ -169,7 +169,7 @@
<dependency>
<groupId>cc.factorie</groupId>
<artifactId>factorie</artifactId>
- <version>1.0.0-M3</version>
+ <version>1.0.0-M4</version>
</dependency>
</dependencies>
@@ -24,7 +24,7 @@ object TopicDescription {
for (topicItem <- xml \\ "topic") yield {
val topic = new Topic((topicItem \\ "@name").head.text) // HACK: bug fix Computer_science got read with more than 16 characters
val categories = (topicItem \\ "categories").head.text.split(",").map(category => category.toCharArray.subSequence(0, category.length).toString.trim)
- val keywords = (topicItem \\ "keywords").head.text.split(",").map(category => category.toCharArray.subSequence(0, category.length).toString.trim)
+ //val keywords = (topicItem \\ "keywords").head.text.split(",").map(category => category.toCharArray.subSequence(0, category.length).toString.trim)
var iptcTopics = Set[String]()
for (iptcItem <- topicItem \\ "iptc")
@@ -34,9 +34,9 @@ object TopicDescription {
for (feedItem <- topicItem \\ "feed")
feeds += new URL((feedItem \\ "@url").head.text)
- new TopicDescription(topic, categories, keywords, iptcTopics, feeds)
+ TopicDescription(topic, categories, iptcTopics, feeds)
}
}
}
-class TopicDescription(val topic: Topic, val categories: Seq[String], val keywords: Seq[String], val iptcTopics: Set[String], val rssFeeds: Set[URL])
+case class TopicDescription(topic: Topic, categories: Seq[String],iptcTopics: Set[String], rssFeeds: Set[URL])
Oops, something went wrong.

0 comments on commit 48c53d5

Please sign in to comment.