Skip to content

Commit

Permalink
Some cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
chile12 committed Oct 9, 2017
1 parent 677cd4e commit 35582e9
Show file tree
Hide file tree
Showing 12 changed files with 104 additions and 82 deletions.
Expand Up @@ -11,6 +11,7 @@ import org.dbpedia.extraction.config.dataparser.DataParserConfig

import scala.collection.mutable
import scala.language.reflectiveCalls
import scala.reflect.ClassTag

class IntermediateNodeMapping (
val nodeClass : OntologyClass, // public for rml mappings
Expand All @@ -19,7 +20,7 @@ class IntermediateNodeMapping (
context : {
def ontology : Ontology
def language : Language
def recorder : ExtractionRecorder[TemplateNode]
def recorder[T: ClassTag] : ExtractionRecorder[T]
}
)
extends PropertyMapping
Expand All @@ -46,7 +47,7 @@ extends PropertyMapping
if(affectedTemplatePropertyNodes.size > 1)
{
if(valueNodes.forall(_.size <= 1))
context.recorder.record(new RecordEntry[TemplateNode](node, node.title.encoded, RecordSeverity.Info, context.language, "IntermediateNodeMapping for multiple properties have multiple values in: " + subjectUri))
context.recorder[TemplateNode].record(new RecordEntry[TemplateNode](node, node.title.encoded, RecordSeverity.Info, context.language, "IntermediateNodeMapping for multiple properties have multiple values in: " + subjectUri))

createInstance(graph, node, subjectUri)
}
Expand Down
@@ -1,15 +1,18 @@
package org.dbpedia.extraction.mappings

import org.dbpedia.extraction.ontology.datatypes.Datatype

import scala.collection.mutable.HashMap
import scala.collection.mutable.ArrayBuffer
import java.util.logging.{Logger, Level, LogRecord}
import java.util.logging.{Level, LogRecord, Logger}

import org.dbpedia.extraction.wikiparser._
import org.dbpedia.extraction.dataparser.StringParser
import org.dbpedia.extraction.ontology.{Ontology, OntologyClass, OntologyProperty}
import java.lang.IllegalArgumentException
import org.dbpedia.extraction.util.Language
import org.dbpedia.extraction.util.{ExtractionRecorder, Language}

import scala.language.reflectiveCalls
import scala.reflect.ClassTag

/**
* Loads the mappings from the configuration and builds a MappingExtractor instance.
Expand All @@ -24,7 +27,9 @@ object MappingsLoader
def ontology : Ontology
def language : Language
def redirects : Redirects
def mappingPageSource : Traversable[WikiPage] } ) : Mappings =
def mappingPageSource : Traversable[WikiPage]
def recorder[T: ClassTag] : ExtractionRecorder[T]
} ) : Mappings =
{
logger.info("Loading mappings ("+context.language.wikiCode+")")

Expand Down Expand Up @@ -89,9 +94,11 @@ object MappingsLoader
}

private def loadTemplateMapping(tnode : TemplateNode, context : {
def ontology : Ontology
def redirects : Redirects
def language : Language } ) =
def ontology : Ontology
def redirects : Redirects
def language : Language
def recorder[T: ClassTag] : ExtractionRecorder[T]
} ) =
{
new TemplateMapping( loadOntologyClass(tnode, "mapToClass", true, context.ontology),
loadOntologyClass(tnode, "correspondingClass", false, context.ontology),
Expand All @@ -101,9 +108,11 @@ object MappingsLoader
}

private def loadPropertyMappings(node : TemplateNode, propertyName : String, context : {
def ontology : Ontology
def redirects : Redirects
def language : Language } ) : List[PropertyMapping] =
def ontology : Ontology
def redirects : Redirects
def language : Language
def recorder[T: ClassTag] : ExtractionRecorder[T]
} ) : List[PropertyMapping] =
{
var mappings = List[PropertyMapping]()

Expand All @@ -124,9 +133,11 @@ object MappingsLoader
}

private def loadPropertyMapping(tnode : TemplateNode, context : {
def ontology : Ontology
def redirects : Redirects
def language : Language } ) = tnode.title.decoded match
def ontology : Ontology
def redirects : Redirects
def language : Language
def recorder[T: ClassTag] : ExtractionRecorder[T]}
) = tnode.title.decoded match
{
case "PropertyMapping" =>
{
Expand Down Expand Up @@ -204,9 +215,11 @@ object MappingsLoader
}

private def loadConditionalMapping(tnode : TemplateNode, context : {
def ontology : Ontology
def redirects : Redirects
def language : Language } ) =
def ontology : Ontology
def redirects : Redirects
def language : Language
def recorder[T: ClassTag] : ExtractionRecorder[T]
} ) =
{
val conditionMappings =
for( casesProperty <- tnode.property("cases").toList;
Expand All @@ -217,9 +230,11 @@ object MappingsLoader
}

private def loadConditionMapping(tnode : TemplateNode, context : {
def ontology : Ontology
def redirects : Redirects
def language : Language } ) =
def ontology : Ontology
def redirects : Redirects
def language : Language
def recorder[T: ClassTag] : ExtractionRecorder[T]
} ) =
{
//Search for the template mapping in the first template node of the mapping property
val mapping = tnode.property("mapping").flatMap(mappingNode =>
Expand Down
Expand Up @@ -2,14 +2,15 @@ package org.dbpedia.extraction.mappings.rml.util

import java.io.File


import org.dbpedia.extraction.mappings.Redirects
import org.dbpedia.extraction.ontology.Ontology
import org.dbpedia.extraction.ontology.io.OntologyReader
import org.dbpedia.extraction.sources.XMLSource
import org.dbpedia.extraction.util.Language
import org.dbpedia.extraction.util.{ExtractionRecorder, Language}
import org.dbpedia.extraction.wikiparser.WikiPage

import scala.reflect.ClassTag

/**
* Generate context for xml and rml
*/
Expand All @@ -26,6 +27,7 @@ object ContextCreator {
def language: Language
def redirects: Redirects
def mappingPageSource: Traversable[WikiPage]
def recorder[T: ClassTag]: ExtractionRecorder[T]
} =
{
val xmlMappingFile = new File(pathToXML)
Expand All @@ -38,6 +40,8 @@ object ContextCreator {
def redirects: Redirects = null

def mappingPageSource: Traversable[WikiPage] = xmlMapping

def recorder[T: ClassTag] = null.asInstanceOf[ExtractionRecorder[T]]
}
}

Expand Down
Expand Up @@ -77,7 +77,7 @@ class ConfigLoader(config: Config)

def language: Language = input._1

def recorder[_]: ExtractionRecorder[_] = getExtractionRecorder(input._1)
def recorder[T: ClassTag]: ExtractionRecorder[T] = getExtractionRecorder[T](input._1)

private lazy val _mappingPageSource =
{
Expand All @@ -98,7 +98,7 @@ class ConfigLoader(config: Config)

def mappingPageSource : Traversable[WikiPage] = _mappingPageSource

private lazy val _mappings =
private lazy val _mappings: Mappings =
{
MappingsLoader.load(this)
}
Expand Down
@@ -1,18 +1,11 @@
package org.dbpedia.extraction.live.util;

import java.net.URI;
import java.net.URLEncoder;
import org.dbpedia.extraction.live.record.*;
import org.w3c.dom.Node;

import javax.xml.xpath.XPathExpressionException;


import org.dbpedia.extraction.live.record.DeletionRecord;
import org.dbpedia.extraction.live.record.IRecord;
import org.dbpedia.extraction.live.record.MediawikiTitle;
import org.dbpedia.extraction.live.record.Record;
import org.dbpedia.extraction.live.record.RecordContent;
import org.dbpedia.extraction.live.record.RecordMetadata;
import org.w3c.dom.Node;
import java.net.URI;
import java.net.URLEncoder;



Expand Down Expand Up @@ -48,7 +41,7 @@ public static Record exportToRecord(Node node, String baseWikiUri, String oaiUri
String text = XPathUtil.evalToString(node, DBPediaXPathUtil.getTextExpr());

RecordMetadata metadata = new RecordMetadata(language, title, oaiId,
new URI(wikipediaUri), revision, username, ip, userId);
URI.create(wikipediaUri), revision, username, ip, userId);

RecordContent content = new RecordContent(text, revision, XMLUtil.toString(node));

Expand Down
@@ -1,12 +1,14 @@
package org.dbpedia.extraction.live.extractor

import org.dbpedia.extraction.ontology.io.OntologyReader
import org.dbpedia.extraction.util.Language
import org.dbpedia.extraction.util.{ExtractionRecorder, Language}
import org.dbpedia.extraction.ontology.Ontology
import org.dbpedia.extraction.wikiparser._
import org.dbpedia.extraction.sources.Source
import org.dbpedia.extraction.mappings._

import collection.mutable.ListBuffer
import scala.reflect.ClassTag

/**
* Created by IntelliJ IDEA.
Expand Down Expand Up @@ -59,7 +61,7 @@ object LiveExtractor

def mappingPageSource : Traversable[WikiPage] = _mappingsSource

private lazy val _mappings = MappingsLoader.load(this)
private lazy val _mappings: Mappings = MappingsLoader.load(this)
def mappings : Mappings = _mappings

def articlesSource : Source = _articlesSource
Expand All @@ -71,6 +73,8 @@ object LiveExtractor
def disambiguations : Disambiguations = new Disambiguations(Set[Long]())

def commonsSource : Source = _commonsSource

def recorder[T: ClassTag]: ExtractionRecorder[T] = null.asInstanceOf[ ExtractionRecorder[T]]
}
}

Expand Down
Expand Up @@ -52,7 +52,7 @@ object CreateIriSameAsUriLinks {

new QuadMapper().mapQuads(language, inputFile, destination, required = true) { quad =>
val iri = quad.subject
val uri = new URI(iri).toASCIIString //in this case we actually want to use an URI not an IRI
val uri = URI.create(iri).toASCIIString //in this case we actually want to use an URI not an IRI
if (uri == iri) List.empty
else List(new Quad(null, null, iri, sameAs, uri, null, null: String))
}
Expand Down
Expand Up @@ -10,6 +10,7 @@ import org.jsoup.Jsoup

import scala.collection.mutable.ListBuffer
import scala.collection.convert.decorateAsScala._
import scala.util.{Failure, Success}
import scala.util.matching.Regex

/**
Expand Down Expand Up @@ -48,12 +49,15 @@ object ISNILinkExtractor {
if(error.isEmpty || !error.text().contains("data limit")){
val links = Jsoup.parse(html.replaceAll("\n", "")).select(linksQuery).asScala
for(node <- links){
val link = new URI(node.attr("href"))
baseUriMap.get(link.getScheme + "://" + link.getAuthority) match{
case Some(regex) =>
val target = regex._1.replaceAllIn(link.toString, regex._2)
list.append(SourceLink(id, error = false, target))
case None =>
URI.create(node.attr("href")) match {
case Success(link) =>
baseUriMap.get(link.getScheme + "://" + link.getAuthority) match {
case Some(regex) =>
val target = regex._1.replaceAllIn(link.toString, regex._2)
list.append(SourceLink(id, error = false, target))
case None =>
}
case Failure(f) =>
}
}
}
Expand Down
Expand Up @@ -82,17 +82,8 @@ abstract class ExtractionManager(
logHandler.setLevel(Level.WARNING)
logger.addHandler(logHandler)

// context object that has only this mappingSource
val context = new {
val ontology: Ontology = self.ontology()
val language: Language = lang
val redirects: Redirects = new Redirects(Map())
val mappingPageSource: Traversable[WikiPage] = mappingsPages
val disambiguations: Disambiguations = self.disambiguations
}

//Load mappings
val mappings = MappingsLoader.load(context)
val mappings = loadMappings(lang)

if (mappings.templateMappings.isEmpty && mappings.tableMappings.isEmpty)
logger.severe("no mappings found")
Expand Down Expand Up @@ -201,39 +192,23 @@ abstract class ExtractionManager(
{
CompositeParseExtractor.load(classes,self.getExtractionContext(lang))
}
private val extractionRecorder = new mutable.HashMap[ClassTag[_], mutable.HashMap[Language, ExtractionRecorder[_]]]()
def getExtractionRecorder[T: ClassTag](lang: Language, dataset : Dataset = null): org.dbpedia.extraction.util.ExtractionRecorder[T] = {
extractionRecorder.get(classTag[T]) match{
case Some(s) => s.get(lang) match {
case None =>
s(lang) = new ExtractionRecorder[T](null, 2000, null, null, if(dataset != null) ListBuffer(dataset) else ListBuffer())
s(lang).initialize(lang)
s(lang).asInstanceOf[ExtractionRecorder[T]]
case Some(er) =>
if(dataset != null) if(!er.datasets.contains(dataset)) er.datasets += dataset
er.asInstanceOf[ExtractionRecorder[T]]
}
case None =>
extractionRecorder(classTag[T]) = new mutable.HashMap[Language, ExtractionRecorder[_]]()
getExtractionRecorder[T](lang, dataset)
}
}

/**
* Build the context for all extractors involved
* including the config itself
* @param lang
* @return
*/
protected def getExtractionContext(lang: Language) = {
new { val ontology: Ontology = self.ontology
new { val ontology: Ontology = self.ontology()
val language: Language = lang
val mappings: Mappings = self.mappings(lang)
val redirects: Redirects = self.redirects.getOrElse(lang, new Redirects(Map()))
val disambiguations: Disambiguations = self.disambiguations
val configFile: ServerConfiguration = Server.config
val nonFreeImages = Seq()
val freeImages = Seq()
def recorder: ExtractionRecorder[_] = getExtractionRecorder(lang)
def recorder[T: ClassTag]: ExtractionRecorder[T] = Server.getExtractionRecorder[T](lang)
}
}

Expand All @@ -253,7 +228,7 @@ abstract class ExtractionManager(
val configFile: ServerConfiguration = Server.config
val nonFreeImages = Seq()
val freeImages = Seq()
def recorder: ExtractionRecorder[_] = getExtractionRecorder(lang)
def recorder[T: ClassTag]: ExtractionRecorder[T] = Server.getExtractionRecorder[T](lang)
}

MappingsLoader.load(context)
Expand Down

0 comments on commit 35582e9

Please sign in to comment.