Skip to content

Commit

Permalink
#1362 - NifWriter does not write out NE identifier
Browse files Browse the repository at this point in the history
- Fix handling of NE identifier
- Added unit test
  • Loading branch information
reckart committed Jun 3, 2019
1 parent 9a6519f commit a7926f3
Show file tree
Hide file tree
Showing 7 changed files with 202 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
import static org.apache.uima.fit.util.JCasUtil.select;
import static org.apache.uima.fit.util.JCasUtil.selectCovered;

import org.apache.commons.lang3.StringUtils;
import org.apache.jena.ontology.Individual;
import org.apache.jena.ontology.OntModel;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.riot.system.IRIResolver;
import org.apache.uima.jcas.JCas;

import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
Expand Down Expand Up @@ -188,13 +188,12 @@ public static void convert(JCas aJCas, OntModel aTarget)
// not have the concept of a NE category.
for (NamedEntity uimaNamedEntity : select(aJCas, NamedEntity.class)) {
String neClass = uimaNamedEntity.getValue();
String neIdentifier = uimaNamedEntity.getValue();
String neIdentifier = uimaNamedEntity.getIdentifier();

boolean neClassIsUri = StringUtils.startsWith(neClass, "http://");
boolean neIdentifierIsUri = StringUtils.startsWith(neIdentifier, "http://");
// checkIRI returns true if there are violations, so we need to negate it
boolean neClassIsUri = neClass != null && !IRIResolver.checkIRI(neClass);
boolean neIdentifierIsUri = neIdentifier != null && !IRIResolver.checkIRI(neIdentifier);

// The crudest form of checking for a URI, but since "http://" appears to be the default
// prefix in the semantic web, let's just stick with it for the moment.
if (!neClassIsUri && !neIdentifierIsUri) {
continue;
}
Expand All @@ -214,7 +213,7 @@ public static void convert(JCas aJCas, OntModel aTarget)
}

if (neIdentifierIsUri) {
nifNamedEntity.addProperty(pTaClassRef, m.createResource(neIdentifier));
nifNamedEntity.addProperty(pTaIdentRef, m.createResource(neIdentifier));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ public void convert()
{
convert("src/test/resources/nif/brown/a01.ttl", "src/test/resources/nif/brown/a01-cooked.ttl");
convert("src/test/resources/nif/kore50/kore50.ttl", "src/test/resources/nif/kore50/kore50-cooked.ttl");
convert("src/test/resources/nif/freme/freme.ttl", "src/test/resources/nif/freme/freme-cooked.ttl");
}

@Test
Expand Down Expand Up @@ -75,6 +76,17 @@ public void testKore50()
new TestOptions().resultAssertor(this::assertModelEquals));
}

@Test
public void testFreme()
throws Exception
{
testOneWay(
NifReader.class, // the reader
NifWriter.class, // the writer
"nif/freme/ref.ttl", "nif/freme/freme-cooked.ttl",
new TestOptions().resultAssertor(this::assertModelEquals));
}

private void assertModelEquals(File expected, File actual)
{
Model mExpected = ModelFactory.createDefaultModel();
Expand Down
2 changes: 2 additions & 0 deletions dkpro-core-io-nif-asl/src/test/resources/nif/freme/README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Resource: https://github.com/freme-project/e-Entity/issues/56

Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
@prefix dbc: <http://dbpedia.org/resource/Category:> .
@prefix dbpedia-fr: <http://fr.dbpedia.org/resource/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix dbpedia-es: <http://es.dbpedia.org/resource/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix itsrdf: <http://www.w3.org/2005/11/its/rdf#> .
@prefix nif: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#> .
@prefix dbpedia: <http://dbpedia.org/resource/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix dbpedia-de: <http://de.dbpedia.org/resource/> .
@prefix dbpedia-ru: <http://ru.dbpedia.org/resource/> .
@prefix freme-onto: <http://freme-project.eu/ns#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix dbpedia-nl: <http://nl.dbpedia.org/resource/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix dbpedia-it: <http://it.dbpedia.org/resource/> .

<http://freme-project.eu/#char=0,140>
a nif:RFC5147String , nif:Context , nif:String ;
nif:beginIndex "0"^^xsd:int ;
nif:endIndex "140"^^xsd:int ;
nif:isString "This meant the Aos Sí (pronounced ees shee), the 'spirits' or 'fairies', could more easily come into our world and were particularly active." .

<http://freme-project.eu/#char=15,21>
a nif:RFC5147String , nif:String , nif:Word , nif:Phrase ;
nif:anchorOf "Aos Sí" ;
nif:beginIndex "15"^^xsd:int ;
nif:endIndex "21"^^xsd:int ;
nif:referenceContext <http://freme-project.eu/#char=0,140> ;
itsrdf:taClassRef owl:Thing ;
itsrdf:taConfidence "0.4859081423223223"^^xsd:double ;
itsrdf:taIdentRef <http://dbpedia.org/resource/Aos_S%25C3%25AD> .
30 changes: 30 additions & 0 deletions dkpro-core-io-nif-asl/src/test/resources/nif/freme/freme.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
@prefix dbpedia-fr: <http://fr.dbpedia.org/resource/> .
@prefix dbc: <http://dbpedia.org/resource/Category:> .
@prefix dbpedia-es: <http://es.dbpedia.org/resource/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix itsrdf: <http://www.w3.org/2005/11/its/rdf#> .
@prefix dbpedia: <http://dbpedia.org/resource/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix nif: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#> .
@prefix dbpedia-de: <http://de.dbpedia.org/resource/> .
@prefix dbpedia-ru: <http://ru.dbpedia.org/resource/> .
@prefix freme-onto: <http://freme-project.eu/ns#> .
@prefix dbpedia-nl: <http://nl.dbpedia.org/resource/> .
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix dbpedia-it: <http://it.dbpedia.org/resource/> .

<http://freme-project.eu/#char=0,140>
a nif:String , nif:Context , nif:RFC5147String ;
nif:beginIndex "0"^^xsd:int ;
nif:endIndex "140"^^xsd:int ;
nif:isString "This meant the Aos Sí (pronounced ees shee), the 'spirits' or 'fairies', could more easily come into our world and were particularly active."^^xsd:string .

<http://freme-project.eu/#char=15,21>
a nif:RFC5147String , nif:String , nif:Word , nif:Phrase ;
nif:anchorOf "Aos Sí"^^xsd:string ;
nif:beginIndex "15"^^xsd:int ;
nif:endIndex "21"^^xsd:int ;
nif:referenceContext <http://freme-project.eu/#char=0,140> ;
itsrdf:taClassRef <http://www.w3.org/2002/07/owl#Thing> ;
itsrdf:taConfidence "0.4859081423223223"^^xsd:double ;
itsrdf:taIdentRef <http://dbpedia.org/resource/Aos_S%25C3%25AD> .
118 changes: 118 additions & 0 deletions dkpro-core-io-nif-asl/src/test/resources/nif/freme/ref.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix itsrdf: <http://www.w3.org/2005/11/its/rdf#> .
@prefix nif: <http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .

rdf:rest a rdf:Property , rdfs:Resource ;
rdfs:domain rdf:List ;
rdfs:range rdf:List ;
rdfs:subPropertyOf rdf:rest .

rdf:List a rdfs:Class , rdfs:Resource ;
rdfs:subClassOf rdfs:Resource , rdf:List .

rdf:predicate a rdf:Property , rdfs:Resource ;
rdfs:domain rdf:Statement ;
rdfs:subPropertyOf rdf:predicate .

rdf:Property a rdfs:Class , rdfs:Resource ;
rdfs:subClassOf rdfs:Resource , rdf:Property .

rdfs:label a rdf:Property , rdfs:Resource ;
rdfs:range rdfs:Literal .

rdf:Statement a rdfs:Class , rdfs:Resource ;
rdfs:subClassOf rdfs:Resource , rdf:Statement .

rdfs:Class a rdfs:Class , rdfs:Resource ;
rdfs:subClassOf rdfs:Resource , rdfs:Class .

rdf:type a rdf:Property , rdfs:Resource ;
rdfs:range rdfs:Class .

rdfs:Resource a rdfs:Class , rdfs:Resource ;
rdfs:subClassOf rdfs:Resource .

rdf:subject a rdf:Property , rdfs:Resource ;
rdfs:domain rdf:Statement ;
rdfs:subPropertyOf rdf:subject .

rdf:XMLLiteral a rdfs:Datatype , rdfs:Resource , rdfs:Class .

rdfs:comment a rdf:Property , rdfs:Resource ;
rdfs:range rdfs:Literal .

rdfs:range a rdf:Property , rdfs:Resource ;
rdfs:domain rdf:Property ;
rdfs:range rdfs:Class .

rdfs:subPropertyOf a rdf:Property , rdfs:Resource ;
rdfs:domain rdf:Property ;
rdfs:range rdf:Property .

rdf:object a rdf:Property , rdfs:Resource ;
rdfs:domain rdf:Statement ;
rdfs:subPropertyOf rdf:object .

rdf:nil a rdf:List , rdfs:Resource .

rdfs:domain a rdf:Property , rdfs:Resource ;
rdfs:domain rdf:Property ;
rdfs:range rdfs:Class .

rdfs:Literal a rdfs:Class , rdfs:Resource ;
rdfs:subClassOf rdfs:Resource , rdfs:Literal .

rdf:first a rdf:Property , rdfs:Resource ;
rdfs:domain rdf:List ;
rdfs:subPropertyOf rdf:first .

rdfs:subClassOf a rdf:Property , rdfs:Resource ;
rdfs:domain rdfs:Class ;
rdfs:range rdfs:Class .

<urn:freme-cooked.ttl#offset_0_140>
a nif:Context ;
nif:beginIndex "0"^^xsd:nonNegativeInteger ;
nif:endIndex "140"^^xsd:nonNegativeInteger ;
nif:isString "This meant the Aos Sí (pronounced ees shee), the 'spirits' or 'fairies', could more easily come into our world and were particularly active." .

<urn:freme-cooked.ttl#offset_15_21>
a nif:EntityOccurrence ;
nif:anchorOf "Aos Sí" ;
nif:beginIndex "15"^^xsd:nonNegativeInteger ;
nif:endIndex "21"^^xsd:nonNegativeInteger ;
nif:referenceContext <urn:freme-cooked.ttl#offset_0_140> ;
itsrdf:taClassRef owl:Thing ;
itsrdf:taIdentRef <http://dbpedia.org/resource/Aos_S%25C3%25AD> .

rdf:Bag a rdfs:Class , rdfs:Resource ;
rdfs:subClassOf rdf:Bag , rdfs:Container .

rdf:Seq a rdfs:Class , rdfs:Resource ;
rdfs:subClassOf rdf:Seq , rdfs:Container .

rdfs:Datatype a rdfs:Class , rdfs:Resource ;
rdfs:subClassOf rdfs:Datatype , rdfs:Resource , rdfs:Class .

rdf:Alt a rdfs:Class , rdfs:Resource ;
rdfs:subClassOf rdf:Alt , rdfs:Container .

rdfs:Container a rdfs:Class , rdfs:Resource ;
rdfs:subClassOf rdfs:Container .

rdfs:ContainerMembershipProperty
a rdfs:Class , rdfs:Resource ;
rdfs:subClassOf rdfs:ContainerMembershipProperty , rdfs:Resource , rdf:Property .

rdfs:isDefinedBy a rdf:Property , rdfs:Resource ;
rdfs:subPropertyOf rdfs:isDefinedBy , rdfs:seeAlso .

rdfs:seeAlso a rdf:Property , rdfs:Resource ;
rdfs:subPropertyOf rdfs:seeAlso .

nif:EntityOccurrence a rdfs:Class , rdfs:Resource .

nif:Context a rdfs:Class , rdfs:Resource .
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@
a nif:RFC5147String , nif:Context , nif:String ;
nif:beginIndex "0"^^xsd:nonNegativeInteger ;
nif:endIndex "3779"^^xsd:nonNegativeInteger ;
nif:isString "David and Victoria named their children Brooklyn, Romeo, Cruz, and Harper Seven.\nDavid and Victoria added spice to their marriage.\nTiger was lost in the woods when he got divorced from Elin.\nTiger lost the US Open.\nMadonna played Eva and was seen with Carlos.\nIn this musical, Madonna played the role of the First Lady.\nAngelina, her father Jon, and her partner Brad never played together in the same movie.\nHeidi and her husband Seal live in Vegas.\nParis and Kim are both wealthy It Girls who had sex tapes on the Internet.\nJustin, Stefani, and Kate are among the most popular people on both MTV and Twitter.\nDylan performed Hurricane about the black fighter Carter, from his album Desire.\nDesire contains a duet with Harris in the song Joey.\nThree of the greatest guitarists started their career in a single band : Clapton, Beck, and Page.\nAllen founded the EMP in Seattle, which featured exhibitions about Hendrix and Dylan, but also about various science fiction movies.\nDespite featuring some of the most promininent musicians of their decade --- like Sinatra, Dylan, Joel, and Santana --- Columbia was aquired by Sony in the 1980s.\nAfter unsuccessful years, aging country star Cash made a grandiose comeback with his American Recordings, recorded at his home with the help of Rubin.\nThe group formed by Homme, Grohl, and Jones was supposed to be named Caligula, but the name was already taken.\nJobs and Baez dated in the late 1970s, and she performed at his Stanford memorial.\nThe Isle of Wight festival in 1970 was the biggest at its time, surpassing Woodstock with acts like Davis, Chicago, and Mitchell.\nEric preferred to play Blues instead of Rock, so he joined Mayall 's band.\nAfter the death of Steve, the former CEO of Apple, his commencement speech at Stanford was watched thousands of times.\nIn 1980, Steve dropped out of Stanford to join Microsoft, the company behind the Windows operating system.\nCairo was the code name for a project at Microsoft from 1991 to 1996. Its charter was to build technologies for a next generation operating system that would fulfill the vision of Bill.\nSteve, Bill, Sergey, and Larry have drawn a great deal of admiration these days for their pioneering successes that changed the world we live in.\nKarl and Theo made their extreme fortunes selling low-price groceries.\nWhile Apple is an electronics company, Mango is a clothing one and Orange is a communication one.\nSam, the co-founder of Equity International, was given the nickname of \\\"the grave dancer\\\" because of his ability to buy businesses that others thought were dead.\nPixar produced Cars, and John directed it.\nMars, Galaxy, and Bounty are all chocolate.\nBosch and Sharp are both home appliances producing companies.\nCity won 3:2 against the Spurs.\nThe Gunners now play their home matches at the Emirates.\nAtletico has beaten its archrival Real.\nMüller scored a hattrick against England.\nThomas and Mario are strikers playing in Munich.\nHaug congratulated Red Bull.\nVöller will never forget the match against Oranje in San Siro.\nLandgraf and Meijer played at the Tivoli.\nYabo plays for Aachen.\nHertha won against Dortmund.\nNixon resigned after Watergate despite his success in the Ping-Pong Diplomacy with China.\nThe Sun and The Times reported that Greece will have to leave the Euro soon.\nThe Enola Gay bombed Hiroshima at the end of Second World War.\nThe RAF was a terrorist group led by Baader and Meinhof that killed Schleyer.\nOnassis married Kennedy on October 20, 1968.\nArmstrong was the first man on the Moon.\nErich was born in Neunkirchen.\nMacedonia is a province of Greece.\nObama welcomed Merkel upon her arrival at JFK.\nKennedy was also an active politician, yet he is most known for his writings, some of which he published under the name of Mark Littleton." ;
nif:isString "David and Victoria named their children Brooklyn, Romeo, Cruz, and Harper Seven.\nDavid and Victoria added spice to their marriage.\nTiger was lost in the woods when he got divorced from Elin.\nTiger lost the US Open.\nMadonna played Eva and was seen with Carlos.\nIn this musical, Madonna played the role of the First Lady.\nAngelina, her father Jon, and her partner Brad never played together in the same movie.\nHeidi and her husband Seal live in Vegas.\nParis and Kim are both wealthy It Girls who had sex tapes on the Internet.\nJustin, Stefani, and Kate are among the most popular people on both MTV and Twitter.\nDylan performed Hurricane about the black fighter Carter, from his album Desire.\nDesire contains a duet with Harris in the song Joey.\nThree of the greatest guitarists started their career in a single band : Clapton, Beck, and Page.\nAllen founded the EMP in Seattle, which featured exhibitions about Hendrix and Dylan, but also about various science fiction movies.\nDespite featuring some of the most promininent musicians of their decade --- like Sinatra, Dylan, Joel, and Santana --- Columbia was aquired by Sony in the 1980s.\nAfter unsuccessful years, aging country star Cash made a grandiose comeback with his American Recordings, recorded at his home with the help of Rubin.\nThe group formed by Homme, Grohl, and Jones was supposed to be named Caligula, but the name was already taken.\nJobs and Baez dated in the late 1970s, and she performed at his Stanford memorial.\nThe Isle of Wight festival in 1970 was the biggest at its time, surpassing Woodstock with acts like Davis, Chicago, and Mitchell.\nEric preferred to play Blues instead of Rock, so he joined Mayall 's band.\nAfter the death of Steve, the former CEO of Apple, his commencement speech at Stanford was watched thousands of times.\nIn 1980, Steve dropped out of Stanford to join Microsoft, the company behind the Windows operating system.\nCairo was the code name for a project at Microsoft from 1991 to 1996. Its charter was to build technologies for a next generation operating system that would fulfill the vision of Bill.\nSteve, Bill, Sergey, and Larry have drawn a great deal of admiration these days for their pioneering successes that changed the world we live in.\nKarl and Theo made their extreme fortunes selling low-price groceries.\nWhile Apple is an electronics company, Mango is a clothing one and Orange is a communication one.\nSam, the co-founder of Equity International, was given the nickname of \"the grave dancer\" because of his ability to buy businesses that others thought were dead.\nPixar produced Cars, and John directed it.\nMars, Galaxy, and Bounty are all chocolate.\nBosch and Sharp are both home appliances producing companies.\nCity won 3:2 against the Spurs.\nThe Gunners now play their home matches at the Emirates.\nAtletico has beaten its archrival Real.\nMüller scored a hattrick against England.\nThomas and Mario are strikers playing in Munich.\nHaug congratulated Red Bull.\nVöller will never forget the match against Oranje in San Siro.\nLandgraf and Meijer played at the Tivoli.\nYabo plays for Aachen.\nHertha won against Dortmund.\nNixon resigned after Watergate despite his success in the Ping-Pong Diplomacy with China.\nThe Sun and The Times reported that Greece will have to leave the Euro soon.\nThe Enola Gay bombed Hiroshima at the end of Second World War.\nThe RAF was a terrorist group led by Baader and Meinhof that killed Schleyer.\nOnassis married Kennedy on October 20, 1968.\nArmstrong was the first man on the Moon.\nErich was born in Neunkirchen.\nMacedonia is a province of Greece.\nObama welcomed Merkel upon her arrival at JFK.\nKennedy was also an active politician, yet he is most known for his writings, some of which he published under the name of Mark Littleton." ;
nif:sourceUrl <http://www.mpi-inf.mpg.de/yago-naga/aida/download/KORE50.tar.gz/AIDA.tsv> .

<http://www.mpi-inf.mpg.de/yago-naga/aida/download/KORE50.tar.gz/AIDA.tsv#char=450,455>
Expand Down Expand Up @@ -1089,7 +1089,7 @@

<http://www.mpi-inf.mpg.de/yago-naga/aida/download/KORE50.tar.gz/AIDA.tsv#char=2415,2578>
a nif:RFC5147String , nif:Sentence , nif:String ;
nif:anchorOf "Sam, the co-founder of Equity International, was given the nickname of \\\"the grave dancer\\\" because of his ability to buy businesses that others thought were dead." ;
nif:anchorOf "Sam, the co-founder of Equity International, was given the nickname of \"the grave dancer\" because of his ability to buy businesses that others thought were dead." ;
nif:beginIndex "2415"^^xsd:nonNegativeInteger ;
nif:endIndex "2578"^^xsd:nonNegativeInteger ;
nif:referenceContext <http://www.mpi-inf.mpg.de/yago-naga/aida/download/KORE50.tar.gz/AIDA.tsv#char=0,3779> .
Expand Down

0 comments on commit a7926f3

Please sign in to comment.