Skip to content

Commit

Permalink
added parsing of SSSOM to eval client
Browse files Browse the repository at this point in the history
  • Loading branch information
sven-h committed Sep 18, 2023
1 parent 1f9b601 commit d0e1aa9
Show file tree
Hide file tree
Showing 12 changed files with 308 additions and 8 deletions.
1 change: 1 addition & 0 deletions documentation/release_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
- [Wikidata tests are fixed](https://github.com/dwslab/melt/commit/ae824d9#diff-471d5bfa39673c940c5c7c2f7450ffe61545c052efd6ccf85080e0e589cbd7c9) because Wikidata entry for `EU` changed
- [Testcasevalidation is more memory friendly](https://github.com/dwslab/melt/commit/fe6915287637895c3fee63eec2e28934218ba0bd)
- fixed macro F1 computation (it is **not** the harmonic mean of macro averaged precision and recall but now the macro averaged f1 scores)
- track `Popconference[0-100]` works now because this track has no reference alignments (see [commit 1f9b601](https://github.com/dwslab/melt/commit/1f9b60102c14131a97228f6d84cf84ffe29df024) )

**New Tracks**
- [pgx Track](http://oaei.ontologymatching.org/2023/pharmacogenomics/index.html)
Expand Down
113 changes: 113 additions & 0 deletions examples/SSSOMMatcher/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>de.uni-mannheim.informatik.dws.melt</groupId>
<artifactId>SSSOMMatcher</artifactId><!-- id used as matcher id in descriptor file of seals package -->
<packaging>jar</packaging>
<version>1.0</version><!-- version appearing in descriptor file of seals package -->
<description>simple SSSOM Matcher</description><!-- description appearing in descriptor file of seals package -->

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>

<oaei.mainClass>de.uni_mannheim.informatik.dws.melt.demomatcher.SSSOMMatcher</oaei.mainClass><!-- mandatory: this class has to implement IOntologyMatchingToolBridge -->

<oaei.copyright>(C) Mannheim, 2019</oaei.copyright> <!--optional copyright appearing in the seals descriptor file -->
<oaei.license>GNU Lesser General Public License 2.1 or above</oaei.license> <!--optional license appearing in the seals descriptor file -->

<matching.version>3.4-SNAPSHOT</matching.version> <!-- version for all matching related packages -->
</properties>

<!-- Accessing the resources:
- all files in "oaei-resources" folder are stored in the current working directory and can be accessed with
Files.readAllLines(Paths.get("oaei-resources", "configuration_oaei.txt"));
- all files in "src/main/resources" folder are compiled to the resulting jar and can be accessed with
getClass().getClassLoader().getResourceAsStream("configuration_jar.txt");
-->

<dependencies>
<!-- dependency for jena matchers - for other matchers you can replace it with artifactId: matching-base -->
<dependency>
<groupId>de.uni-mannheim.informatik.dws.melt</groupId>
<artifactId>matching-jena</artifactId>
<version>${matching.version}</version>
</dependency>

<!-- This dependency is necessary for seals_external. It contains the cli wrapper. -->
<dependency>
<groupId>de.uni-mannheim.informatik.dws.melt</groupId>
<artifactId>receiver-cli</artifactId>
<version>${matching.version}</version>
</dependency>

<dependency>
<groupId>de.uni-mannheim.informatik.dws.melt</groupId>
<artifactId>matching-eval</artifactId>
<version>${matching.version}</version>
<scope>test</scope>
<!-- only in test scope, can only be used when running tests.
You can also remove the scope but then remove the whole dependency when packaging the matcher.
Otherwise you get a lot of unused dependencies in your matcher.-->
</dependency>

<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.36</version>
</dependency>


<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>5.9.0</version>
<scope>test</scope>
</dependency>

<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<version>5.9.0</version>
<scope>test</scope>
</dependency>

</dependencies>

<build>
<plugins>

<!-- the following plugin will generate a seals assembly -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.4.2</version>
<dependencies>
<dependency>
<groupId>de.uni-mannheim.informatik.dws.melt</groupId>
<artifactId>matching-assembly</artifactId>
<version>${matching.version}</version>
</dependency>
</dependencies>
<executions>
<execution>
<phase>package</phase>
<goals><goal>single</goal></goals>
<!-- there exists three descriptors:
1) "seals" - removes all dependencies available in seals client regardless of their version.
2) "seals_all_deps" - adds all dependencies of your project (please note that this can cause problems when using jena for example)
3) "seals_external" - creates a wrapper around the matcher so that no inference with seals happens(the matcher has to log to std err only).-->
<configuration><descriptorRefs><descriptorRef>seals_external</descriptorRef></descriptorRefs></configuration>
</execution>
</executions>
</plugin>

<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.0.0-M7</version>
</plugin>
</plugins>
</build>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
package de.uni_mannheim.informatik.dws.melt.demomatcher;

import de.uni_mannheim.informatik.dws.melt.matching_base.MatcherURL;
import de.uni_mannheim.informatik.dws.melt.matching_jena.OntologyCacheJena;
import de.uni_mannheim.informatik.dws.melt.yet_another_alignment_api.Alignment;
import de.uni_mannheim.informatik.dws.melt.yet_another_alignment_api.DefaultExtensions.SSSOM;
import de.uni_mannheim.informatik.dws.melt.yet_another_alignment_api.SSSOMSerializer;
import java.io.File;
import java.net.URL;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import org.apache.jena.ontology.OntModel;
import org.apache.jena.ontology.OntResource;
import org.apache.jena.rdf.model.NodeIterator;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.util.iterator.ExtendedIterator;
import org.apache.jena.vocabulary.RDFS;

/**
* A very simple SSSOM matcher.
*/
public class SSSOMMatcher extends MatcherURL {

@Override
public URL match(URL sourceURL, URL targetURL, URL inputAlignment) throws Exception {

OntModel source = OntologyCacheJena.get(sourceURL, OntologyCacheJena.DEFAULT_JENA_ONT_MODEL_SPEC);
OntModel target = OntologyCacheJena.get(targetURL, OntologyCacheJena.DEFAULT_JENA_ONT_MODEL_SPEC);

Alignment alignment = new Alignment();
matchResources(source.listClasses(), target.listClasses(), alignment);//match only classes

File alignmentFile = File.createTempFile("alignment", ".rdf");

alignment.addExtensionValue(SSSOM.MAPPING_SET_TITLE, "A simple SSSOM mapping.");

SSSOMSerializer.serialize(alignment, alignmentFile);
return alignmentFile.toURI().toURL();
}


private void matchResources(ExtendedIterator<? extends OntResource> sourceResources, ExtendedIterator<? extends OntResource> targetResources, Alignment alignment) {
HashMap<String, String> text2URI = new HashMap<>();
while (sourceResources.hasNext()) {
OntResource source = sourceResources.next();
for(String s : getStringRepresentation(source))
text2URI.put(s, source.getURI());
}
while (targetResources.hasNext()) {
OntResource target = targetResources.next();
for(String s : getStringRepresentation(target)){
String sourceURI = text2URI.get(s);
if(sourceURI != null){
alignment.add(sourceURI, target.getURI());
}
}
}
}

private Set<String> getStringRepresentation(OntResource resource) {
Set<String> texts = new HashSet<>();
String uri = resource.getURI();
if(uri == null)
return texts;
NodeIterator labelIterator = resource.listPropertyValues(RDFS.label);
while(labelIterator.hasNext()){
RDFNode label = labelIterator.next();
if(label.isLiteral()){
texts.add(label.asLiteral().getLexicalForm().toLowerCase());
}
}
String fragment = getUriFragment(resource.getURI());
if(fragment != null)
texts.add(fragment.toLowerCase().replace("_", " "));
return texts;
}

public static String getUriFragment(String uri){
int lastIndex = uri.lastIndexOf("#");
if(lastIndex >= 0){
return uri.substring(lastIndex + 1);
}
lastIndex = uri.lastIndexOf("/");
if(lastIndex >= 0){
return uri.substring(lastIndex + 1);
}
return null;
}
}

8 changes: 8 additions & 0 deletions examples/SSSOMMatcher/src/main/resources/log4j.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Root logger option
log4j.rootLogger=INFO, stdout

# Direct log messages to stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.Target=System.out
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p [%-30.30t] %-4L in %-20.20c{1} - %m%n
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package de.uni_mannheim.informatik.dws.melt.demomatcher;

import de.uni_mannheim.informatik.dws.melt.matching_data.TrackRepository;
import de.uni_mannheim.informatik.dws.melt.matching_eval.ExecutionResult;
import de.uni_mannheim.informatik.dws.melt.matching_eval.ExecutionResultSet;
import de.uni_mannheim.informatik.dws.melt.matching_eval.Executor;
import org.junit.jupiter.api.Test;


public class EvaluateMatcher {

@Test
public void evalSimpleMatcher(){
ExecutionResultSet result = Executor.run(TrackRepository.Anatomy.Default, new SSSOMMatcher());
ExecutionResult r = result.iterator().next();
System.out.print(r.getSystemAlignment());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package de.uni_mannheim.informatik.dws.melt.demomatcher;

import de.uni_mannheim.informatik.dws.melt.matching_base.external.seals.MatcherSeals;
import de.uni_mannheim.informatik.dws.melt.matching_data.TrackRepository;
import de.uni_mannheim.informatik.dws.melt.matching_eval.ExecutionResult;
import de.uni_mannheim.informatik.dws.melt.matching_eval.Executor;
import java.io.File;
import java.nio.file.Paths;

/**
* This class tests the generated seals package.
* The exit code is important because it used in the continuouns integration
* pipeline (CI) in github to test the package.
*/
public class TestPackage {
public static void main(String[] args) {
// cache seals client in CI (thus the location .m2 because it is cache already)
File sealsClient = Paths.get(System.getProperty("user.home"), ".m2", "seals_client.jar").toFile();
MatcherSeals sealsMatcher = new MatcherSeals(new File("target/simpleSealsMatcher-1.0-seals_external.zip"), sealsClient);
ExecutionResult result = Executor.runSingle(TrackRepository.Anatomy.Default.getFirstTestCase(), sealsMatcher);

if(result.getSystemAlignment().isEmpty()){
System.exit(1);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public static void main(String[] args) {
// help option
if (cmd.hasOption(HELP_OPTION_STRING)) {
HelpFormatter helpFormatter = new HelpFormatter();
helpFormatter.printHelp("ant", options);
helpFormatter.printHelp("java -jar matching-eval-client-latest.jar", options);
return;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@
import de.uni_mannheim.informatik.dws.melt.matching_eval.refinement.Refiner;
import de.uni_mannheim.informatik.dws.melt.yet_another_alignment_api.Alignment;
import de.uni_mannheim.informatik.dws.melt.yet_another_alignment_api.AlignmentParser;
import de.uni_mannheim.informatik.dws.melt.yet_another_alignment_api.SSSOMFormatException;
import de.uni_mannheim.informatik.dws.melt.yet_another_alignment_api.SSSOMParser;
import java.io.*;
import java.net.URL;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;
import java.util.logging.Level;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
Expand Down Expand Up @@ -152,14 +155,22 @@ private static Set<Refiner> addRefinementToNewSet(Set<Refiner> initialRefinement
* @param url url which represents the alignment
* @return Parsed alignment.
*/
private static Alignment silentlyParseAlignment(URL url){
private static Alignment silentlyParseAlignment(URL url) {
try {
return AlignmentParser.parse(url);
}catch(FileNotFoundException ex){
LOGGER.error("The system alignment file with URL {} does not exist. Returning empty system alignment.", url);
}
catch (SAXException | IOException | NullPointerException ex) {
LOGGER.error("The system alignment given by following URL could not be parsed: " + url.toString(), ex);
}catch(IOException | NullPointerException ex){
LOGGER.error("The system alignment given by following URL could not be parsed with Alignment Format: " + url.toString(), ex);
}catch(SAXException ex){
try {
return SSSOMParser.parse(AlignmentParser.getInputStreamFromURL(url));
} catch (IOException ex1) {
LOGGER.error("The system alignment given by following URL could not be parsed with SSSOM: " + url.toString(), ex);
} catch (SSSOMFormatException ex1) {
LOGGER.error("The system alignment given by URL {} could not be parsed as alignemntFormat and SSSOM: error from alignment: {} error from SSSOM: {} ",
url.toString(), ex.getMessage(), ex1.getMessage());
}
}
return new Alignment();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ public Alignment cut(double threshold){
Alignment m = new Alignment(this, false);
ResultSet<Correspondence> result = this.retrieve(QueryFactory.greaterThanOrEqualTo(Correspondence.CONFIDENCE, threshold));
for(Correspondence c : result){
m.add(c);
m.add(new Correspondence(c));
}
return m;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package de.uni_mannheim.informatik.dws.melt.yet_another_alignment_api;

/**
* Exception representing a error when data does not fit to SSSOM schema
*/
public class SSSOMFormatException extends Exception {

private static final long serialVersionUID = 5451825154955645498L;


public SSSOMFormatException(String msg) {
super(msg);
}

public SSSOMFormatException(String msg, Throwable inner) {
super(msg, inner);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public class SSSOMParser {

private static final Logger LOGGER = LoggerFactory.getLogger(SSSOMParser.class);

public static Alignment parse(InputStream s) throws IOException {
public static Alignment parse(InputStream s) throws IOException, SSSOMFormatException {

Alignment a = new Alignment();
BufferedReader reader = new BufferedReader(new InputStreamReader(s, StandardCharsets.UTF_8));
Expand Down Expand Up @@ -69,6 +69,10 @@ public static Alignment parse(InputStream s) throws IOException {
} // else no metadata found and skipping it

try(CSVParser csvParser = CSVFormat.DEFAULT.withDelimiter('\t').withFirstRecordAsHeader().parse(reader)){
if(csvParser.getHeaderNames().contains("subject_id") == false || csvParser.getHeaderNames().contains("object_id") == false){
throw new SSSOMFormatException("SSSOM header does not contain subject_id and/or object_id");
}

Set<String> usedKeys = new HashSet<>(Arrays.asList("subject_id", "object_id", "predicate_id", "confidence"));
for (CSVRecord record : csvParser) {
String source;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
class SSSOMParserTest {

@Test
public void SSSOMParserTest() throws IOException{
public void SSSOMParserTest() throws IOException, SSSOMFormatException{
InputStream i = this.getClass().getClassLoader().getResourceAsStream("mp_hp_mgi_all.sssom.tsv");
Alignment a = SSSOMParser.parse(i);

Expand Down Expand Up @@ -44,4 +44,13 @@ public void SSSOMParserTest() throws IOException{
assertEquals(LocalDate.parse("2022-09-02"), mappingDate);
}


@Test
public void SSSOMParserWithInvalidInput() throws IOException{
InputStream i = this.getClass().getClassLoader().getResourceAsStream("LogMap-cmt-conference.rdf");
assertThrows(SSSOMFormatException.class, ()->{
Alignment a = SSSOMParser.parse(i);
});
}

}

0 comments on commit d0e1aa9

Please sign in to comment.